2021-06-14 2주차 스파르타 파이썬 데이터분석 첫걸음

Hyeonu_Chun·2021년 6월 21일
0

2주차 강의

import pandas as pd
commercial = pd.read_csv('./data/commercial.csv')
commercial.groupby('상가업소번호')['상권업종소분류명'].count().sort_values(ascending=False)


commercial[['시', '구', '상세주소']] = commercial['도로명'].str.split(' ', n= 2, expand=True)
seoul_data = commercial[commercial['시']=='서울특별시']
seoul_chicken_data = seoul_data[seoul_data['상권업종소분류명']=='후라이드/양념치킨']
sorted_chicken_count_gu = seoul_chicken_data.groupby('구')['상권업종소분류명'].count().sort_values(ascending=False)

import matplotlib.pyplot as plt
plt.rcParams['font.family'] = "Malgun Gothic"
plt.figure(figsize=(10,5))
plt.bar(sorted_chicken_count_gu.index, sorted_chicken_count_gu)
plt.title('구에 따른 치킨가게 수의 합계')
plt.xticks(rotation = 90)
plt.show()

#conda install -c conda-forge folium
import folium
import json
seoul_state_geo = './data/seoul_geo.json'
geo_data = json.load(open(seoul_state_geo, encoding='utf-8'))
map = folium.Map(location=[37.5502, 126.982], zoom_start=11)
folium.Choropleth(geo_data=geo_data, data=chicken_count_gu, columns=[chicken_count_gu.index, chicken_count_gu], fill_color='PuRd', key_on='properties.name').add_to(map)

population = pd.read_csv('./data/population07.csv')
sum_of_groupdata_by_gu = population.groupby('군구')['유동인구수'].sum()
sorted_sum_of_groupdata_by_gu = sum_of_groupdata_by_gu.sort_values(ascending=True)

plt.figure(figsize=(10,5))
plt.bar(sorted_sum_of_groupdata_by_gu.index, sorted_sum_of_groupdata_by_gu)
plt.title('2020년 7월 서울 군구별 유동인구 수')
plt.xlabel('군구')
plt.ylabel('유동인구 수(명)')
plt.xticks(rotation = -45)
plt.show()

population_gangnam = population[population['군구']=='강남구']
population_gangnam_daily = population_gangnam.groupby('일자')['유동인구수'].sum()
plt.figure(figsize=(10,5))

date = [str(x) for x in population_gangnam_daily.index]

plt.plot(date, population_gangnam_daily)
plt.title('2020년 7월 서울 강남구 날짜별 유동인구 수')
plt.xlabel('날짜')
plt.ylabel('유동인구 수(천만명)')
plt.xticks(rotation = -90)
plt.show()

map = folium.Map(location=[37.5502, 126.982], zoom_start=11, tiles='stamentoner')
seoul_state_geo = './data/seoul_geo.json'
geo_data = json.load(open(seoul_state_geo, encoding='utf-8'))
folium.Choropleth(geo_data=geo_data, data=sum_of_groupdata_by_gu, columns=[sum_of_groupdata_by_gu.index, sum_of_groupdata_by_gu], fill_color='PuRd', key_on='properties.name').add_to(map)

new_chicken_count_gu = pd.DataFrame(chicken_count_gu).reset_index()
new_sum_of_groupdata_by_gu = pd.DataFrame(sum_of_groupdata_by_gu).reset_index()
gu_chicken = new_chicken_count_gu.join(new_sum_of_groupdata_by_gu.set_index('군구'), on='구')
gu_chicken['유동인구수/치킨집수'] = gu_chicken['유동인구수']/gu_chicken['상권업종소분류명']
gu_chicken = gu_chicken.sort_values(by='유동인구수/치킨집수')
plt.figure(figsize=(10,5))
plt.bar(gu_chicken['구'], gu_chicken['유동인구수/치킨집수'])
plt.xlabel('구')
plt.ylabel('유동인구수/치킨집수')
plt.xticks(rotation = 90)
plt.title('치킨집 당 유동인구수')
plt.show()

##################################################################

2주차 과제

#1
import pandas as pd
population_4 = pd.read_csv('./data/population04.csv')
sum_of_groupdata_by_gu_4 = population_4.groupby('군구')['유동인구수'].sum()

import matplotlib.pyplot as plt
plt.rcParams['font.family'] = "Malgun Gothic"
sorted_sum_of_groupdata_by_gu_4 = sum_of_groupdata_by_gu_4.sort_values(ascending=True)
plt.figure(figsize=(10,5))
plt.bar(sorted_sum_of_groupdata_by_gu_4.index, sorted_sum_of_groupdata_by_gu_4)
plt.title('2020년 4월 서울 군구별 유동인구 수')
plt.xlabel('군구')
plt.ylabel('유동인구 수(명)')
plt.xticks(rotation = -45)
plt.show()

#2
import pandas as pd
population_7 = pd.read_csv('./data/population07.csv')
sum_of_groupdata_by_gu_7 = population_7.groupby('군구')['유동인구수'].sum()
population_gangnam_7 = population_7[population_7['군구']=='강남구']
population_gangnam_daily_7 = population_gangnam_7.groupby('일자')['유동인구수'].sum()

population_4 = pd.read_csv('./data/population04.csv')
sum_of_groupdata_by_gu_4 = population_4.groupby('군구')['유동인구수'].sum()
population_gangnam_4 = population_4[population_4['군구']=='강남구']
population_gangnam_daily_4 = population_gangnam_4.groupby('일자')['유동인구수'].sum()

import matplotlib.pyplot as plt
plt.rcParams['font.family'] = "Malgun Gothic"
plt.figure(figsize=(20,5))

date = [str(x) for x in population_gangnam_daily_4.index]
date1 = [str(y) for y in population_gangnam_daily_7.index]

plt.plot(date, population_gangnam_daily_4)
plt.plot(date1, population_gangnam_daily_7)
plt.title('2020년 7월 서울 강남구 날짜별 유동인구 수')
plt.xlabel('날짜')
plt.ylabel('유동인구 수(천만명)')
plt.xticks(rotation = -90)
plt.show()
profile
Stay hungry, stay foolish

0개의 댓글