// 데이터 불러오기
import pandas as pd
commercial = pd.read_csv('./data/commercial.csv')
// 헤더가 어떻게 구성되어 있는지 살펴보기
list(commercial), len(list(commercial))
// 치킨집으로 구분되는 변수 찾기
category_range = set(commercial['상권업종소분류명'])
category_range, len(category_range)
// 문자열 쪼개서 테이블에 확장하여 붙이기
commercial[['시','구','상세주소']] = commercial['도로명'].str.split(' ', n=2, expand= True)
'상세주소' 컬럼 없애기
commercial = commercial.drop(["상세주소"], axis =1)
// '서울특별시' 자료만 뽑아내기
seoul_data = commercial[ commercial['시'] == '서울특별시']
다른 광역시 위경도 좌표
https://github.com/vuski/admdongkor
// 데이터 검정 => 중복확인
city_type = set(seoul_data['시'])
city_type, len(city_type)
// '서울특별시' 자료에서 치킨업종만 분류
seoul_chicken_data = seoul_data[ seoul_data['상권업종소분류명'] == '후라이드/양념치킨' ]
seoul_chicken_data
//데이터 검정 => 중복확인
set(seoul_chicken_data['상권업종소분류명'])
// '구'별 치킨집의 개수
groupdata = seoul_chicken_data.groupby('구')
group_by_category = groupdata['상권업종소분류명']
chicken_count_gu = group_by_category.count()
sorted_chicken_count_gu = chicken_count_gu.sort_values(ascending= False)
sorted_chicken_count_gu
// 그래프 그리기
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'AppleGothic'
plt.figure(figsize=(10,5))
plt.bar(sorted_chicken_count_gu.index, sorted_chicken_count_gu)
plt.title('구에 다른 치킨가게 수')
plt.xticks(rotation = 45)
plt.show()
// 지도에 위치 표현하기
import folium
import json
seoul_state_geo = './data/seoul_geo.json'
geo_data = json.load(open(seoul_state_geo, encoding='utf-8'))
map = folium.Map(location=[37.5502,126.982], zoom_start=11)
folium.Choropleth(geo_data=geo_data,
data=chicken_count_gu,
columns=[chicken_count_gu.index, chicken_count_gu],
fill_color='PuRd',
key_on='properties.name').add_to(map)
map
// 2020년 7월 서울특별시 군구별 유동인구 수
import pandas as pd
population = pd.read_csv('./data/population07.csv')
list(population), len(list(population))
set(population['군구']), len(set(population['군구']))
sum_of_population_by_gu = population.groupby('군구')['유동인구수'].sum()
sorted_sum_of_population_by_gu = sum_of_population_by_gu.sort_values(ascending= True)
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'AppleGothic'
plt.figure(figsize=(10,5))
plt.bar(sorted_sum_of_population_by_gu.index, sorted_sum_of_population_by_gu)
plt.title('2020년 7월 서울 군구별 유동인구 수')
plt.xlabel('군구')
plt.ylabel('유동인구 수(명)')
import folium
import json
map = folium.Map(location=[37.5502,126.982], zoom_start=11, tiles='stamentoner')
seoul_state_geo = ('./data/seoul_geo.json')
geo_data = json.load(open(seoul_state_geo, encoding='utf-8'))
folium.Choropleth(geo_data = geo_data,
data=sorted_sum_of_population_by_gu,
columns=[sorted_sum_of_population_by_gu.index, sorted_sum_of_population_by_gu],
fill_color="PuRd",
key_on='properties.name').add_to(map)
map
// 2020년 7월 서울 강남구 날짜별 유동인구 수
import pandas as pd
population = pd.read_csv('./data/population07.csv')
set(population['일자']), len(set(population['일자']))
population_gangnam = population[ population['군구'] == '강남구' ]
population_gangnam_daily = population_gangnam.groupby('일자')['유동인구수'].sum()
sorted_population_gangnam_daily = population_gangnam_daily.sort_values(ascending= True)
plt.figure(figsize=(10,5))
date = []
for day in population_gangnam_daily.index:
date.append(str(day))
plt.plot(date, population_gangnam_daily)
plt.title('2020년 7월 서울 강남구 날짜별 유동인구 수')
plt.xlabel('날짜')
plt.ylabel('유동인구 수(천만명)')
plt.xticks(rotation=-45)
plt.show()
import pandas as pd
commercial = pd.read_csv('./data/commercial.csv')
commercial[['시', '구', '상세주소']] = commercial['도로명'].str.split(' ', n=2, expand= True)
seoul_data = commercial[ commercial['시'] == '서울특별시']
seoul_chicken_data = seoul_data[ seoul_data['상권업종소분류명'] == '후라이드/양념치킨']
chicken_count_gu = seoul_chicken_data.groupby('구')['상권업종소분류명'].count()
population = pd.read_csv('./data/population07.csv')
sum_of_population_by_gu = population.groupby('군구')['유동인구수'].sum()
new_chicken_count_gu = pd.DataFrame(chicken_count_gu).reset_index()
new_sum_of_population_by_gu = pd.DataFrame(sum_of_population_by_gu).reset_index()
gu_chicken = new_chicken_count_gu.join(new_sum_of_population_by_gu.set_index('군구'), on='구')
gu_chicken['유동인구수/치킨집수'] = gu_chicken['유동인구수'] / gu_chicken['상권업종소분류명']
gu_chicken = gu_chicken.sort_values(by='유동인구수/치킨집수')
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'AppleGothic'
plt.figure(figsize=(10,5))
plt.bar(gu_chicken['구'], gu_chicken['유동인구수/치킨집수'])
plt.title('서울특별시 구별 치킨집 당 유동인구수')
plt.xlabel('구')
plt.ylabel('유동인구수/치킨집수')
plt.xticks(rotation=45)
plt.show()
import pandas as pd
population04 = pd.read_csv('./data/population04.csv')
population04_gu = population04.groupby('군구')['유동인구수'].sum()
sorted_population04_gu = population04_gu.sort_values(ascending= True)
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'AppleGothic'
plt.figure(figsize=(10,5))
plt.bar(sorted_population04_gu.index, sorted_population04_gu)
plt.title('2020년 4월 서울 구군별 유동인구 수')
plt.xlabel('군구')
plt.ylabel('유동인구 수(명)')
plt.xticks(rotation=45)
plt.show()
gangnam04 = population04[ population04['군구'] == '강남구' ]
daily04 = gangnam04.groupby('일자')['유동인구수'].sum()
population07 = pd.read_csv('./data/population07.csv')
gangnam07 = population07[ population07['군구'] == '강남구' ]
daily07 = gangnam07.groupby('일자')['유동인구수'].sum()
plt.figure(figsize=(20,5))
date_list = []
for date in daily04.index:
date_list.append(str(date))
plt.plot(date_list, daily04)
date_list2 = []
for date in daily07.index:
date_list2.append(str(date))
plt.plot(date_list2, daily07)
plt.title('2020년 4월과 7월 서울 강남구 유동인구 수')
plt.xlabel('날짜')
plt.ylabel('유동인구 수')
plt.xticks(rotation=45)
plt.show()