import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import platform
import seaborn as sns
from matplotlib import font_manager, rc
# %matplotlib inline
get_ipython().run_line_magic('matplotlib', 'inline')
path = 'C:/Windows/Fonts/malgun.ttf'
if platform.system() == 'Darwin':
rc('font', family='Arial Unicode MS')
elif platform.system() == 'Windows':
font_name = font_manager.FontProperties(fname=path).get_name()
rc('font', family=font_name)
else:
print('Unknown system. sorry')
import warnings
warnings.filterwarnings(action='ignore') # Warning 무시해줌
population = pd.read_excel('../data/07_population_raw_data.xlsx', header=1)
population.fillna(method='pad', inplace=True)
population
위의 데이터를 다듬어 새로운 데이터프레임 생성한 후, 소멸비율을 계산하여 소멸비율 컬럼과 소멸위기지역 컬럼을 생성해주기!
# pivot_table
pop = pd.pivot_table(
data=population,
index=['광역시도', '시도'],
columns=['구분'],
values=['인구수', '20-39세', '65세이상']
)
pop
# 소멸 비율 계산
pop['소멸비율'] = pop['20-39세', '여자'] / (pop['65세이상', '합계'] / 2)
pop.tail()
# 소멸위기지역 컬럼 생성
pop['소멸위기지역'] = pop['소멸비율'] < 1.0
pop
for idx, row in pop.iterrows():
if row['광역시도'][-3:] not in ['광역시', '특별시', '자치시']:
si_name[idx] = row['시도'][:-1]
elif row['광역시도'] == '세종특별자치시':
si_name[idx] = '세종'
else:
if len(row['시도']) == 2:
si_name[idx] = row['광역시도'][:2] + ' ' + row['시도']
else:
si_name[idx] = row['광역시도'][:2] + ' ' + row['시도'][:-1]
for idx, row in pop.iterrows():
if row['광역시도'][-3:] not in ['광역시', '특별시', '자치시']:
for keys, values in tmp_gu_dict.items():
if row['시도'] in values:
if len(row['시도']) == 2:
si_name[idx] = keys + ' ' + row['시도']
elif row['시도'] in ['마산합포구', '마산회원구']:
si_name[idx] = keys + ' ' + row['시도'][2:-1]
else:
si_name[idx] = keys + ' ' + row['시도'][:-1]
for idx, row in pop.iterrows():
if row['광역시도'][-3:] not in ['광역시', '특별시', '자치시']:
if row['시도'][:-1] == '고성' and row['광역시도'] == '강원도':
si_name[idx] = '고성(강원)'
elif row['시도'][:-1] == '고성' and row['광역시도'] == '경상남도':
si_name[idx] = '고성(경남)'
위의 한반도 모형의 데이터에 직접 시, 도 경계선 그려주기
BORDER_LINES = [
[(5, 1), (5, 2), (7, 2), (7, 3), (11, 3), (11, 0)], # 인천
[(5, 4), (5, 5), (2, 5), (2, 7), (4, 7), (4, 9), (7, 9), (7, 7), (9, 7), (9, 5), (10, 5), (10, 4), (5, 4)], # 서울
[(1, 7), (1, 8), (3, 8), (3, 10), (10, 10), (10, 7), (12, 7), (12, 6), (11, 6), (11, 5), (12, 5), (12, 4), (11, 4), (11, 3)], # 경기도
[(8, 10), (8, 11), (6, 11), (6, 12)], # 강원도
[(12, 5), (13, 5), (13, 4), (14, 4), (14, 5), (15, 5), (15, 4), (16, 4), (16, 2)], # 충청북도
[(16, 4), (17, 4), (17, 5), (16, 5), (16, 6), (19, 6), (19, 5), (20, 5), (20, 4), (21, 4), (21, 3), (19, 3), (19, 1)], # 전라북도
[(13, 5), (13, 6), (16, 6)], # 대전시
[(13, 5), (14, 5)], # 세종시
[(21, 2), (21, 3), (22, 3), (22, 4) ,(24, 4), (24, 2), (21, 2)], # 광주
[(20, 5), (21, 5), (21, 6), (23, 6)], # 전라남도
[(10, 8), (12, 8), (12, 9), (14, 9), (14, 8), (16, 8), (16, 6)], # 충청북도
[(14, 9), (14, 11), (14, 12), (13, 12), (13, 13)], # 경상북도
[(15, 8), (17, 8), (17, 10), (16, 10), (16, 11), (14, 11)], # 대구
[(17, 9), (18, 9), (18, 8), (19, 8), (19, 9), (20, 9), (20, 10), (21, 10)], # 부산
[(16, 11), (16, 13)],
[(27, 5), (27, 6), (25, 6)]
]
카르토그램 테스트
def plot_text_simple(draw_korea):
for idx, row in draw_korea.iterrows():
if len(row['ID'].split()) == 2:
dispname = '{}\n{}'.format(row['ID'].split()[0], row['ID'].split()[1])
elif row['ID'][:2] == '고성':
dispname = '고성'
else:
dispname = row['ID']
if len(dispname.split()[-1]) >= 3:
fontsize, linespacing = 9.5, 1.5
else:
fontsize, linespacing = 11, 1.2
plt.annotate(
dispname,
(row['x'] + 0.5, row['y'] + 0.5),
weight='bold',
fontsize=fontsize,
linespacing=linespacing,
ha = 'center', # 수평 정렬
va = 'center', # 수직 정렬
)
def simpleDraw(draw_korea):
plt.figure(figsize=(8, 11))
plot_text_simple(draw_korea)
for path in BORDER_LINES:
ys, xs = zip(*path)
plt.plot(xs, ys, c='black', lw=1.5)
plt.gca().invert_yaxis()
plt.axis('off')
plt.tight_layout()
plt.show()
simpleDraw(draw_korea)
이제 pop데이터의 수치들에 색 적용하여 카르토그램 완성해보기!
def get_data_info(targetData, blockedMap):
whitelabelmin = (
max(blockedMap[targetData]) - min(blockedMap[targetData])
) * 0.25 + min(blockedMap[targetData])
vmin = min(blockedMap[targetData])
vmax = max(blockedMap[targetData])
mapdata = blockedMap.pivot_table(index='y', columns='x', values=targetData)
return mapdata, vmax, vmin, whitelabelmin
def get_data_info_for_zero_center(targetData, blockedMap):
whitelabelmin = 5
tmp_max = max(
np.abs(min(blockedMap[targetData])), np.abs(max(blockedMap[targetData]))
)
vmin, vmax = -tmp_max, tmp_max
mapdata = blockedMap.pivot_table(index='y', columns='x', values=targetData)
return mapdata, vmax, vmin, whitelabelmin
def plot_text(targetData, blockedMap, whitelabelmin):
for idx, row in blockedMap.iterrows():
if len(row['ID'].split()) == 2:
dispname = '{}\n{}'.format(row['ID'].split()[0], row['ID'].split()[1])
elif row['ID'][:2] == '고성':
dispname = '고성'
else:
dispname = row['ID']
if len(dispname.split()[-1]) >= 3:
fontsize, linespacing = 9.5, 1.5
else:
fontsize, linespacing = 11, 1.2
annocolor = 'white' if np.abs(row[targetData]) > whitelabelmin else 'black'
plt.annotate(
dispname,
(row['x'] + 0.5, row['y'] + 0.5),
weight='bold',
color=annocolor,
fontsize=fontsize,
linespacing=linespacing,
ha = 'center', # 수평 정렬
va = 'center', # 수직 정렬
)
def drawKorea(targetData, blockedMap, cmapname, zeroCenter=False):
if zeroCenter:
masked_mapdata, vmax, vmin, whitelabelmin = get_data_info_for_zero_center(targetData, blockedMap)
if not zeroCenter:
masked_mapdata, vmax, vmin, whitelabelmin = get_data_info(targetData, blockedMap)
plt.figure(figsize=(8, 11))
plt.pcolor(masked_mapdata, vmin=vmin, vmax=vmax, cmap=cmapname, edgecolor='#aaaaaa', linewidth=0.5)
plot_text(targetData, blockedMap, whitelabelmin)
for path in BORDER_LINES:
ys, xs = zip(*path)
plt.plot(xs, ys, c='black', lw=1.5)
plt.gca().invert_yaxis()
plt.axis('off')
plt.tight_layout()
cb = plt.colorbar(shrink=0.1, aspect=10)
cb.set_label(targetData)
plt.show()
drawKorea('인구수합계', pop, 'Blues')
pop['소멸위기지역'] = [1 if con else 0 for con in pop['소멸위기지역']]
drawKorea('소멸위기지역', pop, 'Reds')
import folium
import json
pop_folium = pop.set_index('ID')
pop_folium.head()
geo_path = '../data/07_skorea_municipalities_geo_simple.json'
geo_str = json.load(open(geo_path, encoding='utf-8'))
# 인구수합계 지도 시각화
mymap = folium.Map(location=[36.002, 127.054], zoom_start=7)
mymap.choropleth(
geo_data=geo_str,
data=pop_folium['인구수합계'],
key_on='feature.id',
columns=[pop_folium.index, pop_folium['인구수합계']],
fill_color='YlGnBu'
)
mymap
# 소멸위기지역 시각화
mymap = folium.Map(location=[36.002, 127.054], zoom_start=7)
mymap.choropleth(
geo_data=geo_str,
data=pop_folium['소멸위기지역'],
key_on='feature.id',
columns=[pop_folium.index, pop_folium['소멸위기지역']],
fill_color='PuRd'
)
mymap