미니 프로젝트
1. 배경
- 목표
1.인구 소멸 위기 지역 파악
2.인구 소멸 위기 지역의 지도 표현
3.지도 표현에 대한 카르토그램 표현
2. 데이터 읽고 인구 소멸 지역 계산하기
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import platform
from matplotlib import rc, font_manager
get_ipython().run_line_magic('matplotlib', 'inline')
plt.rcParams['font.family'] = 'Malgun Gothic'
warnings.simplefilter(action='ignore', category=FutureWarning)
population = pd.read_excel(".//07_population_raw_data.xlsx", header=1)
population.fillna(method="pad", inplace=True)
population

# 컬럼 이름 변경
population.rename(
columns={
"행정구역(동읍면)별(1)": "광역시도",
"행정구역(동읍면)별(2)": "시도",
"계": "인구수"
}, inplace=True
)
population.tail()

# 소계 제거
population = population[population["시도"] != "소계"]
population.head()

population.is_copy = False
population.rename(
columns={"항목": "구분"}, inplace=True
)
population.head()

population.loc[population["구분"] == "총인구수 (명)", "구분"] = "합계"
population.loc[population["구분"] == "남자인구수 (명)", "구분"] = "남자"
population.loc[population["구분"] == "여자인구수 (명)", "구분"] = "여자"
population

# 소멸지역을 조사하기 위한 데이터
population["20-39세"] = (
population["20 - 24세"] + population["25 - 29세"] + population["30 - 34세"] + population["35 - 39세"]
)
population["65세이상"] = (
population["65 - 69세"] + population["70 - 74세"] + population["75 - 79세"] + population["80 - 84세"] + population["85 - 89세"] + population["90 - 94세"] + population["95 - 99세"] + population["100+"]
)
population

# pivot_table
pop = pd.pivot_table(
data=population,
index=["광역시도", "시도"],
columns=["구분"],
values=["인구수", "20-39세", "65세이상"]
)
pop

# 소멸 비율 계산
pop["소멸비율"] = pop["20-39세", "여자"] / (pop["65세이상", "합계"] / 2)
pop.tail()

# 소멸위기지역 컬럼 생성
pop["소멸위기지역"] = pop["소멸비율"] < 1.0
pop

# 소멸위기지역 조회
pop[pop["소멸위기지역"] == True].index.get_level_values(1)
pop.reset_index(inplace=True)
pop.head()

# 컬럼명 합치기
tmp_columns = [
pop.columns.get_level_values(0)[n] + pop.columns.get_level_values(1)[n]
for n in range(0, len(pop.columns.get_level_values(0)))
]
pop.columns = tmp_columns
pop.head()

지도 시각화를 위한 지역별 ID 만들기
pop['시도'].unique()

si_name = [None] * len(pop)
si_name

tmp_gu_dict = {
"수원": ["장안구", "권선구", "팔달구", "영통구"],
"성남": ["수정구", "중원구", "분당구"],
"안양": ["만안구", "동안구"],
"안산": ["상록구", "단원구"],
"고양": ["덕양구", "일산동구", "일산서구"],
"용인": ["처인구", "기흥구", "수지구"],
"청주": ["상당구", "서원구", "흥덕구", "청원구"],
"천안": ["동남구", "서북구"],
"전주": ["완산구", "덕진구"],
"포항": ["남구", "북구"],
"창원": ["의창구", "성산구", "진해구", "마산합포구", "마산회원구"],
"부천": ["오정구", "원미구", "소사구"],
}
pop['광역시도'].unique()

(1) 일반 시 이름과 세종시, 광역시도 일반 구 정리
for idx, row in pop.iterrows():
if row["광역시도"][-3:] not in ["광역시", "특별시", "자치시"]:
si_name[idx] = row["시도"][:-1]
elif row["광역시도"] == "세종특별자치시":
si_name[idx] = "세종"
else:
if len(row["시도"]) == 2:
si_name[idx] = row["광역시도"][:2] + " " + row["시도"]
else:
si_name[idx] = row["광역시도"][:2] + " " + row["시도"][:-1]
si_name

(2) 행정구
for idx, row in pop.iterrows():
if row["광역시도"][-3:] not in ["광역시", "특별시", "자치시"]:
for keys, values in tmp_gu_dict.items():
if row["시도"] in values:
if len(row["시도"]) == 2:
si_name[idx] = keys + " " + row["시도"]
elif row["시도"] in ["마산합포구", "마산회원구"]:
si_name[idx] = keys + " " + row["시도"][2:-1]
else:
si_name[idx] = keys + " " + row["시도"][:-1]
si_name

(3) 고성군
for idx, row in pop.iterrows():
if row["광역시도"][-3:] not in ["광역시", "특별시", "자치시"]:
if row["시도"][:-1] == "고성" and row["광역시도"] == "강원도":
si_name[idx] = "고성(강원)"
elif row["시도"][:-1] == "고성" and row["광역시도"] == "경상남도":
si_name[idx] = "고성(경남)"
si_name

pop["ID"] = si_name
del pop["20-39세남자"]
del pop["65세이상남자"]
del pop["65세이상여자"]
pop.head()

4. 지도 그리기(카르토그램)
draw_korea_raw = pd.read_excel("./07_draw_korea_raw.xlsx")
draw_korea_raw

draw_korea_raw_stacked = pd.DataFrame(draw_korea_raw.stack())
draw_korea_raw_stacked

draw_korea_raw_stacked.reset_index(inplace=True)
draw_korea_raw_stacked

draw_korea_raw_stacked.rename(
columns={
"level_0": "y",
"level_1": "x",
0: "ID"
}, inplace=True
)
draw_korea_raw_stacked
draw_korea = draw_korea_raw_stacked

BORDER_LINES = [
[(5, 1), (5, 2), (7, 2), (7, 3), (11, 3), (11, 0)], # 인천
[(5, 4), (5, 5), (2, 5), (2, 7), (4, 7), (4, 9), (7, 9), (7, 7), (9, 7), (9, 5), (10, 5), (10, 4), (5, 4)], # 서울
[(1, 7), (1, 8), (3, 8), (3, 10), (10, 10), (10, 7), (12, 7), (12, 6), (11, 6), (11, 5), (12, 5), (12, 4), (11, 4), (11, 3)], # 경기도
[(8, 10), (8, 11), (6, 11), (6, 12)], # 강원도
[(12, 5), (13, 5), (13, 4), (14, 4), (14, 5), (15, 5), (15, 4), (16, 4), (16, 2)], # 충청북도
[(16, 4), (17, 4), (17, 5), (16, 5), (16, 6), (19, 6), (19, 5), (20, 5), (20, 4), (21, 4), (21, 3), (19, 3), (19, 1)], # 전라북도
[(13, 5), (13, 6), (16, 6)],
[(13, 5), (14, 5)], # 대전시 # 세종시
[(21, 2), (21, 3), (22, 3), (22, 4), (24, 4), (24, 2), (21, 2)], # 광주
[(20, 5), (21, 5), (21, 6), (23, 6)], # 전라남도
[(10, 8), (12, 8), (12, 9), (14, 9), (14, 8), (16, 8), (16, 6)], # 충청북도
[(14, 9), (14, 11), (14, 12), (13, 12), (13, 13)], # 경상북도
[(15, 8), (17, 8), (17, 10), (16, 10), (16, 11), (14, 11)], # 대구
[(17, 9), (18, 9), (18, 8), (19, 8), (19, 9), (20, 9), (20, 10), (21, 10)], # 부산
[(16, 11), (16, 13)],
[(27, 5), (27, 6), (25, 6)]
]
def plot_text_simple(draw_korea):
for idx, row in draw_korea.iterrows():
if len(row["ID"].split()) == 2:
dispname = "{}\n{}".format(row["ID"].split()[0], row["ID"].split()[1])
elif row["ID"][:2] == "고성":
dispname = "고성"
else:
dispname = row["ID"]
if len(dispname.splitlines()[-1]) >= 3:
fontsize, linespacing = 9.5, 1.5
else:
fontsize, linespacing = 11, 1.2
plt.annotate(
dispname,
(row["x"] + 0.5, row["y"] + 0.5),
weight="bold",
fontsize=fontsize,
linespacing=linespacing,
ha="center", # 수평 정렬
va="center", # 수직 정렬
)
def simpleDraw(draw_korea):
plt.figure(figsize=(8, 11))
plot_text_simple(draw_korea)
for path in BORDER_LINES:
ys, xs = zip(*path)
plt.plot(xs, ys, c="black", lw=1.5)
plt.gca().invert_yaxis()
plt.axis("off")
plt.tight_layout()
plt.show()
simpleDraw(draw_korea)

검증 작업
set(draw_korea["ID"].unique()) - set(pop["ID"].unique())

tmp_list = list(set(pop["ID"].unique()) - set(draw_korea["ID"].unique()))
for tmp in tmp_list:
pop = pop.drop(pop[pop["ID"] == tmp].index)
print(set(pop["ID"].unique()) - set(draw_korea["ID"].unique()))

pop = pd.merge(pop, draw_korea, how="left", on="ID")
pop.head()

그림을 그리기 위한 데이터를 계산하는 함수
- 색상을 만들 때, 최소값을 흰색
- blockedMap : 인구현황(pop)
- targetData : 그리고 싶은 컬럼
def get_data_info(targetData, blockedMap):
whitelabelmin = (
max(blockedMap[targetData]) - min(blockedMap[targetData])
)*0.25 + min(blockedMap[targetData])
vmin = min(blockedMap[targetData])
vmax = max(blockedMap[targetData])
mapdata = blockedMap.pivot_table(index='y', columns='x', values=targetData)
return mapdata, vmax, vmin, whitelabelmin
def get_data_info_for_zero_center(targetData, blockedMap):
whitelabelmin = 5
tmp_max = max([abs(min(blockedMap[targetData])), abs(max(blockedMap[targetData]))])
vmin, vmax = -tmp_max, tmp_max
mapdata = blockedMap.pivot_table(index='y', columns='x', values=targetData)
return mapdata, vmax, vmin, whitelabelmin
def plot_text(targetData, blockedMap, whitelabelmin):
for idx, row in blockedMap.iterrows():
if len(row["ID"].split()) == 2:
dispname = "{}\n{}".format(row["ID"].split()[0], row["ID"].split()[1])
elif row["ID"][:2] == "고성":
dispname = "고성"
else:
dispname = row["ID"]
if len(dispname.splitlines()[-1]) >= 3:
fontsize, linespacing = 9.5, 1.5
else:
fontsize, linespacing = 11, 1.2
annocolor = 'white' if np.abs(row[targetData]) > whitelabelmin else 'black'
plt.annotate(
dispname,
(row["x"] + 0.5, row["y"] + 0.5),
weight="bold",
color = annocolor,
fontsize=fontsize,
linespacing=linespacing,
ha="center", # 수평 정렬
va="center", # 수직 정렬
)
def drawKorea(targetData, blockedMap, cmapname, zeroCenter=False):
if zeroCenter:
masked_mapdata, vmax, vmin, whitelabelmin = get_data_info_for_zero_center(targetData, blockedMap)
else:
masked_mapdata, vmax, vmin, whitelabelmin = get_data_info(targetData, blockedMap)
plt.figure(figsize=(8, 11))
plt.pcolor(masked_mapdata, vmin=vmin, vmax=vmax, cmap=cmapname, edgecolor="#aaaaaa", linewidth=0.5)
plot_text(targetData, blockedMap, whitelabelmin)
for path in BORDER_LINES:
ys, xs = zip(*path)
plt.plot(xs, ys, c="black", lw=1.5)
plt.gca().invert_yaxis()
plt.axis("off")
plt.tight_layout()
cb = plt.colorbar(shrink=0.1, aspect=10)
cb.set_label(targetData)
plt.show()
drawKorea('인구수합계', pop, "Blues")

pop['소멸위기지역'] = [1 if con else 0 for con in pop['소멸위기지역']]
drawKorea('소멸위기지역', pop, 'Reds')

pop['여성비'] = (pop['인구수여자'] / pop['인구수합계'] - 0.5) * 100
drawKorea('여성비', pop, 'RdBu', zeroCenter =True)

pop['2030여성비'] = (pop['20-39세여자'] / pop['20-39세합계'] - 0.5) * 100
drawKorea('2030여성비', pop, 'RdBu', zeroCenter =True)

import json
import folium
pop_folium = pop.set_index('ID')
pop_folium.head()
geo_path = './07_skorea_municipalities_geo_simple.json'
geo_str = json.load(open(geo_path, encoding='utf-8'))
mymap = folium.Map(location=[36.2002, 127.054], zoom_start=7)
mymap.choropleth(
geo_data = geo_str,
data=pop_folium['인구수합계'],
key_on='feature.id',
columns=[pop_folium.index, pop_folium['인구수합계']],
fill_color='YlGnBu'
)
mymap

mymap = folium.Map(location=[36.2002, 127.054], zoom_start=7)
mymap.choropleth(
geo_data = geo_str,
data=pop_folium['소멸위기지역'],
key_on='feature.id',
columns=[pop_folium.index, pop_folium['소멸위기지역']],
fill_color='PuRd'
)
mymap
