๐ ๋ชฉํ | ์์ธ์ ์ง์ญ๋ณ, ๋ธ๋๋๋ณ ์ฃผ์ ๊ฐ๊ฒฉ ๋ถ์ ๋ฐ ์๊ฐํ
๐ ํฌ๋กค๋ง ์ถ์ฒ
๋ชฉํ๋ฐ์ดํฐ
- ์์ธ์ ๊ฐ ๊ตฌ๋ณ ๋ฐ์ดํฐ(๋ธ๋๋, ๊ฐ๊ฒฉ, ์ ํ ์ฃผ์ ์ฌ๋ถ, ์์น) ์์ ํ์ผ
from selenium import webdriver
url = "https://www.opinet.co.kr/searRgSelect.do"
driver = webdriver.Chrome("../driver/chromedriver.exe")
driver.get(url)
from selenium.webdriver.common.by import By
sido_list_raw = driver.find_element(By.ID, "SIDO_NM0")
sido_list = sido_list_raw.find_elements(By.TAG_NAME,"option")
sido_names = []
for option in sido_list :
sido_names.append(option.get_attribute("value"))
sido_names = sido_names[1:]
sido_list_raw.send_keys(sido_names[0])
sido_names[0]
๊ฐ ์
๋ ฅgu_list_raw = driver.find_element(By.ID, "SIGUNGU_NM0") #๋ถ๋ชจํ๊ทธ
gu_list = gu_list_raw.find_elements(By.TAG_NAME,"option") #์์ํ๊ทธ
gu_name = [(option.text) for option in gu_list ]
gu_name = gu_name[1:]
# ์์ธ 25๊ฐ ๊ตฌ ์ฃผ์ ์ ๋ฐ์ดํฐ ๋ค์ด๋ก๋
import time
from tqdm import tqdm_notebook
for gu in tqdm_notebook(gu_name) :
# ๊ตฌ ์ด๋ฆ ๋ณด๋ด๊ธฐ
driver.find_element(By.ID, "SIGUNGU_NM0").send_keys(gu)
time.sleep(2)
# ์์
๋ค์ด๋ ํด๋ฆญ
driver.find_element(By.ID,"glopopd_excel").click()
time.sleep(2)
driver.close()
๋ชฉํ
ํฌ๋กค๋ง์ผ๋ก ๋ค์ด๋ก๋ ๋ฐ์ ๋ฐ์ดํฐ ํ Data Frame์ ์ ๋ฆฌ
import pandas as pd
from glob import glob
stations_files = glob("../data/์ง์ญ_*.xls")
tmp_raw = []
for file in stations_files :
tmp = pd.read_excel(file, header=2)
tmp_raw.append(tmp)
tmp_raw
station_raw = pd.concat(tmp_raw)
๐ concat : ํ์์ด ๋์ผํ๊ณ ์ฐ๋ฌ์ ๋ถ์ด๊ธฐ๋ง ํ ๋ ์ฌ์ฉ
โก๏ธ index๋ฒํธ reset ํ์
station = pd.DataFrame({
"์ํธ" : station_raw["์ํธ"],
"์ฃผ์" : station_raw["์ฃผ์"],
"๊ฐ๊ฒฉ" : station_raw["ํ๋ฐ์ "],
"์
ํ" : station_raw["์
ํ์ฌ๋ถ"],
"์ํ" : station_raw["์ํ"]
})
station["๊ตฌ"] = [ each_address.split()[1] for each_address in station["์ฃผ์"]]
station = station[station["๊ฐ๊ฒฉ"] != "-"]
station["๊ฐ๊ฒฉ"] = station["๊ฐ๊ฒฉ"].astype("float")
station.reset_index(inplace=True)
del station["index"]
from matplotlib import font_manager as fm
from matplotlib import pyplot as plt
#ํ๊ธํฐํธ ๊นจ์ง ํด๊ฒฐ
get_ipython().run_line_magic("matplotlib", "inline")
plt.rc('font', family = "Malgun Gothic")
#๋ง์ด๋์ค๋ถํธ ๊นจ์ง ํด๊ฒฐ
import matplotlib as mpl
mpl.rcParams['axes.unicode_minus'] = False
station.boxplot(column="๊ฐ๊ฒฉ", by = "์
ํ", figsize=(12,8))
plt.figure(figsize=(12,8))
sns.boxplot( x = "์
ํ" , y = "๊ฐ๊ฒฉ", data = station, palette= "Set3" )
plt.figure(figsize=(12,8))
sns.boxplot( x= "์ํ", y = "๊ฐ๊ฒฉ", hue = "์
ํ", data=station, palette = "Set3" )
gu_data = pd.pivot_table ( data=station, index="๊ตฌ", values="๊ฐ๊ฒฉ", aggfunc=np.mean )
geo_path = "../data/02. skorea_municipalities_geo_simple.json"
geo_str = json.load(open(geo_path,encoding="utf-8"))
my_map = folium.Map(location=[37.5502,126.982], zoom_start=10, tiles="CartoDB Positron")
folium.Choropleth(
geo_data = geo_str,
data = gu_data,
columns = [gu_data.index, "๊ฐ๊ฒฉ"],
key_on = "feature.id",
fill_color = "PuRd"
).add_to(my_map)
my_map