๐ ๊ฐ์ : ์์ธ์ ๋ฒ์ฃํํฉ ๋ถ์
๐ ๋ชฉํ
- ๊ฒฝ์ฐฐ์๋ณ ๋ฒ์ฃ๋ณ ๋ฐ์ดํฐ ํผ๋ดํ
- ๊ฒฝ์ฐฐ์๋ช <> ๊ตฌ๋ณ ๋ฐ์ดํฐ ์ ๋ฆฌ
- ๋ฒ์ฃ ๋ฐ์ดํฐ ์ ๊ทํ
- ์๊ด๊ด๊ณ ์๊ฐํ
- ์ง๋ ์๊ฐํ
- ๋ฐ์ดํฐ : ์์ธ์ ๊ด์๋ณ 5๋ ๋ฒ์ฃํํฉ
- ์ถ์ : ๊ณต๊ณต๋ฐ์ดํฐํฌํธ
ํ๋ก์ ํธ ์์ ์ ํ๊ธ ๋ฐ ๋ง์ด๋์ค ๋ถํธ ๊นจ์งํ์ ์ธํ
from matplotlib import font_manager as fm
from matplotlib import pyplot as plt
#ํ๊ธํฐํธ ๊นจ์ง ํด๊ฒฐ
get_ipython().run_line_magic("matplotlib", "inline")
plt.rc('font', family = "Malgun Gothic")
#๋ง์ด๋์ค๋ถํธ ๊นจ์ง ํด๊ฒฐ
import matplotlib as mpl
mpl.rcParams['axes.unicode_minus'] = False
crime_raw = pd.read_csv("../data/02. crime_in_Seoul.csv",thousands=",",encoding = "euc-kr")
thousands = ","
์ฒ๋จ์์์ , ๋ก ๊ตฌ๋ถ (์ซ์๊ฐ์ ๋ฌธ์๊ฐ์ผ๋ก ์ธ์ํ ์ ์์ด์ ์ซ์๊ฐ์ผ๋ก ๋ํ๋ ์ ์๋๋ก ์ธํ )
crime_raw.info()
crime_raw["์ฃ์ข
"].unique()
crime_raw_data = crime_raw[crime_raw["์ฃ์ข
"].notnull()]
crime_station = crime_raw_data.pivot_table(
index = "๊ตฌ๋ถ",
columns = ["์ฃ์ข
","๋ฐ์๊ฒ๊ฑฐ"],
aggfunc= [np.sum]
)
crime_station.head()
pandas multi index ์ ๊ฑฐ
์ ๋ฐ์ดํฐํ๋ผ์์ columns์ ์ค๋ณต๊ฐ์ธ 'sum','๊ฑด์' ์ปฌ๋ผ์ droplevel๋ฉ์๋๋ก ์ญ์ ํ๋ค.
์ ๊ณผ์ ๊น์ง ์งํํ ๊ฐ๊ณต์์
index๋ ๊ฒฝ์ฐฐ์ ์ด๋ฆ์ผ๋ก ๋์ด์๊ณ , ๊ฒฝ์ฐฐ์ ์ด๋ฆ์ผ๋ก ๊ตฌ ์ด๋ฆ์ ์์๋ด์ผํ๋ค.
import googlemaps
gmaps_key = "googlemaps key๊ฐ"
gmaps = googlemaps.Client(key = gmaps_key)
google maps ๋ชจ๋์ค์น
gmaps.geocode("์์ธ์๋ฑํฌ๊ฒฝ์ฐฐ์",language="ko")
์คํ ์
[{'address_components': [{'long_name': '์๋ฑํฌ๊ตฌ',
'short_name': '์๋ฑํฌ๊ตฌ',
'types': ['political', 'sublocality', 'sublocality_level_1']},
{'long_name': '์์ธํน๋ณ์',
'short_name': '์์ธํน๋ณ์',
'types': ['administrative_area_level_1', 'political']},
{'long_name': '๋ํ๋ฏผ๊ตญ',
'short_name': 'KR',
'types': ['country', 'political']}],
'formatted_address': '๋ํ๋ฏผ๊ตญ ์์ธํน๋ณ์ ์๋ฑํฌ๊ตฌ',
'geometry': {'bounds': {'northeast': {'lat': 37.556286, 'lng': 126.9498867},
'southwest': {'lat': 37.4850011, 'lng': 126.8779285}},
'location': {'lat': 37.5223245, 'lng': 126.9101692},
'location_type': 'APPROXIMATE',
'viewport': {'northeast': {'lat': 37.556286, 'lng': 126.9498867},
'southwest': {'lat': 37.4850011, 'lng': 126.8779285}}},
'partial_match': True,
'place_id': 'ChIJk_FAvx2ffDURQAuECsjy0Cw',
'types': ['political', 'sublocality', 'sublocality_level_1']}]
ํ์ํ ์ ๋ณด์ธ lat
lng
formatted_address
๋ฐ์ดํฐ๋ฅผ ๋ฝ์์ crime_station์ ์ถ๊ฐํด์ผํจ.
crime_station["๊ตฌ๋ณ"] = np.nan
crime_station["lat"] = np.nan
crime_station["lng"] = np.nan
cnt = 0
for idx, rows in crime_station.iterrows():
crime_name = "์์ธ" + str(idx) + "๊ฒฝ์ฐฐ์"
tmp = gmaps.geocode(crime_name,language="ko")
tmp_gu = tmp[0].get("formatted_address").split()[2]
tmp_lat = tmp[0].get("geometry")["location"]["lat"]
tmp_lng = tmp[0].get("geometry")["location"]["lng"]
crime_station.loc[idx,"lat"] = tmp_lat
crime_station.loc[idx,"lng"] = tmp_lng
crime_station.loc[idx,"๊ตฌ๋ณ"] = tmp_gu
print(cnt)
cnt += 1
๋ฐ์ดํฐ ์ถ๊ฐ
์ด ๊ณผ์ ์์ ์ด๋ฏธ ์ฌ๋ผ์ง ๊ฒฝ์ฐฐ์์ ๊ฒฝ์ฐ ๋ฐ์ดํฐ๊ฐ ๋ถ๋ฌ์ ์ง์ง ์์ crime_station์์ ํด๋น ๊ตฌ๋ถ(๊ฒฝ์ฐฐ์์ด๋ฆ) drop์ ์ด์ฉํ์ฌ ์ญ์ ํ์์
tmp = [
crime_station.columns.get_level_values(0)[n] + crime_station.columns.get_level_values(1)[n]
for n in range(len(crime_station.columns.get_level_values(0)))
]
tmp
์ ๋ฐ์ดํฐ ๋ณํฉ๋๊ณผ ํ๋จ ์ปฌ๋ผ๋ช ํฉ์น๊ณ
crime_station.columns = tmp
crime_station.to_csv("../data/02. crime_in_Seoul_raw.csv",sep=",",encoding="utf-8")
crime_anal_station = pd.read_csv("../data/02. crime_in_Seoul_raw.csv", index_col = 0, encoding = "utf-8")
crime_anal_gu = pd.pivot_table(crime_anal_station, index = "๊ตฌ๋ณ", aggfunc=np.sum)
del crime_anal_gu["lat"]
crime_anal_gu.drop(["lng"],axis = 1,inplace = True)
crime_anal_gu.head()
pivot table๋ก ๊ตฌ๋ณ๋ก index ๋ณ๊ฒฝ ํ ํ์์๋ ๊ฒฝ๋ ์๋ ์ปฌ๋ผ ์ญ์
target = ["๊ฐ๊ฐ๊ฒ๊ฑฐ์จ","๊ฐ๋๊ฒ๊ฑฐ์จ","์ด์ธ๊ฒ๊ฑฐ์จ","์ ๋๊ฒ๊ฑฐ์จ","ํญ๋ ฅ๊ฒ๊ฑฐ์จ"]
num = ["๊ฐ๊ฐ๊ฒ๊ฑฐ","๊ฐ๋๊ฒ๊ฑฐ","์ด์ธ๊ฒ๊ฑฐ","์ ๋๊ฒ๊ฑฐ","ํญ๋ ฅ๊ฒ๊ฑฐ"]
den = ["๊ฐ๊ฐ๋ฐ์","๊ฐ๋๋ฐ์","์ด์ธ๋ฐ์","์ ๋๋ฐ์","ํญ๋ ฅ๋ฐ์"]
crime_anal_gu[target] = crime_anal_gu[num].div(crime_anal_gu[den].values) * 100
crime_anal_gu.head()
๊ฐ ๊ฒ๊ฑฐ์จ ์ปฌ๋ผ์ถ๊ฐ
์ปฌ๋ผ ๋๋๊ธฐ
1. ํ๋์ ์ปฌ๋ผ์ ๋ค๋ฅธ์ปฌ๋ผ์ผ๋ก ๋๋๊ธฐ
crime_anal_gu["๊ฐ๋๊ฒ๊ฑฐ"]/crime_anal_gu["๊ฐ๋๋ฐ์"]
2. ๋ค์์ ์ปฌ๋ผ์ ๋ค๋ฅธ์ปฌ๋ผ์ผ๋ก ๋๋๊ธฐ
crime_anal_gu[["๊ฐ๋๊ฒ๊ฑฐ","์ด์ธ๊ฒ๊ฑฐ"]].div(crime_anal_gu["๊ฐ๋๋ฐ์"],axis = 0)
3. ๋ค์์์ปฌ๋ผ์ ๋ค์์ ์ปฌ๋ผ์ผ๋ก ๊ฐ๊ฐ ๋๋๊ธฐ
num = ["๊ฐ๊ฐ๊ฒ๊ฑฐ","๊ฐ๋๊ฒ๊ฑฐ","์ด์ธ๊ฒ๊ฑฐ","์ ๋๊ฒ๊ฑฐ","ํญ๋ ฅ๊ฒ๊ฑฐ"]
den = ["๊ฐ๊ฐ๋ฐ์","๊ฐ๋๋ฐ์","์ด์ธ๋ฐ์","์ ๋๋ฐ์","ํญ๋ ฅ๋ฐ์"]
crime_anal_gu[num].div(crime_anal_gu[den].values)
del crime_anal_gu["๊ฐ๊ฐ๊ฒ๊ฑฐ"]
del crime_anal_gu["๊ฐ๋๊ฒ๊ฑฐ"]
del crime_anal_gu["์ด์ธ๊ฒ๊ฑฐ"]
del crime_anal_gu["์ ๋๊ฒ๊ฑฐ"]
del crime_anal_gu["ํญ๋ ฅ๊ฒ๊ฑฐ"]
ํ์์๋ ๊ฒ๊ฑฐ์ปฌ๋ผ ์ญ์
crime_anal_gu[crime_anal_gu[target] > 100 ] = 100
crime_anal_gu.rename( columns={"๊ฐ๊ฐ๋ฐ์" : "๊ฐ๊ฐ","๊ฐ๋๋ฐ์" : "๊ฐ๋","์ด์ธ๋ฐ์" : "์ด์ธ","์ ๋๋ฐ์" : "์ ๋","ํญ๋ ฅ๋ฐ์" : "ํญ๋ ฅ"}, inplace = True )
col = ["๊ฐ๊ฐ","๊ฐ๋","์ด์ธ","์ ๋","ํญ๋ ฅ"]
crime_anal_norm = crime_anal_gu[col] / crime_anal_gu[col].max()
col2 = ["๊ฐ๊ฐ๊ฒ๊ฑฐ์จ","๊ฐ๋๊ฒ๊ฑฐ์จ","์ด์ธ๊ฒ๊ฑฐ์จ","์ ๋๊ฒ๊ฑฐ์จ","ํญ๋ ฅ๊ฒ๊ฑฐ์จ"]
crime_anal_norm[col2] = crime_anal_gu[col2]
result_CCTV = pd.read_csv("../data/01. CCTV_result.csv",index_col = "๊ตฌ๋ณ", encoding= "utf-8")
crime_anal_norm[["์ธ๊ตฌ์","CCTV"]] = result_CCTV[["์ธ๊ตฌ์","์๊ณ"]]
col = ["๊ฐ๊ฐ","๊ฐ๋","์ด์ธ","์ ๋","ํญ๋ ฅ"]
crime_anal_norm["๋ฒ์ฃ"] = np.mean(crime_anal_norm[col], axis = 1)
col2 = ["๊ฐ๊ฐ๊ฒ๊ฑฐ์จ","๊ฐ๋๊ฒ๊ฑฐ์จ","์ด์ธ๊ฒ๊ฑฐ์จ","์ ๋๊ฒ๊ฑฐ์จ","ํญ๋ ฅ๊ฒ๊ฑฐ์จ"]
crime_anal_norm["๊ฒ๊ฑฐ"] = np.mean(crime_anal_norm[col2],axis = 1)
np.meanํ๊ท ๊ฐ
np.mean( np.array( [[1.000000,1.000000,0.357143,0.977118,0.733773], [0.310078,0.358974,0.285714,0.477799,0.463880]] ) , axis = 1 #axis = 0 ์ด axis = 1 ํ (drop์ด๋ ๋ฐ๋) )
1. ๊ฐ๋, ์ด์ธ, ํญ๋ ฅ์ ๋ํ ์๊ด๊ด๊ณ ํ์ธ
sns.pairplot(data=crime_anal_norm,
vars = ["์ด์ธ","๊ฐ๋","ํญ๋ ฅ"],
kind = "reg",
height = 3)
2. ์ธ๊ตฌ์, CCTV์ ์ด์ธ, ๊ฐ๋์ ์๊ด๊ด๊ณ ํ์ธ
def drawGraph() :
sns.pairplot(data = crime_anal_norm,
x_vars = ["์ธ๊ตฌ์","CCTV"],
y_vars = ["์ด์ธ","๊ฐ๋"],
kind = "reg",
height = 4
)
plt.show()
drawGraph()
3. ์ธ๊ตฌ์ cctv์ ์ด์ธ๊ฒ๊ฑฐ์จ, ํญ๋ ฅ๊ฒ๊ฑฐ์จ์ ์๊ด๊ด๊ณํ์ธ
def drawGraph() :
sns.pairplot(data = crime_anal_norm,
x_vars = ["์ธ๊ตฌ์","CCTV"],
y_vars = ["์ด์ธ๊ฒ๊ฑฐ์จ","ํญ๋ ฅ๊ฒ๊ฑฐ์จ"],
kind = "reg",
height = 4
)
plt.show()
drawGraph()
4. ์ธ๊ตฌ์ cctv์ ์ ๋๊ฒ๊ฑฐ์จ, ๊ฐ๋๊ฒ๊ฑฐ์จ์ ์๊ด๊ด๊ณํ์ธ
def drawGraph() :
sns.pairplot(data = crime_anal_norm,
x_vars = ["์ธ๊ตฌ์","CCTV"],
y_vars = ["์ ๋๊ฒ๊ฑฐ์จ","๊ฐ๋๊ฒ๊ฑฐ์จ"],
kind = "reg",
height = 4
)
plt.show()
drawGraph()
1. ๊ฒ๊ฑฐ์จ heatmap
def drawGraph ():
#๋ฐ์ดํฐํ๋ ์ ์์ฑ
target_column = ["๊ฐ๊ฐ๊ฒ๊ฑฐ์จ", "๊ฐ๋๊ฒ๊ฑฐ์จ", "์ด์ธ๊ฒ๊ฑฐ์จ", "์ ๋๊ฒ๊ฑฐ์จ", "ํญ๋ ฅ๊ฒ๊ฑฐ์จ"]
crime_anal_norm_sort = crime_anal_norm.sort_values(by = "๊ฒ๊ฑฐ", ascending = False) #๋ด๋ฆผ์ฐจ์
# ๊ทธ๋ํ ์์ฑ
plt.figure(figsize=(10,10))
sns.heatmap(
data = crime_anal_norm_sort[target_column],
annot= True, #๋ฐ์ดํฐ๊ฐ ํํ
fmt = "f", #์ค์๋กํํ
linewidths=5, # ๊ฐ๊ฒฉ์ค์
cmap = "RdPu"
)
plt.title("๋ฒ์ฃ๊ฒ๊ฑฐ๋น์จ (์ ๊ทํ๋ ๊ฒ๊ฑฐ์ ํฉ์ผ๋ก ์ ๋ ฌ)")
plt.show()
drawGraph()
2. ๋ฒ์ฃ๋ฐ์๊ฑด์ heatmap
def drawGraph ():
#๋ฐ์ดํฐํ๋ ์ ์์ฑ
target_column = ["๊ฐ๊ฐ", "๊ฐ๋", "์ด์ธ", "์ ๋", "ํญ๋ ฅ"]
crime_anal_norm_sort = crime_anal_norm.sort_values(by = "๋ฒ์ฃ", ascending = False) #๋ด๋ฆผ์ฐจ์
# ๊ทธ๋ํ ์์ฑ
plt.figure(figsize=(10,10))
sns.heatmap(
data = crime_anal_norm_sort[target_column],
annot= True, #๋ฐ์ดํฐ๊ฐ ํํ
fmt = "f", #์ค์๋กํํ
linewidths=5, # ๊ฐ๊ฒฉ์ค์
cmap = "RdPu"
)
plt.title("๋ฒ์ฃ ๋น์จ (์ ๊ทํ๋ ๊ฒ๊ฑฐ์ ํฉ์ผ๋ก ์ ๋ ฌ)")
plt.show()
drawGraph()