[EDA] Chapter03. 서울시 범죄 현황 데이터 분석(2/2)

황성미·2023년 8월 4일

제로베이스 데이터스쿨

목록 보기

22/58

✍🏻 4일 공부 이야기.

👀 오늘 공부한 자세한 내용의 코드는 아래 깃허브에 올라와있습니다!!
https://github.com/nabi4442/ZeroBaseDataSchool/commit/e6477c2c4878210482114903737bd62b6cdbf67f

데이터 시각화

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rc

plt.rcParams['axes.unicode_minus'] = False 
rc("font", family = "Malgun Gothic")
get_ipython().run_line_magic("matplotlib", "inline")

Pairplot

강도, 살인, 폭력에 대한 상관관계를 살펴보자.

# pairplot 강도, 살인, 폭력에 대한 상관관계 확인

sns.pairplot(data = crime_anal_gu_norm, vars=["살인", "강도", "폭력"],
            kind = "reg", #회귀직선 (scatter : 산점도만, kde : 지형도, hist : 히스토그램)
            height = 3)

인구수, CCTV 수와 살인, 강도와의 관계를 살펴보자.

# 인구수, CCTV 수와 살인, 강도와의 상관관계 확인
def drawGraph():
    sns.pairplot(data = crime_anal_gu_norm,
                x_vars = ["인구수", "CCTV"],
                y_vars = ["살인", "강도"],
                kind = "reg",
                height = 4
                )
    plt.show()
    
drawGraph()

: 데이터가 흩어져있는 것을 보아 큰 관련은 없어보인다. 하지만 눈에 띄게 CCTV가 적은 경우 강도가 적게 발생한 데이터가 보이는데 이것만 보고 CCTV를 줄여야한다!고 판단하기엔 이르다.

인구수, CCTV수와 살인/폭력 검거율과의 관계를 살펴보자.

# 인구수, CCTV 와 살인검거율, 폭력검거율 상관관계 확인
def drawGraph():
    sns.pairplot(data = crime_anal_gu_norm,
                x_vars = ["인구수", "CCTV"],
                y_vars = ["살인검거율", "폭력검거율"],
                kind = "reg",
                height = 4
                )
    plt.show()
    
drawGraph()

인구수, CCTV수와 절도/강도 검거율과의 관계를 살펴보자.

# 인구수, CCTV 와 절도검거율, 강도검거율 상관관계 확인
def drawGraph():
    sns.pairplot(data = crime_anal_gu_norm,
                x_vars = ["인구수", "CCTV"],
                y_vars = ["절도검거율", "강도검거율"],
                kind = "reg",
                height = 4
                )
    plt.show()
    
drawGraph()

: 눈에 띄게 CCTV가 적은 경우 살인/강도 검거율이 높은 데이터들이 많이 있었다. CCTV 수가 적을수록 살인/강도 검거율이 높을까? 하지만 인구수와 CCTV 대비 범죄 발생 상관관계 시각화를 보면, CCTV가 적은 경우 살인/강도 건수 또한 많았었다.

그러므로 CCTV 수가 적을수록 살인/강도 검거율이 높다고 판단하려면 다른 분석이 더 필요하다.

Heatmap

검거율 컬럼들

# 검거율 heatmap
# '검거' 컬럼을 기준으로 정렬

def drawGraph():
    #데이터 프레임 생성
    target_col = ["강간검거율", "강도검거율", "살인검거율", "절도검거율", "폭력검거율", "검거"]
    crime_anal_gu_norm_sort = crime_anal_gu_norm.sort_values(by = "검거", ascending = False)#내림차순
    
    #그래프 생성
    plt.figure(figsize = (10,10))
    sns.heatmap(
        data = crime_anal_gu_norm_sort[target_col],
        annot = True, #데이터 값 표현
        fmt = "f", #실수로 표현
        linewidths = 0.5, #간격 설정
        cmap = "RdPu",
        
    )
    plt.title("범죄 검거 비율(정규화된 검거의 합으로 정렬)")
    plt.show()

범죄발생 건수 컬럼들

# 범죄 발생 건수 heatmap
# 범죄 컬럼을 기준으로 정렬

def drawGraph():
    #데이터 프레임 생성
    target_col = ["강간", "강도", "살인", "절도", "폭력", "범죄"]
    crime_anal_gu_norm_sort = crime_anal_gu_norm.sort_values(by = "범죄", ascending = False)#내림차순
    
    #그래프 생성
    plt.figure(figsize = (10,10))
    sns.heatmap(
        data = crime_anal_gu_norm_sort[target_col],
        annot = True, #데이터 값 표현
        fmt = "f", #실수로 표현
        linewidths = 0.5, #간격 설정
        cmap = "RdPu",
        
    )
    plt.title("범죄 발생 비율(정규화된 발생 건수로 정렬)")
    plt.show()

결론

강남 3구의 범죄 발생 건수가 결코 낮지 않고, 강남 3구의 범죄 검거율 조차 높지 않음을 확인했다.
단, 인구 대비 현황 등을 좀 더 고려해봐야할 듯!

지도 시각화 Folium

!pip install folium 로 설치하기.

< Folium 장점 >
크롬 브라우저에서 잘 작동됨
지도를 html로 저장 가능 m.save('파일경로/파일명.html)
지도 스타일 설정 가능 tiles
마커 추가 가능 folium.Marker()
다양한 모양의 아이콘 지원
마커 클릭시, 타이틀 또는 위도 경도 표시 가능
원 도형 추가 가능 folium.Circle(), folium.CircleMarker()
지도에 colormap 표현

folium.Map()

기본 형태

변수값 = folium.Map(location = [위도값, 경도값]) #(위도값, 경도값) 도 가능

해당 지도가 처음부터 크게 나오기 위한 형태

변수값 = folium.Map(location = [위도값, 경도값], zoom_start = 값) # 0-18 사이 값

지도 스타일 : tiles.option

    - "OpenStreetMap"
    - "Mapbox Bright" (Limited levels of zoom for free tiles)
    - "Mapbox Control Room" (Limited levels of zoom for free tiles)
    - "Stamen" (Terrain, Toner, and Watercolor)
    - "Cloudmade" (Must pass API key)
    - "Mapbox" (Must pass API key)
    - "CartoDB" (positron and dark_matter)

m = folium.Map(location = [37.4697, 126.6908],
               zoom_start = 15,
               tiles = "Stamen Terrain"           
              ) 
m

default(OpenStreetMap)	Stamen Terrain

지도 마커 생성 : folium.Marker()

popup : 마커를 마우스로 클릭하면 나타나는 팝업창(HTML 문법을 따름)
tooltip : 마커를 마우스 위에 갖다대면 나타나는 팝업창(HTML 문법을 따름)
icon : 마커 아이콘 변경

m = folium.Map(location = [37.4697, 126.6908],
               zoom_start = 15,          
              ) # 0 - 18사이값
# 간석역
folium.Marker(location = (37.4647,126.6934)).add_to(m)

# popup , tooltip
# 다이소
folium.Marker(location = (37.4655,126.6941), 
              popup = "<b>Daiso</b>", # 마우스 클릭시 팝업 html문법 가능
              tooltip = "<i>다이소</i>", # 마우스 갖다대면 나옴
             ).add_to(m)

# html 문법을 활용한 하이퍼링크 기능
# 국제바로병원
folium.Marker(location = (37.4655,126.6934), 
              popup = "<a href = 'http://www.smartbaro.com/' target=_'blink'>국제바로병원</a>", # 마우스 클릭시 해당 링크로 이동
              tooltip = "<i>국제바로병원</i>", 
             ).add_to(m)
m

기본 형태

folium.Map(
	icon = folium.Icon(
    	color = '원하는 마커 컬러',
        icon_color = '아이콘 컬러',
        icon = '아이콘 모양',
        angle = '아이콘 모양 각도',
        prefix = '아이콘 출처' #간혹 이 옵션을 작성해주어야지 아이콘 모양이 뜸
    )
)

📄 아이콘 모양 사이트
- https://fontawesome.com/icons
- https://icons.getbootstrap.com/

m = folium.Map(location = [37.4697, 126.6908],
               zoom_start = 15,          
              ) # 0 - 18사이값
# icon basic
folium.Marker(location = (37.4647,126.6934) , 
              tooltip = "<i>icon_basic</i>", 
              #아이콘 설정
              icon = folium.Icon(color = "black")).add_to(m)

# icon_color
folium.Marker(location = (37.4655,126.6941), 
              popup = "<b>Daiso</b>",
              tooltip = "<i>icon_color</i>", 
              #아이콘 설정
              icon = folium.Icon(
                  color = "red",
                  icon_color = "pink",
                  icon = "cloud"
              )
             ).add_to(m)

# icon custom
folium.Marker(location = (37.4655,126.6934), 
              popup = "<b>국제바로병원</b>", 
              tooltip = "<i>icon_custom</i>", 
              #아이콘 설정
              icon = folium.Icon(
                  color = "blue",
                  icon_color = "white",
                  icon = "notes-medical",
                  angle = 50,
                  prefix = 'fa'
              
              )
             ).add_to(m)
m

지도 위에 마우스로 클릭한 자리 마커 생성 : folium.ClickForMarker()

기본 형태

변수값 = folium.Map(~)
변수값.add_child(folium.ClickForMarker()) #popup 옵션이 없다면 위도, 경도를 보여줌.

m = folium.Map(location = [37.4697, 126.6908],
               zoom_start = 15,       
               tiles = "OpenStreetMap"
              )
m.add_child(folium.ClickForMarker()) #popup 옵션을 주지 않으면 위도 경도 보여줌
#팝업창에 위도, 경도를 표시해주는 또다른 코드
#m.add_child(folium.LatLngPopup())

원 모양 생성 : folium.Circle(), folium.CircleMarker()

기본 형태

변수값 = folium.Map(~)
folium.Circle(
	location = (위도값, 경도값),
    radius = 반경값,
    fill = True, #원 안쪽의 색깔을 채울건지
    color = 색상값, #원 둘레 색상
    fill_color = 색상값, #원 안쪽 색상
).add_to(변수값)
#Circle -> CircleMarker만 변경해서 작성할 수도 있음

m = folium.Map(location = [37.4697, 126.6908],
               zoom_start = 15,       
               tiles = "OpenStreetMap"
              )
#Circle
folium.Circle(
    location = (37.4647,126.6934) ,
    radius = 100,
    fill = True,
    color = "#e3a48d",
    fill_color = "#e3a48d",
    popup = "Circle Popup",
    tooltip = "Circle Tooltip"
).add_to(m)


#CircleMarker
folium.CircleMarker(
    location = (37.4696,126.689) ,
    radius = 100, #같은 숫자값이라도 원의 크기가 차이남(공식문서에서도 정확한 설명없음ㅠ)
    fill = True,
    color = "#8d9ee3",
    fill_color = "#8d9ee3",
    popup = "CircleMarker Popup",
    tooltip = "CircleMarker Tooltip"
).add_to(m)

m

위도,경도 데이터로 지도 만들기 : folium.Choropleth

Parameters
----------
geo_data: string/object
    URL, file path, or data (json, dict, geopandas, etc) to your GeoJSON
    geometries
data: Pandas DataFrame or Series, default None
    Data to bind to the GeoJSON.
columns: dict or tuple, default None
    If the data is a Pandas DataFrame, the columns of data to be bound.
    Must pass column 1 as the key, and column 2 the values.
key_on: string, default None
    Variable in the `geo_data` GeoJSON file to bind the data to. Must
    start with 'feature' and be in JavaScript objection notation.
    Ex: 'feature.id' or 'feature.properties.statename'.
fill_color: string, optional
    Area fill color, defaults to blue. Can pass a hex code, color name,
    or if you are binding data, one of the following color brewer palettes:
    'BuGn', 'BuPu', 'GnBu', 'OrRd', 'PuBu', 'PuBuGn', 'PuRd', 'RdPu',
    'YlGn', 'YlGnBu', 'YlOrBr', and 'YlOrRd'.
fill_opacity: float, default 0.6
    Area fill opacity, range 0-1.   
line_opacity: float, default 1
    GeoJSON geopath line opacity, range 0-1.
legend_name: string, default empty string
    Title for data legend.

import json

state_data = pd.read_csv("../data/02. US_Unemployment_Oct2012.csv")


m = folium.Map([43,-102], zoom_start = 3)

folium.Choropleth(
    # 따로 json파일을 읽을 필요없이 r경로만 넣어주면 알아서 처리해줌
    geo_data = "../data/02. us-states.json", #경계선 좌표값이 담긴 데이터
    data = state_data,#Series or DataFrame
    columns = ["State", "Unemployment"], #DataFrame columns
    key_on = "feature.id",
    fill_color = "BuPu",
    fill_opacity =  1,
    line_opacity = 1,
    legend_name = "Unemployment rate (%)"
).add_to(m)

m

예제. 서울 동작구 아파트 유형을 지도로 시각화해보자.

[공공데이터포털]

# folium 

m = folium.Map(location=[37.50589466533131, 126.93450729567374], zoom_start=13)

for idx, rows in df.iterrows():
    
    # location 
    lat, lng = rows.위도, rows.경도
    
    # Marker 
    folium.Marker(
        location=[lat, lng],
        popup=rows.주소,
        tooltip=rows.분류, 
        icon=folium.Icon(
            icon="home",
            #위 describe()를 보고 적당한 선에서 cut!
            color="lightred" if rows.세대수 >= 199 else "lightblue",
            icon_color="darkred" if rows.세대수 >= 199 else "darkblue",
        )
    ).add_to(m)

    # CircleMarker
    folium.Circle(
        location=[lat, lng],
        radius=rows.세대수 * 0.5, 
        fill=True, 
        #위 describe()를 보고 적당한 선에서 cut!
        color="pink" if rows.세대수 >= 518 else "green",
        fill_color="pink" if rows.세대수 >= 518 else "green",
    ).add_to(m)
    
m

서울 구별 범죄 현황 지도 시각화

💡 위 명령어를 사용하기 위해선, 지도 시각화에 필요한 json 파일을 구해야함!!

📄 한국 json 파일
https://github.com/southkorea/southkorea-maps

위 json파일에서 서울시만 추출

'''
추가로 한국 위도 경도를 찾을 수 있는 방법을 구글링하다가
아래 사이트를 발견했다.

https://fhaktj8-18.tistory.com/entry/open-api

카카오 API를 이용하여 위도, 경도를 추출하는 방식인데
일단 도움이 될까 싶어 이곳에 첨부해둔다 :)
'''

geo_path = "../data/02. skorea_municipalities_geo_simple.json"
geo_str = json.load(open(geo_path, encoding = "utf-8")) #그냥 geo_path만 했을때는 인코딩과 관련해서 에러가 떴기 때문에 읽어들이는 것부터 명시적으로 표시해주었다!

# 살인 발생 건수 지도 시각화

my_map = folium.Map(
    location=[37.5502, 126.982], 
    zoom_start=11, 
    tiles="Stamen Toner")

folium.Choropleth(
    geo_data = geo_str, #우리나라 경계선 좌표값 데이터
    data = crime_anal_gu_norm["살인"],
    columns=[crime_anal_gu_norm.index, crime_anal_gu_norm["살인"]],
    fill_color="PuRd",
    key_on="feature.id",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="정규화된 살인 발생 건수",

).add_to(my_map)

my_map

위 코드의 컬럼명만 바꾸어서 시각화를 진행한 결과는 아래와 같다.

살인 발생 건수 지도 시각화	성범죄 발생 건수 지도 시각화	5대 범죄 발생 건수 지도 시각화

🤔 인구수에 따라 범죄 발생 건수의 차이가 있을 수 있지 않을까?

# 인구 대비 범죄 발생 건수 지도 시각화

tmp_criminal = crime_anal_gu_norm["범죄"] / crime_anal_gu_norm["인구수"] #인구 대비 범죄 발생

my_map = folium.Map(
    location=[37.5502, 126.982], 
    zoom_start=11, 
    tiles="Stamen Toner")

folium.Choropleth(
    geo_data = geo_str, #우리나라 경계선 좌표값 데이터
    data = tmp_criminal,
    columns=[crime_anal_gu_norm.index, tmp_criminal],
    fill_color="PuRd",
    key_on="feature.id",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="정규화된 인구 대비 5대 범죄 발생 건수",

).add_to(my_map)

my_map

구별 범죄 현황과 경찰서별 검거율을 함께 지도에 표시해보자.

# 구별 범죄 현황과 경찰서별 검거율을 함께 표시
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=11)

my_map.choropleth(
    geo_data=geo_str,
    data=crime_anal_gu_norm["범죄"],
    columns=[crime_anal_gu_norm.index, crime_anal_gu_norm["범죄"]],
    fill_color="PuRd",
    key_on="feature.id",
    fill_opacity=0.7,
    line_opacity=0.2,
)

for idx, rows in crime_anal_station.iterrows():
    folium.CircleMarker(
        [rows["lat"], rows["lng"]],
        radius=rows["검거"] * 50,
        popup=rows["구분"] + " : " + "%.2f" % rows["검거"],
        color="#3186cc",
        fill=True,
        fill_color="#3186cc",
    ).add_to(my_map)
    
my_map

지금까지 시각화한 것을 바탕으로 생각해본다면, 딱히 강남 3구가 범죄로부터 안전하다고는 못 할것 같다. 인구수에 영향을 받는게 아닌가해서 살펴본 인구 대비 범죄 현황 시각화에서도 큰 차이는 없었다. (물론 고려 사항이 많겠지만, 지금 있는 데이터로만 보았을 땐 그러하다!)

서울시 범죄 현황 발생 장소 분석

🤔 혹시 유흥업소가 많아서 범죄가 많이 발생하는 것은 아닌가? 다른 장소들은 낮지 않을까?

[공공데이터포털]

위 데이터를 전처리 한 후, 앞서 사용했던 범죄 현황 데이터를 합쳐 Heatmap으로 시각화한 결과는 아래와 같다.

# 시각화

crime_loc_norm_sort = crime_loc_norm.sort_values("종합", ascending = False) #내림차순

def drawGraph():
    plt.figure(figsize = (10,10))
    sns.heatmap(
        crime_loc_norm_sort,
        annot = True,
        fmt = "f",
        linewidths = 0.5,
        cmap = "RdPu"
    )
    plt.title("범죄 발생 장소")
    plt.show()
drawGraph()

위 히트맵을 보았을 때, 유흥업소의 범죄 발생율이 그리 높아보이진 않는다. 유흥업소에서 시비가 붙어 노상이나 기타 장소에서 범죄가 발생할 수도 있겠지만, 일단 데이터에서 보여지는 바로는 <유흥업소가 많아서 범죄가 많다>가 성립되려면 더 추가적인 분석이 필요해 보인다.
위 시각화에서 눈에 띄는건 살인이 단독주택에서 많이 발생한다는 것, 그리고 많은 범죄들이 노상에서 발생한다는 것은 좀 더 분석을 하고 싶은 호기심이 들었다.