import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
driver = webdriver.Chrome('../driver/chromedriver.exe')
driver.get('https://www.starbucks.co.kr/store/store_map.do?disp=locale')
popup = driver.find_element(By.CSS_SELECTOR,'body > div.holiday_notice.addBtnHoliday > p > a')
popup.click()
region_select = driver.find_element(By.CSS_SELECTOR,'#container > div > form > fieldset > div > section > article.find_store_cont > article > article:nth-child(4) > div.loca_step1 > div.loca_step1_cont > ul > li:nth-child(1) > a')
region_select.click()
seoul_select=driver.find_element(By.CSS_SELECTOR,'#mCSB_2_container > ul > li:nth-child(1) > a')
seoul_select.click()
#BeautifulSoup으로 처리
req = driver.page_source
soup = BeautifulSoup(req,'html.parser')
cafe_info_raw=soup.select('.mCSB_container > .quickSearchResultBoxSidoGugun > .quickResultLstCon')
name_info_raw = cafe_info_raw[0].text.split()[0]
address_info_raw=' '.join(cafe_info_raw[0].text.split()[1:][1:4])
address_info_raw
'강남구 언주로 425'
names_list = []
addresses_list = []
gu_info_list=[]
cafe_info_raw = soup.select('.mCSB_container > .quickSearchResultBoxSidoGugun > .quickResultLstCon')
for n in range(0, len(cafe_info_raw)):
names_list.append(cafe_info_raw[n].text.split()[0])
address_info_raw=cafe_info_raw[n].text.split()[1:][1:4]
gu_info_list.append(address_info_raw[0])
addresses_list.append(' '.join(address_info_raw))
data = {
'District':gu_info_list,
'Name' : names_list,
'Address' : addresses_list
}
starbucks_info = pd.DataFrame(data)
starbucks_info[starbucks_info['District']=='서울특별시']
starbucks_info.loc[353,'District'] = cafe_info_raw[353].text.split()[1:][2:5][0]
starbucks_info.loc[353,'Address'] = cafe_info_raw[353].text.split()[1:][2:5]
starbucks_info.loc[363,'District'] = cafe_info_raw[363].text.split()[1:][2:5][0]
starbucks_info.loc[363,'Address'] = cafe_info_raw[363].text.split()[1:][2:5]
starbucks_info.loc[377,'District'] = cafe_info_raw[377].text.split()[1:][2:5][0]
starbucks_info.loc[377,'Address'] = cafe_info_raw[377].text.split()[1:][2:5]
starbucks_info.loc[406,'District'] = cafe_info_raw[377].text.split()[1:][2:5][0]
starbucks_info.loc[406,'Address'] = cafe_info_raw[377].text.split()[1:][2:5]
starbucks_info['District'].unique()
array(['강남구', '강북구', '강서구', '관악구', '광진구', '금천구', '노원구', '도봉구', '동작구',
'마포구', '서대문구', '서초구', '성북구', '송파구', '양천구', '영등포구', '은평구', '종로구',
'중구', '강동구', '구로구', '동대문구', '성동구', '용산구', '중랑구'], dtype=object)
driver = webdriver.Chrome('../driver/chromedriver.exe')
driver.get('https://members.ediya.com/store')
gu_list =starbucks_info['District'].unique()
len(gu_list)
address_button = driver.find_element(By.CSS_SELECTOR,'#contents > div > div > div.store_wrap > div.srch_wrap > div.srch_tab_wrap > a:nth-child(2)')
address_button.click()
search = driver.find_element(By.CSS_SELECTOR,'#keyword')
search.clear()
search.send_keys(gu_list[2])
search_button = driver.find_element(By.CSS_SELECTOR,'#contents > div > div > div.store_wrap > div.srch_wrap > div.form_search > button.btn_search')
search_button.click()
# BeautifulSoup으로 처리
req = driver.page_source
soup = BeautifulSoup(req,'html.parser')
name_info_raw = soup.select('.info_txt > .name')
name_info = name_info_raw[0].text
name_info
'등촌중앙점'
search = driver.find_element(By.CSS_SELECTOR,'#keyword')
search_button = driver.find_element(By.CSS_SELECTOR,'#contents > div > div > div.store_wrap > div.srch_wrap > div.form_search > button.btn_search')
names_list = []
addresses_list = []
gu_info_list = []
n = 0
for gu in gu_list:
search.clear()
search.send_keys(gu)
search_button.click()
req = driver.page_source
soup = BeautifulSoup(req,'html.parser')
name_info_raw = soup.select('.info_txt > .name')
address_info_raw = soup.select('.info_txt > .addr')
for n in range(0, len(name_info_raw)):
name_info = name_info_raw[n].text
names_list.append(name_info)
address_info = address_info_raw[n].text.split()[1:]
gu_info_list.append(address_info[0])
addresses_list.append(' '.join(address_info))
data = {
'District' : gu_info_list,
'Name' : names_list,
'Address' : addresses_list
}
ediya_info = pd.DataFrame(data)
03. Starbucks VS EDIYA 비교 분석
import
import googlemaps
import folium
import json
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
plt.rcParams['axes.unicode_minus']=False
rc('font',family='Malgun Gothic')
get_ipython().run_line_magic('matplotlib','inline')
서울시 STARBUCKS VS EDIYA 매장 분포 확인
- 구 별 매장 개수 구하여 분포 비교하기
gmaps_key = "AIzaSyArL82Xz6b3GTUz2YgEouRPzPw1QihX7Ec"
gmaps = googlemaps.Client(key=gmaps_key)
lat = []
lng = []
for n in range(0, len(starbucks_info.index)):
target_name = starbucks_info['Address'][n]
tmp = gmaps.geocode(target_name, language="ko")
lat.append(tmp[0]['geometry']['location']['lat'])
lng.append(tmp[0]['geometry']['location']['lng'])
starbucks_info['lat'] = lat
starbucks_info['lng'] = lng
lat = []
lng = []
for n in range(0, len(ediya_info.index)):
try:
target_name = ediya_info['Address'][n]
tmp = gmaps.geocode(target_name, language="ko")
lat.append(tmp[0]['geometry']['location']['lat'])
lng.append(tmp[0]['geometry']['location']['lng'])
except:
lat.append(0)
lng.append(0)
ediya_info['lat'] = lat
ediya_info['lng'] = lng
ediya_info.head()
delete_row=ediya_info[ediya_info['lat']==0].index
delete_row
ediya_info.drop(delete_row, inplace=True)
ediya_info.head()
gu_list_starbucks = starbucks_info['District'].unique()
starbucks_cnt = []
for gu in gu_list:
gu_cnt = starbucks_info.loc[starbucks_info['District']==gu,'District'].count()
starbucks_cnt.append(gu_cnt)
gu_list_ediya = ediya_info['District'].unique()
ediya_cnt = []
for gu in gu_list:
gu_cnt = ediya_info.loc[ediya_info['District']==gu,'District'].count()
ediya_cnt.append(gu_cnt)
Gu_cnt_diff = pd.DataFrame(
{
'District':gu_list_starbucks,
'Starbucks Count' : starbucks_cnt,
'Ediya Count' : ediya_cnt
}
)
Gu_cnt_diff
Gu_cnt_diff['Difference'] =Gu_cnt_diff['Ediya Count'] - Gu_cnt_diff['Starbucks Count']
Gu_cnt_diff
Gu_cnt_diff['Difference Ratio'] = Gu_cnt_diff['Ediya Count'] / Gu_cnt_diff['Starbucks Count']
Gu_cnt_diff.sort_values(by='Difference Ratio', ascending=True, inplace=True)
Gu_cnt_diff
Gu_cnt_diff['Difference Ratio'].mean()
매장 개수의 분포 CircleMarker로 시각화
lat =[]
lng = []
for i in gu_list:
lat.append(starbucks_info[starbucks_info['District']==i]['lat'].mean())
lng.append(starbucks_info[starbucks_info['District']==i]['lng'].mean())
Gu_cnt_diff['lat'] = lat
Gu_cnt_diff['lng']= lng
Gu_cnt_diff
map_cnt_diff = folium.Map(location=[37.558, 126.983],zoom_start=11)
for idx, row in Gu_cnt_diff.iterrows():
folium.CircleMarker(
location = [row['lat'], row['lng']],
radius = row['Starbucks Count'],
tooltip=row['Starbucks Count'],
color="#FF0000",
fill_color="#FF0000",
).add_to(map_cnt_diff)
for idx, row in Gu_cnt_diff.iterrows():
folium.CircleMarker(
location = [row['lat'], row['lng']],
radius = row['Ediya Count']*0.36,
tooltip=row['Difference Ratio'],
popup=row['District'],
color="#3186cc",
fill_color="#3186cc",
).add_to(map_cnt_diff)
map_cnt_diff
folium.Marker 시각화
ediya_info.reset_index(inplace=True)
mapping = folium.Map(location=[37.558, 126.983], zoom_start=11)
mapping
for idx, row in ediya_info.iterrows():
folium.Marker(
location=[row['lat'],row['lng']],
popup=row['Name'],
tooltip=row['Address'],
icon=folium.Icon(
icon='coffee',
prefix = 'fa'
)
).add_to(mapping)
for idx, row in starbucks_info.iterrows():
folium.Marker(
location=[row['lat'], row['lng']],
popup=row['Name'],
tooltip=row['Address'],
icon=folium.Icon(
icon='coffee',
prefix='fa',
color='red'
)
).add_to(mapping)
for idx, row in ediya_info.iterrows():
folium.Circle(
location=[row['lat'],row['lng']],
radius=100,
color='blue', fill_color='blue').add_to(mapping)
for idx, row in starbucks_info.iterrows():
folium.Circle(
location=[row['lat'],row['lng']],
radius=100,
color='red', fill_color='red').add_to(mapping)
mapping
도로명 이름으로 비교하기
같은 도로명을 가진 지점만 비교(숫자 제거)
스타벅스 도로명 리스트 추가
import re
street_list = []
for n in range(0, len(starbucks_info.index)):
street_name1 = str(starbucks_info['Address'][n]).split()[1]
street_name = re.sub(r'\d', '', street_name1)
if street_name[-1]=='길':
street_name = street_name[:-1]
street_list.append(street_name)
street_list
starbucks_info['Street'] = street_list
starbucks_info
import re
street_list = []
for n in range(0, len(ediya_info.index)):
street_name1 = str(ediya_info['Address'][n]).split()[1]
street_name = re.sub(r'\d', '', street_name1)
if street_name[-1]=='길':
street_name = street_name[:-1]
street_list.append(street_name)
street_list
ediya_info['Street'] = street_list
ediya_info
common_street_list = []
for i in starbucks_info['Street'].unique():
if i in ediya_info['Street'].unique():
common_street_list.append(i)
common_street_list
sum_ediya = 0
sum_starbucks = 0
for i in common_street_list:
sum_ediya += ediya_info[ediya_info['Street']==i].value_counts().sum()
sum_starbucks += starbucks_info[starbucks_info['Street']==i].value_counts().sum()
ediya_copy = ediya_info.copy()
ediya_copy
for i in ediya_copy['Street']:
if i not in common_street_list:
idx = ediya_copy[ediya_copy['Street']==i].index
ediya_copy.drop(idx,inplace=True)
ediya_copy
starbucks_copy = starbucks_info.copy()
starbucks_copy
for i in starbucks_copy['Street']:
if i not in common_street_list:
idx = starbucks_copy[starbucks_copy['Street']==i].index
starbucks_copy.drop(idx,inplace=True)
starbucks_copy
스타벅스의 407개의 매장이 공통된 도로명에 위치해있다.
mapping = folium.Map(location=[37.558, 126.983], zoom_start=11)
mapping
for idx, row in ediya_copy.iterrows():
folium.Circle(
location=[row['lat'],row['lng']],
radius=100,
color='blue', fill_color='blue').add_to(mapping)
for idx, row in starbucks_copy.iterrows():
folium.Circle(
location=[row['lat'],row['lng']],
radius=100,
color='red', fill_color='red').add_to(mapping)
mapping