url = 'https://www.opinet.co.kr/searRgSelect.do'
driver = webdriver.Chrome("../driver/chromedriver.exe")
driver.get(url)
seoul = driver.find_element_by_css_selector('#SIDO_NM0 > option:nth-child(2)')
seoul.click()
gu_list_raw = driver.find_element_by_css_selector('#SIGUNGU_NM0') #부모태그
gu_list = gu_list_raw.find_elements_by_css_selector('option')
gu_names = [option.get_attribute('value') for option in gu_list if option.get_attribute('value')] #if문은 공백제거역할
import pandas as pd #최종저장
import time
import googlemaps
gmaps_key = "AIzaSyC-aSVFR4qL3Jphc-MVw4Nmv_5YkjzQ7_k"
gmaps = googlemaps.Client(key=gmaps_key)
datas = []
#전체 구 가져오고, 개수에 맞게끔 순환
for i in range(len(gu_names)):
gu_selecter = f'#SIGUNGU_NM0 > option:nth-child({i+2})'
driver.find_element_by_css_selector(gu_selecter).click()
station_items = driver.find_elements_by_css_selector('#body1 > tr') # 주유소 목록
for idx in range(len(station_items)):
detail_selector = f'#body1 > tr:nth-child({idx+1}) > td.rlist > a'
driver.find_element_by_css_selector(detail_selector).click()
#title = driver.find_element_by_css_selector('.header #os_nm').get_attribute('innerText')
#print(f'{v} 주유소 이름: ', title)
#time.sleep(0.5)
# 메인 데이터 수집
name = driver.find_element_by_css_selector('.header #os_nm').get_attribute('innerText')
gasoline = driver.find_element_by_css_selector('#b027_p').get_attribute('innerText')
diesel = driver.find_element_by_css_selector('#d047_p').get_attribute('innerText')
address = driver.find_element_by_css_selector('#rd_addr').get_attribute('innerText')
brand = driver.find_element_by_css_selector('#poll_div_nm').get_attribute('innerText')
cwsh_yn = 'N' if '_off' in driver.find_element_by_css_selector('.service #cwsh_yn').get_attribute('src').split('/')[-1] else 'Y'
lpg_yn = 'N' if '_off' in driver.find_element_by_css_selector('.service #lpg_yn').get_attribute('src').split('/')[-1] else 'Y'
maint_yn = 'N' if '_off' in driver.find_element_by_css_selector('.service #maint_yn').get_attribute('src').split('/')[-1] else 'Y'
cvs_yn = 'N' if '_off' in driver.find_element_by_css_selector('.service #cvs_yn').get_attribute('src').split('/')[-1] else 'Y'
sel24_yn = 'N' if '_off' in driver.find_element_by_css_selector('.service #sel24_yn').get_attribute('src').split('/')[-1] else 'Y'
try:
driver.find_element_by_css_selector('#self_icon').get_attribute('alt')
is_self = 'Y'
except:
is_self = 'N'
# 구, 위도, 경도 한꺼번에 하겠음. sql할 때 한 코드안에 있어야 편함
address = driver.find_element_by_css_selector('#rd_addr').get_attribute('innerText')
#gu
gu = address.split()[1]
#lat, lng #주소값으로 검색함
tmp = gmaps.geocode(address, language='ko')
lat = tmp[0].get('geometry')['location']['lat']
lng = tmp[0].get('geometry')['location']['lng']
datas.append({
'name': name,
'address': address,
'brand': brand,
'is_self': is_self,
'gasoline': gasoline,
'diesel': diesel,
'car_wash': cwsh_yn,
'charging_station': lpg_yn,
'car_maintenance': maint_yn,
'convenience_store': cvs_yn,
'24_hour': sel24_yn,
'gu': gu,
'lat': lat,
'lng': lng
}) #위에서 변수로 찍어주고, 밑에서 이렇게 써주는게 유지보수가 편함. 위에서만 수정하면 반영됨
time.sleep(0.2)
time.sleep(0.5)
driver.quit()
df = pd.DataFrame(datas)
df.tail()
df.info()
import datetime
now = datetime.datetime.now()
nowDate = now.strftime('%Y%m%d')
df.to_csv(f'./oilstation_oneday_{nowDate}.csv', encoding='utf-8')
stations = pd.read_csv('./oilstation_oneday_20240419.csv', encoding='utf-8', thousands=',', index_col=0)
stations
import matplotlib.pyplot as plt
import seaborn as sns
import platform
from matplotlib import font_manager, rc
get_ipython().run_line_magic("matplotlib", "inline")
f_path = r"C:\Windows\Fonts\NanumGothic.ttf" #tab
font_manager.FontProperties(fname=f_path).get_name()
rc("font", family="NanumGothic")
plt.figure(figsize=(12, 8))
sns.boxplot(x='is_self', y='gasoline', data=stations, palette='Set1')
plt.grid(True)
plt.show()
plt.figure(figsize=(12, 8))
sns.boxplot(x='brand', y='gasoline', hue='is_self', data=stations, palette='Set1')
plt.grid(True)
plt.show()
plt.figure(figsize=(18, 8))
sns.boxplot(x='gu', y='gasoline', data=stations, palette='Set1')
plt.grid(True)
plt.show()
import json
import folium
stations[['gu', 'name', 'is_self', 'gasoline']].sort_values(by='gasoline', ascending=False).head(10).reset_index(drop=True) #내림차순
stations[['gu', 'name', 'is_self', 'gasoline']].sort_values(by='gasoline', ascending=True).head(10).reset_index(drop=True) #내림차순