import time
from selenium import webdriver
driver = webdriver.Chrome('../driver/chromedriver.exe')
def main_page_access():
driver.get('https://www.opinet.co.kr')
time.sleep(1)
driver.get('https://www.opinet.co.kr/searRgSelect.do')
driver.maximize_window() # 화면 최대화
main_page_access() # 페이지 이동
# 지역 시/도
sido_list_raw = driver.find_element_by_id('SIDO_NM0')
sido_list_raw.text
Output :
' 시/도\n \n \n \n 서울\n \n \n \n 부산\n \n \n \n \n \n 대구\n \n \n \n \n 인천\n \n \n \n \n 광주\n \n \n \n \n 대전\n \n \n \n \n 울산\n \n \n \n \n 세종\n \n \n \n \n 경기\n \n \n \n \n 충북\n \n \n \n \n 충남\n \n \n \n \n 전북\n \n \n \n \n 전남\n \n \n \n \n 경북\n \n \n \n \n 경남\n \n \n \n \n 제주\n \n \n \n \n 강원\n \n \n ‘
sido_list = sido_list_raw.find_elements_by_tag_name('option')
sido_list[0].text, len(sido_list)
Output : ('시/도', 18)
sido_names = [sido_list[idx].text for idx in range(0, len(sido_list))]
sido_element = driver.find_element_by_id('SIDO_NM0')
sido_element.send_keys(sido_names[1]) # 서울을 조사할 것이기 때문에 다음과 같이 설정
# 구
gu_list_raw = driver.find_element_by_id('SIGUNGU_NM0')
gu_list = gu_list_raw.find_elements_by_tag_name('option') # Tag 길이 확인
len(gu_list), gu_list
gu_names = []
for option in gu_list:
gu_names.append(option.get_attribute('value'))
gu_names[:5]
Output : ['', '강남구', '강동구', '강북구', '강서구']
’ ‘
) 제거gu_names = gu_names[1:] # 빈 칸 제거
gu_names
import time
from tqdm import tqdm_notebook
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
driver.find_element_by_id('SIGUNGU_NM0').send_keys('강남구')
addelement1 = driver.find_element_by_css_selector('#CWSH_YN').click() # 세차장 체크
addelement2 = driver.find_element_by_css_selector('#MAINT_YN').click() # 경정비 체크
addelement3 = driver.find_element_by_css_selector('#CVS_YN').click() # 편의점 체크
addelement4 = driver.find_element_by_css_selector('#SEL24_YN').click() # 24시간 체크
driver.find_element(By.CSS_SELECTOR,'#body1 > tr:nth-child(1) > td.rlist > a').click() # 세부정보 보기
req = driver.page_source # 해당 페이지 저장
soup = BeautifulSoup(req, 'html.parser')
# 구별 주유소 추출 및 저장
oilStation = soup.select_one('#os_price1')
oilStation_list = oilStation.select('.rlist')
# oilStation_list[0].text.strip()
for idx, val in enumerate(oilStation_list):
print(idx)
# 해당 구 중 한 개의 주유소 정보 확인
req = driver.page_source
soup = BeautifulSoup(req,'html.parser')
# 주유소 정보 확인
name = soup.select_one('#os_nm').text # 이름
brand = soup.select_one('#poll_div_nm').text # 브랜드
gasoline = soup.select_one('#b027_p').text # 휘발유 가격
diesel = soup.select_one('#d047_p').text # 경유 가격
address = soup.select_one('#rd_addr').text # 주소
name, brand, gasoline, diesel, address
Output :
('HD현대오일뱅크㈜직영 도곡셀프주유소', 'HD현대오일뱅크', '1,784', '1,719', '서울 강남구 남부순환로 2718 (도곡2동)')
N
, 되어있을 경우 Y
로 표시# 서비스 정보 확인
service_info = soup.select('.service')
if 'off' in soup.select_one('#cwsh_yn').get('src'):
carWash = 'N'
else:
carWash = 'Y'
if 'off' in soup.select_one('#maint_yn').get('src'):
charging = 'N'
else:
charging = 'Y'
if 'off' in soup.select_one('#lpg_yn').get('src'):
maintenance = 'N'
else:
maintenance = 'Y'
if 'off' in soup.select_one('#cvs_yn').get('src'):
cvsStore = 'N'
else:
cvsStore = 'Y'
if 'off' in soup.select_one('#sel24_yn').get('src'):
hours24 = 'N'
else:
hours24 = 'Y'
carWash, charging, maintenance, cvsStore, hours24
Output : ('Y', 'Y', 'N', 'N', 'N')
# 데이터 저장공간(리스트) 생성
name_list = [] ; brand_list = [] ; gasoline_list = [] ; diesel_list = [] ; address_list = [] ; oilSelf_list = []# 주유소정보
carWash_list = [] ; charging_list = [] ; maintenance_list = [] ; cvsStore_list = [] ; hours24_list = [] # 서비스정보
guName_list = [] # 구 이름
for gu in tqdm_notebook(gu_names):
element = driver.find_element_by_id('SIGUNGU_NM0')
element.send_keys(gu)
time.sleep(3)
# 구별 주유소 추출 및 저장
req = driver.page_source # 해당 페이지 저장
soup = BeautifulSoup(req, 'html.parser')
oilStation = soup.select_one('#os_price1')
oilStation_list = oilStation.select('.rlist')
for idx, information in enumerate(oilStation_list):
#oilStation_info = driver.find_element(By.CSS_SELECTOR,'#body1 > tr:nth-child({}) > td.rlist > a' .format(idx + 1)).click() # 세부정보 보기
driver.find_element(By.CSS_SELECTOR,'#body1 > tr:nth-child({}) > td.rlist > a' .format(idx + 1)).click() # 세부정보 보기
time.sleep(2)
#rlist=driver.find_element(By.CSS_SELECTOR,f'#body1 > tr:nth-child({idx+1}) > td.rlist > a')
req = driver.page_source
soup = BeautifulSoup(req,'html.parser')
# 셀프 여부 확인
if '셀프' in information.text.strip():
oilSelf = 'Y'
else:
oilSelf = 'N'
# 주유소 정보 확인
name = soup.select_one('#os_nm').text # 이름
brand = soup.select_one('#poll_div_nm').text # 브랜드
gasoline = soup.select_one('#b027_p').text # 휘발유 가격
diesel = soup.select_one('#d047_p').text # 경유 가격
address = soup.select_one('#rd_addr').text # 주소
if 'off' in soup.select_one('#cwsh_yn').get('src'):
carWash = 'N'
else:
carWash = 'Y'
if 'off' in soup.select_one('#maint_yn').get('src'):
charging = 'N'
else:
charging = 'Y'
if 'off' in soup.select_one('#lpg_yn').get('src'):
maintenance = 'N'
else:
maintenance = 'Y'
if 'off' in soup.select_one('#cvs_yn').get('src'):
cvsStore = 'N'
else:
cvsStore = 'Y'
if 'off' in soup.select_one('#sel24_yn').get('src'):
hours24 = 'N'
else:
hours24 = 'Y'
# 리스트 추가
guName_list.append(gu)
name_list.append(name)
brand_list.append(brand)
gasoline_list.append(gasoline)
diesel_list.append(diesel)
address_list.append(address)
oilSelf_list.append(oilSelf)
carWash_list.append(carWash)
charging_list.append(charging)
maintenance_list.append(maintenance)
cvsStore_list.append(cvsStore)
hours24_list.append(hours24)
time.sleep(3)
import pandas as pd
data = pd.DataFrame({
'name' : name_list,
'brand' : brand_list,
'gasolinePrice' : gasoline_list,
'dieselPrice' : diesel_list,
'address' : address_list,
'oilSelf' : oilSelf_list,
'carWash' : carWash_list,
'charging' : charging_list,
'maintenance' : maintenance_list,
'cvsStore' : cvsStore_list,
'hours24' : hours24_list,
})
data
import googlemaps
import numpy as np
gmaps_key = 'AIzaSyAyS0HZsy46gyFUhDlIwYQbHdXXCWHNpc8'
gmaps = googlemaps.Client(key = gmaps_key)
data['lat']= np.nan
data['lon']= np.nan
data
for idx, rows in data.iterrows():
station_address = rows['address']
tmp = gmaps.geocode(station_address, language='ko')
lat = tmp[0]['geometry']['location']['lat']
lon = tmp[0]['geometry']['location']['lng']
address = tmp[0]['formatted_address'].split()[2]
data.loc[idx, 'gu'] = address
data.loc[idx, 'lat'] = lat
data.loc[idx, 'lon'] = lon
data
데이터 저장
data.to_csv('Oil Price Analysis.csv', sep = ',', encoding = 'utf-8')