from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome('../driver/chromedriver.exe')
driver.get('https://www.naver.com')
keyword = driver.find_element(By.CSS_SELECTOR, '#query')
keyword.send_keys('파이썬')
⇊
keyword = driver.find_element(By.CSS_SELECTOR, '#query')
keyword.clear()
keyword.send_keys('딥러닝')
search_btn = driver.find_element(By.CSS_SELECTOR, '#search_btn')
search_btn.click()
'//' : 최상위,
'*': 자손 태그 (자식 태그의 하위 태그들)
'/': 자식 태그 (바로 밑에 있는 태그)
'div[1]': div 중에서 1번째 태그
driver.find_element(By.XPATH, '//*[@id="query"]').send_keys('xpath')
driver.find_element(By.XPATH, '//*[@id="search_btn"]/span[2]').click()
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome('../driver/chromedriver.exe')
driver.get('https://pinkwink.kr')
#1.돋보기 버튼을 선택
from selenium.webdriver import ActionChains
search_tag = driver.find_element(By.CSS_SELECTOR, '.search')
action = ActionChains(driver)
action.click(search_tag)
action.perform()
#2. 검색어 입력
driver.find_element(By.CSS_SELECTOR, '#header > div.search > input[type=text]').send_keys('딥러닝')
#3.검색 버튼 클릭
driver.find_element(By.CSS_SELECTOR, '#header > div.search > button').click()
driver.page_source
from bs4 import BeautifulSoup
req = driver.page_source
soup = BeautifulSoup(req, 'html.parser')
soup.select('.post-item')
contents = soup.select('.post-item')
len(contents)
c
8
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import time
url = 'https://www.opinet.co.kr/searRgSelect.do'
driver = webdriver.Chrome('../driver/chromedriver.exe')
driver.get(url)
def main_page():
#셀레니움은 느리기에 시간적 여유를 줌
time.sleep(3)
#팝업창으로 전환
driver.switch_to_window(driver.window_handles[-1])
#팝업창 닫기
driver.close()
time.sleep(3)
#메인화면으로 창 전환
driver.switch_to_window(driver.window_handles[-1])
#접근 url 다시 요청
driver.get(url)
main_page()
#시/도 리스트
sido_list_raw = driver.find_element_by_id('SIDO_NM0')
sido_list_raw.text
⇊
sido_list = sido_list_raw.find_elements_by_tag_name('option')
len(sido_list), sido_list[17].text
⇊
(18, '제주')
sido_list[1].get_attribute("value")
⇊
'서울특별시'
sido_names = []
for option in sido_list:
sido_names.append(option.get_attribute('value'))
sido_names
⇊
['',
'서울특별시',
'부산광역시',
'대구광역시',
'인천광역시',
'광주광역시',
'대전광역시',
'울산광역시',
'세종특별자치시',
'경기도',
'강원도',
'충청북도',
'충청남도',
'전라북도',
'전라남도',
'경상북도',
'경상남도',
'제주특별자치도']
sido_names = [option.get_attribute('value') for option in sido_list]
sido_names[:5]
⇊
['', '서울특별시', '부산광역시', '대구광역시', '인천광역시']
sido_names = sido_names[1:]
sido_names
⇊
['서울특별시',
'부산광역시',
'대구광역시',
'인천광역시',
'광주광역시',
'대전광역시',
'울산광역시',
'세종특별자치시',
'경기도',
'강원도',
'충청북도',
'충청남도',
'전라북도',
'전라남도',
'경상북도',
'경상남도',
'제주특별자치도']
sido_list_raw.send_keys(sido_names[16])
⇊
제주도로 변경
driver.find_element_by_css_selector('#glopopd_excel').click()
엑셀로 저장
import time
from tqdm import tqdm_notebook
for gu in tqdm_notebook(gu_names):
element = driver.find_element_by_id('SIGUNGU_NM0')
element.send_keys(gu)
time.sleep(3)
element_get_excel = driver.find_element_by_id('glopopd_excel').click()
time.sleep(2)
import pandas as pd
from glob import glob
glob('../data/지역_*.xls')
⇊
['../data\\지역_위치별(주유소) (1).xls',
'../data\\지역_위치별(주유소) (10).xls',
'../data\\지역_위치별(주유소) (11).xls',
'../data\\지역_위치별(주유소) (12).xls',
'../data\\지역_위치별(주유소) (13).xls',
'../data\\지역_위치별(주유소) (14).xls',
'../data\\지역_위치별(주유소) (15).xls',
'../data\\지역_위치별(주유소) (16).xls',
'../data\\지역_위치별(주유소) (17).xls',
'../data\\지역_위치별(주유소) (18).xls',
'../data\\지역_위치별(주유소) (19).xls',
'../data\\지역_위치별(주유소) (2).xls',
'../data\\지역_위치별(주유소) (20).xls',
'../data\\지역_위치별(주유소) (21).xls',
'../data\\지역_위치별(주유소) (22).xls',
'../data\\지역_위치별(주유소) (23).xls',
'../data\\지역_위치별(주유소) (24).xls',
'../data\\지역_위치별(주유소) (3).xls',
'../data\\지역_위치별(주유소) (4).xls',
'../data\\지역_위치별(주유소) (5).xls',
'../data\\지역_위치별(주유소) (6).xls',
'../data\\지역_위치별(주유소) (7).xls',
'../data\\지역_위치별(주유소) (8).xls',
'../data\\지역_위치별(주유소) (9).xls',
'../data\\지역_위치별(주유소).xls']
'지역_'로 시작하고 끝이 '.xls'로 끝나는 모든 파일을 가져옴
glob: 파일의 목록을 읽어오고 정리
stations_files = glob('../data/지역_*.xls')
stations_files
tmp_raw = []
for file_name in stations_files:
tmp = pd.read_excel(file_name, header =2)
tmp_raw.append(tmp)
station_raw = pd.concat(tmp_raw)
-concat: 구조가 동일한 pandas 데이터 프레임이 여러개 있을때, 하나로 연달아 붙여줌
stations = pd.DataFrame(
{
'상호':station_raw['상호'],
'주소':station_raw['주소'],
'가격':station_raw['휘발유'],
'셀프':station_raw['셀프여부'],
'상표':station_raw['상표'],
}
)
stations.head()
⇊
stations['구'] = [eachAddress.split()[1] for eachAddress in stations['주소']]
stations.head()
⇊
stations['가격'] = stations['가격'].astype('float')
⇊
stations.reset_index(inplace=True)
stations.head()
⇊
del stations['index']
stations.head()
⇊
import matplotlib.pyplot as plt
import seaborn as sns
import platform
from matplotlib import font_manager, rc
get_ipython().run_line_magic('matplotlib', 'inline')
path = 'c:/Windows/Fonts/malgun.ttf'
if platform.system() == 'Darwin':
rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
font_name = font_manager.FontProperties(fname=path).get_name()
rc('font', family=font_name)
else:
print('Unknown system')
stations.boxplot(column='가격', by='셀프', figsize=(12, 8));
⇊
plt.figure(figsize=(12, 8))
sns.boxplot(x='셀프', y='가격', data=stations, palette='Set3')
plt.grid()
plt.show()
⇊
plt.figure(figsize=(12, 8))
sns.boxplot(x='상표', y='가격', hue='셀프', data=stations, palette='Set3')
plt.grid()
plt.show()
⇊
import json
import folium
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) #아래에 뜨는 경고문구 무시
stations.sort_values(by='가격', ascending=False).head(10)
⇊
stations.sort_values(by='가격', ascending=True).head(10)
⇊
import numpy as np
gu_data = pd.pivot_table(stations, index=['구'], values=['가격'], aggfunc=np.mean)
gu_data.head()
⇊
geo_path = '../data/02. skorea_municipalities_geo_simple.json'
geo_str = json.load(open(geo_path, encoding='utf-8'))
my_map = folium.Map(location=[37.5502, 126.982], zoom_start=10.5, tiles='Stamen Toner')
my_map.choropleth(
geo_data=geo_str,
data=gu_data,
columns=[gu_data.index,'가격'],
fill_color='PuRd',
key_on='feature.id'
)
my_map
⇊