import sys
import os
import time
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook
import chromedriver_autoinstaller
from bs4 import BeautifulSoup
from selenium import webdriver
path = chromedriver_autoinstaller.install()
driver = webdriver.Chrome(path)
driver.get("http://www.kyobobook.co.kr/")
time.sleep(2)
driver.find_element_by_link_text("베스트").click()
time.sleep(1)
title_list = []
info_list = []
price_list = []
for i in tqdm_notebook(range(2, 11)):
try:
articles = "div.detail > div.title > a"
article_raw = driver.find_elements_by_css_selector(articles)
for article in article_raw:
title = article.text
title_list.append(title)
authors = "div.detail > div.author"
authors_raw = driver.find_elements_by_css_selector(authors)
for author in authors_raw:
auth = author.text
info_list.append(auth)
price_lo = "div.detail > div.price > strong"
price_raw = driver.find_elements_by_css_selector(price_lo)
for price in price_raw:
pri = price.text
price_list.append(pri)
print(i-1, title)
time.sleep(1)
driver.find_element_by_link_text(str(i)).click( )
except:
break
print(len(title_list), len(info_list), len(price_list))
df = pd.DataFrame({'title':title_list, 'info':info_list, 'price':price_list})
df.to_excel("best_books.xlsx", encoding='utf-8-sig')