import time
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook
import chromedriver_autoinstaller
from bs4 import BeautifulSoup
from selenium import webdriver
search = "데이터 분석 파이썬"
path = chromedriver_autoinstaller.install( )
driver = webdriver.Chrome(path)
driver.get("http://www.saramin.co.kr")
time.sleep(2)
driver.find_element_by_css_selector(".btn_search").click( )
time.sleep(2)
element = driver.find_element_by_id("ipt_keyword_recruit")
element.send_keys(search)
driver.find_element_by_id("btn_search_recruit").click( )
time.sleep(2)
try:
driver.find_element_by_css_selector(".view_more.track_event").click( )
time.sleep(2)
driver.find_element_by_xpath("//*[@id='recruit_info_list']/div[2]/div/a[1]").click( )
except:
pass
repeat = 8
title_list = []
condition_list = []
url_list = []
for i in tqdm_notebook(range(2, repeat+2)):
try:
articles = "div.area_job > h2 > a"
article_raw = driver.find_elements_by_css_selector(articles)
for article in article_raw:
title = article.get_attribute('title')
title_list.append(title)
url = article.get_attribute('href')
url_list.append(url)
time.sleep(1)
infos = "div.area_job > div.job_condition"
infos_raw = driver.find_elements_by_css_selector(infos)
for info in infos_raw:
condition = info.text
condition = condition.replace("\n","/")
condition_list.append(condition)
time.sleep(1)
print(i-1, title)
if i % 10 == 1:
driver.find_element_by_link_text("다음").click( )
else:
driver.find_element_by_link_text(str(i)).click( )
time.sleep(1)
except:
break
print(len(title_list), len(url_list), len(condition_list))
df = pd.DataFrame({'title':title_list, 'condition':condition_list, 'url':url_list})
df.to_excel("recuriut({}).xlsx".format(search), encoding='utf-8-sig')