1. 라이브러리 임포트
import time
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import chromedriver_autoinstaller
from selenium import webdriver
path = chromedriver_autoinstaller.install( )
driver = webdriver.Chrome(path)
driver.get("https://www.hrd.go.kr/")
time.sleep(2)
driver.find_element_by_link_text("#K-Digital Training").click( )
name_list = []
local_list = []
title_list = []
period_list = []
2. 크롤링 for 문
try:
for j in range(2,11):
for i in range(1,11):
name_select = f"#contentArea > div.detailListWrap > ul > li:nth-child({i}) >div.title > a >p.zone"
name = driver.find_element_by_css_selector(name_select)
name = name.text
name_list.append(name)
local_select = f"#contentArea > div.detailListWrap > ul > li:nth-child({i}) > div.title > a > p.school"
local = driver.find_element_by_css_selector(local_select)
local = local.text
local_list.append(local)
title_select = f"#contentArea > div.detailListWrap > ul > li:nth-child({i}) > div.content > p > a"
title = driver.find_element_by_css_selector(title_select)
title = title.get_attribute('title')
title = title.replace(" 새창","")
title_list.append(title)
period_select = f'#contentArea > div.detailListWrap > ul > li:nth-child({i}) > div.content > div > dl:nth-child(2) > dd'
period = driver.find_element_by_css_selector(period_select)
period = period.text
period_list.append(period)
driver.find_element_by_link_text(str(j)).click()
time.sleep(2)
except:
pass
3. CSV 파일로 변환
print(len(name_list), len(local_list), len(title_list), len(period_list))
90 90 90 90
df = pd.DataFrame({'name':name_list, 'local':local_list, 'title':title_list, 'period':period_list})
df.to_csv("K-digital.csv", encoding='utf-8-sig', index=False)