Web Crawling(웹 크롤링)_Selenium_해커랭크 SQL 목록 (python)

juyeon·2022년 8월 21일
0

크롤링

목록 보기
12/13
post-thumbnail

Selenium

절차

설치

from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

실행

driver = webdriver.Chrome()
driver.get("https://www.hackerrank.com/domains/sql")
time.sleep(2)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)

elements = driver.find_elements(By.CSS_SELECTOR, "#contest-challenges-problem div.challenge-name-details")

data = []
for element in elements:
    data.append({
		"title" : element.find_element(By.CSS_SELECTOR, "div > h4").text, 
		"difficulty" : element.find_element(By.CSS_SELECTOR, "div > h4 > div > span").text,
		"subdomains" : element.find_element(By.CSS_SELECTOR, "div > h4 > div > span:nth-child(2)").text,
	})
hackerrank_sql = pd.DataFrame(data)

driver.quit()

데이터프레임 수정

a = pd.DataFrame(hackerrank_sql['title'].str.split('\n').tolist(),columns=['title','b'])
hackerrank_sql.drop('title', axis = 1)
hackerrank_sql['title'] = a['title']
hackerrank_sql = hackerrank_sql[['title', 'subdomains', 'difficulty']]

출력

profile
내 인생의 주연

0개의 댓글