gmarket_url = "http://corners.gmarket.co.kr/Bestsellers"
driver = wb.Chrome()
driver.get(gmarket_url)
tabs = driver.find_elements(By.CSS_SELECTOR, value="#categoryTabG > :not(li:first-child)")
for i in range(len(tabs)):
# 전에 있던 HTML과 현재 HTML이 다르기 때문에(동적 페이지니까) 계속 새로 loading을 해줘야 한다.
tabs = driver.find_elements(By.CSS_SELECTOR, value="#categoryTabG > :not(li:first-child)")
time.sleep(1)
tabs[i].click()
temlist = []
pricelist = []
itemCatelist = []
# 뒤의 for문 안에 넣기
itemNms = driver.find_elements(By.CLASS_NAME, value="itemname")
itemPrices = driver.find_elements(By.CSS_SELECTOR, value="div.s-price > strong > span > span")
for j in range(len(itemNms)):
itemlist.append(itemNms[j].text)
pricelist.append(itemPrices[j].text)
itemCatelist.append(category)
item_dic = {
"카테고리":itemCatelist,
"상품명":itemlist,
"가격":pricelist
}
item_df = pd.DataFrame(item_dic)
item_df
from selenium import webdriver as wb
from selenium.webdriver.common.by import By
# 키보드의 값을 가지고 있는 라이브러리
from selenium.webdriver.common.keys import Keys
from tqdm.notebook import tqdm
import pandas as pd
import time
yt_url = "https://www.youtube.com/c/gimongcho/videos"
driver = wb.Chrome()
driver.get(yt_url)
videos = driver.find_elements(By.ID, value="video-title")
title = videos[0].text
url = videos[0].get_attribute("href")
view = videos[0].get_attribute("aria-label")
start = view.find("조")+4
end = view.rfind("회")
title, url, int(view[start:end].replace(",",""))
document.body.scrollHeight
: 페이지의 끝
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# 끝까지 스크롤 다운
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1)
# 스크롤 다운 후 스크롤 높이 다시 가져옴
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
ActionChains 의 move_to_element
: 특정 element를 알고 있을 때, 그 위치까지 scroll 하는 것.
# 우선 imoprt가 필수!
from selenium.webdriver import ActionChains
some_tag = driver.find_element_by_id('요소 명')
action = ActionChains(driver)
action.move_to_element(some_tag).perform()
videos = driver.find_elements(By.ID, value="video-title")
titleList = []
urlList = []
viewList = []
for video in tqdm(videos):
try:
title = video.text
url = video.get_attribute("href")
view = video.get_attribute("aria-label")
start = view.find("조회수")+3
end = view.rfind("회")
titleList.append(title)
urlList.append(url)
viewList.append(view[start:end])
except:
print("수집실패")
video_dic = {
"제목":titleList,
"주소":urlList,
"조회수":viewList
}
video_df = pd.DataFrame(video_dic)
video_df