
New formula:
find and rfind in text
→ to find so thu tu in text Ex: label.rfind('조회수')
1. import library
from selenium import webdriver as wb
from bs4 import BeautifulSoup as bs
import requests as req
import os
import pandas as pd
from urllib.request import urlretrieve
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
2. Get link
yt_url = "https://www.youtube.com/@hoseobiiiiiii._.0410/videos"
3. Process link
driver = wb.Chrome()
driver.get(yt_url)
4. Get title, hyper link, view
title = driver.find_elements([By.ID](http://by.id/),'video-title')
link = driver.find_elements([By.ID](http://by.id/),'video-title-link')
view = driver.find_elements(By.CSS_SELECTOR,'div#separator+span')
5. Get list
last_height = driver.execute_script("return document.body.scrollWidth")
while(True):
body = driver.find_element(By.TAG_NAME,'body')
body.send_keys(Keys.END)
time.sleep(1)
current_height=driver.execute_script("return document.body.scrollWidth")
if current_height == last_height:
break
last_height = current_height
print(last_height, current_height)
yt_video_link = driver.find_elements([By.ID],'video-title-link')
for a_link in yt_video_link:
title = a_link.text
href = a_link.get_attribute('href')
label = a_link.get_attribute('aria-label')
start_index =label.rfind('조회수')+4
end_index = label.rfind('회')
view = label[start_index:end_index]
print(title)
print(label)
print(view)
6. Make dataframe
data = pd.DataFrame(data = zip(titles,links,views), columns =["Title","Link", "View"])
data
7. Export to excel
data.to_excel('name.xlsx', index=False)
🍏Collect comment data
url = "https://www.youtube.com/watch?v=bRrUbFPcygA"
driver = wb.Chrome()
driver.get(url)
cmt = driver.find_elements([By.ID],'content-text')
comment=[]
for i in range(len(cmt)):
comment.append(cmt[i].text.strip('\n'))
comment
comment = pd.DataFrame(comment)
comment.to_excel('Yt_comment.xlsx', index=False)
f = open('yt_review.txt','w',encoding ='utf-8')
for comment in cmt:
f.write(comment.text)
f.close()