
1. IMPORT LIBRARY
from selenium import webdriver as wb
from bs4 import BeautifulSoup as bs
import requests as req
import os
import pandas as pd
from urllib.request import urlretrieve
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
2. GET WEB URL
url = ββ;
head_option = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36'}
res= req.get(movie_search_url, headers = head_option)
html = bs(res.text,'html')
driver = wb.Chrome();
driver.get(url);
3. SEARCH ON WEB TO COLLECT DATA
html.select('div.ellipsis.rank01')
html.select('div.ellipsis.rank02 > span.checkEllipsis')
html.select_one('div.se-main-container')
driver.find_elements(By.CSS_SELECTOR, value ='h4.h.fz_03')
4. GET LIST DATA
for idx in range(0,100):
title = songs[idx].text.strip('\n')
singer = singers[idx].text
print("{:03} {} / {}".format(idx+1, title, singer))
titles = []
links = []
views= []
for i in range(len(title)):
titles.append(title[i].text)
views.append(view[i].text)
links.append(link[i].get_attribute('href'))
5. MAKE DATA FRAME
data = pd.DataFrame(data = zip(titles,links,views), columns =["Title","Link", "View"])
6. EXPORT TO FILE(TEXT/EXCEL)
now = datetime.now()
filename = now.strftime('Melon_Top100_at_%Y%m%d_%Hh%Mm.xlsx')
data.to_excel(filename, **index=False**)
f = open('blog_review.txt','w',encoding ='utf-8')
for COMMENT in cmt:
f.write(COMMENT.replace(' \u57b5','').replace('\U0001f388',''))
f.close()