기사에서 검사-> Copy -> Copy selector
from bs4 import BeautifulSoup
from selenium import webdriver
driver = webdriver.Chrome('chromedriver')
url = "https://search.naver.com/search.naver?where=news&sm=tab_jum&query=추석"
driver.get(url)
req = driver.page_source
soup = BeautifulSoup(req, 'html.parser')
articles = soup.select('#main_pack > section.sc_new.sp_nnews._prs_nws > div > div.group_news > ul > li')
for article in articles:
title = article.select_one('div.news_wrap.api_ani_send > div > a').text
print(title)
driver.quit()
url = article.select_one('div.news_wrap.api_ani_send > div > a')['href']
결과
com = article.select_one('a.info.press').text.split(' ')[0].replace('언론사','')
결과
from bs4 import BeautifulSoup
from selenium import webdriver
driver = webdriver.Chrome('chromedriver')
url = "https://search.naver.com/search.naver?where=news&sm=tab_jum&query=추석"
driver.get(url)
req = driver.page_source
soup = BeautifulSoup(req, 'html.parser')
articles = soup.select('#main_pack > section.sc_new.sp_nnews._prs_nws > div > div.group_news > ul > li')
for article in articles:
title = article.select_one('div.news_wrap.api_ani_send > div > a').text
url = article.select_one('div.news_wrap.api_ani_send > div > a')['href']
com = article.select_one('a.info.press').text.split(' ')[0].replace('언론사','')
print(title,url,com)
driver.quit()
title, url, com이 전부 잘 스크래핑 되었다!
from openpyxl import Workbook # 엑셀 저장을 위한 라이브러리
from bs4 import BeautifulSoup
from selenium import webdriver
driver = webdriver.Chrome('chromedriver')
url = "https://search.naver.com/search.naver?where=news&sm=tab_jum&query=추석"
driver.get(url)
req = driver.page_source
soup = BeautifulSoup(req, 'html.parser')
articles = soup.select('#main_pack > section.sc_new.sp_nnews._prs_nws > div > div.group_news > ul > li')
# 엑셀 저장을 위한 코드
wb = Workbook()
ws1 = wb.active
ws1.title = "articles"
ws1.append(["제목", "링크", "신문사"])
for article in articles:
title = article.select_one('div.news_wrap.api_ani_send > div > a').text
url = article.select_one('div.news_wrap.api_ani_send > div > a')['href']
com = article.select_one('a.info.press').text.split(' ')[0].replace('언론사','')
ws1.append([title, url, com]) # for문 안에 엑셀 저장 코드 넣기
driver.quit()
wb.save(filename='articles.xlsx')
결과
엑셀 파일에 잘 들어간 것을 확인할 수 있다!
진짜 편리
연습삼아 썸네일도 추가해봤다.
재밌다 ╰(°▽°)╯
thum = article.select_one('div.news_wrap.api_ani_send > a > img')['src']