처음 해본곳은 다음뉴스
네이버뉴스를 해보고 싶었으나 권한문제로 할 수 없었다.
내가 가져오고 싶은 정보?
import requests
from bs4 import BeautifulSoup
webpage = requests.get("https://news.daum.net/ranking/popular")
soup = BeautifulSoup(webpage.text, "html.parser")
for x in range(0, 20, 2):
title = soup.select(".link_txt")[x].get_text()
print(title)
print("===========================")
webpage2 = requests.get("https://news.v.daum.net/v/20210608151412944")
soup2 = BeautifulSoup(webpage2.content, "html.parser")
content2 = soup2.select('p')
for el in soup2.find_all('p', attrs={'dmcf-ptype': 'general'}):
txt = el.get_text()
print(txt)
다음으로 해본 곳은 해외뉴스 cnet
import requests
from bs4 import BeautifulSoup
webpage = requests.get("https://www.cnet.com/news/")
soup = BeautifulSoup(webpage.content, "html.parser")
title = soup.find(class_ = "fdListingContainer")
templist=[]
url = title.find_all('a',attrs={'class':'assetHed'})
for a in url:
href = a.attrs['href']
templist.append(href)
for x in range(0,10):
titles = title.select(".assetHed")[x].get_text()
if x%2 == 0 :
print("title : " +titles.lstrip())
else:
print("sub_title : "+titles.lstrip())
webpage2 = requests.get("https://www.cnet.com" + templist[x])
soup2 = BeautifulSoup(webpage2.content, "html.parser")
soup3 = soup2.select("#article-body > div.col-7.article-main-body.row > p")
print("content")
for j in soup3:
if j.text.startswith("Read more:"):
continue
else:
print(j.text)
print("="*20)