오늘은 네이버 기사 검색을 했다.
import requests
from bs4 import BeautifulSoup
import urllib
import re
keyword = input('검색하실 키워드를 입력하세요(ex:커피,코로나...) : ')
keyword1 = urllib.parse.quote(keyword)
for temp in range(1,101,10):
url = 'https://search.naver.com/search.naver?where=news&sm=tab_pge&query={0}&start={1}'.format(keyword,temp)
page = requests.get(url)
html = page.text
soup = BeautifulSoup(html,'lxml')
data1 = soup.find('div',{'id':'main_pack'})
data2 = data1.find_all('div',{'class':'news_area'})
import re
result_title = []
for temp in data2:
titletemp = temp.find('a',{'class':'news_tit'})
result_title.append( re.sub('[^A-Za-z0-9가-힣]',' ',titletemp.attrs['title']) )
data3 = data1.find_all('a',{'class':'api_txt_lines dsc_txt_wrap'})
result_desc = []
for temp in data3:
result_desc.append(re.sub('[^A-Za-z0-9가-힣]',' ',temp.text))
result = []
for temp in range(0,10):
temp1 = []
temp1.append(result_title[temp])
temp1.append(result_desc[temp])
result.append(temp1)
f = open('{0}뉴스기사검색.csv'.format(keyword) , 'w')
f.write('제목' +',' + '내용' + '\n')
for temp in result:
f.write(temp[0] + ',' + temp[1] + '\n')
f.close()