-- 03.Daum 검색 페이지.ipynb --
import requests
from bs4 import BeautifulSoup
keyword = '손흥민'
page = 1
url = f'https://search.daum.net/search?w=fusion&col=blog&q={keyword}&DA=TWA&p={page}'
response = requests.get(url)
response
dom = BeautifulSoup(response.text, 'html.parser')
posts = dom.select('c-container > c-card')
len(posts)
post = posts[0]
post.select_one('c-menu-share').attrs['data-title'].strip() # 블로그 제목
post.select_one('c-footer-desc').contents[0].strip() # text가 안되면 해보자! 블로그 작성일
post.select_one('c-menu-share').attrs['data-link'].strip() # 블로그 URL
def crawl_daum_blog(keyword, page = 1) :
url = f'https://search.daum.net/search?w=fusion&col=blog&q={keyword}&DA=TWA&p={page}'
response = requests.get(url)
dom = BeautifulSoup(response.text, 'html.parser')
posts = dom.select('c-container > c-card')
result = [
{
"title" : post.select_one('c-menu-share').attrs['data-title'].strip(),
"date" : post.select_one('c-footer-desc').contents[0].strip(),
"url" : post.select_one('c-menu-share').attrs['data-link'].strip(),
}
for post in posts
]
return result
result = crawl_daum_blog('손흥민', 2)
result
result = []
for page in range(1,11) :
print(f'{page} page 크롤링...')
result += crawl_daum_blog('손흥민', page)
len(result)
result[-1]