import requests
from bs4 import BeautifulSoup
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=pnt&date=20200303',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
trs = soup.select('#old_content > table > tbody > tr')
for tr in trs:
a_tag = tr.select_one('td.title > div > a')
if a_tag is not None:
rank = tr.select_one('td:nth-child(1) > img')['alt']
title = a_tag.text
star = tr.select_one('td.point').text
print(rank, title, star)
지난 시간에 배운 네이버 영화 랭킹 크롤링 결과.
insert를 사용해 이 결과를 DB에 저장해보자.
from pymongo import MongoClient client = MongoClient('localhost', 27017) db = client.dbsparta
상단에 pymongo 시작코드 적기
doc = { 'rank':rank, 'title':title, 'star':star }
print(rank, title, star) 대신 doc(dictionary) 만들기
db.movies.insert_one(doc)
위에서 만든 doc을 insert_one을 사용해 movies(collection)에 넣어줌
import requests
from bs4 import BeautifulSoup
from pymongo import MongoClient
client = MongoClient('localhost', 27017)
db = client.dbsparta
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=pnt&date=20200303',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
trs = soup.select('#old_content > table > tbody > tr')
for tr in trs:
a_tag = tr.select_one('td.title > div > a')
if a_tag is not None:
rank = tr.select_one('td:nth-child(1) > img')['alt']
title = a_tag.text
star = tr.select_one('td.point').text
doc = {
'rank':rank,
'title':title,
'star':star
}
db.movies.insert_one(doc)
정리하면 이런 코드가 나온다.
👇🏻Refresh를 누르면 Movies 안에 크롤링 값이 insert 된 것을 확인할 수 있다.