-- 02.음원차트 크롤링.ipynb --
import requests # 외부 서버에 요청/응답을 처리하는 모듈
from bs4 import BeautifulSoup
response = requests.get('https://music.bugs.co.kr/chart')
response # request하면 그에 맞는 해당 코드를 가져옴. (여기선 200번)
response.status_code # 200 : 서버에서 정상적인 처리를 한 결과
response.text
dom = BeautifulSoup(response.text, 'html.parser') # 로딩된 웹 객체. 강사님은 dom이라고 해놓음.
#dom
rank_lists = dom.select('table.list > tbody > tr')
len(rank_lists)
rank_list = rank_lists[0] # 첫번째
ranking = int(rank_list.select_one('td > div.ranking > strong').text.strip())
ranking
m_title = rank_list.select_one('th > p.title > a').text.strip()
m_title
m_artist = rank_list.select_one('td > .artist > a').text.strip()
m_artist
import requests
from bs4 import BeautifulSoup
import pandas as pd
response = requests.get('https://music.bugs.co.kr/chart')
dom = BeautifulSoup(response.text, 'html.parser')
rank_lists = dom.select('table.list > tbody > tr')
result = []
for rank in rank_lists :
ranking = int(rank.select_one('td > div.ranking > strong').text.strip())
m_title = rank.select_one('th > p.title > a').text.strip()
m_artist = rank.select_one('td > .artist > a').text.strip()
result.append({
'rank' : ranking,
'title' : m_title,
'artist' : m_artist,
})
df = pd.DataFrame(result)
df.to_excel('bugs.xlsx')