파이썬의 크롤링 라이브러리인 "BeautifulSoup"를 이용해서
빌보드 차트 핫100의 랭크/가수이름/곡명을 크롤링 해보자:-)

import requests
from bs4 import BeautifulSoup
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.sql import *
engine = create_engine('sqlite:///music.db')
Base = declarative_base()
class Music(Base):
    __tablename__ = 'musics'
    id = Column(Integer, primary_key=True)
    rank = Column(String(50))
    song = Column(String(50))
    singer = Column(String(50))
Music.__table__.create(bind=engine, checkfirst=True)
Session = sessionmaker(bind=engine)
session = Session()
req = requests.get('https://www.billboard.com/charts/hot-100')
html = req.text
soup = BeautifulSoup(html, 'html.parser')
rank = soup.select(
    'li > button > span.chart-element__rank.flex--column.flex--xy-center.flex--no-shrink > span.chart-element__rank__number'
)
song = soup.select(
    'li > button > span.chart-element__information > span.chart-element__information__song.text--truncate.color--primary'
)
singer = soup.select(
    'li > button > span.chart-element__information > span.chart-element__information__artist.text--truncate.color--secondary'
)
#image = soup.select(
#    'li > button > span.chart-element__image.flex--no-shrink'
#)
#print(image)
music_chart = []
for item in zip(rank, song, singer):
    music_chart.append(
        {
            'rank'    : item[0].text,
            'song'    : item[1].text,
            'singer'  : item[2].text,
        }
    )
for element in music_chart:
    result = Music(rank = element['rank'],
                   song = element['song'],
                   singer = element['singer'],
    )
    session.add(result)
    session.commit()
request = session.query(Music).all()
for row in request:
    print(row.rank,'|', row.song,'|' ,row.singer)
profile
#의식의흐름 #순간순간 #생각의스냅샷

0개의 댓글