νΈμν° ν¬λ‘€λ§ νλ λ°©λ²? μ½λ€. λ§λ€.
νμ§λ§ λμκ²λ μ¬μΈκ±°λ 보μ₯μ μμ.
μ¬λ¬κ°μ§ λ°©λ² μλ ν κ°μ₯ μ½κ³ μ€λ₯κ° μλλ λ°©λ²μΌλ‘
νΈμν° ν¬λ‘€λ§ κ°μ΄λλ₯Ό κ°μ§κ³ μμ΅λλ€.
*λ€μ΄λ²μ μ΄λ€ λΆ λΈλ‘κ·Έ μ°Έκ³ νμμ. κ°μ¬ν©λλ€!
νΈμν° κ°λ°μ κ³μ μ μ² ν api key λ°κΈ°
https://developer.twitter.com/en
import ν΄μ€ κ²
import tweepy
import snscrape.modules.twitter as sntwitter
import pandas as pd
# νΈμν° APIμ μ κ·ΌνκΈ° μν κ°μΈ ν€λ₯Ό μ
λ ₯
consumer_key = "blahblah"
consumer_secret = "blahblah"
access_token = "blahblah"
access_token_secret = "blahblah"
# OAuth νΈλ€λ¬ μμ± & κ°μΈμ 보 μΈμ¦ μμ²
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# μ‘μΈμ€ μμ²
auth.set_access_token(access_token, access_token_secret)
# api instace μμ±
api = tweepy.API(auth)
# νΈμ ν¬λ‘€λ§ ν λ΄μ 리μ€νΈ μμ±
tweets_list = []
# TwitterSearchSc,γ
‘ .raperλ₯Ό μ΄μ©νμ¬ ν΄λΉ νΈμμ κΈμ΄μμ 리μ€νΈμ λ£κΈ° ('κ²μμ΄ since:μμλ μ§ until:λλλλ μ§)
# λ¬΄λ£ λ²μ μ νΈμ μ 5000κ° μ΄νλ‘ μ ν
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('λ¨μ§ λ¨μ§ since:2020-01-01 until:2022-08-18').get_items()):
if i>5000:
break
tweets_list.append([tweet.date, tweet.content, tweet.likeCount])
# λ°μ΄ν° νλ μμΌλ‘ μ μ₯
tweets_df = pd.DataFrame(tweets_list, columns=['Datetime', 'Text', 'Like'])
tweets_df
μ΄ λ°©λ²μΌλ‘ λꡬλ μμ½κ² ν¬λ‘€λ§ ν μ μκΈ°λ₯Ό-!