API.user_timeline(user_id, screen_name, since_id, count, max_id, trim_user, exclude_replies, include_rts)
def get_tweet(api, user):
status = api.user_timeline(screen_name = user, count=1)[0]
return status.text
screen_name을 이용해서 타임라인 데이터 불러오기
def get_tweet(api, user):
status = api.user_timeline(screen_name = user, count=1)[0]
return status.created_at
def get_tweet(api, user):
status = tweepy.Cursor(api.user_timeline, id = user).items(400)
return status
수집 옵션에 api.user_timeline을 입력해주고 400개 만큼 수집합니다.
# import pandas
import pandas as pd
# twitter crawling
tweet_list = []
api = connect_api()
for status in get_tweet(api, 'SamsungNewsroom'):
temp_list = [status.text, status.created_at,status.retweet_count, status.favorite_count]
tweet_list.append(temp_list)
# list -> dataframe
df = pd.DataFrame(tweet_list, columns=['Tweets', 'Created_Date', '#_of_Retweets', '#_of_Likes'])
df = df.set_index('Tweets')
# datafrmae
print(df.head())
# tweepy 패키지를 불러옵니다.
import tweepy
# twitter api 생성
def connect_api():
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
# 핸들러를 생성하고 개인정보 인증요청을 합니다.
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# 엑세스 요청
auth.set_access_token(access_token, access_token_secret)
# twitter API 생성
api = tweepy.API(auth)
return api
# api로 불러오기
def get_tweet(api, user):
status = tweepy.Cursor(api.user_timeline, id = user).items(400)
return status
# 불러온 데이터 dataframe으로 조직화하기
import pandas as pd
# twitter crawling
tweet_list = []
api = connect_api()
for status in get_tweet(api, 'SamsungNewsroom'):
temp_list = [status.text, status.created_at,status.retweet_count, status.favorite_count]
tweet_list.append(temp_list)
# list -> dataframe
df = pd.DataFrame(tweet_list, columns=['Tweets', 'Created_Date', '#_of_Retweets', '#_of_Likes'])
df = df.set_index('Tweets')
# datafrmae
print(df.head())