ํ์ ํ๋ฉด์ TIL๊น์ง ์ฐ๋ ๊ฑฐ ๋์ฒด ์ด์ผํจ???
WIL๋ก ๋ฏธ๋ฃจ๊ฒ ์ต๋๋ค...^^;;
ํ๋ก์ ํธ ์ฝ๋ ์์ฑํ๊ฑฐ ๋ ๋ค ๋ณต๋ถํ๊ธฐ
https://teamsparta.notion.site/NBA-1bc2dc3ef5148120aeecfa26008ccba1
import pandas as pd
games = pd.read_csv('data/games.csv')
games_details = pd.read_csv('data/games_details.csv')
players = pd.read_csv('data/players.csv')
ranking = pd.read_csv('data/ranking.csv')
teams = pd.read_csv('data/teams.csv')
games.isna().sum()
games.shape
games[games['PTS_home'].isna()==True]
games_details.isna().sum()
ranking.isna().sum()
games.head()
games.columns
games = games[games['SEASON']>=2014]
games = games.drop(['HOME_TEAM_ID','VISITOR_TEAM_ID'], axis=1)
games.shape
games.isna().sum()
ranking.head()
ranking.shape
games_details.head()
games_details.shape
NBA ๋ถ์ ์์
https://western-sky.tistory.com/42
์์ฆ๋ณ ์์ 5ํ ์ถ์ถ (์น๋ฅ ์ด ๋๋ฅ ์ธ ๊ฒฝ์ฐ ๊ทธ๋ฅ ๋๋ค์ผ๋ก ํ๋๋ง)
games ํ
์ด๋ธ์์ ๊ฐ ํ์ ํ๊ท ์น ๊ณ์ฐ
์์ฆ๋ณ ํ์ 5ํ ์ถ์ถ
games ํ
์ด๋ธ์์ ๊ฐ ํ์ ํ๊ท ์น ๊ณ์ฐ
์ฐ๋ฆฌํ(Oklahoma city)์ ๊ธฐ๋ ๊ณ์ฐ
๋ถ์กฑํ ์ญ๋ ๋ณด์์ ์ํ ํ๋ณด ์์ ์ ๋ต
ranking = pd.read_csv('data/ranking.csv')
rank_post2014 = ranking[(ranking['SEASON_ID']>=22014)&(ranking['SEASON_ID']<22022)]
rank_post2014.groupby('SEASON_ID')['G'].max()
end_date = rank_post2014.groupby('SEASON_ID')['STANDINGSDATE'].max().to_list()
end_date
rank_last_date = rank_post2014[rank_post2014['STANDINGSDATE'].isin(end_date)]
rank_last_date['group rank'] = rank_last_date.groupby('SEASON_ID')['W_PCT'].rank(method='first', ascending=False)
rank_top5 = rank_last_date[rank_last_date['group rank']<=5].sort_values(['SEASON_ID','group rank'],ascending=[False,True])
rank_top5 = rank_top5[['TEAM_ID','SEASON_ID','CONFERENCE','TEAM','HOME_RECORD','ROAD_RECORD','group rank']]
rank_top5
games = pd.read_csv('data/games.csv')
games.head()
games = games.drop(['HOME_TEAM_ID','VISITOR_TEAM_ID','GAME_STATUS_TEXT'],axis=1)
games = games[games['SEASON']>=2014]
games.isna().sum()
games_2014 = games[games['SEASON']==2014]
games.shape
games_2014.head()
rank_top5_2014 = rank_top5[rank_top5['SEASON_ID']==22014]
rank_top5_2014
merged_games2014_home = pd.merge(games_2014,rank_top5_2014,how='inner',left_on='TEAM_ID_home', right_on='TEAM_ID')
merged_games2014_home.describe()
merged_games2014_away = pd.merge(games_2014,rank_top5_2014,how='inner',left_on='TEAM_ID_away', right_on='TEAM_ID')
merged_games2014_away.describe()
merged_games2014_home.groupby("TEAM")['FG3_PCT_home'].mean()
merged_games2014_away.groupby("TEAM")['FG3_PCT_away'].mean()
games_details
games_details[(games_details['FGM'].isna()==True)&(games_details['TEAM_CITY'].isin(rank_top5_2014['TEAM']))].head(2000)
games[games['GAME_ID']==41900222]
games_details.groupby('GAME_ID')['GAME_ID'].count()
games_details ๊ฒฐ์ธก์น -> ๊ฐ ๊ฒ์ ๋น ์ฝ์น ์ ๋ณด
games_details MIN๊ฐ(๋ด ์๊ฐ)์ด ๋ง์ด๋์ค์ธ ์ ์๋ค ์กด์ฌ (์ฝ 20row??) -> ์ด์์น??(์ด์์น ใ ใ )
๊ฒฝ๊ธฐ ๋ด ์๊ฐ์ด 0์ด์์ธ ์ ์๋ค๋ง ์ถ์ถํด์ผํ ๋ฏ
์ ์ฒ๋ฆฌ ๋นจ๋ฆฌ ํ๊ณ ํ ์ด๋ธ ํฉ์ณ์ ๊ณ์ฐํด๋ณด๊ณ 2์ฐจ ์คํฏ(?)๋ ๊ณ์ฐํด๋ณด๊ณ ,,,
games_details = pd.read_csv('data/games_details.csv')
#games_details['MIN'] = (pd.to_timedelta("00:" + games_details['MIN'].astype(str), errors='coerce').dt.total_seconds() / 60).round(2)
games_details['MIN'].isna().sum()
games_details.head()
games_details.shape
games_details = games_details.drop(['TEAM_ABBREVIATION','NICKNAME','COMMENT','FG3_PCT','FT_PCT'],axis=1)
game_ids = games['GAME_ID'].unique().tolist()
len(game_ids)
games_details = games_details[games_details['GAME_ID'].isin(game_ids)]
print(games_details.shape)
display(games_details.head())
games_details.isna().sum()
games_details = games_details.dropna(subset=['FGM'])
games_details.shape
games_details.isna().sum()
games_details['MIN'][422]
games_details['MIN'] = (pd.to_timedelta("00:" + games_details['MIN'].astype(str), errors='coerce').dt.total_seconds() / 60).round(2)
games_details['MIN']
min_list = []
for i in games_details['MIN']:
# ':'์ด ์๋ ๊ฒฝ์ฐ -> split
if ':' in i:
x = i.split(':')
m = int(float(x[0]))
s = int(float(x[1]))
else:
# ':'์ด ์๋ ๊ฒฝ์ฐ -> ์ด๋ 0
m = int(float(i))
s = 0
min_list.append([m,s])
games_details['MIN'] = min_list
games_details
games_details['MIN']
#games_details['MIN'] = 1pd.DataFrame(total_sec)
games_details['MIN'].isna().sum()
games_details[games_details['MIN'].isna()].index
games_details['MIN'][422]
games_details.groupby('TEAM_ID')['GAME_ID'].nunique()
games_details.groupby(['TEAM_ID',''])['GAME_ID'].nunique()
์ผ๋จ ํ์๋ค๋ผ๋ฆฌ ์ ์ฒ๋ฆฌํ๋ ๋ฐฉ์์ ํต์ผํ๊ณ ,
๋์ผํ ์ฝ๋๋ฅผ ๊ณต์ ํ๊ธฐ
team_details = games_details.groupby(['GAME_ID','TEAM_ID','TEAM_CITY'])[['FGM','FGA','FG3M','FG3A','FTM','FTA','OREB','DREB','REB', 'AST', 'STL', 'BLK', 'TO','PF','PTS']].sum().reset_index()
team_details