파이썬 데이터 핸들링 기본(2)
1. Time_Series : 시계열 데이터
df.Yr_Mo_Dy = pd.to_datetime(df.Yr_Mo_Dy)
df.Yr_Mo_Dy.dt.year.unique()
def fix_century(x):
import datetime
year = x.year - 100 if x.year >= 2061 else x.year
return pd.to_datetime(datetime.date(year, x.month, x.day))
df.fillna(method='ffill').fillna(method='bfill')
df.groupby(df.Yr_Mo_Dy.dt.to_period('M')).mean()
df[['RPT','VAL']].rolling(7).mean()
def change_date(x):
import datetime
hour = x.split(':')[1]
date = x.split(":")[0]
if hour =='24':
hour ='00:00:00'
FinalDate = pd.to_datetime(date +" "+hour) + datetime.timedelta(days=1)
else:
hour = hour +':00:00'
FinalDate = pd.to_datetime(date +" "+hour)
return FinalDate
Ans1.pivot(index='dayName',columns='PM10등급',values='size').fillna(0)
df['(년-월-일:시)'].diff().unique()
df.set_index('(년-월-일:시)',inplace=True)
2. Pivot
df.drop('Indicator',axis=1,inplace=True)
target.pivot(index='Location',columns='Period',values='First Tooltip')
df.pivot_table(index='Dim1',columns='Period',values='First Tooltip',aggfunc='mean')
3. Merge, Concat
Concat은 SQL의 UNION, Merge는 SQL의 JOIN과 유사
pd.concat([df1,df2], axis=0)
pd.concat([df3,df4],join='inner')
pd.concat([df3,df4],join='outer').fillna(0)
df.T.iloc[:7,:3]
pd.merge(df5,df6,on='Algeria',how='inner')
4. Statistics
del df['Unnamed: 0']
names[names.Count == names.Count.max()]
names[names.Count == names.Count.median()]
names.Count.std()
names.describe()