날짜처리
pd.Timestamp(0)
pd.Timestamp(10, unit='D')
pd.Timestamp(10, unit='M')
pd.to_datetime('2020-05-01')
s=pd.Series([10, 100, 1000, 10000])
pd.to_datetime(s, unit='D')
s=pd.Series(['2015-12-05', '2020-11-08', '2018-13-01'])
pd.to_datetime(s)
pd.to_datetime(s, errors='coerce')
pd.to_datetime(s, errors='ignore')
pd.to_datetime('2020년 5월 1일', format='%Y년 %m월 %d일')
d='시작일자 : 11월 8일, 2020 시작시간: 09:15 am'
f='시작일자 : %m월 %d일, %Y 시작시간: %I:%M %p'
pd.to_datetime(d, format=f)
birth=pd.to_datetime(df['Birth'], format='%Y-%m-%d %H:%M')
birth.dt.date
birth.dt.year
birth.dt.month
birth.dt.day
birth.dt.time
birth.dt.quarter
birth.dt.day_name()
birth.dt.day_name('Korean')
birth.dt.weekday
birth.dt.weekday >5
birth.dt.day_name('Korean').isin(['토요일','일요일'])
birth.dt.isocalendar().week
birth.dt.dayofyear
birth.dt.days_in_month
birth.dt.is_leap_year
birth.dt.is_month_end
birth.dt.is_quater_start
birth.dt.is_quater_end
birth + pd.Timedelta(days=100)
birth - pd.Timedalta(weeks=7)
(pd.to_datetime('2020-05-01') - birth).astype('timedelta64[D]')
(pd.to_datetime('2020-05-01') - birth).astype('timedelta64[M]')
(pd.to_datetime('2020-05-01') - birth).total_seconds() / 3600.0
pd.date_range('2020-05-01', periods=7, freq='D')
pd.date_range('2020-05-01', periods=7, freq='W')
first_time = train.TIME.sort_values().iloc[1]
last_time = train.TIME.sort_values().iloc[-1]
pd.date_range(first_time, period=5, freq='D').strftime('%Y%m%d').tolist()
time-series handling
data.resample('1S').ffill()
data.loc[data['col1'].diff() != 0, 'col1']
new_idx = pd.date_range(start_date, end_date, freq='H')
df = df.set_index('TIME_STAMP')
df = df.reindex(new_idx)
df = df.interpolate()
df.resample('1H', origin='start').mean()
df.resample('1H', origin='end', closed='right', label='right').mean()
idx = (df['TIME'] - pd.Timestamp('2020-10-20 00:00:00')).abs().idxmin()
idx = df['TIME'].sub(pd.Timestamp('2020-10-20 00:00:00')).abs().idxmin()
def longest_period_above_mean(data):
mean_val = data.mean()
above_mean = data > mean_val
consecutive_lengths = above_mean.groupby((above_mean!=above_mean.shift()).cumsum()).apply(lambda x: x[x].size)
return longest_period
def longest_period_decreasing(data):
diff = data.diff()
is_decreasing = diff < 0
consecutive_decrease = is_decreasing.astype('int').groupby((~is_decreasing).cumsum()).cumsum()
max_decrease_period = consecutive_decrease.max()
return max_decrease_period