import numpy as np import pandas as pd import matplotlib import missingno as msno df = pd.read_csv('bike_sharing_daily.csv') from matplotlib import pyplot as plt
# 결측치 분포 시각화 msno.matrix(df) plt.show() msno.bar(df) plt.show()
df.apply(lambda x : x.eq('').any())
df_drop_all = df.dropna(how='all')
df_drop_any = df.dropna(how='any')
df_drop_thresh = df.dropna(thresh=10)
df_drop_subset = df.dropna(subset=['atemp'])
df_fill_with_column = df.fillna({'temp' : df['temp'].max()}) # method 사용 df.fillna(method='ffill') df.fillna(method='bfill') df.fillna(method='bfill',limit=1) # 컬럼 선택 df['temp'].fillna(method='ffill') # 보간법 df.interpolate(method='values')
df['dteday'] = pd.to_datetime(df['dteday']) df = df.set_index('dteday')
df_fill_interpolate_time = df.interpolate(method='time')