House Price Prediction EDA (updated 2019.03.12)
#skewness and kurtosis
print("Skewness: %f" % df_train['price'].skew())
print("Kurtosis: %f" % df_train['price'].kurt())
for df in [df_train,df_test]:
df['date'] = df['date'].apply(lambda x: x[0:8])
df['yr_renovated'] = df['yr_renovated'].apply(lambda x: np.nan if x == 0 else x)
df['yr_renovated'] = df['yr_renovated'].fillna(df['yr_built'])
np.nan
으로 0값을 바꾸고, fillna()
를 이용한 trick이 인상적이다. #prepare fit model with cross-validation
folds = KFold(n_splits=5, shuffle=True, random_state=42)
oof = np.zeros(len(df_train))
predictions = np.zeros(len(df_test))
feature_importance_df = pd.DataFrame()
#run model
for fold_, (trn_idx, val_idx) in enumerate(folds.split(df_train)):
trn_data = lgb.Dataset(df_train.iloc[trn_idx][train_columns], label=y_reg.iloc[trn_idx])#, categorical_feature=categorical_feats)
val_data = lgb.Dataset(df_train.iloc[val_idx][train_columns], label=y_reg.iloc[val_idx])#, categorical_feature=categorical_feats