
5 Modeling
Setting
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# Optuna Installing
!pip install optuna
# Optuna
import optuna
from optuna import Trial
from optuna.samplers import TPESampler
5.1.1 Optuna Hyper Parameter Searching
# Single Data Set
# random sampler
sampler = TPESampler(seed=10)
# define function
def objective(trial):
    lgbm_param = {
        'objective': 'regression',
        'verbose': -1,
        'metric': 'mse', 
        'num_leaves': trial.suggest_int('num_leaves', 2, 1024, step=1, log=True), 
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.7, 1.0),
        'reg_alpha': trial.suggest_uniform('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_uniform('reg_lambda', 0.0, 10.0),
        'max_depth': trial.suggest_int('max_depth',3, 15),
        'learning_rate': trial.suggest_loguniform("learning_rate", 1e-8, 1e-2),
        'n_estimators': trial.suggest_int('n_estimators', 100, 3000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_loguniform('subsample', 0.4, 1),
    }
    # Generate model
    model_lgbm = LGBMRegressor(**lgbm_param)
    model_lgbm = model_lgbm.fit(X_train, y_train, eval_set=[(X_val, y_val)], 
                           verbose=0, early_stopping_rounds=25)
    MSE = mean_squared_error(y_val, model_lgbm.predict(X_val))
    return MSE
optuna_lgbm = optuna.create_study(direction='minimize', sampler=sampler)
optuna_lgbm.optimize(objective, n_trials=100)
lgbm_trial = optuna_lgbm.best_trial
lgbm_trial_params = lgbm_trial.params
print('Best Trial: score {},\nparams {}'.format(lgbm_trial.value, lgbm_trial_params))
Best Trial: score 30.43495607441355,
{'class_weight': None,
 'colsample_bytree': 0.8423168945999394,
 'learning_rate': 0.00724401703273698,
 'max_depth': 9,
 'min_child_samples': 7,
 'n_estimators': 584,
 'num_leaves': 42,
 'reg_alpha': 0.2697752337668521,
 'reg_lambda': 0.3664981899835943,
 'subsample': 0.4583503212208177}
lgbm_trial_params
{'class_weight': None,
 'colsample_bytree': 0.8423168945999394,
 'learning_rate': 0.00724401703273698,
 'max_depth': 9,
 'min_child_samples': 7,
 'n_estimators': 584,
 'num_leaves': 42,
 'reg_alpha': 0.2697752337668521,
 'reg_lambda': 0.3664981899835943,
 'subsample': 0.4583503212208177}
5.1.2 Modeling Study & Submission
# 단일 데이터
# Modeling study
lgbm = LGBMRegressor(**lgbm_trial_params)
lgbm_study = lgbm.fit(X_train, y_train)
# Predict the y_test
submission['INVC_CONT'] = lgbm_study.predict(X_test)
# Submit the Result
sub_path = '.../submission/'
submission.to_csv(sub_path+'lgbmSub15.csv', sep=',', index=False)