from pycaret.regression import *
s = setup(data=train_data, target='IXX', train_size=0.8, test_data=None,
ordinal_features=ordinal, categorical_features=categorical,
ignore_features=None, keep_features=None,
n_jobs=-1, use_gpu=False, session_id=69,
data_split_shuffle=True, data_split_stratify=False,
fold_strategy='kfold', fold=4, fold_shuffle=True,
preprocess=True,
imputation_type=None,
numeric_imputation='mean',
categorical_imputation='mode',
iterative_imputation_iters=5,
max_encoding_ohe=25, encoding_method=None,
rare_to_value=None, rare_value='rare',
normalize=True, normalize_method='zscore',
remove_outliers=False, outliers_method='iforest', outliers_threshold=0.05,
remove_multicollinearity=False, multicollinearity_threshold=0.9,
polynomial_features=False, polynomial_degree=2,
transformation=False, transformation_method='yeo-johnson',
pca=False, pca_method='linear', pca_components=None,
feature_selection=False, feature_selection_method='classic',
n_features_to_select=0.9,
bin_numeric_features=None,
low_variance_threshold=None,
numeric_iterative_imputer='lightgbm',
categorical_iterative_imputer='lightgbm',
feature_selection_estimator='lightgbm'
)
metric_remove=['mae','mse','r2','rmsle','mape','rmse']
for metric in metric_remove:
s.remove_metric(metric)
s.add_metric(id='nrmse',name='NRMSE',score_func=calc_normalized_rmse,greater_is_better=False)
s.add_metric(id='maep',name='MAE_P',score_func=calc_mae_percentage,greater_is_better=False)
s.add_metric(id='maxerr',name='MAX_E',score_func=calc_max_error,greater_is_better=False)
best=compare_models(exclude=['lr','lar'],sort='nrmse')
best=compare_models(sort='nrmse')
y_pred=s.predict_model(best,data=test)
y_pred['prediction_label']
eda(display_format='svg')
evaluate_model(best)
interpret_model(best)
dashboard(best)
deep_check(best)
tune=tune_model(best,optimize='nrmse',choose_better = True, n_iter=50,
search_library='optuna')
top3 = compare_models(sort='nrmse',n_select = 3)
tuned_top3 = [tune_model(i,optimize='nrmse', search_library='optuna', choose_better=True) for i in top3]
boosting_top3 = [ensemble_model(i,optimize='nrmse', method='Boosting',n_estimators=100,choose_better=True) for i in tuned_top3]
bagging_top3 = [ensemble_model(i,optimize='nrmse', method='Bagging',n_estimators=100,choose_better=True) for i in tuned_top3]
blender = blend_models(bagging_top3,optimize='nrmse',choose_better=True, weights = [0.5,0.5,0.5])
stacker = stack_models(tuned_top3)
lb = get_leaderboard()
lb.iloc[0]['Model']