import pandas as pd
feature_name_df = pd.read_csv(url, sep='\s+', header=None, names=['column_index', 'column_name'])
feature_name = feature_name_df.iloc[:, 1].values.tolist()
X_train = pd.read_csv(X_train_url, sep='\s+', header=None)
X_test = pd.read_csv(X_test_url, sep='\s+', header=None)
X_train.columns = feature_name
X_test.columns = feature_name
y_train = pd.read_csv(y_train_url, sep='\s+', header=None, names=['action'])
y_test = pd.read_csv(y_test_url, sep='\s+', header=None, names=['action'])
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import time
import warnings
warnings.filterwarnings('ignore')
start_time = time.time()
gb_clf = GradientBoostingClassifier(random_state=13)
gb_clf.fit(X_train, y_train)
gb_pred = gb_clf.predict(X_test)
print('ACC: ', accuracy_score(y_test, gb_pred))
print('Fit time: ', time.time() - start_time)
'''
ACC: 0.9389209365456397
Fit time: 1094.129744052887
'''
from sklearn.model_selection import GridSearchCV
params = {'n_estimators':[100, 500], 'learning_rate':[0.05, 0.1]}
start_time = time.time()
grid = GridSearchCV(gb_clf, param_grid=params, cv=2, verbose=1, n_jobs=-1)
grid.fit(X_train, y_train)
gb_pred = gb_clf.predict(X_test)
print('Fit time: ', time.time() - start_time)
print(grid.best_score_)
print(grid.best_params_)
print(accuracy_score(y_test, grid.best_estimator_.predict(X_test)))
Reference
1) 제로베이스 데이터 스쿨 강의자료