스태킹 (Stacking) 과 배깅, 부스팅 비교
스태킹 모델은 아래의 두 종류의 모델이 필요하다.
즉, 스태킹 모델의 핵심은 여러 개별 모델의 예측 데이터를 각각 스태킹 (다시 합하는) 형태로 결합하여 최종 메타 모델의 학습용 피처 데이터셋과 테스트용 피처 데이터셋을 만든다는 것!
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
cancer_data = load_breast_cancer()
X_data = cancer_data.data
y_label = cancer_data.target
X_train, X_test, y_train, y_test = train_test_split(
X_data, y_label,
test_size=0.2,
random_state=0
)
knn_clf = KNeighborsClassifier(n_neighbors=4)
rf_clf = RandomForestClassifier(n_estimators=100, random_state=0)
dt_clf = DecisionTreeClassifier()
ada_clf = AdaBoostClassifier(n_estimators=100)
lr_final = LogisticRegression()
knn_clf.fit(X_train, y_train)
rf_clf.fit(X_train, y_train)
dt_clf.fit(X_train, y_train)
ada_clf.fit(X_train, y_train)
knn_pred = knn_clf.predict(X_test)
rf_pred = rf_clf.predict(X_test)
dt_pred = dt_clf.predict(X_test)
ada_pred = ada_clf.predict(X_test)
accuracy_score(y_test, knn_pred)
accuracy_score(y_test, rf_pred)
accuracy_score(y_test, dt_pred)
accuracy_score(y_test, ada_pred)
pred = np.array([knn_pred, rf_pred, dt_pred, ada_pred]) ## 예측 결과를 행 형태로 붙임
pred.shape
pred = np.transpose(pred)
pred.shape
lr_final.fit(pred, y_test)
final = lr_final.predict(pred)
## 최종 메타 모델의 예측 정확도
accuracy_score(y_test, final)
... 이어서 정리하기