Pipeline
- pipeline 이란?
- 코드로 구현하기
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
estimators = [
('scaler', StandardScaler()),
('clf', DecisionTreeClassifier())
]
pipe = Pipeline(estimators)
- 구조확인하기
pipe.steps
>>>
[('scaler', StandardScaler()), ('clf', DecisionTreeClassifier())]
pipe.steps[0]
>>>
('scaler', StandardScaler())
- 파라미터 지정
pipe.set_params(clf__max_depth = 2)
pipe.set_params(clf__random_state =13)
- 스탭이름 + 언더바 두개 (__) + 파라미터 값
- 학습 및 평가
from sklearn.model_selection import train_test_split
X_train, X_test, y_train ,y_test = train_test_split(X,y, test_size=0.2, random_state=13,
stratify = y)
pipe.fit(X_train, y_train)
from sklearn.metrics import accuracy_score
y_pred_train = pipe.predict(X_train)
y_pred_test = pipe.predict(X_test)
print(f'Train acc : {accuracy_score(y_pred_train , y_train)}')
print(f'Test acc : {accuracy_score(y_pred_test , y_test)}')