
๐2๋ฒ์งธ ๊ฐ์์ผ๊ธฐ ~ ๐
์ค๋์ iris๋ฐ์ดํฐ์
์ผ๋ก ํ์ดํ๋ผ์ธ+GridSeachCV ์ฐ๊ฒฐํด์ ํด๋ณผ๊น ์ฝ๋๋ฅผ ์์ฑํด๋ดค๋ค
iris = load_iris()
>X = pd.DataFrame(
iris.data,
columns=iris.feature_names
)
y = iris.target
target_names = iris.target_names
print(X.head())
X_train, X_test, y_train, y_test = train_test_split(
X, y,test_size=0.2,random_state=42,stratify=y)
print(f"Train: {X_train.shape}")
+๊ฒฐ๊ณผ๊ฐ

pipe_li = Pipeline([
('clf',RandomForestClassifier(random_state=42))
])
print("Pipeline ๊ตฌ์ฑ์๋ฃ")
+๊ฒฐ๊ณผ๊ฐ

param_grid = {
'clf__n_estimators' : [10,50,100,200],
'clf__max_depth' : [None, 10, 20],
'clf__min_samples_split' : [2,5],
'clf__min_samples_leaf' : [1, 2]
}
grid_clf = GridSearchCV(
pipe_li,
param_grid,
cv=StratifiedKFold(5),
scoring='accuracy',
n_jobs=-1,
verbose=1
)
grid_clf.fit(X_train, y_train)
print('GridSearch ์๋ฃ!')
+๊ฒฐ๊ณผ๊ฐ

y_pred = grid_clf.best_estimator_.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion_matrix:")
print(confusion_matrix(y_test, y_pred))
+๊ฒฐ๊ณผ๊ฐ

predicted_flowers = iris.target_names[y_pred]
true_flowers = iris.target_names[y_test]
print(predicted_flowers[:5])
print(true_flowers[:5])
+๊ฒฐ๊ณผ๊ฐ

new_flower = [[5.1, 3.5, 1.4, 0.2]]
pred = grid_clf.best_estimator_.predict(new_flower)
predicted_flowers_name = target_names[pred][0]
print("์์ธก๋ ๊ฝ:", predicted_flowers_name)
+๊ฒฐ๊ณผ๊ฐ

๐๏ธ๊ณต๋ถ ์ ๋ฆฌ๏ผฟใ(ใใ)๐๏ธ
์ค๋ฌด์์๋ ์ฝ๋ฉ์ ์์์ฐ๊ณ vscode๋ฅผ ์์ฃผ์ด๋ค๋๋ง์ ๊น์์ํ๋๋ฐ ํ์ด์ฌ์ด ๊น๋ ค์๋๋ฐ ์๊พธ ์๋๋ค๊ณ ๋จ๊ณ ์ฌ๊ธฐ์ ๊ธฐ ์์น๋ฅผํด๋ ์๋์์. ๊ฒฐ๋ก ์ ๊ฐ์์๋ง๋ค์ด์ ํ์ผ์ ์
๋ก๋ํ๊ธฐ๋กํจ.

๋๊ธฐ๋ถ์ ๋์์ผ๋ก ์ฝ๋๋ถ๋ฌ์ค๊ณ ๊ทธ์ ๋จ๊ณ,์ ์ ๋จ๊ณ๋ฅผ ์๋ค๋ฆฌ๊ฐ๋ค๋ฆฌ๋ ํ ์์๊ฒ๋จ.
๊ทธ๋ค๋ก๋ ์์ฃผ ์๋์์.
์ค๋ ๋ด๊ฐ ์ํ๋ ์ฝ๋ฉ ์์๋
์์ด๋ฆฌ์ค ->ํ์ดํ๋ผ์ธ->๋๋คํฌ๋ ์คํธ(์์๋ธ)->GridSearchCV->ํ๊ฐ->์๊ฝ์์ธก ์์ผ๋ก ์ด์ด๊ฐ๊ณ ์ถ์์.
์ ์ฒ๋ฆฌ์ ๋ชจ๋ธ์ pipeline์ผ๋ก ๊ตฌ์ฑํด์ GridSearchCV๋ฅผ ํตํด์ ์ ์ฒด ํ์ดํ๋ผ์ธ ๋จ์๋ฅผ ํ๋ํ๋๊ฑธ ํด๋ณด๊ณ ์ถ์๊ธฐ ๋๋ฌธ.
๐์ค์ค๋ก ์ค์ต ํด๋ณธ๊ฒฐ๊ณผ๐
1. RandomForest์์ ์ค์ผ์ผ๋ง
์์ง์ ๋๋ฌด ์ด๋ ต๊ณ ํ์ง๋ง ์ฐจ์ฐจ ์ค๋ ฅ์ดํค์์ ธ๋๊ฐ๊ฒ ๋ ธ๋ ฅํ ๊ฑฐ๊ณ ๊ทธ๋ ธ๋ ฅ์ด ๊ผญ ๋น์๋ฐ๋๊ฑฐ๋ผ ๋๋ ๋ฏฟ์ !!! ๋๋ฆฌ์ง๋ง ๋์ํ ์์์ผ๋๊น!!!!!๋ ๋ฏฟ์ด๋ณด์์์์์