e.g)
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
X = iris.data[:,2:]
y = iris.target
clf.fit(X, y)
from sklearn.metrics import accuracy_score
y_pred_tr = clf.predict(X)
accuracy_score(y, y_pred_tr)
from sklearn.tree import plot_tree
plt.figure(figsize=(12,8))
plot_tree(iris_clf);
from mlxtend.plotting import plot_decision_regions
plt.figure(figsize=(14,8))
plot_decision_regions(X=iris.data[:,2:], y = iris.target, clf=iris_clf, legend=2)
plt.show()
Train 데이터와 Test 데이터를 분리
Validation 개념은 뒤에서!
Scikit-learn Module
from sklearn.model_selection import train_test_split
feature = iris.data[:,2:]
labels = iris.target
X_train, X_test, y_train, y_test = train_test_split(feature, labels,
test_size=0.2,
random_state=13,
stratify=labels,
)
iris_clf_model = dict(zip(iris.feature_names, clf.feature_importances_))
iris_clf_model
>>>
{'sepal length (cm)': 0.0,
'sepal width (cm)': 0.033898305084745756,
'petal length (cm)': 0.3958012326656394,
'petal width (cm)': 0.5703004622496148}