시각화
혼동행렬 시각화
from sklearn.metrics import (confusion_matrix, ConfusionMatrixDisplay, plot_confusion_matrix)
# 단순 행렬 조회
confusion_matrix(y_train, pred_train)
# 시각화1
plot_confusion_matrix(dummy_model,
x_test, y_test,
display_labels=["Neg", "Pos"],
cmap='Blues')
# 시각화2
cm = confusion_matrix(y_test, pred_test)
disp = ConfusionMatrixDisplay(cm, )
disp.plot()
Graphviz를 통한 tree구조 시각화
https://velog.io/@dcafplz/Scikit-learn-Graphviz%EB%A5%BC-%ED%86%B5%ED%95%9C-tree%EA%B5%AC%EC%A1%B0-%EC%8B%9C%EA%B0%81%ED%99%94
평가
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
def print_metrics_classification(y, pred, title=None):
"""
정답(target)과 모델이 추론한 값을 받아 정확도, 재현율, 정밀도, f1 점수를 출력
[parameter]
y: ndarray - 정답(target)
pred: ndarray - 모델이 추론한 결과
title: str - 출력할 내용의 title. 기본값: None = 출력안한다.
[return value]
[exception]
"""
if title:
print(title)
print('정확도(accuracy):', accuracy_score(y, pred))
print('재현율(recall):', recall_score(y, pred))
print('정밀도(precision)', precision_score(y, pred))
print('F1점수:', f1_score(y, pred))
from sklearn.metrics import classification_report
result = classification_report(y_train, pred_train_tree)
print(result)
from sklearn.metrics import PrecisionRecallDisplay
import matplotlib.pyplot as plt
model_pos = model.predict_proba(x_test)[:,1]
precisions, recalls, thresh = precision_recall_curve(y_test, model_pos)
# PR Curve
disp = PrecisionRecallDisplay(precisions, recalls)
disp.plot()
plt.show()
# AP Score
from sklearn.metrics import average_precision_score
ap_score = average_precision_score(y_test, model_pos)
disp = PrecisionRecallDisplay(precisions, recalls, average_precision=ap_score)
disp.plot()
plt.show()
from sklearn.metrics import roc_curve, RocCurveDisplay
fprs, tprs, thresh = roc_curve(y_test, model_pos)
from sklearn.metrics import roc_auc_score
model_auc = roc_auc_score(y_test, model_pos)
disp = RocCurveDisplay(fpr=fprs, tpr=tprs, roc_auc=model_auc, estimator_name='Model')
disp.plot()
plt.show()
임계값(Threshold) 변경을 통한 재현율, 정밀도 변환
# 임계값에 따른 precision, recall 자동 배분해주는 함수
from sklearn.metrics import precision_recall_curve
precision_list, recall_list, threshold_list = precision_recall_curve(y_test, pred_test_proba[:,1])
for thresh, pre, recall in zip(threshold_list, precision_list, recall_list):
print(round(thresh, 3), round(pre, 3), round(recall, 3), sep='\t')
# 시각화
import matplotlib.pyplot as plt
plt.figure(figsize=(8,7))
plt.plot(threshold_list, precision_list, marker='o', label='정밀도')
plt.plot(threshold_list, recall_list, marker='x', label='재현율')
plt.grid(True)
plt.legend(loc='upper left', bbox_to_anchor=(1,1))
plt.show()