[TIL] 250131

혜·2025년 1월 31일

목록 보기

52/88

오늘 프로젝트 진행 내용

새로운 매개변수 추가해서 모델(LightGBM, XGBoost) 학습시키기
Optuna를 이용해서 하이퍼 파라미터 튜닝하기
분류 threshold를 조절하면서 recall-precision 확인하기

# 확률값 예측
xgb_proba = final_model.predict_proba(X_test)[:, 1]  # 양성 클래스 확률값 가져오기

# 기본 임계값(0.5)으로 예측
xgb_pred = (xgb_proba >= 0.5).astype(int)

get_score('xgb with best params, threshold 0.5 |', y_test, xgb_pred)

# 최적의 threshold 찾기 - ROC Curve
from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_test, xgb_proba)
optimal_idx = (tpr - fpr).argmax()
optimal_threshold = thresholds[optimal_idx]

print(f"최적 임계값: {optimal_threshold}")
# 최적 임계값: 0.21580682694911957

# 최적의 threshold 찾기 - Recall-Precision Curve
from sklearn.metrics import precision_recall_curve

precisions, recalls, thresholds = precision_recall_curve(y_test, xgb_proba)
optimal_idx = (precisions * recalls).argmax()
optimal_threshold = thresholds[optimal_idx]

print(f"최적 임계값: {optimal_threshold}")
# 최적 임계값: 0.3881194591522217

# threshold 1
threshold = 0.22
xgb_pred_adjusted1 = (xgb_proba >= threshold).astype(int)

get_score('xgb with best params, threshold 0.22 |', y_test, xgb_pred_adjusted1)
# xgb with best params, threshold 0.22 | accuracy:  0.8378 f1_score:  0.6824 AUC:  0.8325 recall:  0.8234 precision:  0.5826

# threshold 2
threshold = 0.39
xgb_pred_adjusted2 = (xgb_proba >= threshold).astype(int)

get_score('xgb with best params, threshold 0.39 |', y_test, xgb_pred_adjusted2)
# xgb with best params, threshold 0.39 | accuracy:  0.8793 f1_score:  0.7105 AUC:  0.8137 recall:  0.7001 precision:  0.7211