서포트 벡터 머신(SVM) : 데이터 분류를 위하여 마진(margin)이 최대가 되는 결정 경계선(decision boundary)을 찾아내는 머신러닝 방법
장점 :
단점 :
# 패키지 및 데이터셋 추가
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import numpy as np
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/wikibook/machinelearning/2.0/data/csv/basketball_stat.csv")
train, test = train_test_split(df, test_size=0.2
# 최적의 SVM 파라미터 찾기
def svc_param_selection(X, y, nfolds):
svm_parameters = [{'kernel': ['rbf'],
'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
'C': [0.01, 0.1, 1, 10, 100, 1000]}]
clf = GridSearchCV(SVC(), svm_parameters, cv=nfolds)
clf.fit(X, y)
print(clf.best_params_)
return clf
X_train = train[['3P', 'BLK']]
y_train = train[['Pos']]
clf = svc_param_selection(X_train, y_train.values.ravel(), 10)
{'C': 0.1, 'gamma': 1,
'kernel': 'rbf'}
# 모델 테스트
X_test = test[['3P', 'BLK']]
y_test = test[['Pos']]
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
print("accuracy : "+ str(accuracy_score(y_true, y_pred)) )
precision recall f1-score support
C 1.00 0.89 0.94 9
SG 0.92 1.00 0.96 11
accuracy 0.95 20
macro avg 0.96 0.94 0.95 20
weighted avg 0.95 0.95 0.95 20
accuracy : 0.95
# 실제 예측값 확인하기
comparison = pd.DataFrame({'prediction': y_pred, 'ground_truth': y_true.values.ravel()})
comparison
prediction ground_truth
0 SG SG
1 SG SG
2 C C
3 SG SG
4 C C
5 C C
6 SG SG
7 SG SG
8 SG C
9 SG SG
10 SG SG
11 SG SG
12 C C
13 C C
14 SG SG
15 C C
16 C C
17 C C
18 SG SG
19 SG SG