import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm
from sklearn.metrics import accuracy_score
data = pd.read_csv('bmi.csv')
data = data[:50000]
print(data.head(2))
feature = data.drop(['label'], axis=1)
label = data['label']
feature['weight'] = feature['weight'].apply(lambda x: x/100)
feature['height'] = feature['height'].apply(lambda x: x/200)
print(feature.head(2))
label = label.map({'thin':0, 'normal':1, 'fat':2})
print(label.unique())
x_train, x_test, y_train, y_test = train_test_split(feature, label, test_size=0.3, random_state=10)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
model = svm.SVC(C=0.01)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print('예측값 : ', y_pred[:10])
print('실제값 : ', y_test[:10])
acc = accuracy_score(y_test, y_pred)
print('모델 예측값 : ', acc)
cross_vali = cross_val_score(model, feature, label, cv=3)
print('각각의 검증 정확도 : ', cross_vali)
print('평균 검증 정확도 : ', np.mean(cross_vali))
label_data = pd.read_csv('bmi.csv', index_col=2)
def scatter_func(label, color):
b = label_data.loc[label]
plt.scatter(b['weight'], b['height'], c=color, label=label)
scatter_func('fat', 'red')
scatter_func('normal', 'green')
scatter_func('thin', 'blue')
plt.legend()
plt.show()
new_data = pd.DataFrame({'weight':[60, 55], 'height':[180, 170]})
new_data['weight'] = new_data['weight'].apply(lambda x: x/100)
new_data['height'] = new_data['height'].apply(lambda x: x/200)
new_pred = model.predict(new_data)
print(new_pred)