![]()
![]()
from sklearn import svm, metrics
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import time # 실행시간 측정 용도
from time import strftime
2만명의 키, 몸무게 데이터 생성
base_path = r'/content/drive/MyDrive/dataset'
def calc_bmi(h,w) :
bmi = w / (h / 100) ** 2
if bmi < 18.5 : return "thin" # 저체중
if bmi < 25 : return "normal" # 정상체중
return "fat"
calc_bmi(170, 80),calc_bmi(180, 54),calc_bmi(166,65)
file_path = os.path.join(base_path, 'bmi.csv')
file_path
fp = open(file_path, "w", encoding="utf-8")
fp.write("height,weight,label\n")
cnt = {
"thin" : 0,
"normal" : 0,
"fat" : 0,
}
random.seed(10)
for i in range(20000) :
h = random.randint(120,200) # 키 120 ~ 200cm
w = random.randint(35,80) # 몸무게 35 ~ 80kg
label = calc_bmi(h,w)
cnt[label] += 1
fp.write(f'{h},{w},{label}\n')
fp.close()
print("ok", cnt)
df_bmi = pd.read_csv(file_path)
df_bmi
w = df_bmi['weight'] / 100 # 몸무게 최대 100kg으로 가정하고 0~1 사이 정규화(normalize)
h = df_bmi['height'] / 200 # 키는 최대 200cm으로 가정
wh = pd.concat([w,h], axis = 1)
wh
label = df_bmi['label']
label
X_train, X_test, y_train, y_test = train_test_split(wh, label, random_state=24)
X_train.shape
X_test.shape
clf = svm.SVC() # Support Vector Classifier
clf.fit(X_train, y_train)
predict = clf.predict(X_test)
predict
metrics.accuracy_score(y_test, predict)
report = metrics.classification_report(y_test, predict)
print(report)
df = df_bmi.set_index('label')
df
df.loc['normal']
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
def scatter(lbl, color) :
b = df.loc[lbl]
ax.scatter(b['weight'], b['height'], c = color, label = lbl)
scatter("fat", "red")
scatter("normal", "yellow")
scatter("thin", "purple")
ax.legend()
SVC / NuSVC / LinearSVC
start_time = time.time() # 시작시간
clf = svm.LinearSVC()
clf.fit(X_train, y_train)
predict = clf.predict(X_test)
acc_score = metrics.accuracy_score(y_test, predict)
c1_report = metrics.classification_report(y_test, predict)
end_time = time.time() # 종료시간
print(f'정답률 = {acc_score}')
print(c1_report)
print(f'경과시간 {end_time - start_time} sec')
start_time = time.time() # 시작시간
clf = svm.SVC()
clf.fit(X_train, y_train)
predict = clf.predict(X_test)
acc_score = metrics.accuracy_score(y_test, predict)
c1_report = metrics.classification_report(y_test, predict)
end_time = time.time() # 종료시간
print(f'정답률 = {acc_score}')
print(c1_report)
print(f'경과시간 {end_time - start_time} sec')