ํด๋น ๊ธ์ FastCampus - '[skill-up] ์ฒ์๋ถํฐ ์์ํ๋ ๋ฅ๋ฌ๋ ์ ์น์ ๊ฐ์๋ฅผ ๋ฃ๊ณ ,
์ถ๊ฐ ํ์ตํ ๋ด์ฉ์ ๋ง๋ถ์ฌ ์์ฑํ์์ต๋๋ค.

y_hat = ฯ(xW + b)
True if y_hat >= 0.5 else False
RegressionvsClassification
ํญ๋ชฉ ํ๊ท (Regression) ๋ถ๋ฅ (Classification) ์ถ๋ ฅ ์ค์ ๊ฐ ๋ฒกํฐ ๋ฒ์ฃผํ ๊ฐ ์์ค ํจ์ MSE Loss BCE / Cross Entropy ๋ง์ง๋ง ๊ณ์ธต Linear Sigmoid / Softmax ์ ์ฐ๋ด ์์ธก ๊ฐ์ผ ์ฌ๋ถ ์์ธก

์ค์ ์ ๋ต์ด 1์ด๋ผ๋ฉด ํ๋ฅ ลท๋ 1์ ๊ฐ๊น๊ฒ
์ค์ ์ ๋ต์ด 0์ด๋ผ๋ฉด ํ๋ฅ ลท๋ 0์ ๊ฐ๊น๊ฒ ํ์ต๋๋๋ก ์ ๋ํ๋ ์์ค ํจ์
,
์์์ ์ ๋ถ๋ถ์ True๋ง์ ์ด๋ฆฌ๋ ๋ถ๋ถ, ๋ท ๋ถ๋ถ์ False๋ง์ ์ด๋ฆฌ๋ ๋ถ๋ถ
โ - ๊ฐ ์์ ์์ผ๋ฏ๋ก minimize ํจ์๊ฐ ๋๋ ๊ฒ

์๊ทธ๋ชจ์ด๋์ ์ถ๋ ฅ์ 0๊ณผ 1 ์ฌ์ด์ ํ๋ฅ ์ด๋ฏ๋ก BCE ์์คํจ์ ์ฌ์ฉ (MSE๋ฅผ ์จ๋ ํ๋ฆฌ๊ธด ํ์ง๋ง ์ต์ ํ X)
BCELoss๊ฐ์ ๊ฒฝ์ฐ์๋ ํ๋ฅ /ํต๊ณ, ์ ๋ณด ์ด๋ก ๊ณผ ๋ฐ์ ํ ๊ด๋ จ์ด ์์


x 200,000 iterations
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
# print(cancer.DESCR)
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
df['class'] = cancer.target
# df.tail()
### Pair plot with mean features
# sns.pairplot(df[['class'] + list(df.columns[:10])])
# plt.show()
### Pair plot with std features
# sns.pairplot(df[['class'] + list(df.columns[10:20])])
# plt.show()
### Pair plot with worst features
# sns.pairplot(df[['class'] + list(df.columns[20:30])])
# plt.show()
### Select features
cols = ["mean radius", "mean texture",
"mean smoothness", "mean compactness", "mean concave points",
"worst radius", "worst texture",
"worst smoothness", "worst compactness", "worst concave points",
"class"]
for c in cols[:-1]:
sns.histplot(df, x=c, hue=cols[-1], bins=50, stat='probability')
plt.show()
## Train Model with PyTorch
data = torch.from_numpy(df[cols].values).float()
print(data.shape)
x = data[:, :-1]
y = data[:, -1:]
print(x.shape, y.shape)
n_epochs = 200000
learning_rate = 1e-2
print_interval = 10000
class MyModel(nn.Module): #nn.Module์ ์์๋ฐ์์ ๋๋ง์ custom ๋ชจ๋ธ ๋ง๋ค๊ธฐ
def __init__(self, input_dim, output_dim):
self.input_dim = input_dim
self.output_dim = output_dim
super().__init__()
self.linear = nn.Linear(input_dim, output_dim)
self.act = nn.Sigmoid()
def forward(self, x):
# |x| = (batch_size, input_dim)
# |y| = (batch_size, output_dim)
y = self.act(self.linear(x))
return y
model = MyModel(input_dim=x.size(-1),
output_dim=y.size(-1))
crit = nn.BCELoss() # Define BCELoss instead of MSELoss.
optimizer = optim.SGD(model.parameters(), # ๋์ค์ ๊ธฐ์ธ๊ธฐ ๊ตฌํ parameter๋ค ๋ฑ๋ก
lr=learning_rate)
for i in range(n_epochs):
y_hat = model(x)
loss = crit(y_hat, y)
optimizer.zero_grad()
loss.backward() # model.parameters()์ ํฌํจ๋ ๋ชจ๋ ํ
์(W, b, ...)์ ๋ํด gradient (โloss/โparameter)๋ฅผ ์๋ ๊ณ์ฐ
optimizer.step()
if (i + 1) % print_interval == 0:
print('Epoch %d: loss=%.4e' % (i + 1, loss))
correct_cnt = (y == (y_hat > .5)).sum()
total_cnt = float(y.size(0))
print('Accuracy: %.4f' % (correct_cnt / total_cnt))
df = pd.DataFrame(torch.cat([y, y_hat], dim=1).detach().numpy(),
columns=["y", "y_hat"])
sns.histplot(df, x='y_hat', hue='y', bins=50, stat='probability')
plt.show()