ํด๋น ๊ธ์ FastCampus - '[skill-up] ์ฒ์๋ถํฐ ์์ํ๋ ๋ฅ๋ฌ๋ ์ ์น์ ๊ฐ์๋ฅผ ๋ฃ๊ณ ,
์ถ๊ฐ ํ์ตํ ๋ด์ฉ์ ๋ง๋ถ์ฌ ์์ฑํ์์ต๋๋ค.
Gradient descent ์์
: Learning rate

| Optimizer | ํน์ง |
|---|---|
| SGD | ๊ธฐ๋ณธ์ ์ด๋ฉฐ ์์ ์ ์ด์ง๋ง ํ๋ ํ์ |
| Momentum | ๊ด์ฑ ์ ์ฉ, ์๋ ด ๊ฐ์ |
| AdaGrad | ํ๋ผ๋ฏธํฐ๋ณ ํ์ต๋ฅ ์๋ ์กฐ์ |
| RMSProp | ์ต๊ทผ gradient ๊ธฐ๋ฐ ํ์ต๋ฅ ์กฐ์ |
| Adam | Momentum + AdaGrad, ๋์ ์ฑ๋ฅ |




import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()
df = pd.DataFrame(california.data, columns=california.feature_names)
df["Target"] = california.target
# df.tail()
# sns.pairplot(df.sample(1000))
# plt.show()
scaler = StandardScaler()
scaler.fit(df.values[:, :-1])
df.values[:, :-1] = scaler.transform(df.values[:, :-1])
# sns.pairplot(df.sample(1000))
# plt.show()
data = torch.from_numpy(df.values).float()
print(data.shape)
x = data[:, :-1]
y = data[:, -1:]
print(x.shape, y.shape)
n_epochs = 4000
batch_size = 128
print_interval = 200
# learning_rate = 1e-5 ํ์ X
model = nn.Sequential(
nn.Linear(x.size(-1), 10),
nn.LeakyReLU(),
nn.Linear(10, 9),
nn.LeakyReLU(),
nn.Linear(9, 8),
nn.LeakyReLU(),
nn.Linear(8, 7),
nn.LeakyReLU(),
nn.Linear(7, 6),
nn.LeakyReLU(),
nn.Linear(6, 5),
nn.LeakyReLU(),
nn.Linear(5, 4),
nn.LeakyReLU(),
nn.Linear(4, 3),
nn.LeakyReLU(),
nn.Linear(3, y.size(-1)),
)
print(model)
optimizer = optim.Adam(model.parameters(),)
# |x| = (total_size, input_dim)
# |y| = (total_size, output_dim)
for i in range(n_epochs):
# the index to feed-forward.
indices = torch.randperm(x.size(0)) # Shuffle
x_ = torch.index_select(x, dim=0, index=indices) # x์ y indices ๋์ผํ๊ฒ ์จ์ผ ํจ
y_ = torch.index_select(y, dim=0, index=indices)
x_ = x_.split(batch_size, dim=0)
y_ = y_.split(batch_size, dim=0)
# |x_[i]| = (batch_size, input_dim)
# |y_[i]| = (batch_size, output_dim)
y_hat = []
total_loss = 0
for x_i, y_i in zip(x_, y_):
# |x_i| = |x_[i]|
# |y_i| = |y_[i]|
y_hat_i = model(x_i)
loss = F.mse_loss(y_hat_i, y_i)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += float(loss) # Gradient graph ๋์ด์ง โ float๋ก ๋ณํํด์ memory leak ๋ฐฉ์ง
y_hat += [y_hat_i]
total_loss = total_loss / len(x_)
if (i + 1) % print_interval == 0:
print('Epoch %d: loss=%.4e' % (i + 1, total_loss))
y_hat = torch.cat(y_hat, dim=0)
y = torch.cat(y_, dim=0)
# |y_hat| = (total_size, output_dim)
# |y| = (total_size, output_dim)
df = pd.DataFrame(torch.cat([y, y_hat], dim=1).detach().numpy(),
columns=["y", "y_hat"])
sns.pairplot(df, height=5)
plt.show()