
# Hitters data
Hitters = load_data('Hitters').dropna()
n = Hitters.shape[0]
print(n) # > 263
# Set up the model matrix and the response
model = MS(Hitters.columns.drop('Salary'), intercept=False)
X = model.fit_transform(Hitters).to_numpy()
Y = Hitters['Salary'].to_numpy()
# Split the data into test and training
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=1/3, random_state=1
)
# Fit the linear model and evaluate the test error
hit_lm = LinearRegression().fit(X_train, Y_train)
Yhat_test = hit_lm.predict(X_test)
print(np.abs(Yhat_test - Y_test).mean()) # > 259.7152883314631
# Fit the lasso using sklearn
# Encode a pipeline with two steps: we first normalize
# the features using a StandardScaler() transform, and
# then fit the lasso without further normalization.
scaler = StandardScaler(with_mean=True, with_std=True)
lasso = Lasso(warm_start=True, max_iter=30000)
standard_lasso = Pipeline(steps=[
('scaler', scaler),
('lasso', lasso)
])
# Create a grid of values for ฮป
X_s = scaler.fit_transform(X_train)
n = X_s.shape[0]
print(n)
lam_max = np.fabs(X_s.T.dot(Y_train - Y_train.mean())).max() / n
print(lam_max)
param_grid = {
'alpha': np.exp(np.linspace(0, np.log(0.01), 100)) * lam_max
}
print(param_grid)
# Perform cross-validation using this sequence of ฮป
cv = KFold(10, shuffle=True, random_state=1)
grid = GridSearchCV(
lasso,
param_grid,
cv=cv,
scoring='neg_mean_absolute_error'
)
grid.fit(X_train, Y_train)
trained_lasso = grid.best_estimator_
Yhat_test = trained_lasso.predict(X_test)
print(np.fabs(Yhat_test - Y_test).mean()) # > 257.23820107995
# Make a class and object for Hitters data
class HittersModel(nn.Module):
def __init__(self, input_size):
super(HittersModel, self).__init__()
self.flatten = nn.Flatten()
self.sequential = nn.Sequential(
nn.Linear(input_size, 50),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(50, 1)
)
def forward(self, x):
x = self.flatten(x)
return torch.flatten(self.sequential(x))
hit_model = HittersModel(X.shape[1])
summary(
hit_model,
input_size=X_train.shape,
col_names=['input_size', 'output_size', 'num_params']
)
self.sequential ๊ฐ์ฒด๋ 4๊ฐ์ ์ฐ์ฐ(map)์ผ๋ก ๊ตฌ์ฑ๋ ๋ชจ๋ธ์ด๋ค.
์ฒซ ๋ฒ์งธ๋ Hitters ๋ฐ์ดํฐ์ 19๊ฐ ํน์ฑ์ 50์ฐจ์์ผ๋ก ๋ณํํ๋ ์ ํ ๊ณ์ธต(linear layer)์ผ๋ก,
๊ฐ์ค์น 50ร19๊ฐ + ๋ฐ์ด์ด์ค 50๊ฐ = ์ด 1,000๊ฐ์ ํ๋ผ๋ฏธํฐ๊ฐ ์ฌ์ฉ๋๋ค.
๊ทธ ๋ค์์ผ๋ก๋
ReLU ํ์ฑํ ํจ์์ด ๋ชจ๋ ๊ฒ์ ํฉ์น๋ฉด ํ์ต ๊ฐ๋ฅํ ํ๋ผ๋ฏธํฐ ์๋ 50ร19 + 50 + 50 + 1 = 1,051๊ฐ์ด๋ค.
# Transform our training data into a form for torch
X_train_t = torch.tensor(X_train.astype(np.float32))
Y_train_t = torch.tensor(Y_train.astype(np.float32))
hit_train = TensorDataset(X_train_t, Y_train_t)
# Transform our test data into a form for torch
X_test_t = torch.tensor(X_test.astype(np.float32))
Y_test_t = torch.tensor(Y_test.astype(np.float32))
hit_test = TensorDataset(X_test_t, Y_test_t)
# Provide a helper function SimpleDataModule() in ISLP to live on different GPU machines
max_num_workers = rec_num_workers()
hit_dm = SimpleDataModule(hit_train, hit_test,
batch_size=32,
num_workers=min(4, max_num_workers),
validation=hit_test)
# Use the SimpleModule.regression() method
hit_module = SimpleModule.regression(hit_model,
metrics={'mae': MeanAbsoluteError()})
# Log our results via CSVLogger() within โlogs/hittersโ
hit_logger = CSVLogger('logs', name='hitters')
# Train our model using the Trainer() object
hit_trainer = Trainer(deterministic=True,
max_epochs=50,
log_every_n_steps=5,
logger=hit_logger,
callbacks=[ErrorTracker()])
hit_trainer.fit(hit_module, datamodule=hit_dm)
SGD์์๋ ๋งค ์คํ
๋ง๋ค 32๊ฐ์ ํ๋ จ ์ํ์ ๋๋คํ๊ฒ ์ ํํด gradient๋ฅผ ๊ณ์ฐํ๋ค.
์ ์ฒด ๋ฐ์ดํฐ๊ฐ 175๊ฐ์ด๊ณ batch size๊ฐ 32์ด๋ฏ๋ก, ํ ์ํญ(epoch)์ ์ฝ 5.5๊ฐ์ SGD ์คํ
์ผ๋ก ๊ตฌ์ฑ๋๋ค.
โ ์ฆ, 1 epoch = ์ ์ฒด ๋ฐ์ดํฐ๋ฅผ ํ ๋ฒ ์ฒ๋ฆฌํ๋ ๋ฐ ํ์ํ ๋ฏธ๋๋ฐฐ์น ํ์.
# Evaluate performance on our test data
hit_trainer.test(hit_module, datamodule=hit_dm)
hit_results = pd.read_csv(hit_logger.experiment.metrics_file_path)
print(hit_results)
```python
# Simple generic function to produce this plot
def summary_plot(results, ax, col='loss',
valid_legend='Validation',
training_legend='Training',
ylabel='Loss', fontsize=20):
for (column, color, label) in zip([f'train_{col}_epoch', f'valid_{col}'],
['black', 'red'],
[training_legend, valid_legend]):
results.plot(x='epoch',
y=column,
label=label,
marker='o',
color=color,
ax=ax)
ax.set_xlabel('Epoch')
ax.set_ylabel(ylabel)
return ax
```python
# Simple generic function to produce this plot
def summary_plot(results, ax, col='loss',
valid_legend='Validation',
training_legend='Training',
ylabel='Loss', fontsize=20):
for (column, color, label) in zip([f'train_{col}_epoch', f'valid_{col}'],
['black', 'red'],
[training_legend, valid_legend]):
results.plot(x='epoch',
y=column,
label=label,
marker='o',
color=color,
ax=ax)
ax.set_xlabel('Epoch')
ax.set_ylabel(ylabel)
return ax
# Delete all references to the torch objects
del(Hitters,
hit_model, hit_dm,
hit_logger,
hit_test, hit_train,
X, Y,
X_test, X_train,
Y_test, Y_train,
X_test_t, Y_test_t,
hit_trainer, hit_module)
# MNIST() function within torchvision.datasets retrieves the training and test data sets
(mnist_train, mnist_test) = [
MNIST(
root='data',
train=train,
download=True,
transform=ToTensor()
)
for train in [True, False]
]
mnist_train
ํ์ต ๋ฐ์ดํฐ์๋ 60,000์ฅ์ ์ด๋ฏธ์ง๊ฐ ์๊ณ , ํ ์คํธ ๋ฐ์ดํฐ์๋ 10,000์ฅ์ด ์๋ค. ๊ฐ ์ด๋ฏธ์ง๋ 28ร28 ํฌ๊ธฐ์ ํฝ์ ํ๋ ฌ๋ก ์ ์ฅ๋์ด ์๋ค.
# Form a data module from training and test datasets
mnist_dm = SimpleDataModule(
mnist_train,
mnist_test,
validation=0.2,
num_workers=max_num_workers,
batch_size=256
)
for idx, (X_, Y_) in enumerate(mnist_dm.train_dataloader()):
print('X: ', X_.shape)
print('Y: ', Y_.shape)
if idx >= 1:
break
X๋ ๊ฐ ๋ฐฐ์น๋ง๋ค 256๊ฐ์ ์ด๋ฏธ์ง๋ฅผ ๋ด๊ณ ์์ผ๋ฉฐ, ๊ฐ ์ด๋ฏธ์ง๋1ร28ร28 ํฌ๊ธฐ์ ํ
์์
๋๋ค.
# Specify our neural network.
class MNISTModel(nn.Module):
def __init__(self):
super(MNISTModel, self).__init__()
self.layer1 = nn.Sequential(
nn.Flatten(),
nn.Linear(28 * 28, 256),
nn.ReLU(),
nn.Dropout(0.4)
)
self.layer2 = nn.Sequential(
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.3)
)
self._forward = nn.Sequential(
self.layer1,
self.layer2,
nn.Linear(128, 10)
)
def forward(self, x):
return self._forward(x)
โ MNISTModel ๊ตฌ์กฐ ์ค๋ช
์
๋ ฅ: ๊ฐ ์ด๋ฏธ์ง๋ ํฌ๊ธฐ 1ร28ร28์ ํ์์กฐ ์ด๋ฏธ์ง
โ ์ฒซ ๋ฒ์งธ ๊ณ์ธต์์ ์ด๋ฅผ 1์ฐจ์ ๋ฒกํฐ(784์ฐจ์) ๋ก flattenํจ.
1์ธต (Layer 1):
784 โ 256 ์ฐจ์์ผ๋ก ์ ํ ๋ณํ
ReLU ํ์ฑํDropout(0.4)๋ก 40% ๋ด๋ฐ ๋ฌด์์ ์ ๊ฑฐ2์ธต (Layer 2):
256 โ 128 ์ฐจ์์ผ๋ก ์ ํ ๋ณํ
ReLU ํ์ฑํDropout(0.3)๋ก 30% ๋ด๋ฐ ๋ฌด์์ ์ ๊ฑฐ์ถ๋ ฅ์ธต:
128 โ 10 ์ฐจ์์ผ๋ก ์ ํ ๋ณํ
โ MNIST์ ํด๋์ค ์์ธ 0~9 ์ด 10๊ฐ ํด๋์ค์ ๋ํ ๋ก์ง(logits) ์ถ๋ ฅ
# Check that the model produces output of expected size based on our existing batch X_ above.
mnist_model = MNISTModel()
mnist_model(X_).size()
# > torch.Size([256, 10])
# SimpleModule.classification() method uses the cross-entropy loss function instead of mean squared error
mnist_module = SimpleModule.classification(mnist_model, num_classes=10)
mnist_logger = CSVLogger('logs', name='MNIST')
# Supply training data, and fit the model
mnist_trainer = Trainer(
deterministic=True,
max_epochs=30,
logger=mnist_logger,
callbacks=[ErrorTracker()]
)
mnist_trainer.fit(mnist_module, datamodule=mnist_dm)
# Display accuracy across epochs.
mnist_results = pd.read_csv(mnist_logger.experiment.metrics_file_path)
fig, ax = subplots(1, 1, figsize=(6, 6))
summary_plot(
mnist_results,
ax,
col='accuracy',
ylabel='Accuracy'
)
ax.set_ylim([0.5, 1])
ax.set_ylabel('Accuracy')
ax.set_xticks(np.linspace(0, 30, 7).astype(int))
# Evaluate the accuracy using the test() method
mnist_trainer.test(mnist_module, datamodule=mnist_dm)
# Multiclass logistic regression
class MNIST_MLR(nn.Module):
def __init__(self):
super(MNIST_MLR, self).__init__()
self.linear = nn.Sequential(
nn.Flatten(),
nn.Linear(784, 10)
)
def forward(self, x):
return self.linear(x)
mlr_model = MNIST_MLR()
mlr_module = SimpleModule.classification(mlr_model, num_classes=10)
mlr_logger = CSVLogger('logs', name='MNIST_MLR')
mlr_trainer = Trainer(
deterministic=True,
max_epochs=30,
callbacks=[ErrorTracker()]
)
mlr_trainer.fit(mlr_module, datamodule=mnist_dm)
# Fit the model just as before and compute the test
mlr_trainer.test(mlr_module, datamodule=mnist_dm)
# Delete some of the objects
del (
mnist_test, mnist_train, mnist_model, mnist_dm,
mnist_trainer, mnist_module, mnist_results,
mlr_model, mlr_module, mlr_trainer
)