NN_only code ๐Ÿ’ป

์ฐฝ์Šˆยท2025๋…„ 4์›” 4์ผ

Deep Learning

๋ชฉ๋ก ๋ณด๊ธฐ
4/16
post-thumbnail

๐Ÿ“ฆ Lab: Single Layer Network on Hitters Data

# Hitters data
Hitters = load_data('Hitters').dropna()
n = Hitters.shape[0]
print(n)  # > 263

# Set up the model matrix and the response
model = MS(Hitters.columns.drop('Salary'), intercept=False)
X = model.fit_transform(Hitters).to_numpy()
Y = Hitters['Salary'].to_numpy()

# Split the data into test and training
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=1/3, random_state=1
)
# Fit the linear model and evaluate the test error
hit_lm = LinearRegression().fit(X_train, Y_train)
Yhat_test = hit_lm.predict(X_test)
print(np.abs(Yhat_test - Y_test).mean())  # > 259.7152883314631

# Fit the lasso using sklearn
# Encode a pipeline with two steps: we first normalize
# the features using a StandardScaler() transform, and
# then fit the lasso without further normalization.
scaler = StandardScaler(with_mean=True, with_std=True)
lasso = Lasso(warm_start=True, max_iter=30000)
standard_lasso = Pipeline(steps=[
    ('scaler', scaler),
    ('lasso', lasso)
])
# Create a grid of values for ฮป
X_s = scaler.fit_transform(X_train)
n = X_s.shape[0]
print(n)

lam_max = np.fabs(X_s.T.dot(Y_train - Y_train.mean())).max() / n
print(lam_max)

param_grid = {
    'alpha': np.exp(np.linspace(0, np.log(0.01), 100)) * lam_max
}
print(param_grid)

# Perform cross-validation using this sequence of ฮป
cv = KFold(10, shuffle=True, random_state=1)
grid = GridSearchCV(
    lasso,
    param_grid,
    cv=cv,
    scoring='neg_mean_absolute_error'
)
grid.fit(X_train, Y_train)

trained_lasso = grid.best_estimator_
Yhat_test = trained_lasso.predict(X_test)
print(np.fabs(Yhat_test - Y_test).mean())  # > 257.23820107995
# Make a class and object for Hitters data
class HittersModel(nn.Module):
    def __init__(self, input_size):
        super(HittersModel, self).__init__()
        self.flatten = nn.Flatten()
        self.sequential = nn.Sequential(
            nn.Linear(input_size, 50),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(50, 1)
        )

    def forward(self, x):
        x = self.flatten(x)
        return torch.flatten(self.sequential(x))

hit_model = HittersModel(X.shape[1])

summary(
    hit_model,
    input_size=X_train.shape,
    col_names=['input_size', 'output_size', 'num_params']
)

self.sequential ๊ฐ์ฒด๋Š” 4๊ฐœ์˜ ์—ฐ์‚ฐ(map)์œผ๋กœ ๊ตฌ์„ฑ๋œ ๋ชจ๋ธ์ด๋‹ค.
์ฒซ ๋ฒˆ์งธ๋Š” Hitters ๋ฐ์ดํ„ฐ์˜ 19๊ฐœ ํŠน์„ฑ์„ 50์ฐจ์›์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ์„ ํ˜• ๊ณ„์ธต(linear layer)์œผ๋กœ,
๊ฐ€์ค‘์น˜ 50ร—19๊ฐœ + ๋ฐ”์ด์–ด์Šค 50๊ฐœ = ์ด 1,000๊ฐœ์˜ ํŒŒ๋ผ๋ฏธํ„ฐ๊ฐ€ ์‚ฌ์šฉ๋œ๋‹ค.

๊ทธ ๋‹ค์Œ์œผ๋กœ๋Š”

  • ReLU ํ™œ์„ฑํ™” ํ•จ์ˆ˜
  • 40%์˜ ๋“œ๋กญ์•„์›ƒ(dropout)
  • ๋งˆ์ง€๋ง‰์œผ๋กœ 1์ฐจ์›์œผ๋กœ ์ค„์ด๋Š” ์„ ํ˜• ๊ณ„์ธต(50๊ฐœ ๊ฐ€์ค‘์น˜ + 1๊ฐœ ๋ฐ”์ด์–ด์Šค)

์ด ๋ชจ๋“  ๊ฒƒ์„ ํ•ฉ์น˜๋ฉด ํ•™์Šต ๊ฐ€๋Šฅํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ ์ˆ˜๋Š” 50ร—19 + 50 + 50 + 1 = 1,051๊ฐœ์ด๋‹ค.

# Transform our training data into a form for torch
X_train_t = torch.tensor(X_train.astype(np.float32))
Y_train_t = torch.tensor(Y_train.astype(np.float32))
hit_train  = TensorDataset(X_train_t, Y_train_t)

# Transform our test data into a form for torch
X_test_t  = torch.tensor(X_test.astype(np.float32))
Y_test_t  = torch.tensor(Y_test.astype(np.float32))
hit_test  = TensorDataset(X_test_t, Y_test_t)

# Provide a helper function SimpleDataModule() in ISLP to live on different GPU machines
max_num_workers = rec_num_workers()
hit_dm = SimpleDataModule(hit_train, hit_test,
                          batch_size=32,
                          num_workers=min(4, max_num_workers),
                          validation=hit_test)
# Use the SimpleModule.regression() method
hit_module = SimpleModule.regression(hit_model,
                                     metrics={'mae': MeanAbsoluteError()})

# Log our results via CSVLogger() within โ€˜logs/hittersโ€™
hit_logger = CSVLogger('logs', name='hitters')

# Train our model using the Trainer() object
hit_trainer = Trainer(deterministic=True,
                      max_epochs=50,
                      log_every_n_steps=5,
                      logger=hit_logger,
                      callbacks=[ErrorTracker()])

hit_trainer.fit(hit_module, datamodule=hit_dm)

SGD์—์„œ๋Š” ๋งค ์Šคํ…๋งˆ๋‹ค 32๊ฐœ์˜ ํ›ˆ๋ จ ์ƒ˜ํ”Œ์„ ๋žœ๋คํ•˜๊ฒŒ ์„ ํƒํ•ด gradient๋ฅผ ๊ณ„์‚ฐํ•œ๋‹ค.
์ „์ฒด ๋ฐ์ดํ„ฐ๊ฐ€ 175๊ฐœ์ด๊ณ  batch size๊ฐ€ 32์ด๋ฏ€๋กœ, ํ•œ ์—ํญ(epoch)์€ ์•ฝ 5.5๊ฐœ์˜ SGD ์Šคํ…์œผ๋กœ ๊ตฌ์„ฑ๋œ๋‹ค.
โ†’ ์ฆ‰, 1 epoch = ์ „์ฒด ๋ฐ์ดํ„ฐ๋ฅผ ํ•œ ๋ฒˆ ์ฒ˜๋ฆฌํ•˜๋Š” ๋ฐ ํ•„์š”ํ•œ ๋ฏธ๋‹ˆ๋ฐฐ์น˜ ํšŸ์ˆ˜.

# Evaluate performance on our test data
hit_trainer.test(hit_module, datamodule=hit_dm)

hit_results = pd.read_csv(hit_logger.experiment.metrics_file_path)
print(hit_results)

```python # Simple generic function to produce this plot def summary_plot(results, ax, col='loss', valid_legend='Validation', training_legend='Training', ylabel='Loss', fontsize=20): for (column, color, label) in zip([f'train_{col}_epoch', f'valid_{col}'], ['black', 'red'], [training_legend, valid_legend]): results.plot(x='epoch', y=column, label=label, marker='o', color=color, ax=ax) ax.set_xlabel('Epoch') ax.set_ylabel(ylabel) return ax

```python
# Simple generic function to produce this plot
def summary_plot(results, ax, col='loss',
                 valid_legend='Validation',
                 training_legend='Training',
                 ylabel='Loss', fontsize=20):
    
    for (column, color, label) in zip([f'train_{col}_epoch', f'valid_{col}'],
                                      ['black', 'red'],
                                      [training_legend, valid_legend]):
        results.plot(x='epoch',
                     y=column,
                     label=label,
                     marker='o',
                     color=color,
                     ax=ax)
    
    ax.set_xlabel('Epoch')
    ax.set_ylabel(ylabel)
    
    return ax

# Delete all references to the torch objects
del(Hitters,
hit_model, hit_dm,
hit_logger,
hit_test, hit_train,
X, Y,
X_test, X_train,
Y_test, Y_train,
X_test_t, Y_test_t,
hit_trainer, hit_module)

๐Ÿ“ฆ Lab: Multilayer Network on the MNIST Digit Data

# MNIST() function within torchvision.datasets retrieves the training and test data sets
(mnist_train, mnist_test) = [
    MNIST(
        root='data',
        train=train,
        download=True,
        transform=ToTensor()
    )
    for train in [True, False]
]

mnist_train

ํ•™์Šต ๋ฐ์ดํ„ฐ์—๋Š” 60,000์žฅ์˜ ์ด๋ฏธ์ง€๊ฐ€ ์žˆ๊ณ , ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ์—๋Š” 10,000์žฅ์ด ์žˆ๋‹ค. ๊ฐ ์ด๋ฏธ์ง€๋Š” 28ร—28 ํฌ๊ธฐ์˜ ํ”ฝ์…€ ํ–‰๋ ฌ๋กœ ์ €์žฅ๋˜์–ด ์žˆ๋‹ค.

# Form a data module from training and test datasets
mnist_dm = SimpleDataModule(
    mnist_train,
    mnist_test,
    validation=0.2,
    num_workers=max_num_workers,
    batch_size=256
)

for idx, (X_, Y_) in enumerate(mnist_dm.train_dataloader()):
    print('X: ', X_.shape)
    print('Y: ', Y_.shape)
    if idx >= 1:
        break

X๋Š” ๊ฐ ๋ฐฐ์น˜๋งˆ๋‹ค 256๊ฐœ์˜ ์ด๋ฏธ์ง€๋ฅผ ๋‹ด๊ณ  ์žˆ์œผ๋ฉฐ, ๊ฐ ์ด๋ฏธ์ง€๋Š”1ร—28ร—28 ํฌ๊ธฐ์˜ ํ…์„œ์ž…๋‹ˆ๋‹ค.

# Specify our neural network.
class MNISTModel(nn.Module):
    def __init__(self):
        super(MNISTModel, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 256),
            nn.ReLU(),
            nn.Dropout(0.4)
        )

        self.layer2 = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self._forward = nn.Sequential(
            self.layer1,
            self.layer2,
            nn.Linear(128, 10)
        )

    def forward(self, x):
        return self._forward(x)

โœ… MNISTModel ๊ตฌ์กฐ ์„ค๋ช…

  • ์ž…๋ ฅ: ๊ฐ ์ด๋ฏธ์ง€๋Š” ํฌ๊ธฐ 1ร—28ร—28์˜ ํšŒ์ƒ‰์กฐ ์ด๋ฏธ์ง€
    โ†’ ์ฒซ ๋ฒˆ์งธ ๊ณ„์ธต์—์„œ ์ด๋ฅผ 1์ฐจ์› ๋ฒกํ„ฐ(784์ฐจ์›) ๋กœ flattenํ•จ.

  • 1์ธต (Layer 1):
    784 โ†’ 256 ์ฐจ์›์œผ๋กœ ์„ ํ˜• ๋ณ€ํ™˜

    • ReLU ํ™œ์„ฑํ™”
    • Dropout(0.4)๋กœ 40% ๋‰ด๋Ÿฐ ๋ฌด์ž‘์œ„ ์ œ๊ฑฐ
  • 2์ธต (Layer 2):
    256 โ†’ 128 ์ฐจ์›์œผ๋กœ ์„ ํ˜• ๋ณ€ํ™˜

    • ReLU ํ™œ์„ฑํ™”
    • Dropout(0.3)๋กœ 30% ๋‰ด๋Ÿฐ ๋ฌด์ž‘์œ„ ์ œ๊ฑฐ
  • ์ถœ๋ ฅ์ธต:
    128 โ†’ 10 ์ฐจ์›์œผ๋กœ ์„ ํ˜• ๋ณ€ํ™˜
    โ†’ MNIST์˜ ํด๋ž˜์Šค ์ˆ˜์ธ 0~9 ์ด 10๊ฐœ ํด๋ž˜์Šค์— ๋Œ€ํ•œ ๋กœ์ง“(logits) ์ถœ๋ ฅ

# Check that the model produces output of expected size based on our existing batch X_ above.
mnist_model = MNISTModel()
mnist_model(X_).size()
# > torch.Size([256, 10])

# SimpleModule.classification() method uses the cross-entropy loss function instead of mean squared error

mnist_module = SimpleModule.classification(mnist_model, num_classes=10)
mnist_logger = CSVLogger('logs', name='MNIST')

# Supply training data, and fit the model
mnist_trainer = Trainer(
    deterministic=True,
    max_epochs=30,
    logger=mnist_logger,
    callbacks=[ErrorTracker()]
)

mnist_trainer.fit(mnist_module, datamodule=mnist_dm)

# Display accuracy across epochs.
mnist_results = pd.read_csv(mnist_logger.experiment.metrics_file_path)

fig, ax = subplots(1, 1, figsize=(6, 6))
summary_plot(
    mnist_results,
    ax,
    col='accuracy',
    ylabel='Accuracy'
)
ax.set_ylim([0.5, 1])
ax.set_ylabel('Accuracy')
ax.set_xticks(np.linspace(0, 30, 7).astype(int))

# Evaluate the accuracy using the test() method
mnist_trainer.test(mnist_module, datamodule=mnist_dm)

# Multiclass logistic regression
class MNIST_MLR(nn.Module):
    def __init__(self):
        super(MNIST_MLR, self).__init__()
        self.linear = nn.Sequential(
            nn.Flatten(),
            nn.Linear(784, 10)
        )

    def forward(self, x):
        return self.linear(x)

mlr_model   = MNIST_MLR()
mlr_module  = SimpleModule.classification(mlr_model, num_classes=10)
mlr_logger  = CSVLogger('logs', name='MNIST_MLR')
mlr_trainer = Trainer(
    deterministic=True,
    max_epochs=30,
    callbacks=[ErrorTracker()]
)

mlr_trainer.fit(mlr_module, datamodule=mnist_dm)

# Fit the model just as before and compute the test
mlr_trainer.test(mlr_module, datamodule=mnist_dm)

# Delete some of the objects
del (
    mnist_test, mnist_train, mnist_model, mnist_dm,
    mnist_trainer, mnist_module, mnist_results,
    mlr_model, mlr_module, mlr_trainer
)

0๊ฐœ์˜ ๋Œ“๊ธ€