
# Read the CIFAR100 data, which is available in the torchvision package
(cifar_train, cifar_test) = [
CIFAR100(root="data", train=train, download=True)
for train in [True, False]
]
transform = ToTensor()
cifar_train_X = torch.stack([transform(x) for x in cifar_train.data])
cifar_test_X = torch.stack([transform(x) for x in cifar_test.data])
cifar_train = TensorDataset(cifar_train_X, torch.tensor(cifar_train.targets))
cifar_test = TensorDataset(cifar_test_X, torch.tensor(cifar_test.targets))
print(cifar_train_X.shape) # torch.Size([50000, 3, 32, 32])
The CIFAR100 dataset consists of:
(3, 32, 32), where:3 is the number of color channels (RGB)32 x 32 is the height and width in pixelsμ¦, κ° μ΄λ―Έμ§λ RGB μ»¬λ¬ μ΄λ―Έμ§μ΄λ©°, μ΄ 100κ° ν΄λμ€λ‘ λΆλ₯λλ ꡬ쑰μ
λλ€.
# Create the data module
max_num_workers = rec_num_workers()
print(max_num_workers) # 2
cifar_dm = SimpleDataModule(
cifar_train,
cifar_test,
validation=0.2,
num_workers=max_num_workers,
batch_size=128
)
for idx, (X_, Y_) in enumerate(cifar_dm.train_dataloader()):
print('X: ', X_.shape)
print('Y: ', Y_.shape)
if idx >= 1:
break
# Choose random images from the training data
fig, axes = subplots(5, 5, figsize=(10, 10))
rng = np.random.default_rng(4)
indices = rng.choice(np.arange(len(cifar_train)), 25, replace=False).reshape((5, 5))
for i in range(5):
for j in range(5):
idx = indices[i, j]
axes[i, j].imshow(
np.transpose(cifar_train[idx][0], [1, 2, 0]),
interpolation=None
)
axes[i, j].set_xticks([])
axes[i, j].set_yticks([])
# Specify a moderately-sized CNN
class BuildingBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super(BuildingBlock, self).__init__()
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=(3, 3),
padding='same'
)
self.activation = nn.ReLU()
self.pool = nn.MaxPool2d(kernel_size=(2, 2))
def forward(self, x):
return self.pool(self.activation(self.conv(x)))
padding='same' μΈμλ nn.Conv2d() μμ μ
λ ₯κ³Ό μΆλ ₯μ κ³΅κ° μ°¨μ(Height, Width)μ λμΌνκ² μ μ§ν΄ μ£Όλ μν μ ν©λλ€.
μ¦, 컀λμ΄ μ΄λ―Έμ§μ κ°μ₯μ리λ₯Ό λ²μ΄λμ§ μλλ‘ νμν λ§νΌμ μ λ‘ ν¨λ©(zero-padding) μ μλμΌλ‘ μ μ©ν©λλ€.
β μ΄λ κ² νλ©΄ convolution μ°μ° νμλ feature mapμ ν¬κΈ°κ° μ€μ΄λ€μ§ μμ, λ€νΈμν¬ μ€κ³κ° λ¨μν΄μ§κ³ feature dimension μΆμ μ΄ μ¬μμ§λλ€.
# Form the deep learning model for CIFAR100 data
# We use several BuildingBlock() modules sequentially
class CIFARModel(nn.Module):
def __init__(self):
super(CIFARModel, self).__init__()
sizes = [(3, 32), (32, 64), (64, 128), (128, 256)]
self.conv = nn.Sequential(
*[BuildingBlock(in_, out_) for in_, out_ in sizes]
)
self.output = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(2 * 2 * 256, 512),
nn.ReLU(),
nn.Linear(512, 100)
)
def forward(self, x):
val = self.conv(x)
val = torch.flatten(val, start_dim=1)
return self.output(val)
# Build the model and look at the summary
cifar_model = CIFARModel()
summary(
cifar_model,
input_data=X_,
col_names=['input_size', 'output_size', 'num_params']
)
ν΄λΉ λͺ¨λΈμμ νμ΅ κ°λ₯ν νλΌλ―Έν° μλ μ΄ 964,516κ°
# Create the data module
cifar_optimizer = RMSprop(cifar_model.parameters(), lr=0.001)
cifar_module = SimpleModule.classification(
cifar_model,
num_classes=100,
optimizer=cifar_optimizer
)
cifar_logger = CSVLogger('logs', name='CIFAR100')
cifar_trainer = Trainer(
deterministic=True,
max_epochs=30,
logger=cifar_logger,
callbacks=[ErrorTracker()]
)
cifar_trainer.fit(cifar_module, datamodule=cifar_dm)
# Check validation and training accuracy
log_path = cifar_logger.experiment.metrics_file_path
cifar_results = pd.read_csv(log_path)
fig, ax = subplots(1, 1, figsize=(6, 6))
summary_plot(cifar_results,
ax,
col='accuracy',
ylabel='Accuracy')
ax.set_xticks(np.linspace(0, 10, 6).astype(int))
ax.set_ylabel('Accuracy')
ax.set_ylim([0, 1])
# Evaluate our model on our test data
cifar_trainer.test(cifar_module, datamodule=cifar_dm)
# Hardware Acceleration
try:
for name, metric in cifar_module.metrics.items():
cifar_module.metrics[name] = metric.to('mps')
cifar_trainer_mps = Trainer(
accelerator='mps',
deterministic=True,
max_epochs=30
)
cifar_trainer_mps.fit(cifar_module, datamodule=cifar_dm)
cifar_trainer_mps.test(cifar_module, datamodule=cifar_dm)
except:
pass
Trainer νΈμΆ λ°©μκ³Ό νκ°ν metric μ€μ μ λ³κ²½ν¨μΌλ‘μ¨, νμ΅ μλκ° 2~3λ°° λΉ¨λΌμ§λ ν¨κ³Όλ₯Ό μ»μ μ μλ€.
β μ¦, μνλΉ μ°μ° ν¨μ¨μ΄ ν¬κ² ν₯μλλ©°, λμΌν λͺ¨λΈμ΄λΌλ λ λΉ λ₯΄κ² νμ΅ κ°λ₯ν΄μ§λ€λ λ»μ΄λ€.
# Use Pretrained CNN Models
from google.colab import drive
drive.mount('/content/drive')
import os
print(os.listdir('/content/drive/MyDrive'))
resize = Resize((232, 232), antialias=True)
crop = CenterCrop(224)
normalize = Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
imgfiles = sorted([
f for f in glob('/content/drive/MyDrive/book_images/*')
])
imgs = torch.stack([
torch.div(crop(resize(read_image(f))), 255)
for f in imgfiles
])
imgs = normalize(imgs)
imgs.size()
torch.Size([6, 3, 224, 224])
# Set up the trained network with the weights
resnet_model = resnet50(weights=ResNet50_Weights.DEFAULT)
summary(resnet_model,
input_data=imgs,
col_names=['input_size', 'output_size', 'num_params'])
# Set the mode to eval() to ensure that the model is ready to predict on new data.
resnet_model.eval()
# Feed our six images through the fitted network
img_preds = resnet_model(imgs)
img_probs = np.exp(np.asarray(img_preds.detach()))
img_probs /= img_probs.sum(1)[:, None]
# Download the index file associated with ImageNet
labs = json.load(open('/content/drive/MyDrive/book_images/imagenet_class_index.json'))
class_labels = pd.DataFrame([(int(k), v[1]) for k, v in labs.items()],
columns=['idx', 'label'])
class_labels = class_labels.set_index('idx')
class_labels = class_labels.sort_index()
# Construct a data frame for each image file with the
labels with the three highest probabilities as estimated
by the model above.
for i, imgfile in enumerate(imgfiles):
img_df = class_labels.copy()
img_df['prob'] = img_probs[i]
img_df = img_df.sort_values(by='prob',
ascending=False)[:3]
print(f'Image: {imgfile}')
print(img_df.reset_index().drop(columns=['idx']))
