Receptive field를 넓혀 성능을 향상 시킨 모델들
def conv_relu(in_channels, out_channels, kernel_size=3, rate=1):
return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=rate, dilation=rate) # padding과 dilation을 같게
self.features1 = nn.Sequential(
conv_relu(3, 64, 3, 1),
conv_relu(64, 64, 3, 1),
nn.PaxPool2d(3, streide=2, padding=1) # 2x2 -> 3x3으로 변경, 입력이미지 1/2
)
self.features2 = nn.Sequential(
conv_relu(64, 128, 3, 1),
conv_relu(128, 128, 3, 1),
nn.PaxPool2d(3, streide=2, padding=1) # 입력이미지 1/4
)
self.features3 = nn.Sequential(
conv_relu(128, 256, 3, 1),
conv_relu(256, 256, 3, 1),
conv_relu(256, 256, 3, 1),
nn.PaxPool2d(3, streide=2, padding=1) # 입력이미지 1/8
)
self.features4 = nn.Sequential(
conv_relu(256, 512, 3, 1),
conv_relu(512, 512, 3, 1),
conv_relu(512, 512, 3, 1),
nn.PaxPool2d(3, streide=1, padding=1) # 이미지 사이즈 고정
)
self.features5 = nn.Sequential(
conv_relu(512, 512, 3, rate=2),
conv_relu(512, 512, 3, rate=2),
conv_relu(512, 512, 3, rate=2),
nn.PaxPool2d(3, streide=1, padding=1),
nn.AvgPool2d(3, stride=1, padding=1) # 마지막 두 layer 크기 고정
)
self.classifier = nn.Sequential(
conv_relu(512, 1024, 3, rate=12),
nn.Dropout2d(0.5),
conv_relu(1204, 1024, 1, 1),
nn.Dropout2d(0.5),
nn.Conv2d(1024, num_classes, 1)
)
class DeepLabV1(nn.Module):
def __init__(self, backbone, classifier, upsampling=8):
super(DeepLabV1, self).__init__()
self.bacbone = backbone
self.classifier = classifier
self.upsampling = upsampling
def forward(self, x):
x = self.backbone(x) # conv1~conv5
_, _, feature_map_h, feature_map_w = x.size()
x = self.classifier(x)
x= torch.nn.F.interpolate(x, size=(feature_map_h * self.upsampling, feature_map_w * self.upsampling), mode="bilinear")
conv4와 conv5의 MaxPool, AvgPool 제거
Bilinear Interpolation으로 Up Sampling 하지 않고 Deconvolution으로 원본 사이즈 복원
conv4
self.features4 = nn.Sequential(
conv_relu(256, 512, 3, 1),
conv_relu(512, 512, 3, 1),
conv_relu(512, 512, 3, 1),
)
self.features5 = nn.Sequential(
conv_relu(512, 512, 3, rate=2),
conv_relu(512, 512, 3, rate=2),
conv_relu(512, 512, 3, rate=2),
)
class DilatedNetFron(nn.Module):
def __init__(self, backbone, classifier):
super(DilatedNetFront, self).__init__()
self.backbone = backbone
self.classifier = classifier
# deconv
self.deconv = nn.ConvTranspose2d(in_channels=11,
out_channels=11,
kernel_size=16,
stride=8,
padding=4)
def forward(self, x):
x = self.backbone(x)
x = self.classifier(x)
out = self.deconv(x)
return out
7) DilatedNet(Front + Basic Context module)
class BasicContextModule(nn.Module):
def __init__(self, num_classes):
super(BasicContextModule, self).__init__()
self.layer1 = nn.Sequential(conv_relu(num_classes, num_classes, 3, 1))
self.layer2 = nn.Sequential(conv_relu(num_classes, num_classes, 3, 1))
self.layer3 = nn.Sequential(conv_relu(num_classes, num_classes, 3, 2))
self.layer4 = nn.Sequential(conv_relu(num_classes, num_classes, 3, 4))
self.layer5 = nn.Sequential(conv_relu(num_classes, num_classes, 3, 8))
self.layer6 = nn.Sequential(conv_relu(num_classes, num_classes, 3, 16))
self.layer7 = nn.Sequential(conv_relu(num_classes, num_classes, 3, 1))
self.layer8 = nn.Sequential(conv_relu(num_classes, num_classes, 3, 1))
https://arxiv.org/abs/1505.04366 ("Learning Deconvolution Network for Semantic Segmentation")
https://arxiv.org/abs/1611.09326 ("The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation")
https://arxiv.org/abs/1505.04597 ("U-Net: Convolutional Networks for Biomedical Image Segmentation")
https://arxiv.org/abs/1606.00915 ("DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs")