FCN의 한계와 Encoder, Decoder 구조로 극복한 모델들
https://velog.io/@choihj94/Boostcampers-TIL-13#4-transposed-convolution
def CBR(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding),
nn.BatchNorm2d(out_channels),
nn.ReLU()
)
def DCB(in_channels, out_channels, kernel_size=3, stride=1, padding=1):
return nn.Sequential(
nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding),
nn.BatchNorm2d(out_channels),
nn.ReLU()
)
self.conv1_1 = CBR(3, 64, 3, 1, 1)
self.conv1_2 = CBR(64, 64, 3, 1, 1)
self.pool1 = nn.MaxPool2d(kernel_szie=2, stride=2, ceil_mode=True, return_indices=True)
# return_indeices를 True로 설정하여 max pooling시에 어떤 위치에서 수행되었는지 반환한다.(UnPooling)
self.conv2_1 = CBR(64, 128, 3, 1, 1)
self.conv2_2 = CBR(128, 128, 3, 1, 1)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2 , ceil_mode=True, return_indices=True)
self.conv3_1 = CBR(128, 256, 3, 1, 1)
self.conv3_2 = CBR(256, 256, 3, 1, 1)
self.conv3_3 = CBR(256, 256, 3, 1, 1)
self.poo3 = nn.MaxPool2d(kernel_size=2, stride=2 , ceil_mode=True, return_indices=True)
self.conv4_1 = CBR(256, 512, 3, 1, 1)
self.conv4_2 = CBR(512, 512, 3, 1, 1)
self.conv4_3 = CBR(512, 512, 3, 1, 1)
self.poo4 = nn.MaxPool2d(kernel_size=2, stride=2 , ceil_mode=True, return_indices=True)
self.conv5_1 = CBR(512, 512, 3, 1, 1)
self.conv5_2 = CBR(512, 512, 3, 1, 1)
self.conv5_3 = CBR(512, 512, 3, 1, 1)
self.poo5 = nn.MaxPool2d(kernel_size=2, stride=2 , ceil_mode=True, return_indices=True)
self.fc6 = CBR(512, 4096, 7, 1, 0)
self.drop6 = nn.Dropout2d(0.5)
self.fc7 = CBR(4096, 4096, 1, 1, 0)
self.drop7 = nn.Dropout2d(0.5)
self.fc_deconv = DCB(4096, 512, 7, 1, 0) # TransposedConv2d -> BatchNorm2d -> ReLU
self.unpool5 = nn.MaxUnpool2d(2, stride=2) # size *2 (14*14)
self.deconv5_1 = DCB(512, 512, 3, 1, 1)
self.deconv5_2 = DCB(512, 512, 3, 1, 1)
self.deconv5_3 = DCB(512, 512, 3, 1, 1)
self.unpool4 = nn.MaxUnpool2d(2, stride=2) # size *2 (28*28)
self.deconv4_1 = DCB(512, 512, 3, 1, 1)
self.deconv4_2 = DCB(512, 512, 3, 1, 1)
self.deconv4_3 = DCB(512, 256, 3, 1, 1)
self.unpool3 = nn.MaxUnpool2d(2, stride=2) # size *2 (56*56)
self.deconv3_1 = DCB(256, 256, 3, 1, 1)
self.deconv3_2 = DCB(256, 256, 3, 1, 1)
self.deconv3_3 = DCB(256, 128, 3, 1, 1)
self.unpool2 = nn.MaxUnpool2d(2, stride=2) # size *2 (112*112)
self.deconv2_1 = DCB(128, 128, 3, 1, 1)
self.deconv2_2 = DCB(128, 64, 3, 1, 1)
self.unpool1 = nn.MaxUnpool2d(2, stride=2) # size *2 (224*224)
self.deconv1_1 = DCB(64, 64, 3, 1, 1)
self.deconv1_2 = DCB(64, 64, 3, 1, 1)
self.score_fr = nn.conv2d(64, num_classes, 1, 1, 0, 1)
https://arxiv.org/abs/1505.04366 (Learning Deconvolution Network for Semantic Segmentation)