Detailed explanation of resent code

The most important thing in resnet is the code residual block,

def forward(self,x)
    identity = x
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)
    
    out = self.conv2(out)
    out = self.bn2(out)
    
    if self.downsample is not None:
        identity = self.downsample
        out += identity
        out = self.relu(out)
        return out

Residual block structure:

The one on the left is called BasicBlock, and the one on the right is called bottleneck

BasicBlock

class BasicBlock(nn.Module):
    expansion =1
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock,self).__init__()
        self.conv1 = conv3x3(inplanes,planes,stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes,planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride =stride
def forward(self,x)
    identity = x
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
        identity = self.downsample
        out += identity
        out = self.relu(out)
        return out

bottleneck

Note that Res18 and Res34 use BasicBlock, and the rest use Bottleneck

resnet18： ResNet(BasicBlock, [2, 2, 2, 2])

resnet34： ResNet(BasicBlock, [3, 4, 6, 3])

resnet50：ResNet(Bottleneck, [3, 4, 6, 3])

resnet101:ResNet(Bottleneck, [3, 4, 23, 3])

resnet152:ResNet(Bottleneck, [3, 8, 36, 3])

expansion = 4, because the output dimension of each residual structure in Bottleneck is 4 times the input dimension

class Bottleneck(nn.Module):
expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

def forward(self, x):
identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)

if self.downsample is not None:
identity = self.downsample(x)

out += identity
out = self.relu(out)

return out

ResNet class

A few key points:

1. Before the residual structure, first process the original 224 x 224 image, and get a 56 x 56 x 64 feature map after 7 x 7 large convolution kernel, BN, ReLU, and maximum pooling.
2. From layer1 The definitions of, layer2, layer3, and layer4 can be seen that the first stage will not reduce the feature map, and the rest will use the 3 x 3 convolution of step 2 in the first layer of the stage to reduce the length and width of the feature map by half
3. In the ._make_layer function, downsample upgrades the input of the residual structure, directly 1 x 1 convolution plus BN, and then the BasicBlock class and the Bottleneck class are used to obtain
4. The last pooling layer uses the adaptive average pool Instead of global average pooling in the paper

class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

return x