The most important thing in resnet is the code residual block,
def forward(self,x)
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample
out += identity
out = self.relu(out)
return out
Residual block structure:
The one on the left is called BasicBlock, and the one on the right is called bottleneck
BasicBlock
class BasicBlock(nn.Module):
expansion =1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock,self).__init__()
self.conv1 = conv3x3(inplanes,planes,stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes,planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride =stride
def forward(self,x)
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample
out += identity
out = self.relu(out)
return out
bottleneck
Note that Res18 and Res34 use BasicBlock, and the rest use Bottleneck
resnet18: ResNet(BasicBlock, [2, 2, 2, 2])
resnet34: ResNet(BasicBlock, [3, 4, 6, 3])
resnet50:ResNet(Bottleneck, [3, 4, 6, 3])
resnet101:ResNet(Bottleneck, [3, 4, 23, 3])
resnet152:ResNet(Bottleneck, [3, 8, 36, 3])
expansion = 4, because the output dimension of each residual structure in Bottleneck is 4 times the input dimension
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = conv1x1(inplanes, planes)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes, stride)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = conv1x1(planes, planes * self.expansion)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
ResNet class
A few key points:
1. Before the residual structure, first process the original 224 x 224 image, and get a 56 x 56 x 64 feature map after 7 x 7 large convolution kernel, BN, ReLU, and maximum pooling.
2. From layer1 The definitions of, layer2, layer3, and layer4 can be seen that the first stage will not reduce the feature map, and the rest will use the 3 x 3 convolution of step 2 in the first layer of the stage to reduce the length and width of the feature map by half
3. In the ._make_layer function, downsample upgrades the input of the residual structure, directly 1 x 1 convolution plus BN, and then the BasicBlock class and the Bottleneck class are used to obtain
4. The last pooling layer uses the adaptive average pool Instead of global average pooling in the paper
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
super(ResNet, self).__init__()
self.inplanes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x