AlexNet是直筒结构。
(1) 参数num_classes=-1,则用作backbone,该backbone有3个conv+relu+maxPooling,中间2个conv+relu,共含5个卷积组成。
(2) 参数num_classes>0,则用作分类网络,前面的5层卷积后,先view,再接分类器,该分类器由2个dropout+linear+relu、1个linear输出层,共含3个全连接层组成。
@BACKBONES.register_module()
class AlexNet(BaseBackbone):
"""`AlexNet <https://en.wikipedia.org/wiki/AlexNet>`_ backbone.
The input for AlexNet is a 224x224 RGB image.
Args:
num_classes (int): number of classes for classification.
The default value is -1, which uses the backbone as
a feature extractor without the top classifier.
"""
def __init__(self, num_classes=-1):
super(AlexNet, self).__init__()
self.num_classes = num_classes
self.features = nn.Sequential(
# 3个conv+relu+maxPool,中间有2个conv+relu
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
if self.num_classes > 0:
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x):
"""5个卷积层、view、3个全连接层"""
x = self.features(x) # Only for backbone
if self.num_classes > 0: # for classification. 224*224 images.
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x