Article directory
Preface
This article implements the SqueezeNet network structure based on chainer, builds the chainer version based on the torch structure, and calculates the parameters of SqueezeNet.
Code
class Fire(chainer.Chain):
def __init__(self, inplanes: int, squeeze_planes: int, expand1x1_planes: int, expand3x3_planes: int) -> None:
super().__init__()
self.inplanes = inplanes
self.layers = []
self.layers_shortcut1 = []
self.layers_shortcut2 = []
self.layers = []
self.layers += [('squeeze',L.Convolution2D(in_channels=inplanes,out_channels=squeeze_planes,ksize=1))]
self.layers += [('_squeeze_activation',ReLU())]
self.layers_shortcut1 += [('expand1x1',L.Convolution2D(in_channels=squeeze_planes,out_channels=expand1x1_planes,ksize=1))]
self.layers_shortcut1 += [('_expand1x1_activation',ReLU())]
self.layers_shortcut2 += [('expand3x3',L.Convolution2D(in_channels=squeeze_planes,out_channels=expand3x3_planes,ksize=3,pad=1))]
self.layers_shortcut2 += [('_expand3x3_activation',ReLU())]
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
for n in self.layers_shortcut1:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
for n in self.layers_shortcut2:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
for n, f in self.layers:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
short_cut = x
for n, f in self.layers_shortcut1:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
short_cut1 = x
x = short_cut
for n, f in self.layers_shortcut2:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
short_cut2 = x
return F.concat([short_cut1, short_cut2])
class SqueezeNet(chainer.Chain):
cfgs={
'squeezenet1_0':None,
'squeezenet1_1':None
}
def __init__(self, model_name='squeezenet1_0', batch_size=4,image_size=224,num_classes: int = 1000,channels=3, dropout: float = 0.5,**kwargs) -> None:
super().__init__()
self.num_classes = num_classes
self.features = []
if "1_0" in model_name:
self.features += [('conv1',L.Convolution2D(in_channels=channels,out_channels=96,ksize=7,stride=1))]
output_size = int((image_size-7+2*0)/1+1)
self.features += [('_relu1',ReLU())]
self.features += [('_pool1',MaxPooling2D(ksize=3, stride=2))]
output_size = math.ceil((output_size-3+2*0)/2+1)
self.features += [('fire1',Fire(96, 16, 64, 64))]
self.features += [('fire2',Fire(128, 16, 64, 64))]
self.features += [('fire3',Fire(128, 32, 128, 128))]
self.features += [('_pool2',MaxPooling2D(ksize=3, stride=2))]
output_size = math.ceil((output_size-3+2*0)/2+1)
self.features += [('fire4',Fire(256, 32, 128, 128))]
self.features += [('fire5',Fire(256, 48, 192, 192))]
self.features += [('fire6',Fire(384, 48, 192, 192))]
self.features += [('fire7',Fire(384, 64, 256, 256))]
self.features += [('_pool2',MaxPooling2D(ksize=3, stride=2))]
output_size = math.ceil((output_size-3+2*0)/2+1)
self.features += [('fire8',Fire(512, 64, 256, 256))]
elif "1_1" in model_name:
self.features += [('conv1',L.Convolution2D(in_channels=channels,out_channels=64,ksize=3,stride=2))]
output_size = int((image_size-3+2*0)/2+1)
self.features += [('_relu1',ReLU())]
self.features += [('_pool1',MaxPooling2D(ksize=3, stride=2))]
output_size = math.ceil((output_size-3+2*0)/2+1)
self.features += [('fire1',Fire(64, 16, 64, 64))]
self.features += [('fire2',Fire(128, 16, 64, 64))]
self.features += [('_pool2',MaxPooling2D(ksize=3, stride=2))]
output_size = math.ceil((output_size-3+2*0)/2+1)
self.features += [('fire3',Fire(128, 32, 128, 128))]
self.features += [('fire4',Fire(256, 32, 128, 128))]
self.features += [('_pool3',MaxPooling2D(ksize=3, stride=2))]
output_size = math.ceil((output_size-3+2*0)/2+1)
self.features += [('fire5',Fire(256, 48, 192, 192))]
self.features += [('fire6',Fire(384, 48, 192, 192))]
self.features += [('fire7',Fire(384, 64, 256, 256))]
self.features += [('fire8',Fire(512, 64, 256, 256))]
else:
raise ValueError(f"Unsupported SqueezeNet version: 1_0 or 1_1 expected")
self.features += [('_dropout',Dropout(dropout))]
self.features += [('fc',L.Convolution2D(in_channels=512,out_channels=self.num_classes,ksize=1))]
self.features += [('_relu',ReLU())]
self.features += [('_avgpool',AveragePooling2D(ksize=output_size,stride=1,pad=0))]
self.features += [('_reshape',Reshape((batch_size,self.num_classes)))]
with self.init_scope():
for n in self.features:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, x):
for n, f in self.features:
origin_size = x.shape
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
print(n,origin_size,x.shape)
if chainer.config.train:
return x
return F.softmax(x)
Note that this class is the implementation process of ShuffleNet_V1. Note that the forward propagation process of the network is divided into training and testing.
During the training process, x is returned directly, and during the testing process, softmax is entered to obtain the probability.
Calling method
if __name__ == '__main__':
batch_size = 4
n_channels = 3
image_size = 224
num_classes = 123
model = SqueezeNet(num_classes=num_classes, channels=n_channels,image_size=image_size,batch_size=batch_size)
print("参数量",model.count_params())
x = np.random.rand(batch_size, n_channels, image_size, image_size).astype(np.float32)
t = np.random.randint(0, num_classes, size=(batch_size,)).astype(np.int32)
with chainer.using_config('train', True):
y1 = model(x)
loss1 = F.softmax_cross_entropy(y1, t)