chainer-骨干网络backbone-ShuffleNet_V2代码重构【附源码】


前言

本文基于chainer实现ShuffleNet_V2网络结构,并基于torch的结构方式构建chainer版的,并计算ShuffleNet_V2的参数量。


代码实现


def channel_shuffle(x, groups=2):
    n, c, h, w = x.shape
    x = x.reshape(n, groups, c // groups, h, w)
    x = x.transpose(0, 2, 1, 3, 4)
    x = x.reshape(n, c, h, w)
    return x

class InvertedResidual(chainer.Chain):
    def depthwise_conv(self,input_c: int, output_c: int, kernel_s: int, stride: int = 1, padding: int = 0, bias: bool = False):
        return L.Convolution2D(in_channels=input_c, out_channels=output_c, ksize=kernel_s, stride=stride, pad=padding, nobias=not bias, groups=input_c)

    def __init__(self, input_c: int, output_c: int, stride: int):
        super(InvertedResidual, self).__init__()
        if stride not in [1, 2]:
            raise ValueError("illegal stride value.")
        self.stride = stride

        assert output_c % 2 == 0
        branch_features = output_c // 2
        # 当stride为1时,input_channel应该是branch_features的两倍
        # python中 '<<' 是位运算,可理解为计算×2的快速方法
        assert (self.stride != 1) or (input_c == branch_features << 1)
        
        self.branch1 = []

        if self.stride == 2:
            self.branch1 += [('depthwise_conv_branch1',self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1))]
            self.branch1 += [('bn1_branch1',BatchNormalization(input_c))]
            self.branch1 += [('conv1_branch1',L.Convolution2D(in_channels=input_c,out_channels=branch_features,ksize=1,stride=1,pad=0,nobias=True))]
            self.branch1 += [('bn2_branch1',BatchNormalization(branch_features))]
            self.branch1 += [('_relu1_branch1',ReLU())]

        self.branch2 = []
        self.branch2 += [('conv1_branch2',L.Convolution2D(input_c if self.stride > 1 else branch_features, branch_features, ksize=1,stride=1, pad=0, nobias=True))]
        self.branch2 += [('bn1_branch2',BatchNormalization(branch_features))]
        self.branch2 += [('_relu1_branch2',ReLU())]
        self.branch2 += [('depthwise_conv_branch2',self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1))]
        self.branch2 += [('bn2_branch2',BatchNormalization(branch_features))]
        self.branch2 += [('conv2_branch2',L.Convolution2D(branch_features, branch_features, ksize=1, stride=1, pad=0, nobias=True))]
        self.branch2 += [('bn3_branch2',BatchNormalization(branch_features))]
        self.branch2 += [('_relu2_branch2',ReLU())]
        
        with self.init_scope():
            for n in self.branch1:
                if not n[0].startswith('_'):
                    setattr(self, n[0], n[1])
            for n in self.branch2:
                if not n[0].startswith('_'):
                    setattr(self, n[0], n[1])
    
    def forward(self, x):
        if self.stride == 1:
            x1, x2 = F.split_axis(x, 2, axis=1)  # x1, x2 = x.chunk(2, dim=1)
            temp_x = x2
            for n, f in self.branch2:
                if not n.startswith('_'):
                    temp_x = getattr(self, n)(temp_x)
                else:
                    temp_x = f.apply((temp_x,))[0]
            out = F.concat((x1, temp_x), axis=1)
        else:
            temp_x1 = x
            for n, f in self.branch1:
                if not n.startswith('_'):
                    temp_x1 = getattr(self, n)(temp_x1)
                else:
                    temp_x1 = f.apply((temp_x1,))[0]
            temp_x2 = x
            for n, f in self.branch2:
                if not n.startswith('_'):
                    temp_x2 = getattr(self, n)(temp_x2)
                else:
                    temp_x2 = f.apply((temp_x2,))[0]
            out = F.concat((temp_x1, temp_x2), axis=1)

        out = channel_shuffle(out, 2)

        return out

class ShuffleNet_V2(chainer.Chain):
    cfgs={
    
    
        'shufflenetv2_0.5':{
    
    'alpha':0.5,'stages_repeats':[4, 8, 4],'stages_out_channels':[24, 48, 96, 192, 1024]},
        'shufflenetv2_1.0':{
    
    'alpha':1.0,'stages_repeats':[4, 8, 4],'stages_out_channels':[24, 116, 232, 464, 1024]},
        'shufflenetv2_1.5':{
    
    'alpha':1.5,'stages_repeats':[4, 8, 4],'stages_out_channels':[24, 176, 352, 704, 1024]},
        'shufflenetv2_2.0':{
    
    'alpha':2.0,'stages_repeats':[4, 8, 4],'stages_out_channels':[24, 244, 488, 976, 2048]}
    }
    def __init__(self,model_name='shufflenetv2_1.0',
                 num_classes: int = 1000,
                 inverted_residual = InvertedResidual,**kwargs):
        super(ShuffleNet_V2, self).__init__()

        if len(self.cfgs[model_name]['stages_repeats']) != 3:
            raise ValueError("expected stages_repeats as list of 3 positive ints")
        if len(self.cfgs[model_name]['stages_out_channels']) != 5:
            raise ValueError("expected stages_out_channels as list of 5 positive ints")
        self._stage_out_channels = self.cfgs[model_name]['stages_out_channels']

        # input RGB image
        input_channels = 3
        output_channels = self._stage_out_channels[0]

        self.layers = []
        self.layers += [('conv1',L.Convolution2D(input_channels, output_channels, ksize=3, stride=2, pad=1, nobias=True))]
        self.layers += [('bn1',BatchNormalization(output_channels))]
        self.layers += [('_relu1',ReLU())]
        
        input_channels = output_channels
        self.layers += [('_maxpool',MaxPooling2D(ksize=3,stride=2,pad=1))]

        stage_names = ["stage{}".format(i) for i in [2, 3, 4]]
        for name, repeats, output_channels in zip(stage_names, self.cfgs[model_name]['stages_repeats'], self._stage_out_channels[1:]):
            self.layers += [('{0}_1'.format(name),inverted_residual(input_channels, output_channels, 2))]
            for i in range(repeats - 1):
                self.layers += [('{0}_{1}'.format(name,i+2),inverted_residual(output_channels, output_channels, 1))]
            input_channels = output_channels

        output_channels = self._stage_out_channels[-1]
        self.layers += [('conv2',L.Convolution2D(input_channels, output_channels, ksize=1, stride=1, pad=0, nobias=True))]
        self.layers += [('bn2',BatchNormalization(output_channels))]
        self.layers += [('_relu2',ReLU())]
        
        self.layers += [('global_pool',functools.partial(F.mean, axis=(2, 3)))]
        self.layers += [('fc',L.Linear(output_channels, num_classes))]

        with self.init_scope():
            for n in self.layers:
                if not n[0].startswith('_'):
                    setattr(self, n[0], n[1])
        
    def forward(self, x):
        for n, f in self.layers:
            origin_size = x.shape
            if not n.startswith('_'):
                x = getattr(self, n)(x)
            else:
                x = f.apply((x,))[0]
            print(n,origin_size,x.shape)
        if chainer.config.train:
            return x
        return F.softmax(x)


注意此类就是ShuffleNet_V2的实现过程,注意网络的前向传播过程中,分了训练以及测试。
训练过程中直接返回x,测试过程中会进入softmax得出概率

调用方式

if __name__ == '__main__':
    batch_size = 4
    n_channels = 3
    image_size = 224
    num_classes = 123
    
    model = ShuffleNet_V2(num_classes=num_classes, channels=n_channels,image_size=image_size,batch_size=batch_size)
    print("参数量",model.count_params())

    x = np.random.rand(batch_size, n_channels, image_size, image_size).astype(np.float32)
    t = np.random.randint(0, num_classes, size=(batch_size,)).astype(np.int32)
    with chainer.using_config('train', True):
        y1 = model(x)
    loss1 = F.softmax_cross_entropy(y1, t)

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/ctu_sue/article/details/128687453