ACNet: Strengthening the Kernel Skeletons for Powerful CNN via Asymmetric
Convolution Blocks
清华——ICCV2019
paper:https://arxiv.org/pdf/1908.03930.pdf
code:https://github.com/DingXiaoH/ACNet
摘要
通过使用一维非对称卷积来加强方形卷积核的特征表达,可以让训练精度更高。在模型推理时,等价地将ACNet转换为相同的原始架构,不再需要额外的计算。ACNet可以明显地提高CIFAR和ImageNet上各种模型的性能。ACNet的有效性归功于其增强模型对旋转畸变的鲁棒性和增强平方卷积核的核心骨架部分的能力。
论文主要思想
卷积操作是一种线性变换,那么具有结合律性质和分配律性质,即可以将多个卷积操作合并成一个操作。同理,Batch Normalization(BN)也是一种线性变化,那么可以将卷积和BN进行融合。引入1x3的水平卷积核可以提升模型对图像上下翻转的鲁棒性,但水平方向不对称;引入3x1的竖直卷积核可以提升模型对图像中左右翻转的鲁棒性,但竖直方向不对称。因此,当1x3,3x1和3x3的输入是一样时候,可以将三者的卷积核参数与其各自的BN参数融合在一起,这样既能够减少参数量,也能够增强特征表达能力。
Keras实现
以下是根据论文和Tensorflow2.x源码实现的keras版本(支持Tensorflow1.x)。特征通道必须channel last。代码链接
训练部分:
def dbb_asym(self, input, filters, kernel_size, name, dilation_rate=1,
use_bias=False, use_bn=True, model='Add', padding='same'):
"""
|
|
-----------------------------
| | |
| | |
1xk kx1 kxk
| | |
| | |
BN BN BN
| | |
| | |
-----------conbine-----------
|
|
Diverse Branch Block
"""
x = None
if self.stage == 'train':
conv_kxk = Conv2D(filters, (kernel_size, kernel_size), padding=padding,
dilation_rate=(dilation_rate, dilation_rate), use_bias=use_bias,
name=name + '_conv_kxk')(input)
conv_kx1 = Conv2D(filters, (kernel_size, 1), padding=padding,
dilation_rate=(dilation_rate, 1), use_bias=use_bias,
name=name + '_conv_kx1')(input)
conv_1xk = Conv2D(filters, (1, kernel_size), padding=padding,
dilation_rate=(1, dilation_rate), use_bias=use_bias,
name=name + '_conv_1xk')(input)
if use_bn:
conv_kxk = BatchNormalization(axis=-1, name=name + '_bn_kxk')(conv_kxk)
conv_kx1 = BatchNormalization(axis=-1, name=name + '_bn_kx1')(conv_kx1)
conv_1xk = BatchNormalization(axis=-1, name=name + '_bn_1xk')(conv_1xk)
if model == 'Add':
x = Add(name=name + '_add')([conv_kxk, conv_kx1, conv_1xk])
elif model == 'Concate':
x = Concatenate(name=name + '_concate')([conv_kxk, conv_kx1, conv_1xk])
else:
if model == 'Add':
x = Conv2D(filters, kernel_size, dilation_rate=dilation_rate,
padding='same', name=name)(input)
elif model == 'Concate':
x = Conv2D(filters * 3, kernel_size, dilation_rate=dilation_rate,
padding='same', name=name)(input)
self.dbb_block_names['dbb_asym'].append([name, use_bias, use_bn, model, None])
return x
融合部分:
def fusion_asym(AC_names, trained_model, infer_model):
"""
|
|
-----------------------------
| | |
| | |
1xk kx1 kxk
| | |
| | |
BN BN BN
| | |
| | |
-----------conbine-----------
|
|
Diverse Branch Block
"""
for layer_name, use_bias, use_bn, model, epoch in AC_names:
weights = []
bias = []
weights.append(trained_model.get_layer(layer_name + '_conv_kxk').get_weights()[0])
weights.append(trained_model.get_layer(layer_name + '_conv_kx1').get_weights()[0])
weights.append(trained_model.get_layer(layer_name + '_conv_1xk').get_weights()[0])
if use_bias:
bias.append(trained_model.get_layer(layer_name + '_conv_kxk').get_weights()[1])
bias.append(trained_model.get_layer(layer_name + '_conv_kx1').get_weights()[1])
bias.append(trained_model.get_layer(layer_name + '_conv_1xk').get_weights()[1])
else:
bias = [np.zeros((weights[0].shape[-1]),)]*3
if use_bn:
bn_kxk = trained_model.get_layer(layer_name + '_bn_kxk').get_weights()
bn_kx1 = trained_model.get_layer(layer_name + '_bn_kx1').get_weights()
bn_1xk = trained_model.get_layer(layer_name + '_bn_1xk').get_weights()
gammas = [bn_kxk[0], bn_kx1[0], bn_1xk[0]]
betas = [bn_kxk[1], bn_kx1[1], bn_1xk[1]]
means = [bn_kxk[2], bn_kx1[2], bn_1xk[2]]
vars = [bn_kxk[3], bn_kx1[3], bn_1xk[3]]
else:
gammas = [np.ones((weights[0].shape[-1]),)]*3
betas = [np.zeros((weights[0].shape[-1]),)]*3
means = [np.zeros((weights[0].shape[-1]),)]*3
vars = [np.ones((weights[0].shape[-1]),)]*3
kernel_size = weights[0].shape[0]
w_kxk = (gammas[0] / np.sqrt(np.add(vars[0], 1e-10))) * weights[0]
weights_kx1 = (gammas[1] / np.sqrt(np.add(vars[1], 1e-10))) * weights[1]
weights_1xk = (gammas[2] / np.sqrt(np.add(vars[2], 1e-10))) * weights[2]
bs = []
for i in range(3):
bs.append((((bias[i] - means[i]) * gammas[i]) / np.sqrt(np.add(vars[i], 1e-10))) + betas[i])
w_k_1 = np.zeros_like(w_kxk)
w_1_k = np.zeros_like(w_kxk)
w_k_1[kernel_size // 2 - 1:kernel_size // 2 + 2, kernel_size // 2, :, :] = weights_kx1[:, 0, :, :]
w_1_k[kernel_size // 2, kernel_size // 2 - 1:kernel_size // 2 + 2, :, :] = weights_1xk[0, :, :, :]
weight = [w_kxk, w_k_1, w_1_k]
bias = bs
infer_model.get_layer(layer_name).set_weights(diff_model(model, weight, bias))
声明:本内容来源网络,版权属于原作者,图片来源原论文。如有侵权,联系删除。