2018 ECCV 《CBAM: Convolutional Block Attention Module》 PyTorch实现

import numpy as np
import torch
from torch import nn
from torch.nn import init


# CBAM Attention
# 方法出处 2018 ECCV 《CBAM: Convolutional Block Attention Module》
# 通道注意力
class ChannelAttention(nn.Module):
    def __init__(self, channel, reduction=16):
        # 所有继承于nn.Module的模型都要写这句话
        super(ChannelAttention, self).__init__()
        # 这个AdaptiveAvgPool2d会将输入特征图的宽和高
        # 自动的池化到我们在AdaptiveAvgPool2d参数中指定的大小
        # 比如
        # m = nn.AdaptiveAvgPool2d((5, 7))
        # input = torch.randn(1, 64, 8, 9)
        # output = m(input)
        # print(output.size())
        # 最后会输出[1,64,5,7]
        # 如果指定的最后宽和高相等可以只写一个
        # 比如
        # nn.AdaptiveAvgPool2d((1,1))==nn.AdaptiveAvgPool2d(1)
        self.maxpool = nn.AdaptiveMaxPool2d(1)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        self.se = nn.Sequential(
            # 论文中说这里用的是线性层,不是卷积层
            # 但是1*1的卷积层相当于全连接
            nn.Conv2d(channel, channel // reduction, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(channel // reduction, channel, 1, bias=False)
        )
        # 激活
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        max_result = self.maxpool(x)
        avg_result = self.avgpool(x)
        max_out = self.se(max_result)
        avg_out = self.se(avg_result)
        output = self.sigmoid(max_out + avg_out)
        return output


# 空间注意力
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        # 输入通道数是2,max_pool生成一个,avg_pool生成一个
        # 输出1个空间注意力特征
        # 卷积核大小是指定的kernel_size
        # 进行了填充
        self.conv = nn.Conv2d(2, 1, kernel_size=kernel_size, padding=kernel_size // 2)
        # 激活
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # 按照论文要求进行最大池化
        # 表示沿通道求最大值
        max_result, _ = torch.max(x, dim=1, keepdim=True)
        # 平均池化
        # 同理
        avg_result = torch.mean(x, dim=1, keepdim=True)
        result = torch.cat([max_result, avg_result], 1)
        # 获取空间注意力特征
        output = self.conv(result)
        output = self.sigmoid(output)
        return output


class CBAMBlock(nn.Module):

    def __init__(self, channel=512, reduction=16, kernel_size=49):
        super(CBAMBlock, self).__init__()
        # 组合通道注意力和空间注意力
        self.ca = ChannelAttention(channel=channel, reduction=reduction)
        self.sa = SpatialAttention(kernel_size=kernel_size)

    # 初始化层
    def init_weights(self):
        # 遍历当前模型的每一层
        for m in self.modules():
            # 如果是卷积层
            if isinstance(m, nn.Conv2d):
                # kaiming初始化
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)

    # 前向传递建立计算图
    def forward(self, x):
        # 获取输入特诊图的批大小,通道数
        b, c, _, _ = x.size()
        # 最后有一个残差连接
        residual = x
        # 计算通道注意力
        out = x * self.ca(x)
        # 计算空间注意力
        out = out * self.sa(out)
        # 残差连接
        return out + residual


if __name__ == '__main__':
    # 可以将这个输入当做卷积层输出的特征图
    # 维度[50,512,7,7]
    # 50代表批大小
    # 512代表通道数
    # 7*7是宽高
    input = torch.randn(50, 512, 7, 7)
    # 获取通道数
    kernel_size = input.shape[2]
    # 进行通道注意力,空间注意力
    cbam = CBAMBlock(channel=512, reduction=16, kernel_size=kernel_size)
    output = cbam(input)
    print(output.shape)

猜你喜欢

转载自blog.csdn.net/Talantfuck/article/details/124556895