EPNet代码解读——LI-Fusion篇

一、前言

论文源码：https://github.com/happinesslz/EPNet
注释版本：https://github.com/xueking1/EPNet-master/tree/main（仅写了li-fusion，之后有空了更新一下)
论文地址：https://arxiv.org/pdf/2007.08856

2、核心代码位置

1、cig文件：EPNet-master\EPNet-master\tools\cfgs\LI_Fusion_with_attention_use_ce_loss.yaml

LI-Fusion的代码段：

LI_FUSION:
ENABLED: True

ADD_Image_Attention: True
IMG_FEATURES_CHANNEL: 128

IMG_CHANNELS: [3, 64, 128, 256, 512]
POINT_CHANNELS: [96, 256, 512, 1024]

DeConv_Reduce: [16, 16, 16, 16]
DeConv_Kernels: [2, 4, 8, 16]
DeConv_Strides: [2, 4, 8, 16]

2、Li-Fusion位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py

完整注释代码：

import torch
import torch.nn as nn
import torch.nn.functional as F
from pointnet2_lib.pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG
from lib.config import cfg
from torch.nn.functional import grid_sample


BatchNorm2d = nn.BatchNorm2d

def conv3x3(in_planes, out_planes, stride = 1):
    """3x3 convolution with padding"""
    # 3*3的卷积层模块：在原文中使用3*3卷积模块对图像进行降维
    return nn.Conv2d(in_planes, out_planes, kernel_size = 3, stride = stride,
                     padding = 1, bias = False)


class BasicBlock(nn.Module):

    # 这个Block就是经典的卷积模块：Conv+BN+Relu
    def __init__(self, inplanes, outplanes, stride = 1):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, outplanes, stride)
        self.bn1 = BatchNorm2d(outplanes )
        self.relu = nn.ReLU(inplace = True)
        self.conv2 = conv3x3(outplanes, outplanes, 2*stride)

    def forward(self, x):

        out = self.conv1(x)
        # conv层与层之间需要加入BN+ReLU，以保证非线性
        out = self.bn1(out)
        out = self.relu(out)
        # 输出
        out = self.conv2(out)

        return out

class Fusion_Conv(nn.Module):
    def __init__(self, inplanes, outplanes):

        super(Fusion_Conv, self).__init__()

        self.conv1 = torch.nn.Conv1d(inplanes, outplanes, 1)
        self.bn1 = torch.nn.BatchNorm1d(outplanes)

    def forward(self, point_features, img_features):
        #print(point_features.shape, img_features.shape)
        fusion_features = torch.cat([point_features, img_features], dim=1)
        fusion_features = F.relu(self.bn1(self.conv1(fusion_features)))

        return fusion_features


#================addition attention (add)=======================#
class IA_Layer(nn.Module):
    def __init__(self, channels):
        print('##############ADDITION ATTENTION(ADD)#########')
        super(IA_Layer, self).__init__()
        self.ic, self.pc = channels
        rc = self.pc // 4
        self.conv1 = nn.Sequential(nn.Conv1d(self.ic, self.pc, 1),
                                    nn.BatchNorm1d(self.pc),
                                    nn.ReLU())
        self.fc1 = nn.Linear(self.ic, rc)
        self.fc2 = nn.Linear(self.pc, rc)
        self.fc3 = nn.Linear(rc, 1)


    def forward(self, img_feas, point_feas):
        batch = img_feas.size(0)
        # 初始化图像和点云特征信息
        img_feas_f = img_feas.transpose(1,2).contiguous().view(-1, self.ic) #BCN->BNC->(BN)C
        point_feas_f = point_feas.transpose(1,2).contiguous().view(-1, self.pc) #BCN->BNC->(BN)C'
        # print(img_feas)

        # 将图像特征和点云特征分别输入到FC层中，目的：把两者变换到一个维度上去，以便于后面融合
        ri = self.fc1(img_feas_f)
        rp = self.fc2(point_feas_f)

        # 1、特征融合：图像特征和点云特征融合的方式是元素级相加的方式：即ri+rp
        # 2、然后经过tanh：将其分布变换到[-1,1]中
        # 3、FC3层变换维度，以便于后面与图像信息进行相乘
        # 4、通过sigmoid将值限制在[0,1]内，得到权重矩阵，这样图像或点云中每一个元素都有自己的权重值，我们自然也就能知道图像像素能贡献多少
        att = F.sigmoid(self.fc3(F.tanh(ri + rp))) #BNx1

        att = att.squeeze(1)
        att = att.view(batch, 1, -1) #B1N
        # print(img_feas.size(), att.size())

        # 原始图像维度变换：图像特征经过一个卷积层变换维度，使得其能够与权重矩阵进行相乘
        img_feas_new = self.conv1(img_feas)
        # 图像融合特征：用图像*权重矩阵，得到的是图像贡献的信息
        out = img_feas_new * att
        # 返回融合后图像的信息：out
        return out


class Atten_Fusion_Conv(nn.Module):
    # Li—Fusion融合函数
    def __init__(self, inplanes_I, inplanes_P, outplanes):
        super(Atten_Fusion_Conv, self).__init__()
        # 采用IA_Layer融合图像和点云信息
        self.IA_Layer = IA_Layer(channels = [inplanes_I, inplanes_P])
        # self.conv1 = torch.nn.Conv1d(inplanes_P, outplanes, 1)
        self.conv1 = torch.nn.Conv1d(inplanes_P + inplanes_P, outplanes, 1)
        self.bn1 = torch.nn.BatchNorm1d(outplanes)


    def forward(self, point_features, img_features):
        # print(point_features.shape, img_features.shape)
        # 利用IA_Layer得到融合后的图像信息
        img_features =  self.IA_Layer(img_features, point_features)
        # print("img_features:", img_features.shape)

        # 将原始点云信息和融合后的图像信息直接拼接，这样既保留了具有一定权重的图像信息（去除了不重要的图像信息），也保留了原始点云信息
        # fusion_features = img_features + point_features
        fusion_features = torch.cat([point_features, img_features], dim=1)
        # Conv+bn+relu得到最终融合结果
        fusion_features = F.relu(self.bn1(self.conv1(fusion_features)))

        return fusion_features


def Feature_Gather(feature_map, xy):
    """
    :param xy:(B,N,2)  normalize to [-1,1]
    :param feature_map:(B,C,H,W)
    :return:
    """
    # 插值
    # use grid_sample for this.
    # xy(B,N,2)->(B,1,N,2)
    xy = xy.unsqueeze(1)
    # grid_sample：torch官网提供的插值方法：
    # 	原型：torch.nn.functional.grid_sample(input,grid,mode='bilinear',padding_mode='zeros',align_corners=None)。
    # 	其中：mode为选择采样方法，有三种内插算法可选，分别是'bilinear'双线性差值、'nearest'最邻近插值、'bicubic' 双三次插值。

    # 具体参考：https://blog.csdn.net/jameschen9051/article/details/124714759
    interpolate_feature = grid_sample(feature_map, xy)  # (B,C,1,N)
    # 返回插值结果
    return interpolate_feature.squeeze(2) # (B,C,N)


def get_model(input_channels = 6, use_xyz = True):
    return Pointnet2MSG(input_channels = input_channels, use_xyz = use_xyz)


class Pointnet2MSG(nn.Module):
    def __init__(self, input_channels = 6, use_xyz = True):
        super().__init__()
        # PointNet++中的SA模块定义
        self.SA_modules = nn.ModuleList()
        channel_in = input_channels

        skip_channel_list = [input_channels]
        for k in range(cfg.RPN.SA_CONFIG.NPOINTS.__len__()):
            mlps = cfg.RPN.SA_CONFIG.MLPS[k].copy()
            channel_out = 0
            for idx in range(mlps.__len__()):
                mlps[idx] = [channel_in] + mlps[idx]
                channel_out += mlps[idx][-1]
            # 调用PointNet++中的SA模块，聚合信息
            self.SA_modules.append(
                    PointnetSAModuleMSG(
                            npoint = cfg.RPN.SA_CONFIG.NPOINTS[k],
                            radii = cfg.RPN.SA_CONFIG.RADIUS[k],
                            nsamples = cfg.RPN.SA_CONFIG.NSAMPLE[k],
                            mlps = mlps,
                            use_xyz = use_xyz,
                            bn = cfg.RPN.USE_BN
                    )
            )
            skip_channel_list.append(channel_out)
            channel_in = channel_out

        ##################
        # 根据cig文件中设定的参数，决定是否采用LI_FUSION模块
        # cig文件：LI_FUSION.ENABLED: True
        if cfg.LI_FUSION.ENABLED:
            # 这是图像特征抽象模块（图像降采样模块）定义
            self.Img_Block = nn.ModuleList()
            # 这是融合卷积模块定义
            self.Fusion_Conv = nn.ModuleList()
            # 反卷积模块定义
            self.DeConv = nn.ModuleList()
            for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):
                # 下面是对Img_Block模块进行初始化：
                # 1、图像降采样模块Img_Block：根据cig文件里设定参数，将卷积层等添加到Img_Block模块中

                # 2、采用已经定义好的BasicBlock模块进行堆叠，堆叠的方法是按照：
                # 输入input:LI_FUSION.IMG_CHANNELS[i] 输出output：cfg.LI_FUSION.IMG_CHANNELS[i+1]

                # 3、cig文件中对于图像channel的设定：IMG_CHANNELS: [3, 64, 128, 256, 512]
                # 也就是说卷积层输出输出(input,output)设定应该为:(3,64),(64,128),(128,256),(256,512)
                self.Img_Block.append(BasicBlock(cfg.LI_FUSION.IMG_CHANNELS[i], cfg.LI_FUSION.IMG_CHANNELS[i+1], stride=1))
                # 根据cig文件中设定，决定是否加入图像注意力ADD_Image_Attention
                # cig文件：ADD_Image_Attention: True
                if cfg.LI_FUSION.ADD_Image_Attention:
                    # Fusion_Conv模块中添加Li-Fusion融合模块
                    self.Fusion_Conv.append(
                        Atten_Fusion_Conv(cfg.LI_FUSION.IMG_CHANNELS[i + 1], cfg.LI_FUSION.POINT_CHANNELS[i],
                                          cfg.LI_FUSION.POINT_CHANNELS[i]))
                else:
                    self.Fusion_Conv.append(Fusion_Conv(cfg.LI_FUSION.IMG_CHANNELS[i + 1] + cfg.LI_FUSION.POINT_CHANNELS[i],
                                                        cfg.LI_FUSION.POINT_CHANNELS[i]))
                # 反卷积模块DeConv：通过cig设定，添加转置卷积模块ConvTranspose2d
                # cig文件：① IMG_CHANNELS: [3, 64, 128, 256, 512]  ② DeConv_Reduce: [16, 16, 16, 16]
                self.DeConv.append(nn.ConvTranspose2d(cfg.LI_FUSION.IMG_CHANNELS[i + 1], cfg.LI_FUSION.DeConv_Reduce[i],
                                                  kernel_size=cfg.LI_FUSION.DeConv_Kernels[i],
                                                  stride=cfg.LI_FUSION.DeConv_Kernels[i]))
            # 图像特征融合模块
            self.image_fusion_conv = nn.Conv2d(sum(cfg.LI_FUSION.DeConv_Reduce), cfg.LI_FUSION.IMG_FEATURES_CHANNEL//4, kernel_size = 1)
            # BN
            self.image_fusion_bn = torch.nn.BatchNorm2d(cfg.LI_FUSION.IMG_FEATURES_CHANNEL//4)
            # 根据ADD_Image_Attention决定是否采用图像注意力
            if cfg.LI_FUSION.ADD_Image_Attention:
                self.final_fusion_img_point = Atten_Fusion_Conv(cfg.LI_FUSION.IMG_FEATURES_CHANNEL//4, cfg.LI_FUSION.IMG_FEATURES_CHANNEL, cfg.LI_FUSION.IMG_FEATURES_CHANNEL)
            else:
                self.final_fusion_img_point = Fusion_Conv(cfg.LI_FUSION.IMG_FEATURES_CHANNEL + cfg.LI_FUSION.IMG_FEATURES_CHANNEL//4, cfg.LI_FUSION.IMG_FEATURES_CHANNEL)

        # FP模块（逆距离加权平均）定义
        self.FP_modules = nn.ModuleList()
        # FP模块初始化
        for k in range(cfg.RPN.FP_MLPS.__len__()):
            pre_channel = cfg.RPN.FP_MLPS[k + 1][-1] if k + 1 < len(cfg.RPN.FP_MLPS) else channel_out
            self.FP_modules.append(
                    PointnetFPModule(mlp = [pre_channel + skip_channel_list[k]] + cfg.RPN.FP_MLPS[k])
            )

    def _break_up_pc(self, pc):
        # _break_up_pc函数：初始化点云参数

        # 截取点云前三个变量作为点云的xyz坐标
        xyz = pc[..., 0:3].contiguous()
        features = (
            # 截取点云pc[3:]作为点云特征
            pc[..., 3:].transpose(1, 2).contiguous()
            if pc.size(-1) > 3 else None
        )

        return xyz, features


    def forward(self, pointcloud: torch.cuda.FloatTensor, image=None, xy=None):
        # 点云坐标xyz及其特征features
        xyz, features = self._break_up_pc(pointcloud)
        # 将点云xyz坐标和特征升维后传给l_xyz和l_features
        l_xyz, l_features = [xyz], [features]
        # 根据cig文件，决定是否采用LI_FUSION
        # cig文件：LI_FUSION.ENABLED: True
        if cfg.LI_FUSION.ENABLED:
            #### normalize xy to [-1,1]
            size_range = [1280.0, 384.0]
            xy[:, :, 0] = xy[:, :, 0] / (size_range[0] - 1.0) * 2.0 - 1.0
            xy[:, :, 1] = xy[:, :, 1] / (size_range[1] - 1.0) * 2.0 - 1.0  # = xy / (size_range - 1.) * 2 - 1.
            # 保存归一化后的结果
            l_xy_cor = [xy]
            img = [image]
        # 遍历SA模块：S1，S2，S3，S4（原文图2），严格意义上来讲，应该是遍历SA和图像卷积模块，因为两者数量，所以用len(self.SA_modules)
        for i in range(len(self.SA_modules)):
            # 使用SA_modules对点云特征进行提取
            li_xyz, li_features, li_index = self.SA_modules[i](l_xyz[i], l_features[i])
            # 根据cig文件，决定是否采用LI_FUSION
            if cfg.LI_FUSION.ENABLED:
                li_index = li_index.long().unsqueeze(-1).repeat(1,1,2)
                li_xy_cor = torch.gather(l_xy_cor[i],1,li_index)  # 作用：收集输入的特定维度指定位置的数值
                # 得到图像卷积的结果
                image = self.Img_Block[i](img[i])
                #print(image.shape)
                # 对图像进行插值
                img_gather_feature = Feature_Gather(image,li_xy_cor) #, scale= 2**(i+1))
                # 采用Li-Fusion模块对图像和点云信息进行融合
                li_features = self.Fusion_Conv[i](li_features,img_gather_feature)
                # 保存数据
                l_xy_cor.append(li_xy_cor)
                # 图像每个尺度都要保存
                img.append(image)
            # 保存点云xyz
            l_xyz.append(li_xyz)
            # 保存li-fusion融合后的结果
            l_features.append(li_features)

        # FP模块：对应到原图中共有三个FP模块：P1,P2,P3（原文图2）
        for i in range(-1, -(len(self.FP_modules) + 1), -1):
            l_features[i - 1] = self.FP_modules[i](
                    l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
            )
        # 下面是Image Stream的反卷积
        if cfg.LI_FUSION.ENABLED:
            #for i in range(1,len(img))
            DeConv = []
            for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):
                # 将img作为self.DeConv的输入进行反卷积
                # 注：这里的DeConv数组和self.DeConv[i]是不同的，前者保存上采样的结果，后者是反卷积层，具体可看其定义
                DeConv.append(self.DeConv[i](img[i + 1]))
            # 将反卷积的结果进行拼接
            de_concat = torch.cat(DeConv,dim=1)

            # 将反卷积后的结果经过Conv+Bn+Relu，对应到原文是FU层
            img_fusion = F.relu(self.image_fusion_bn(self.image_fusion_conv(de_concat)))
            # 最后一次FP插值:P4（原文图2）
            img_fusion_gather_feature = Feature_Gather(img_fusion, xy)
            # 最后一次Li-Fusion融合(原文图2)
            l_features[0] = self.final_fusion_img_point(l_features[0], img_fusion_gather_feature)

        return l_xyz[0], l_features[0]


class Pointnet2MSG_returnMiddleStages(Pointnet2MSG):
    def __init__(self, input_channels = 6, use_xyz = True):
        super().__init__(input_channels, use_xyz)

    def forward(self, pointcloud: torch.cuda.FloatTensor):
        xyz, features = self._break_up_pc(pointcloud)

        l_xyz, l_features = [xyz], [features]
        idxs = []
        for i in range(len(self.SA_modules)):
            li_xyz, li_features, idx = self.SA_modules[i](l_xyz[i], l_features[i])
            l_xyz.append(li_xyz)
            l_features.append(li_features)
            idxs.append(idx)

        for i in range(-1, -(len(self.FP_modules) + 1), -1):
            l_features[i - 1] = self.FP_modules[i](
                    l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
            )

        return l_xyz, l_features, idxs

三、分模块介绍

3、Two-stream RPN

双流RPN由Geometric Stream几何流（点云流） 和一个Image Stream图像流组成。
[其实也可叫做点云分支和图像分支，两个分支各司其职，同时也一定程度上相互交流信息]

具体如上图所示。几何流和图像流分别产生点特征和语义图像特征。

EPNet使用多个LI-Fusion模块来增强具有相应不同的尺度上图像语义特征的点特征，产生出更有区别的特征表示。

3.1 Image Stream图像流

1、特征提取——图像编码

图像流以图像作为输入，并通过一组卷积操作提取语义图像信息。
特征提取

组成：每个卷积块由两个3×3的卷积层组成，然后是一个批处理归一化层和一个ReLU激活函数。

位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py
具体定义如下：

            for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):
                # 下面是对Img_Block模块进行初始化：
                # 1、图像降采样模块Img_Block：根据cig文件里设定参数，将卷积层（BASIC_Block）添加到Img_Block模块中

                # 2、采用已经定义好的BasicBlock模块进行堆叠，堆叠的方法是按照：
                # 输入input:LI_FUSION.IMG_CHANNELS[i] 输出output：cfg.LI_FUSION.IMG_CHANNELS[i+1]，
                # 前一个和后一个channel两两组合，生成卷积层，具体示例看第3点。

                # 3、cig文件中对于图像channel的设定：IMG_CHANNELS: [3, 64, 128, 256, 512]
                # 也就是说卷积层输出输出(input,output)设定应该为:(3,64),(64,128),(128,256),(256,512)
                self.Img_Block.append(BasicBlock(cfg.LI_FUSION.IMG_CHANNELS[i], cfg.LI_FUSION.IMG_CHANNELS[i+1], stride=1))

这个BasicBlock就是经典的卷积模块：Conv+BN+Relu

class BasicBlock(nn.Module):

    # 这个Block就是经典的卷积模块：Conv+BN+Relu
    def __init__(self, inplanes, outplanes, stride = 1):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, outplanes, stride)
        self.bn1 = BatchNorm2d(outplanes )
        self.relu = nn.ReLU(inplace = True)
        self.conv2 = conv3x3(outplanes, outplanes, 2*stride)

    def forward(self, x):

        out = self.conv1(x)
        # conv层与层之间需要加入BN+ReLU，以保证非线性
        out = self.bn1(out)
        out = self.relu(out)
        # 输出
        out = self.conv2(out)

        return out

2、反卷积模块——图像解码

文中进一步采用四个不同步幅的转置卷积层来恢复图像分辨率，得到与原始图像大小相同的特征图。
（1）参数设定方法
采用nn.ConvTranspose2d来设定参数，具体设定方法就是根据cig文件里的DeConv_Reduce参数（作为output channel）：

DeConv_Reduce: [16, 16, 16, 16]
DeConv_Kernels: [2, 4, 8, 16]
DeConv_Strides: [2, 4, 8, 16]

以及之前用来抽象图像特征的IMG_CHANNELS（作为input channel）：

IMG_CHANNELS: [3, 64, 128, 256, 512]

由于是要把几个不同尺度的特征图同时scaling到一个大小上，所以输入channel由IMG_CHANNELS: [3, 64, 128, 256, 512]决定，输出channel一致都是DeConv_Reduce: [16, 16, 16, 16]。
具体过程，如下图所示
反卷积

位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py

具体代码：

                # 反卷积模块DeConv：通过cig设定，添加转置卷积模块ConvTranspose2d
                # cig文件：① IMG_CHANNELS: [3, 64, 128, 256, 512]  ② DeConv_Reduce: [16, 16, 16, 16]
                self.DeConv.append(nn.ConvTranspose2d(cfg.LI_FUSION.IMG_CHANNELS[i + 1], cfg.LI_FUSION.DeConv_Reduce[i],
                                                  kernel_size=cfg.LI_FUSION.DeConv_Kernels[i],
                                                  stride=cfg.LI_FUSION.DeConv_Kernels[i]))

3.2 Geometric Stream几何流（点云流）

该几何流由四对SA模块层和特征传播（FP）层组成，用于特征提取。

1、SA模块——点云编码

采用的是PointNet++中的SA模块来聚合点的特征，具体可参考：最全PointNet和PointNet++要点梳理总结

注意：这里的SA模块融合的是：LI-Fusion融合的特征 + 前一个输入的特征，实现如下：

li_xyz, li_features, li_index = self.SA_modules[i](l_xyz[i], l_features[i])
# l_xyz是前一个SA输出的xyz，l_feature是LI-Fusion融合的特征

可以参考原文中的图：
SA模块

位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py
SA定义代码如下：

        # PointNet++中的SA模块定义
        self.SA_modules = nn.ModuleList()
        channel_in = input_channels

        skip_channel_list = [input_channels]
        for k in range(cfg.RPN.SA_CONFIG.NPOINTS.__len__()):
            mlps = cfg.RPN.SA_CONFIG.MLPS[k].copy()
            channel_out = 0
            for idx in range(mlps.__len__()):
                mlps[idx] = [channel_in] + mlps[idx]
                channel_out += mlps[idx][-1]
            # 调用PointNet++中的SA模块，聚合点云信息
            self.SA_modules.append(
                    PointnetSAModuleMSG(
                            npoint = cfg.RPN.SA_CONFIG.NPOINTS[k],
                            radii = cfg.RPN.SA_CONFIG.RADIUS[k],
                            nsamples = cfg.RPN.SA_CONFIG.NSAMPLE[k],
                            mlps = mlps,
                            use_xyz = use_xyz,
                            bn = cfg.RPN.USE_BN
                    )
            )
            skip_channel_list.append(channel_out)
            channel_in = channel_out

由于SA模块是调用PointNet++的，所以下面的代码均是PointNet++的，不做过多介绍：

（1）SA模块采用的是PointNet++中的PointnetSAModuleMSG。
位置：\EPNet-master\EPNet-master\pointnet2_lib\pointnet2\pointnet2_modules.py

这里不详细赘述，详细可以看PointNet++源码，下面是PointnetSAModuleMSG代码：

class PointnetSAModuleMSG(_PointnetSAModuleBase):
    """Pointnet set abstraction layer with multiscale grouping"""

    def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True,
                 use_xyz: bool = True, pool_method = 'max_pool', instance_norm = False):
        """
        :param npoint: int
        :param radii: list of float, list of radii to group with
        :param nsamples: list of int, number of samples in each ball query
        :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale
        :param bn: whether to use batchnorm
        :param use_xyz:
        :param pool_method: max_pool / avg_pool
        :param instance_norm: whether to use instance_norm
        """
        super().__init__()

        assert len(radii) == len(nsamples) == len(mlps)
        # 初始化
        self.npoint = npoint
        self.groupers = nn.ModuleList()
        self.mlps = nn.ModuleList()
        for i in range(len(radii)):
            # 球半径
            radius = radii[i]
            # 最大采样数
            nsample = nsamples[i]
            self.groupers.append(
                    # 采用ball query和grouping
                    pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz = use_xyz)
                    if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
            )
            mlp_spec = mlps[i]
            if use_xyz:
                mlp_spec[0] += 3
            # Shared MLP
            self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn = bn, instance_norm = instance_norm))
        self.pool_method = pool_method

（2）SA的CUDA文件
其中需要注意的一点是：FPS，Ball_query，group均采用cuda编写，原因我认为是这些代码的计算量都很大，采用cuda并行计算能够提高模型效率。

代码注释我有写过，但是由于底层代码量太大的原因，就没有放上来，之后有需要我可以补充。

位置：EPNet-master\EPNet-master\pointnet2_lib\pointnet2\src
下面是文件截图：
cuda文件

2、Feature Propogation（FP）层——点云解码

（1）公式
FP采用的是PointNet++中反距离加权插值(Inverse Distance Weight,IDW) ，具体可参考：最全PointNet和PointNet++要点梳理总结

公式：反距离加权插值

按照原图来说，共采用四个FP模块：前三个FP模块（P1、P2、P3） 用于和之前SA模块提取的特征进行插值，最后一个FP模块(P4) 会和原始点云信息进行插值。插值完后，再进行LI-Fusion。

位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py
代码如下：

        # FP模块（逆距离加权平均）定义
        self.FP_modules = nn.ModuleList()
        # FP模块初始化
        for k in range(cfg.RPN.FP_MLPS.__len__()):
            pre_channel = cfg.RPN.FP_MLPS[k + 1][-1] if k + 1 < len(cfg.RPN.FP_MLPS) else channel_out
            self.FP_modules.append(
                    PointnetFPModule(mlp = [pre_channel + skip_channel_list[k]] + cfg.RPN.FP_MLPS[k])
            )

（2）代码
PointnetFPModule模块也在PointNet++代码里。
位置：\EPNet-master\EPNet-master\pointnet2_lib\pointnet2\pointnet2_modules.py

class PointnetFPModule(nn.Module):
    r"""Propigates the features of one set to another"""

    def __init__(self, *, mlp: List[int], bn: bool = True, activation = nn.ReLU(inplace = True)):
        """
        :param mlp: list of int
        :param bn: whether to use batchnorm
        """
        super().__init__()
        self.mlp = pt_utils.SharedMLP(mlp, bn = bn, activation = activation)

    def forward(
            self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor
    ) -> torch.Tensor:
        """
        :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features
        :param known: (B, m, 3) tensor of the xyz positions of the known features
        :param unknow_feats: (B, C1, n) tensor of the features to be propigated to
        :param known_feats: (B, C2, m) tensor of features to be propigated
        :return:
            new_features: (B, mlp[-1], n) tensor of the features of the unknown features
        """
        # known 表示已知点的位置信息 [m,4]
        # known_feats 表示已知点的特征信息 [m,C]
        # unknown 表示需要插值点的位置信息 [n,4]，一般来所，n>m
        # interpolated_feats 表示需要插值点的特征信息 [n,C]，这是返回结果
        if known is not None:
            # 获取 unknown 和 known 之间的近邻关系和距离信息，three_nn:找到目标点最近的三个点
            dist, idx = pointnet2_utils.three_nn(unknown, known)
            # 权值是距离的倒数
            dist_recip = 1.0 / (dist + 1e-8)
            norm = torch.sum(dist_recip, dim = 2, keepdim = True)
            weight = dist_recip / norm
            # 根据近邻关系以及距离信息，直接插值特征信息
            interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)
        else:
            interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1))

        if unknow_feats is not None:
            new_features = torch.cat([interpolated_feats, unknow_feats], dim = 1)  # (B, C2 + C1, n)
        else:
            new_features = interpolated_feats

        new_features = new_features.unsqueeze(-1)
        # 经过MLP：采用共享卷积改变维度
        new_features = self.mlp(new_features)

        return new_features.squeeze(-1)

（3）CUDA文件
插值的核心程序由CUDA编写，能够并行计算提高模型效率。
插值cuda文件：interpolate_gpu.cu（包含three_nn、three_interpolate）
位置在：\EPNet-master\EPNet-master\pointnet2_lib\pointnet2\src\interpolate_gpu.cu

具体参考：小白科研笔记：理解PointNet++中的three_interpolate前向计算和反向求导

代码示例：
①three_nn

__global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, 
    const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
    
    
    // unknown: (B, N, 3)
    // known: (B, M, 3)
    // output: 
    //      dist2: (B, N, 3)
    //      idx: (B, N, 3)
    
    int bs_idx = blockIdx.y;
    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (bs_idx >= b || pt_idx >= n) return;

    unknown += bs_idx * n * 3 + pt_idx * 3;
    known += bs_idx * m * 3;
    dist2 += bs_idx * n * 3 + pt_idx * 3;
    idx += bs_idx * n * 3 + pt_idx * 3;

    float ux = unknown[0];
    float uy = unknown[1];
    float uz = unknown[2];

    double best1 = 1e40, best2 = 1e40, best3 = 1e40;
    int besti1 = 0, besti2 = 0, besti3 = 0;
    for (int k = 0; k < m; ++k) {
    
    
        float x = known[k * 3 + 0];
        float y = known[k * 3 + 1];
        float z = known[k * 3 + 2];
        float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); //计算两点之间的欧几里得距离
       //找到目标点最近的三个点
        if (d < best1) {
    
    
            best3 = best2; besti3 = besti2;
            best2 = best1; besti2 = besti1;
            best1 = d; besti1 = k;
        } 
        else if (d < best2) {
    
    
            best3 = best2; besti3 = besti2;
            best2 = d; besti2 = k;
        } 
        else if (d < best3) {
    
    
            best3 = d; besti3 = k;
        }
    }
    dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;
    idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;
}

②three_interpolate

__global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, 
    const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
    
    
    // points: (B, C, M)
    // idx: (B, N, 3)
    // weight: (B, N, 3)
    // output:
    //      out: (B, C, N)

    int bs_idx = blockIdx.z;
    int c_idx = blockIdx.y;
    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;

    if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;

    weight += bs_idx * n * 3 + pt_idx * 3;
    points += bs_idx * c * m + c_idx * m;
    idx += bs_idx * n * 3 + pt_idx * 3;
    out += bs_idx * c * n + c_idx * n;
	//点*权重
    out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];
}

3.3 Li-Fusion

1、图

LI-Fusion其实就是按照下面的图，尤其是右半边图：图像点云元素级融合LI-Fusion Layer

图中上半支线是LiDAR Features（也就是点云特征），下半支线是Point-wise Image Feature（也就是含有点云信息的图像特征）。

2、步骤

两支线通过以下步骤进行融合：

（1）IA_Layer——元素级融合图像-点云特征
原代码中，采用IA_Layer融合图像和点云信息：
LI-Fusion Layer

①缩放及融合：两者支线首先经过FC层缩放到一个维度，然后才进行元素相加操作（注意：此处的相加就是将两个特征向量直接相加，而非拼接。原代码中是：ri + rp，ri是图像特征，rp是点云特征）。
②经过tanh将数值分布变换到[-1,1]，加入tanh既符合数据分布，也可以提高模型效率。
③通过FC层变换单维度通道，以便于生成一维权重矩阵，能够和图像信息相乘。其次，也起到了融合信息的作用。
④经过sigmoid激活函数，将数值限制在[0,1]，得到权重矩阵，这样图像或点云中每一个元素都有自己的权重值，我们自然也就能知道图像像素能贡献多少。
⑤最后图像像素×权重矩阵得到具有一定贡献度的图像特征。

论文中的原话：

我们首先将激光雷达特征FP和点向特征FI输入一个完全连接的层，并将它们映射到同一个通道中。然后我们将它们加在一起，形成一个紧凑的特征表示，然后通过另一个完全连接的层压缩成一个单通道的权重映射w。我们使用一个sigmoid激活函数来将权值图w归一化到[0,1]的范围内。

代码如下：
位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py

#================addition attention (add)=======================#
class IA_Layer(nn.Module):
    def __init__(self, channels):
        print('##############ADDITION ATTENTION(ADD)#########')
        super(IA_Layer, self).__init__()
        self.ic, self.pc = channels
        rc = self.pc // 4
        self.conv1 = nn.Sequential(nn.Conv1d(self.ic, self.pc, 1),
                                    nn.BatchNorm1d(self.pc),
                                    nn.ReLU())
        self.fc1 = nn.Linear(self.ic, rc)
        self.fc2 = nn.Linear(self.pc, rc)
        self.fc3 = nn.Linear(rc, 1)


    def forward(self, img_feas, point_feas):
        batch = img_feas.size(0)
        # 初始化图像和点云特征信息
        img_feas_f = img_feas.transpose(1,2).contiguous().view(-1, self.ic) #BCN->BNC->(BN)C
        point_feas_f = point_feas.transpose(1,2).contiguous().view(-1, self.pc) #BCN->BNC->(BN)C'
        # print(img_feas)

        # 将图像特征和点云特征分别输入到FC层中，目的：把两者变换到一个维度上去，以便于后面融合
        ri = self.fc1(img_feas_f)
        rp = self.fc2(point_feas_f)

        # 1、特征融合：图像特征和点云特征融合的方式是元素级相加的方式：即ri+rp
        # 2、然后经过tanh：将其分布变换到[-1,1]中
        # 3、FC3层变换维度，以便于后面与图像信息进行相乘
        # 4、通过sigmoid将值限制在[0,1]内，得到权重矩阵，这样图像或点云中每一个元素都有自己的权重值，我们自然也就能知道图像像素能贡献多少
        att = F.sigmoid(self.fc3(F.tanh(ri + rp))) #BNx1

        att = att.squeeze(1)
        att = att.view(batch, 1, -1) #B1N
        # print(img_feas.size(), att.size())

        # 原始图像维度变换：图像特征经过一个卷积层变换维度，使得其能够与权重矩阵进行相乘
        img_feas_new = self.conv1(img_feas)
        # 图像融合特征：用图像*权重矩阵，得到的是图像贡献的信息
        out = img_feas_new * att
        # 返回融合后图像的信息：out
        return out

（2）Atten_Fusion_Conv 融合——原始点云信息再融合

通过之前的IA_Layer已经得到了图像和点云的融合特征，该特征是由图像*权重矩阵得出，也就是包含大量的图像信息。可见，最后还需要加入原始点云的信息，如下图所示：
最终融合
在代码中实现为Atten_Fusion_Conv类，实现步骤为：
①通过IA_Layer模块，得到融合后的图像特征（上一节已提到）
②通过将原始点云信息LiDAR Features和融合后的图像特征img_features进行**“拼接”**（注意，这里不是元素相加，而是直接拼接），

代码为：

      # 将原始点云信息和融合后的图像信息直接拼接，这样既保留了具有一定权重的图像信息（去除了不重要的图像信息），也保留了原始点云信息
      fusion_features = torch.cat([point_features, img_features], dim=1)。

③将拼接后的特征经过Conv1d+BN+relu，将其特征变换维度，得到最终的融合结果。

完整的代码实现为：

class Atten_Fusion_Conv(nn.Module):
    # Li—Fusion融合函数
    def __init__(self, inplanes_I, inplanes_P, outplanes):
        super(Atten_Fusion_Conv, self).__init__()
        # 采用IA_Layer融合图像和点云信息
        self.IA_Layer = IA_Layer(channels = [inplanes_I, inplanes_P])
        # self.conv1 = torch.nn.Conv1d(inplanes_P, outplanes, 1)
        self.conv1 = torch.nn.Conv1d(inplanes_P + inplanes_P, outplanes, 1)
        self.bn1 = torch.nn.BatchNorm1d(outplanes)


    def forward(self, point_features, img_features):
        # print(point_features.shape, img_features.shape)
        # 利用IA_Layer得到融合后的图像信息
        img_features =  self.IA_Layer(img_features, point_features)
        # print("img_features:", img_features.shape)

        # 将原始点云信息和融合后的图像信息直接拼接，这样既保留了具有一定权重的图像信息（去除了不重要的图像信息），也保留了原始点云信息
        # fusion_features = img_features + point_features
        fusion_features = torch.cat([point_features, img_features], dim=1)
        # Conv+bn+relu得到最终融合结果
        fusion_features = F.relu(self.bn1(self.conv1(fusion_features)))

        return fusion_features

3.4 forward函数

上面介绍完各个模块，接下来就是整体的流程如下：
整体流程

①遍历SA模块，执行 SA模块融合

li_xyz, li_features, li_index = self.SA_modules[i](l_xyz[i], l_features[i])

②图像卷积抽象特征

image = self.Img_Block[i](img[i])

③对图像进行插值

img_gather_feature = Feature_Gather(image,li_xy_cor) #, scale= 2**(i+1))

④采用Li-Fusion模块对图像和点云信息进行融合

li_features = self.Fusion_Conv[i](li_features,img_gather_feature)

⑤FP模块：对应到原图中共有三个FP模块：P1,P2,P3（原文图2）

    for i in range(-1, -(len(self.FP_modules) + 1), -1):
        l_features[i - 1] = self.FP_modules[i](
                l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
        )

⑥Image Stream的反卷积

        if cfg.LI_FUSION.ENABLED:
        #for i in range(1,len(img))
        DeConv = []
        for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):
            # 将img作为self.DeConv的输入进行反卷积
            # 注：这里的DeConv数组和self.DeConv[i]是不同的，前者保存上采样的结果，后者是反卷积层
            DeConv.append(self.DeConv[i](img[i + 1]))
        # 将反卷积的结果进行拼接
        de_concat = torch.cat(DeConv,dim=1)

⑦将反卷积后的结果经过Conv+Bn+Relu，对应到原文是FU层

img_fusion = F.relu(self.image_fusion_bn(self.image_fusion_conv(de_concat)))

⑧最后一次FP插值:P4（原文图2）

img_fusion_gather_feature = Feature_Gather(img_fusion, xy)

⑨最后一次Li-Fusion融合(原文图2)

l_features[0] = self.final_fusion_img_point(l_features[0], img_fusion_gather_feature)

具体代码实现如下：

 def forward(self, pointcloud: torch.cuda.FloatTensor, image=None, xy=None):
        # 点云坐标xyz及其特征features
        xyz, features = self._break_up_pc(pointcloud)
        # 将点云xyz坐标和特征升维后传给l_xyz和l_features
        l_xyz, l_features = [xyz], [features]
        # 根据cig文件，决定是否采用LI_FUSION
        # cig文件：LI_FUSION.ENABLED: True
        if cfg.LI_FUSION.ENABLED:
            #### normalize xy to [-1,1]
            size_range = [1280.0, 384.0]
            xy[:, :, 0] = xy[:, :, 0] / (size_range[0] - 1.0) * 2.0 - 1.0
            xy[:, :, 1] = xy[:, :, 1] / (size_range[1] - 1.0) * 2.0 - 1.0  # = xy / (size_range - 1.) * 2 - 1.
            # 保存归一化后的结果
            l_xy_cor = [xy]
            img = [image]
        # 遍历SA模块：S1，S2，S3，S4（原文图2），严格意义上来讲，应该是遍历SA和图像卷积模块，因为两者数量，所以用len(self.SA_modules)
        for i in range(len(self.SA_modules)):
            # 使用SA_modules对点云特征进行提取
            li_xyz, li_features, li_index = self.SA_modules[i](l_xyz[i], l_features[i])
            # 根据cig文件，决定是否采用LI_FUSION
            if cfg.LI_FUSION.ENABLED:
                li_index = li_index.long().unsqueeze(-1).repeat(1,1,2)
                li_xy_cor = torch.gather(l_xy_cor[i],1,li_index)  # 作用：收集输入的特定维度指定位置的数值
                # 得到图像卷积的结果
                image = self.Img_Block[i](img[i])
                #print(image.shape)
                # 对图像进行插值
                img_gather_feature = Feature_Gather(image,li_xy_cor) #, scale= 2**(i+1))
                # 采用Li-Fusion模块对图像和点云信息进行融合
                li_features = self.Fusion_Conv[i](li_features,img_gather_feature)
                # 保存数据
                l_xy_cor.append(li_xy_cor)
                # 图像每个尺度都要保存
                img.append(image)
            # 保存点云xyz
            l_xyz.append(li_xyz)
            # 保存li-fusion融合后的结果
            l_features.append(li_features)

        # FP模块：对应到原图中共有三个FP模块：P1,P2,P3（原文图2）
        for i in range(-1, -(len(self.FP_modules) + 1), -1):
            l_features[i - 1] = self.FP_modules[i](
                    l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
            )
        # 下面是Image Stream的反卷积
        if cfg.LI_FUSION.ENABLED:
            #for i in range(1,len(img))
            DeConv = []
            for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):
                # 将img作为self.DeConv的输入进行反卷积
                # 注：这里的DeConv数组和self.DeConv[i]是不同的，前者保存上采样的结果，后者是反卷积层
                DeConv.append(self.DeConv[i](img[i + 1]))
            # 将反卷积的结果进行拼接
            de_concat = torch.cat(DeConv,dim=1)

            # 将反卷积后的结果经过Conv+Bn+Relu，对应到原文是FU层
            img_fusion = F.relu(self.image_fusion_bn(self.image_fusion_conv(de_concat)))
            # 最后一次FP插值:P4（原文图2）
            img_fusion_gather_feature = Feature_Gather(img_fusion, xy)
            # 最后一次Li-Fusion融合(原文图2)
            l_features[0] = self.final_fusion_img_point(l_features[0], img_fusion_gather_feature)

        return l_xyz[0], l_features[0]