pointnet和pointnet++

pointnet++是pointnet的改进版本，两者都可以做点云分类和点云分割
代码：pointnet系列

pointnet：

分类：
这里写图片描述
输入：B*N*（d+C） d为坐标xyz,C为点属性（在modelnet40数据集没有点属性）图中为B*N*3
transform:通过T-net得到B*3*3的变换矩阵，对坐标进空间变换
mlp:相当于1*1的卷积
max pool:获取全局特征
输出：k个分类

def get_model(point_cloud, is_training, bn_decay=None):

    """ Classification PointNet, input is BxNx3, output Bx40 """

    batch_size = point_cloud.get_shape()[0].value

    num_point = point_cloud.get_shape()[1].value

    end_points = {}



    with tf.variable_scope('transform_net1') as sc:

        transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)

    point_cloud_transformed = tf.matmul(point_cloud, transform)

    input_image = tf.expand_dims(point_cloud_transformed, -1)



    net = tf_util.conv2d(input_image, 64, [1,3],

                         padding='VALID', stride=[1,1],

                         bn=True, is_training=is_training,

                         scope='conv1', bn_decay=bn_decay)

    net = tf_util.conv2d(net, 64, [1,1],

                         padding='VALID', stride=[1,1],

                         bn=True, is_training=is_training,

                         scope='conv2', bn_decay=bn_decay)



    with tf.variable_scope('transform_net2') as sc:

        transform = feature_transform_net(net, is_training, bn_decay, K=64)

    end_points['transform'] = transform

    net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)

    net_transformed = tf.expand_dims(net_transformed, [2])



    net = tf_util.conv2d(net_transformed, 64, [1,1],

                         padding='VALID', stride=[1,1],

                         bn=True, is_training=is_training,

                         scope='conv3', bn_decay=bn_decay)

    net = tf_util.conv2d(net, 128, [1,1],

                         padding='VALID', stride=[1,1],

                         bn=True, is_training=is_training,

                         scope='conv4', bn_decay=bn_decay)

    net = tf_util.conv2d(net, 1024, [1,1],

                         padding='VALID', stride=[1,1],

                         bn=True, is_training=is_training,

                         scope='conv5', bn_decay=bn_decay)



    # Symmetric function: max pooling

    net = tf_util.max_pool2d(net, [num_point,1],

                             padding='VALID', scope='maxpool')



    net = tf.reshape(net, [batch_size, -1])

    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,

                                  scope='fc1', bn_decay=bn_decay)

    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,

                          scope='dp1')

    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,

                                  scope='fc2', bn_decay=bn_decay)

    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,

                          scope='dp2')

    net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')



    return net, end_points

分割：
这里写图片描述
global feature拼接：把1*1024的全局特征，复制n遍，接在n*64后面，n*（64+1024）
mlp:512,256,128,m
输出：n个m分类的点

相当于，对每个点提取点坐标特征信息（64），再联接全局的特征信息（1024），一系列全连接后得到m分类

缺点：
pointnet只是简单的将所有点连接起来，只考虑了全局特征和单个点的特征，没有局部信息，对于有多个实例的多分类问题效果不好。

这里写图片描述

pointnet++

这里写图片描述
set abstraction:
代码中把坐标和属性分开存储：坐标xyz（d）,属性points（C）
聚类&划分：把点云按空间信息聚类，分别得到中心点new_xyz, 聚类（use_xyz决定是否添加坐标信息）new_points, 索引idx,聚类坐标 grouped_xyz
new_points: (batch_size, npoint, nsample, 3+channel)PS：emmmmmmm。。处理成类似图像（B,H,W,C）

def sample_and_group(npoint, radius, nsample, xyz, points, knn=False, use_xyz=True):

    '''

    Input:

        npoint: int32

        radius: float32

        nsample: int32

        xyz: (batch_size, ndataset, 3) TF tensor

        points: (batch_size, ndataset, channel) TF tensor, if None will just use xyz as points

        knn: bool, if True use kNN instead of radius search

        use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features

    Output:

        new_xyz: (batch_size, npoint, 3) TF tensor

        new_points: (batch_size, npoint, nsample, 3+channel) TF tensor

        idx: (batch_size, npoint, nsample) TF tensor, indices of local points as in ndataset points

        grouped_xyz: (batch_size, npoint, nsample, 3) TF tensor, normalized point XYZs

            (subtracted by seed point XYZ) in local regions

    '''



    new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz)) # (batch_size, npoint, 3)

    if knn:

        _,idx = knn_point(nsample, xyz, new_xyz)

    else:

        idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz)

    grouped_xyz = group_point(xyz, idx) # (batch_size, npoint, nsample, 3)

    grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1,1,nsample,1]) # translation normalization

    if points is not None:

        grouped_points = group_point(points, idx) # (batch_size, npoint, nsample, channel)

        if use_xyz:

            new_points = tf.concat([grouped_xyz, grouped_points], axis=-1) # (batch_size, npoint, nample, 3+channel)

        else:

            new_points = grouped_points

    else:

        new_points = grouped_xyz



    return new_xyz, new_points, idx, grouped_xyz

分别pointnet:
没有使用T-net
把new_points反复进行1*1的卷积，然后池化，得到新的坐标new_xyz（上一层的中心点）, 新的特征属性new_points, 索引idx

def pointnet_sa_module(xyz, points, npoint, radius, nsample, mlp, mlp2, group_all, is_training, bn_decay, scope, bn=True, pooling='max', knn=False, use_xyz=True, use_nchw=False):

    ''' PointNet Set Abstraction (SA) Module

        Input:

            xyz: (batch_size, ndataset, 3) TF tensor

            points: (batch_size, ndataset, channel) TF tensor

            npoint: int32 -- #points sampled in farthest point sampling

            radius: float32 -- search radius in local region

            nsample: int32 -- how many points in each local region

            mlp: list of int32 -- output size for MLP on each point

            mlp2: list of int32 -- output size for MLP on each region

            group_all: bool -- group all points into one PC if set true, OVERRIDE

                npoint, radius and nsample settings

            use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features

            use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format

        Return:

            new_xyz: (batch_size, npoint, 3) TF tensor

            new_points: (batch_size, npoint, mlp[-1] or mlp2[-1]) TF tensor

            idx: (batch_size, npoint, nsample) int32 -- indices for local regions

    '''

    data_format = 'NCHW' if use_nchw else 'NHWC'

    with tf.variable_scope(scope) as sc:

        # Sample and Grouping

        if group_all:

            nsample = xyz.get_shape()[1].value

            new_xyz, new_points, idx, grouped_xyz = sample_and_group_all(xyz, points, use_xyz)

        else:

            new_xyz, new_points, idx, grouped_xyz = sample_and_group(npoint, radius, nsample, xyz, points, knn, use_xyz)



        # Point Feature Embedding

        if use_nchw: new_points = tf.transpose(new_points, [0,3,1,2])

        for i, num_out_channel in enumerate(mlp):

            new_points = tf_util.conv2d(new_points, num_out_channel, [1,1],

                                        padding='VALID', stride=[1,1],

                                        bn=bn, is_training=is_training,

                                        scope='conv%d'%(i), bn_decay=bn_decay,

                                        data_format=data_format) 

        if use_nchw: new_points = tf.transpose(new_points, [0,2,3,1])



        # Pooling in Local Regions

        if pooling=='max':

            new_points = tf.reduce_max(new_points, axis=[2], keep_dims=True, name='maxpool')

        elif pooling=='avg':

            new_points = tf.reduce_mean(new_points, axis=[2], keep_dims=True, name='avgpool')

        elif pooling=='weighted_avg':

            with tf.variable_scope('weighted_avg'):

                dists = tf.norm(grouped_xyz,axis=-1,ord=2,keep_dims=True)

                exp_dists = tf.exp(-dists * 5)

                weights = exp_dists/tf.reduce_sum(exp_dists,axis=2,keep_dims=True) # (batch_size, npoint, nsample, 1)

                new_points *= weights # (batch_size, npoint, nsample, mlp[-1])

                new_points = tf.reduce_sum(new_points, axis=2, keep_dims=True)

        elif pooling=='max_and_avg':

            max_points = tf.reduce_max(new_points, axis=[2], keep_dims=True, name='maxpool')

            avg_points = tf.reduce_mean(new_points, axis=[2], keep_dims=True, name='avgpool')

            new_points = tf.concat([avg_points, max_points], axis=-1)



        # [Optional] Further Processing 

        if mlp2 is not None:

            if use_nchw: new_points = tf.transpose(new_points, [0,3,1,2])

            for i, num_out_channel in enumerate(mlp2):

                new_points = tf_util.conv2d(new_points, num_out_channel, [1,1],

                                            padding='VALID', stride=[1,1],

                                            bn=bn, is_training=is_training,

                                            scope='conv_post_%d'%(i), bn_decay=bn_decay,

                                            data_format=data_format) 

            if use_nchw: new_points = tf.transpose(new_points, [0,2,3,1])



        new_points = tf.squeeze(new_points, [2]) # (batch_size, npoints, mlp2[-1])

        return new_xyz, new_points, idx

分类：

def get_model(point_cloud, is_training, bn_decay=None):

    """ Classification PointNet, input is BxNx3, output Bx40 """

    batch_size = point_cloud.get_shape()[0].value

    num_point = point_cloud.get_shape()[1].value

    end_points = {}

    l0_xyz = point_cloud

    l0_points = None

    end_points['l0_xyz'] = l0_xyz



    # Set abstraction layers

    # Note: When using NCHW for layer 2, we see increased GPU memory usage (in TF1.4).

    # So we only use NCHW for layer 1 until this issue can be resolved.

    l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=0.2, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1', use_nchw=True)

    l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=0.4, nsample=64, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')

    l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')



    # Fully connected layers

    net = tf.reshape(l3_points, [batch_size, -1])

    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)

    net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')

    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay)

    net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp2')

    net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')



    return net, end_points

在后续group_all，反复全连接层得到40个分类（和pointnet类似）

分割：
参考一下DSSD（SSD目标检测的改进）的框架图：
这里写图片描述

def get_model(point_cloud, is_training, bn_decay=None):

    """ Part segmentation PointNet, input is BxNx6 (XYZ NormalX NormalY NormalZ), output Bx50 """

    batch_size = point_cloud.get_shape()[0].value

    num_point = point_cloud.get_shape()[1].value

    end_points = {}

    l0_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3])

    l0_points = tf.slice(point_cloud, [0,0,3], [-1,-1,3])



    # Set Abstraction layers

    l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=0.2, nsample=64, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1')

    l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=0.4, nsample=64, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')

    l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')



    # Feature Propagation layers

    l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer1')

    l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer2')

    l0_points = pointnet_fp_module(l0_xyz, l1_xyz, tf.concat([l0_xyz,l0_points],axis=-1), l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer3')



    # FC layers

    net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)

    end_points['feats'] = net 

    net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')

    net = tf_util.conv1d(net, 50, 1, padding='VALID', activation_fn=None, scope='fc2')

Feature Propagation layers：
把高层的特征通过插值得到和低层特征一样的维度，然后连接起来

高层特征：（N2，d+C2）
低层特征：（N1，d+C1）
pointnet_fp_module后:（N1，d+C1+C2）→mlp→（N1，d+C3）

pointnet：

pointnet++

猜你喜欢