ncnn 之 图优化

最近,ncnn release了新版本, 该版本其中一个亮点是增加了图优化,目的是使得前向图结构更加简洁, 运行速度可以加快。下面来逐一分析:

  对于连续两个算子能否合并成一个算子,需要符合特定的条件。

(1)XXX-batchnorm

int fuse_convolution_batchnorm(); // group1
int fuse_convolutiondepthwise_batchnorm();
int fuse_deconvolution_batchnorm();
int fuse_deconvolutiondepthwise_batchnorm();
int fuse_innerproduct_batchnorm();

(2)XXX-activation

int fuse_convolution_activation(); // group2
int fuse_convolutiondepthwise_activation();
int fuse_deconvolution_activation();
int fuse_deconvolutiondepthwise_activation();
int fuse_innerproduct_activation();

(3)batchnorm-scale

(4)innerproduct-dropout

扫描二维码关注公众号,回复: 6647197 查看本文章

  以conv+batchnorm为例

int NetOptimize::fuse_convolution_batchnorm(){

  const int layer_count = layers.size();

  // 遍历所有层

  for(int i=0; i<layer_count; i++){

    // 找Convolution层

    if(layers[i]->type != "Convolution")

      continue;

    // Convolution - BatchNorm

    int top_blob_index = layers[i]->tops[0];

    int j = i + 1;

    for(;j<layer_count;j++){

      // 在确定conv情况下, 寻找bn

      if(layers[j]->type != "BatchNorm")

        continue;

      // bn的blob非唯一即不符合要求

      if(layers[j]->bottoms.size() != 1)

        continue;

      // 寻找conv_bn可以连接成功的pair

      if(layers[j]->bottoms[0] == top_blob_index)

        break;    // 寻找成功

    }

    // 边界条件, 越界则继续下一层迭代

    if(j == layer_count)

      continue;

    // fuse "Convolution - BatchNorm" to  "Convolution"

    // 经过上述筛选, <i, j>表示一个<con_id, bn_id>对, 可以进行合并

    ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];    

    ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];

    fprintf(stderr, "fuse_convolution_batchnorm %s %s \n", convolution->name.c_str(), batchnorm->name.c_str());

    // =======> code segment begin

    {

      int channels = batchnorm->channels;

      float eps = batchnorm->eps;

      // a = bias - slope * mean / sqrt(var + eps)

      // b = slope / sqrt(var + eps)

      // value = value * b + a

      std:: vector<float> a(channels);

      std:: vector<float> b(channels);

      // 这里吐槽一下ncnn,都什么鬼命名?!!! a,b完全没有任何可读性.....

      for(int i=0; i< channels; i++){

        float sqrt_var = sqrt(batchnorm->var_data[i] + eps);

        a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;

        b[i] = batchnorm->slope_data[i] / sqrt_var;

      }

      if(convolution->bias_term ==0){

        // init bias as zero

        convolution->bias_term = 1;

        convolution->bias_term = ncnn::Mat(channels);

        convolution->bias_data.fill(0.f);

      }

      // 跨度

      const int weight_per_outch = convolution->weight_data_size / channels;

      float* weight = convolution->weight_data;

      float* bias = convolution->bias_data;

      for(int i=0; i<channels; i++){

        float* conv_weight_outch = weight + weight_per_outch * i;

        for(int j=0; j<weight_per_outch; j++){

          conv_weight_outch[j] *= b[i];    // 二维展开逐一相乘

        }

        bias[i] += a[i];

      }

    }

    // =======> code segment end

    

    // 修改相关的layer 关系

    int top_blob_index_final = batchnorm->tops[0];    // 记录batchnorm的输出blob

    convolution->tops[0] = top_blob_index_final;    // 将convolution的输出blob设置为原来batchnorm的输出blob

    blobs[top_blob_index_final].product = i;      // 将blob的生产者layer改变为conv而不再是原来的bn

    batchnorm->type = "ncnnfused";   // 修改原始layer的层属性

  }

}

猜你喜欢

转载自www.cnblogs.com/jianfeifeng/p/11097021.html