caffe Reshape层TensorRT IPlugin代码实现

目前TensorRT并不支持caffe的Reshape层,故在用TensorRT加速caffe网络时,Reshape需要通过TensorRT的plugin插件实现。

以下是Reshape层的Plugin源码:

#include <cuda_runtime.h>
#include "NvInferPlugin.h"
#define CHECK(status)\
    {\
        if (status != 0)\
        {\
            std::cout << "Cuda failure: " << cudaGetErrorString(status) \
                      << " at line " << __LINE__ \
                      << std::endl;\
            abort();\
        }\
    }
//Reshape layer
class Reshape : public IPlugin
{
public:
    Reshape(DimsCHW outDim) {// 通过构造函数,将prototxt中Reshape层输出blob的维度传进来
      memcpy(m_outDim.d, outDim.d, 3 * sizeof(int));
    }
    Reshape(const void* buffer, size_t size)
    {// 将通过serialize函数保存到engine文件中的内容解析出来,得到输出blob的具体维度
      assert(size == 3 * sizeof(int));
      const int* d = reinterpret_cast<const int*>(buffer);
      memcpy(m_outDim.d, d, 3 * sizeof(int));
      mCopySize = m_outDim.d[0] * m_outDim.d[1] * m_outDim.d[2] * sizeof(float);
    }

    int getNbOutputs() const override
    {// 第一步
        return 1;
    }
    Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
    {// 第二步:根据输入blob的维度以及Reshape层参数,确定输出blob的具体维度值
      assert(nbInputDims == 1);
      assert(index == 0);
      assert(inputs[index].nbDims == 3);
      int undefined_dim = 0;
      for(int i = 0; i < 3; i++) {
        if(0 == m_outDim.d[i])
          m_outDim.d[i] = inputs[0].d[i];
        if(-1 == m_outDim.d[i])
          undefined_dim++;
      }
      assert(undefined_dim <= 1); // Reshape层参数中最多只能有一个维度定义为-1
      if(-1 == m_outDim.d[0])
        m_outDim.d[0] = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2] / (m_outDim.d[1] * m_outDim.d[2]);
      if(-1 == m_outDim.d[1])
        m_outDim.d[1] = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2] / (m_outDim.d[0] * m_outDim.d[2]);
      if(-1 == m_outDim.d[2])
        m_outDim.d[2] = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2] / (m_outDim.d[0] * m_outDim.d[1]);

      return m_outDim;
    }

    void configure(const Dims*inputs, int nbInputs, const Dims* outputs, int nbOutputs, int) override
    {// 第三步
      mCopySize = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2] * sizeof(float);
    }

    size_t getWorkspaceSize(int) const override
    {// 第四步
      return 0;
    }

    int initialize() override
    {// 第五步
        return 0;
    }

    size_t getSerializationSize() override
    {// 第六步
      return 3 * sizeof(int);
    }

    void serialize(void* buffer) override
    {// 第七步:将输出blob的c、h、w的具体值保存到文件中
      int* d = reinterpret_cast<int*>(buffer);
      memcpy(d, m_outDim.d, 3 * sizeof(int));
    }

    void terminate() override
    {// 第八步
    }

    // currently it is not possible for a plugin to execute "in place". Therefore we memcpy the data from the input to the output buffer
    int enqueue(int batchSize, const void*const *inputs, void** outputs, void*, cudaStream_t stream) override
    {
        CHECK(cudaMemcpyAsync(outputs[0], inputs[0], mCopySize * batchSize, cudaMemcpyDeviceToDevice, stream));
        return 0;
    }

protected:
    DimsCHW m_outDim; // Reshaoe层输出blob的c、h、w的值
    size_t mCopySize; // Reshape层输入输出后三维数据的总大小
};

猜你喜欢

转载自blog.csdn.net/maidabu/article/details/82289353