TensorRT C++ api创建输入层、卷积层、池化层、激活层、全连接层

以lenet5网络为例（2个卷积 + 3个全连接）。

1.首先要生成builder，再生成network，用network搭建网络结构。

static Logger gLogger;
IBuilder* builder = createInferBuilder(gLogger);

// kEXPLICIT_BATCH = 0, //!< Mark the network to be an explicit batch network
// With dynamic shapes, any of the input dimensions can vary at run-time,

// kEXPLICIT_PRECISION = 1, //!< Mark the network to be an explicit precision network
//! Setting the network to be an explicit precision network has the following implications:
//! 1) Precision of all input tensors to the network have to be specified with ITensor::setType() function
//! 2) Precision of all layer output tensors in the network have to be specified using ILayer::setOutputType()
//! function
//! 3) The builder will not quantize the weights of any layer including those running in lower precision(INT8). It
//! will
//! simply cast the weights into the required precision.
//! 4) Dynamic ranges must not be provided to run the network in int8 mode. Dynamic ranges of each tensor in the
//! explicit
//! precision network is [-127,127].
//! 5) Quantizing and dequantizing activation values between higher (FP32) and lower (INT8) precision
//! will be performed using explicit Scale layers with input/output precision set appropriately.
INetworkDefinition* network = builder->createNetworkV2(0U);

2.输入层 addInput

// const char* INPUT_BLOB_NAME = "data";
// DataType dt = DataType::kFLOAT

// Create input tensor of shape { 1, 32, 32 } with name INPUT_BLOB_NAME
//! \param name The name of the tensor.
//! \param type The type of the data held in the tensor.
//! \param dimensions The dimensions of the tensor.
//! \return The new tensor or nullptr if there is an error.
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{1, INPUT_H, INPUT_W});
assert(data);


// 或者
// auto data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{-1, 1, INPUT_H, INPUT_W});

3.创建卷积层 addConvolutionNd

// Add convolution layer with 6 outputs and a 5x5 filter.

// virtual IConvolutionLayer* addConvolutionNd(ITensor& input, int32_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) TRTNOEXCEPT = 0;
//! \param input The input tensor to the convolution.
//! \param nbOutputMaps The number of output feature maps for the convolution.
//! \param kernelSize The multi-dimensions of the convolution kernel.
//! \param kernelWeights The kernel weights for the convolution.
//! \param biasWeights The optional bias weights for the convolution.
//! \return The new convolution layer, or nullptr if it could not be created.

// 输入tensor是*data, 输出6通道，kernel大小是5x5，后面是权重和偏置的值。
IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 6, DimsHW{5, 5}, weightMap["conv1.weight"], weightMap["conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{1, 1});

// 或者
auto conv1 = network->addConvolution(*data->getOutput(0), 6, DimsHW{5, 5}, weightMap["conv1.weight"], weightMap["conv1.bias"]);

4.激活层 addActivation

// Add activation layer using the ReLU algorithm.

//! \param input The input tensor to the layer.
//! \param type The type of activation function to apply.
IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(relu1);

5.池化层addPoolingNd

// Add max pooling layer with stride of 2x2 and kernel size of 2x2.

IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
assert(pool1);
pool1->setStrideNd(DimsHW{2, 2});

6.全连接层 addFullyConnected

IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool2->getOutput(0), 120, weightMap["fc1.weight"], weightMap["fc1.bias"]);
assert(fc1);

7.Softmax函数

// Add softmax layer to determine the probability.

ISoftMaxLayer* prob = network->addSoftMax(*fc3->getOutput(0));
assert(prob);
prob->getOutput(0)->setName(OUTPUT_BLOB_NAME);  // Set the tensor name.
network->markOutput(*prob->getOutput(0));  // Mark a tensor as a network output.

lenet5完整网络结构代码

// Creat the engine using only the API and not any parser.
ICudaEngine* createLenetEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt)
{
    // Mark the network to be an explicit batch network
    INetworkDefinition* network = builder->createNetworkV2(0U);

    // Create input tensor of shape { 1, 32, 32 } with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{1, INPUT_H, INPUT_W});
    assert(data);

    // Add convolution layer with 6 outputs and a 5x5 filter.
    std::map<std::string, Weights> weightMap = loadWeights("../lenet5.wts");
    IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 6, DimsHW{5, 5}, weightMap["conv1.weight"], weightMap["conv1.bias"]);
    assert(conv1);
    conv1->setStrideNd(DimsHW{1, 1});

    // Add activation layer using the ReLU algorithm.
    IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
    assert(relu1);

    // Add max pooling layer with stride of 2x2 and kernel size of 2x2.
    IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
    assert(pool1);
    pool1->setStrideNd(DimsHW{2, 2});

    // Add second convolution layer with 16 outputs and a 5x5 filter.
    IConvolutionLayer* conv2 = network->addConvolutionNd(*pool1->getOutput(0), 16, DimsHW{5, 5}, weightMap["conv2.weight"], weightMap["conv2.bias"]);
    assert(conv2);
    conv2->setStrideNd(DimsHW{1, 1});

    // Add activation layer using the ReLU algorithm.
    IActivationLayer* relu2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
    assert(relu2);

    // Add second max pooling layer with stride of 2x2 and kernel size of 2x2>
    IPoolingLayer* pool2 = network->addPoolingNd(*relu2->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
    assert(pool2);
    pool2->setStrideNd(DimsHW{2, 2});

    // Add fully connected layer
    IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool2->getOutput(0), 120, weightMap["fc1.weight"], weightMap["fc1.bias"]);
    assert(fc1);

    // Add activation layer using the ReLU algorithm.
    IActivationLayer* relu3 = network->addActivation(*fc1->getOutput(0), ActivationType::kRELU);
    assert(relu3);

    // Add second fully connected layer
    IFullyConnectedLayer* fc2 = network->addFullyConnected(*relu3->getOutput(0), 84, weightMap["fc2.weight"], weightMap["fc2.bias"]);
    assert(fc2);

    // Add activation layer using the ReLU algorithm.
    IActivationLayer* relu4 = network->addActivation(*fc2->getOutput(0), ActivationType::kRELU);
    assert(relu4);

    // Add third fully connected layer
    IFullyConnectedLayer* fc3 = network->addFullyConnected(*relu4->getOutput(0), OUTPUT_SIZE, weightMap["fc3.weight"], weightMap["fc3.bias"]);
    assert(fc3);

    // Add softmax layer to determine the probability.
    ISoftMaxLayer* prob = network->addSoftMax(*fc3->getOutput(0));
    assert(prob);
    prob->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    network->markOutput(*prob->getOutput(0));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(1 << 20);
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);

    // Don't need the network any more
    network->destroy();

    // Release host memory
    for (auto& mem : weightMap)
    {
        free((void*) (mem.second.values));
    }

    return engine;
}