onnx节点类型对应的TensorRT api

1. input节点

2.Conv节点

3.BN层

1. input节点

// const char* INPUT_BLOB_NAME = "data";
// static const int INPUT_H = 224;
// static const int INPUT_W = 224;
// DataType dt = DataType::kFLOAT

// Create input tensor of shape { 3, INPUT_H, INPUT_W } with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
    assert(data);

2.Conv节点

这里的conv是没有bias，图中字母"B"是指BN层，64是指64通道。

// std::map<std::string, Weights> weightMap = loadWeights("./resnet18.wts");
// Weights emptywts{DataType::kFLOAT, nullptr, 0};
IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 64, DimsHW{7, 7}, weightMap["conv1.weight"], emptywts);
assert(conv1);
conv1->setStrideNd(DimsHW{2, 2});
conv1->setPaddingNd(DimsHW{3, 3});

3.BN层

不知道Onnx为啥没有单独的bn层。

IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
    float *gamma = (float*)weightMap[lname + ".weight"].values;
    float *beta = (float*)weightMap[lname + ".bias"].values;
    float *mean = (float*)weightMap[lname + ".running_mean"].values;
    float *var = (float*)weightMap[lname + ".running_var"].values;
    int len = weightMap[lname + ".running_var"].count;
    std::cout << "len " << len << std::endl;

    float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        scval[i] = gamma[i] / sqrt(var[i] + eps);
    }
    Weights scale{DataType::kFLOAT, scval, len};
    
    float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
    }
    Weights shift{DataType::kFLOAT, shval, len};

    float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
    for (int i = 0; i < len; i++) {
        pval[i] = 1.0;
    }
    Weights power{DataType::kFLOAT, pval, len};

    weightMap[lname + ".scale"] = scale;
    weightMap[lname + ".shift"] = shift;
    weightMap[lname + ".power"] = power;
    IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
    assert(scale_1);
    return scale_1;
}

IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), "bn1", 1e-5);

4. Relu节点

IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
assert(relu1);

5.MaxPool节点

IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{ 3, 3 });
assert(pool1);
pool1->setStrideNd(DimsHW{ 2, 2 });
pool1->setPaddingNd(DimsHW{ 1, 1 });

6.BasicBlock

resnet18特有的块，输入是激活relu后（第一个block是maxpool）后的结果，输出是relu激活结果。

IActivationLayer* basicBlock(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int stride, std::string lname) {
    Weights emptywts{ DataType::kFLOAT, nullptr, 0 };

    IConvolutionLayer* conv1 = network->addConvolutionNd(input, outch, DimsHW{ 3, 3 }, weightMap[lname + "conv1.weight"], emptywts);
    assert(conv1);
    conv1->setStrideNd(DimsHW{ stride, stride });
    conv1->setPaddingNd(DimsHW{ 1, 1 });

    IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "bn1", 1e-5);

    IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
    assert(relu1);

    IConvolutionLayer* conv2 = network->addConvolutionNd(*relu1->getOutput(0), outch, DimsHW{ 3, 3 }, weightMap[lname + "conv2.weight"], emptywts);
    assert(conv2);
    conv2->setPaddingNd(DimsHW{ 1, 1 });

    IScaleLayer* bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "bn2", 1e-5);

    IElementWiseLayer* ew1;
    if (inch != outch) {
        IConvolutionLayer* conv3 = network->addConvolutionNd(input, outch, DimsHW{ 1, 1 }, weightMap[lname + "downsample.0.weight"], emptywts);
        assert(conv3);
        conv3->setStrideNd(DimsHW{ stride, stride });
        IScaleLayer* bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + "downsample.1", 1e-5);
        ew1 = network->addElementWise(*bn3->getOutput(0), *bn2->getOutput(0), ElementWiseOperation::kSUM);
    }
    else {
        ew1 = network->addElementWise(input, *bn2->getOutput(0), ElementWiseOperation::kSUM);
    }
    IActivationLayer* relu2 = network->addActivation(*ew1->getOutput(0), ActivationType::kRELU);
    assert(relu2);
    return relu2;
}


IActivationLayer* relu2 = basicBlock(network, weightMap, *pool1->getOutput(0), 64, 64, 1, "layer1.0.");

// 第二个残差块
IActivationLayer* relu3 = basicBlock(network, weightMap, *relu2->getOutput(0), 64, 64, 1, "layer1.1.");

7. GlobalAvgPooling

没有找到TensorRT c++ api自带的全局平均池化，这里根据其特点，进行实现。

Dims last_conv_dims = relu9->getOutput(0)->getDimensions();
int nb_dims = last_conv_dims.nbDims;
Dims k_size = DimsHW{ last_conv_dims.d[nb_dims - 2], last_conv_dims.d[nb_dims - 1] };
IPoolingLayer* pool2 = network->addPoolingNd(*relu9->getOutput(0), PoolingType::kAVERAGE, k_size );
assert(pool2);
pool2->setStrideNd(DimsHW{ 1, 1 });

8.Flatten

tensorRT不需要额外flatten，直接接上全连接（如下）。

9.Gemm节点（全连接层）

IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool2->getOutput(0), OUTPUT_SIZE, weightMap["backbone.backbone.fc.weight"], weightMap["backbone.backbone.fc.bias"]);
assert(fc1);

10.Resize节点

onnx Resize类型节点上的scales<4>，意思应该是放大四倍（长宽各两倍）。下面是放大4倍，缩小则要改动下。

IResizeLayer* upsample(ITensor* input_tensor, INetworkDefinition* network) {
    IResizeLayer* resize_tensor = network->addResize(*input_tensor);
    Dims  origin_shape = input_tensor->getDimensions();
    Dims upsample_dim;
    if (origin_shape.nbDims == 4)
        upsample_dim = { origin_shape.nbDims, origin_shape.d[0], origin_shape.d[1], origin_shape.d[2] * 2, origin_shape.d[3] * 2 };
    else
        upsample_dim = { origin_shape.nbDims, origin_shape.d[0], origin_shape.d[1] * 2, origin_shape.d[2] * 2 };
    resize_tensor->setOutputDimensions(upsample_dim);
    resize_tensor->setResizeMode(ResizeMode::kLINEAR);
    resize_tensor->setAlignCorners(true);
    assert(resize_tensor);
    return resize_tensor;
}

11.Concat节点

IConcatenationLayer* conCat(ITensor* tensor_x, ITensor* tensor_y, INetworkDefinition* network) {
    ITensor* inputTensors[] = { tensor_x,tensor_y};
    IConcatenationLayer* cat = network->addConcatenation(inputTensors, 2);
    assert(cat);
    return cat;
}

onnx节点类型对应的TensorRT api

1. input节点

2.Conv节点

3.BN层

4. Relu节点

5.MaxPool节点

6.BasicBlock

7. GlobalAvgPooling

8.Flatten

9.Gemm节点（全连接层）

10.Resize节点

11.Concat节点

猜你喜欢