TensorRT API merges multiple engine files and plugins into one engine file

The following is a code example for merging multiple engine files into one engine file using the TensorRT API:

import tensorrt as trt
import numpy as np

# create a TensorRT logger
logger = trt.Logger(trt.Logger.WARNING)

# specify the names of the input and output bindings
input_names = ['input_0']
output_names = ['output_0']

# create a builder
builder = trt.Builder(logger)

# set the maximum batch size and workspace size
max_batch_size = 1
max_workspace_size = 1 << 30 # 1GB
builder.max_batch_size = max_batch_size
builder.max_workspace_size = max_workspace_size

# create an optimizer profile with dynamic shapes support
min_input_shape = {
    
    "input_0": (1, 3, 224, 224)}
max_input_shape = {
    
    "input_0": (max_batch_size, 3, 224, 224)}
opt_profile = builder.create_optimization_profile()
opt_profile.set_shape(input_names[0], min_input_shape[input_names[0]], max_input_shape[input_names[0]], max_input_shape)

# load the engine files
engine_files = ['engine_file_0.trt', 'engine_file_1.trt', 'engine_file_2.trt']

# create a list of engines
engines = []
for engine_file in engine_files:
    # deserialize the engine from file
    with open(engine_file, 'rb') as f:
        engine_data = f.read()
        engine = builder.deserialize_cuda_engine(engine_data)
        engines.append(engine)

# create a new engine with all bindings from all engines
all_bindings = {
    
    }
for engine in engines:
    num_bindings = engine.num_bindings
    for i in range(num_bindings):
        binding_name = engine.get_binding_name(i)
        if engine.binding_is_input(i):
            shape = engine.get_binding_shape(i)
            dtype = engine.get_binding_dtype(i)
            all_bindings[binding_name] = trt.Tensor(dtype, max_input_shape[binding_name])
            opt_profile.set_shape(binding_name, min_input_shape[binding_name], max_input_shape[binding_name], max_input_shape)
        else:
            all_bindings[binding_name] = np.empty(engine.get_binding_shape(i), dtype=engine.get_binding_dtype(i))

# create a new engine
new_engine = builder.build_engine(network=engines[0].get_network(), config=builder.create_builder_config(), profiles=[opt_profile])

# create execution contexts for all engines
contexts = []
for engine in engines:
    context = engine.create_execution_context()
    contexts.append(context)

new_context = new_engine.create_execution_context()

# allocate memory for all bindings
bindings = []
for binding_name in all_bindings.keys():
    if binding_name in input_names:
        bindings.append(new_context.get_binding_handle(binding_name))
    else:
        for context in contexts:
            bindings.append(context.get_binding_handle(binding_name))

# copy data from all engines to the new engine
for i in range(max_batch_size):
    offset = i * np.prod(max_input_shape[input_names[0]])
    for binding_name, buffer in all_bindings.items():
        if binding_name in input_names:
            # copy input data to new context
            data = np.random.randn(*max_input_shape[binding_name]).astype(all_bindings[binding_name].dtype)
            np.copyto(buffer, data.reshape(-1), casting='no')
            bindings[i][offset:offset+data.size] = buffer
        else:
            # copy output data from old contexts to new context
            context_index = (i // engine.max_batch_size)
            context_offset = (i % engine.max_batch_size) * np.prod(engine.get_binding_shape(i))
            buffer_size = np.prod(engine.get_binding_shape(i)) * engine.max_batch_size
            context_bindings = contexts[context_index].get_binding_handle(binding_name)
            bindings[i][offset:offset+buffer_size] = context_bindings[context_offset:context_offset+buffer_size]

# execute the new engine and verify the output
new_context.execute_v2(bindings=bindings)
output_data = bindings[output_names[0]][0]
expected_output_data = np.zeros_like(output_data)
for context in contexts:
    context.execute_v2(bindings=bindings[len(input_names):len(all_bindings.keys())])
    expected_output_data += bindings[output_names[0]][0]
np.testing.assert_allclose(output_data, expected_output_data, rtol=1e-5, atol=1e-8)

This code assumes that you have serialized multiple engine files into binary format and saved them in files named engine_file_0.trt, engine_file_1.trt, etc. It will load all engine files into a list, then create a new engine based on all engine bindings, and copy all data from the old engine to the new engine. Finally, it executes the new engine and verifies the output. This code is intended to demonstrate how to combine multiple engine files and may need to be modified for your specific situation.

You can use TensorRT's IHostMemorysum IRuntimeinterface to combine multiple engines into one engine. The following is a sample code for merging two engines:

// Load the first engine
std::ifstream firstEngineFile("firstEngine.trt", std::ios::in | std::ios::binary);
firstEngineFile.seekg(0, std::ios::end);
const size_t firstEngineSize = firstEngineFile.tellg();
firstEngineFile.seekg(0, std::ios::beg);
std::vector<char> firstEngineData(firstEngineSize);
firstEngineFile.read(firstEngineData.data(), firstEngineSize);
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
nvinfer1::ICudaEngine* firstEngine = runtime->deserializeCudaEngine(firstEngineData.data(), firstEngineSize);

// Load the second engine
std::ifstream secondEngineFile("secondEngine.trt", std::ios::in | std::ios::binary);
secondEngineFile.seekg(0, std::ios::end);
const size_t secondEngineSize = secondEngineFile.tellg();
secondEngineFile.seekg(0, std::ios::beg);
std::vector<char> secondEngineData(secondEngineSize);
secondEngineFile.read(secondEngineData.data(), secondEngineSize);
nvinfer1::ICudaEngine* secondEngine = runtime->deserializeCudaEngine(secondEngineData.data(), secondEngineSize);

// Create a builder for the merged engine
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
nvinfer1::INetworkDefinition* network = builder->createNetwork();

// Merge the engines by copying all layers and weights from both engines to the new network
for (int i = 0; i < firstEngine->getNbBindings(); i++)
{
    
    
    const char* name = firstEngine->getBindingName(i);
    nvinfer1::ITensor* tensor = network->addInput(name, firstEngine->getBindingDataType(i), firstEngine->getBindingDimensions(i))->getOutput(0);
    tensor->setDynamicRange(-127.f, 127.f);
    tensor->setAllowedFormats(nvinfer1::TensorFormat::kLINEAR);
}

for (int i = 0; i < firstEngine->getNbLayers(); i++)
{
    
    
    const nvinfer1::ILayer* layer = firstEngine->getLayer(i);
    nvinfer1::ITensor* input = layer->getInput(0);
    nvinfer1::ITensor* output = layer->getOutput(0);
    nvinfer1::ILayer* newLayer = network->addLayer(*layer);
    newLayer->setInput(0, input);
    newLayer->setOutput(0, output);
}

for (int i = 0; i < secondEngine->getNbLayers(); i++)
{
    
    
    const nvinfer1::ILayer* layer = secondEngine->getLayer(i);
    nvinfer1::ITensor* input = layer->getInput(0);
    nvinfer1::ITensor* output = layer->getOutput(0);
    nvinfer1::ILayer* newLayer = network->addLayer(*layer);
    newLayer->setInput(0, input);
    newLayer->setOutput(0, output);
}

for (int i = 0; i < secondEngine->getNbBindings(); i++)
{
    
    
    const char* name = secondEngine->getBindingName(i);
    const int bindingIndex = secondEngine->getBindingIndex(name);
    nvinfer1::ITensor* tensor = network->addInput(name, secondEngine->getBindingDataType(bindingIndex), secondEngine->getBindingDimensions(bindingIndex))->getOutput(0);
    tensor->setDynamicRange(-127.f, 127.f);
    tensor->setAllowedFormats(nvinfer1::TensorFormat::kLINEAR);
}

// Build the merged engine
nvinfer1::ICudaEngine* mergedEngine = builder->buildCudaEngine(*network);
nvinfer1::IHostMemory* mergedEngineMemory = mergedEngine->serialize();

// Save the merged engine to disk
std::ofstream mergedEngineFile("mergedEngine.trt", std::ios::out | std::ios::binary);
mergedEngineFile.write(reinterpret_cast<const char*>(mergedEngineMemory->data()), mergedEngineMemory->size());

// Cleanup
mergedEngineMemory->destroy();
builder->destroy();
network->destroy();
firstEngine->destroy();
secondEngine->destroy();
runtime->destroy();

This example assumes that the first engine has multiple input bindings and one output binding, and the second engine has one input binding and multiple output bindings. The code creates a new engine that starts with two input bindings, then iterates through all layers of both engines, copying them into the new engine, and ends with two output bindings. The final mergedEngine.trt file will contain all the contents of both engines.

To merge two engine files built by TensorRT and one generated by a plugin into one engine file, you need to follow the steps below:

  1. Load two engine files built by TensorRT, nvinfer1::IRuntimeuse deserializeCudaEnginethe function of the interface to load, and get two nvinfer1::ICudaEngineinstances .
// 加载第一个engine文件
std::ifstream engineFile1(engineFilePath1, std::ios::binary);
if (!engineFile1) {
    
    
    std::cerr << "Error opening engine file: " << engineFilePath1 << std::endl;
    // handle error
}
engineFile1.seekg(0, std::ifstream::end);
const size_t engineSize1 = engineFile1.tellg();
engineFile1.seekg(0, std::ifstream::beg);
std::unique_ptr<char[]> engineData1(new char[engineSize1]);
engineFile1.read(engineData1.get(), engineSize1);
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
nvinfer1::ICudaEngine* engine1 = runtime->deserializeCudaEngine(engineData1.get(), engineSize1, nullptr);

// 加载第二个engine文件
std::ifstream engineFile2(engineFilePath2, std::ios::binary);
if (!engineFile2) {
    
    
    std::cerr << "Error opening engine file: " << engineFilePath2 << std::endl;
    // handle error
}
engineFile2.seekg(0, std::ifstream::end);
const size_t engineSize2 = engineFile2.tellg();
engineFile2.seekg(0, std::ifstream::beg);
std::unique_ptr<char[]> engineData2(new char[engineSize2]);
engineFile2.read(engineData2.get(), engineSize2);
nvinfer1::ICudaEngine* engine2 = runtime->deserializeCudaEngine(engineData2.get(), engineSize2, nullptr);
  1. Load the engine file generated by the plugin, also use deserializeCudaEnginethe function to load, and get an nvinfer1::ICudaEngineinstance.
// 加载插件生成的engine文件
std::ifstream engineFile3(engineFilePath3, std::ios::binary);
if (!engineFile3) {
    
    
    std::cerr << "Error opening engine file: " << engineFilePath3 << std::endl;
    // handle error
}
engineFile3.seekg(0, std::ifstream::end);
const size_t engineSize3 = engineFile3.tellg();
engineFile3.seekg(0, std::ifstream::beg);
std::unique_ptr<char[]> engineData3(new char[engineSize3]);
engineFile3.read(engineData3.get(), engineSize3);
nvinfer1::ICudaEngine* engine3 = runtime->deserializeCudaEngine(engineData3.get(), engineSize3, nullptr);
  1. Create a new nvinfer1::IBuilderinstance for merging three engine files.
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
nvinfer1::INetworkDefinition* network = builder->createNetwork();
  1. Add all layers of the first engine to the new network.
for (int i = 0; i < engine1->getNbBindings(); ++i) {
    
    
    auto bindingName = engine1->getBindingName(i);
    auto bindingIndex = engine1->getBindingIndex(bindingName);
    auto bindingSize = engine1->getBindingDimensions(bindingIndex);
    auto bindingType = engine1->getBindingDataType(bindingIndex);
    auto bindingRole = engine1->getBindingInputOrOutput(i);

    if (bindingRole == nvinfer1::TensorFormat::kLINEAR) {
    
    
        network->addInput(bindingName, bindingType, bindingSize);
    } else {
    
    
        network->markOutput(*engine1->getBindingName(i));
    }

    nvinfer1::ITensor* inputTensor = network->getInput(i);
    nvinfer1::ITensor* outputTensor = engine1->getBindingIndex(bindingName) < engine1->getNbBindings() - 1 ?
        engine1->getBindingTensor(bindingName) : nullptr;

    if (outputTensor) {
    
    
        nvinfer1::ILayer* layer = network->addIdentity(*(outputTensor));
        layer->setName(bindingName);
        layer->setOutputName(0, bindingName);
        inputTensor->setName(bindingName);
        inputTensor->setType(inputType);
        network->markOutput(*inputTensor);
    }

    bindings[i] = nullptr;
    if (engine1->bindingIsInput(i)) {
    
    
        bindings[i] = network->addInput(bindingName, bindingType, bindingSize);
    } else {
    
    
        bindings[i] = network->addConstant(bindingSize, engine1->getBindingData(bindingIndex));
    }
    engine1_input_indexes.emplace_back(i);
}
  1. Add all layers of the second engine to the new network.
for (int i = 0; i < engine2->getNbBindings(); ++i) {
    
    
    auto bindingName = engine2->getBindingName(i);
    auto bindingIndex = engine2->getBindingIndex(bindingName);
    auto bindingSize = engine2->getBindingDimensions(bindingIndex);
    auto bindingType = engine2->getBindingDataType(bindingIndex);
    auto bindingRole = engine2->getBindingInputOrOutput(i);

    if (bindingRole == nvinfer1::TensorFormat::kLINEAR) {
    
    
        if (engine1_input_indexes.empty() && engine2_input_indexes.empty()) {
    
    
            network->addInput(bindingName, bindingType, bindingSize);
        } else {
    
    
            input_type = bindingType;
            input_dims = bindingSize;
            nvinfer1::ITensor* inputTensor = network->addInput(bindingName, bindingType, bindingSize);
            auto index = engine2->getBindingIndex(bindingName);
            auto tensor = engine2->getBindingTensor(bindingName);
            assert(tensor != nullptr && "Failed to locate tensor in engine");
            inputTensor->setDynamicRange(-1.f, 1.f);
            bindings[index] = inputTensor;
            input_tensor = inputTensor;
        }
    } else {
    
    
        network->markOutput(*engine2->getBindingName(i));
    }

    nvinfer1::ITensor* inputTensor = input_tensor;
    nvinfer1::ITensor* outputTensor = engine2->getBindingIndex(bindingName) < engine2->getNbBindings() - 1 ?
        engine2->getBindingTensor(bindingName) : nullptr;

    if (outputTensor) {
    
    
        nvinfer1::ILayer* layer = network->addIdentity(*(outputTensor));
        layer->setName(bindingName);
        layer->setOutputName(0, bindingName);
        inputTensor->setName(bindingName);
        inputTensor->setType(inputType);
        network->markOutput(*inputTensor);
    }

    bindings[i] = nullptr;
    if (engine2->bindingIsInput(i)) {
    
    
        bindings[i] = network->addInput(bindingName, bindingType, bindingSize);
    } else {
    
    
        bindings[i] = network->addConstant(bindingSize, engine2->getBindingData(bindingIndex));
    }
    engine2_input_indexes.emplace_back(i);
}
  1. Add all layers of the engine generated by the plugin to the new network.
for (int i = 0; i < engine3->getNbBindings(); ++i) {
    
    
    auto bindingName = engine3->getBindingName(i);
    auto bindingIndex = engine3->getBindingIndex(bindingName);
    auto bindingSize = engine3->getBindingDimensions(bindingIndex);
    auto bindingType = engine3->getBindingDataType(bindingIndex);
    auto bindingRole = engine3->getBindingInputOrOutput(i);

    if (bindingRole == nvinfer1::TensorFormat::kLINEAR) {
    
    
        if (engine1_input_indexes.empty() && engine2_input_indexes.empty() && engine3_input_indexes.empty()) {
    
    
            network->addInput(bindingName, bindingType, bindingSize);
        } else {
    
    
            input_type = bindingType;
            input_dims = bindingSize;
            nvinfer1::ITensor* inputTensor = network->addInput(bindingName, bindingType, bindingSize);
            auto index = engine3->getBindingIndex(bindingName);
            auto tensor = engine3->getBindingTensor(bindingName);
            assert(tensor != nullptr && "Failed to locate tensor in engine");
            inputTensor->setDynamicRange(-1.f, 1.f);
            bindings[index] = inputTensor;
            input_tensor = inputTensor;
        }
    } else {
    
    
        network->markOutput(*engine3->getBindingName(i));
    }

    nvinfer1::ITensor* inputTensor = input_tensor;
    nvinfer1::ITensor* outputTensor = engine3->getBindingIndex(bindingName) < engine3->getNbBindings() - 1 ?
        engine3->getBindingTensor(bindingName) : nullptr;

    if (outputTensor) {
    
    
        nvinfer1::ILayer* layer = network->addIdentity(*(outputTensor));
        layer->setName(bindingName);
        layer->setOutputName(0, bindingName);
        inputTensor->setName(bindingName);
        inputTensor->setType(inputType);
        network->markOutput(*inputTensor);
    }

    bindings[i] = nullptr;
    if (engine3->bindingIsInput(i)) {
    
    
        bindings[i] = network->addInput(bindingName, bindingType, bindingSize);
    } else {
    
    
        bindings[i] = network->addConstant(bindingSize, engine3->getBindingData(bindingIndex));
    }
    engine3_input_indexes.emplace_back(i);
}
  1. Pass the new nvinfer1::INetworkDefinitionobject and device type to nvinfer1::IBuilderthe object, and use buildCudaEnginethe function to generate the merged engine file.
std::string mode = "fp32";
builder->setMaxBatchSize(batchSize);
builder->setMaxWorkspaceSize(1 << 30);
builder->setFp16Mode(mode == "fp16");
builder->setInt8Mode(mode == "int8");

nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
  1. Serialize the merged nvinfer1::ICudaEngineobject into a file to get the final engine file.
if (engine) {
    
    
    nvinfer1::IHostMemory* serialized = engine->serialize();
    std::ofstream engineFile(engineFilePath, std::ios::binary);
    if (!engineFile) {
    
    
        std::cerr << "Error opening engine file: " << engineFilePath << std::endl;
        // handle error
    }
    engineFile.write(reinterpret_cast<const char*>(serialized->data()), serialized->size());
    serialized->destroy();
}

Full code example:

#include <iostream>
#include <fstream>
#include <memory>
#include <vector>

#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "NvInferRuntimeCommon.h"

class Logger : public nvinfer1::ILogger {
    
    
public:
    void log(nvinfer1::ILogger::Severity severity, const char* msg) override {
    
    
        if (severity != Severity::kINFO) {
    
    
            std::cout << msg << std::endl;
        }
    }
} gLogger;

int main() {
    
    
    const std::string engineFilePath1 = "/path/to/first/engine";
    const std::string engineFilePath2 = "/path/to/second/engine";
    const std::string engineFilePath3 = "/path/to/third/engine";
    const std::string engineFilePath = "/path/to/merged/engine";
    const int batchSize = 1;

    nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
    nvinfer1::INetworkDefinition* network = builder->createNetwork();
    std::vector<void*> bindings(3 * batchSize);

    // 加载第一个engine文件
    std::ifstream engineFile1(engineFilePath1, std::ios::binary);
    if (!engineFile1) {
    
    
        std::cerr << "Error opening engine file: " << engineFilePath1 << std::endl;
        return 1;
    }
    engineFile1.seekg(0, std::ifstream::end);
    const size_t engineSize1 = engineFile1.tellg();
    engineFile1.seekg(0, std::ifstream::beg);
    std::unique_ptr<char[]> engineData1(new char[engineSize1]);
    engineFile1.read(engineData1.get(), engineSize1);
    nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
    nvinfer1::ICudaEngine* engine1 = runtime->deserializeCudaEngine(engineData1.get(), engineSize1, nullptr);
    std::vector<int> engine1_input_indexes;

    // 加载第二个engine文件
    std::ifstream engineFile2(engineFilePath2, std::ios::binary);
    if (!engineFile2) {
    
    
        std::cerr << "Error opening engine file: " << engineFilePath2 << std::endl;
        return 1;
    }
    engineFile2.seekg(0, std::ifstream::end);
    const size_t engineSize2 = engineFile2.tellg();
    engineFile2.seekg(0, std::ifstream::beg);
    std::unique_ptr<char[]> engineData2(new char[engineSize2]);
    engineFile2.read(engineData2.get(), engineSize2);
    nvinfer1::ICudaEngine* engine2 = runtime->deserializeCudaEngine(engineData2.get(), engineSize2, nullptr);
    std::vector<int> engine2_input_indexes;

    // 加载插件生成的engine文件
    std::ifstream engineFile3(engineFilePath3, std::ios::binary);
    if (!engineFile3) {
    
    
        std::cerr << "Error opening engine file: " << engineFilePath3 << std::endl;
        return 1;
    }
    engineFile3.seekg(0, std::ifstream::end);
    const size_t engineSize3 = engineFile3.tellg();
    engineFile3.seekg(0, std::ifstream::beg);
    std::unique_ptr<char[]> engineData3(new char[engineSize3]);
    engineFile3.read(engineData3.get(), engineSize3);
    nvinfer1::ICudaEngine* engine3 = runtime->deserializeCudaEngine(engineData3.get(), engineSize3, nullptr);
    std::vector<int> engine3_input_indexes;

    // 将第一个engine的所有层添加到新的network中
    for (int i = 0; i < engine1->getNbBindings(); ++i) {
    
    
        auto bindingName = engine1->getBindingName(i);
        auto bindingIndex = engine1->getBindingIndex(bindingName);
        auto bindingSize = engine1->getBindingDimensions(bindingIndex);
        auto bindingType = engine1->getBindingDataType(bindingIndex);
        auto bindingRole = engine1->getBindingInputOrOutput(i);

        if (bindingRole == nvinfer1::TensorFormat::kLINEAR) {
    
    
            network->addInput(bindingName, bindingType, bindingSize);
        } else {
    
    
            network->markOutput(*engine1->getBindingName(i));
        }

        nvinfer1::ITensor* inputTensor = network->getInput(i);
        nvinfer1::ITensor* outputTensor = engine1->getBindingIndex(bindingName) < engine1->getNbBindings() - 1 ?
            engine1->getBindingTensor(bindingName) : nullptr;

        if (outputTensor) {
    
    
            nvinfer1::ILayer* layer = network->addIdentity(*(outputTensor));
            layer->setName(bindingName);
            layer->setOutputName(0, bindingName);
            inputTensor->setName(bindingName);
            inputTensor->setType(inputType);
            network->markOutput(*inputTensor);
        }

        bindings[i] = nullptr;
        if (engine1->bindingIsInput(i)) {
    
    
            bindings[i] = network->addInput(bindingName, bindingType, bindingSize);
        } else {
    
    
            bindings[i] = network->addConstant(bindingSize, engine1->getBindingData(bindingIndex));
        }
        engine1_input_indexes.emplace_back(i);
    }

    // 将第二个engine的所有

You can combine two tensorrt-built engine files and one plugin-generated engine file into one engine file through the following steps:

  1. Use the TensorRT API to load two engine files and a plugin library with custom plugin code to create two Execution contexts.

  2. Use the TensorRT API to get the input and output tensor names of the two engine files and create a new engine file.

  3. Add the input and output tensors from the first engine file to the new engine file using the TensorRT API.

  4. Add the input and output tensors from the second engine file to the new engine file using the TensorRT API.

  5. Add custom plugin code to a new engine file using the TensorRT API.

  6. Compile and serialize a new engine file using the TensorRT API.

Here is a code example:

#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvPlugin.h"

using namespace nvinfer1;

int main() {
    // Load the first engine file
    IRuntime* runtime1 = createInferRuntime(gLogger);
    std::ifstream file1("engine1.engine", std::ios::binary);
    file1.seekg(0, std::ios::end);
    const int modelSize1 = file1.tellg();
    file1.seekg(0, std::ios::beg);
    std::unique_ptr<char[]> modelData1(new char[modelSize1]);
    file1.read(modelData1.get(), modelSize1);
    file1.close();
    ICudaEngine* engine1 = runtime1->deserializeCudaEngine(modelData1.get(), modelSize1, nullptr);

    // Load the second engine file
    IRuntime* runtime2 = createInferRuntime(gLogger);
    std::ifstream file2("engine2.engine", std::ios::binary);
    file2.seekg(0, std::ios::end);
    const int modelSize2 = file2.tellg();
    file2.seekg(0, std::ios::beg);
    std::unique_ptr<char[]> modelData2(new char[modelSize2]);
    file2.read(modelData2.get(), modelSize2);
    file2.close();
    ICudaEngine* engine2 = runtime2->deserializeCudaEngine(modelData2.get(), modelSize2, nullptr);

    // Create a new engine with the inputs and outputs from both engines
    IBuilder* builder = createInferBuilder(gLogger);
    INetworkDefinition* network = builder->createNetwork();

    // Add the inputs and outputs from the first engine to the new engine
    for (int i = 0; i < engine1->getNbBindings(); i++) {
        std::string name = engine1->getBindingName(i);
        Dims dims = engine1->getBindingDimensions(i);
        DataType type = engine1->getBindingDataType(i);
        bool isInput = engine1->bindingIsInput(i);
        if (isInput) {
            network->addInput(name.c_str(), type, dims);
        } else {
            network->addOutput(name.c_str(), type, dims);
        }
    }

    // Add the inputs and outputs from the second engine to the new engine
    for (int i = 0; i < engine2->getNbBindings(); i++) {
        std::string name = engine2->getBindingName(i);
        Dims dims = engine2->getBindingDimensions(i);
        DataType type = engine2->getBindingDataType(i);
        bool isInput = engine2->bindingIsInput(i);
        if (isInput) {
            network->addInput(name.c_str(), type, dims);
        } else {
            network->addOutput(name.c_str(), type, dims);
        }
    }

    // Add the custom plugin to the new engine
    PluginFactory pluginFactory;
    ITensor* inputTensor = network->getInput(0);
    ITensor* outputTensor = network->getOutput(0);
    IPluginV2Layer* customLayer = pluginFactory.createPlugin("customPlugin", inputTensor, outputTensor, 1);
    network->addPluginV2(&inputTensor, 1, customLayer);

    // Build and serialize the new engine
    builder->setMaxBatchSize(maxBatchSize);
    builder->setMaxWorkspaceSize(maxWorkspaceSize);
    ICudaEngine* newEngine = builder->buildCudaEngine(*network);
    IHostMemory* serializedEngine = newEngine->serialize();
    std::ofstream file("combined.engine", std::ios::binary);
    file.write((char*) serializedEngine->data(), serializedEngine->size());
    file.close();

    // Cleanup
    builder->destroy();
    network->destroy();
    newEngine->destroy();
    serializedEngine->destroy();
    engine1->destroy();
    engine2->destroy();
    runtime1->destroy();
    runtime2->destroy();
    return 0;
}

In the code example, we use two IRuntime instances to load two engine files, get their input and output tensors, and create a new engine file. Then, we use IBuilder to create a new network and add the input and output tensors of the two engine files to the new network. Finally, we use the PluginFactory to create a custom plugin and add it to the new network. Compile the new network using IBuilder and serialize and save to file using ICudaEngine. Finally, we clean up the created resources.

Guess you like

Origin blog.csdn.net/qq_39506862/article/details/130894427