pt network to onnx, tensorRT model recording post

pt to onnx

The model here can be replaced with your own network structure at runtime.

"""
如果保存的是模型参数
"""
import torch
import torchvision.models as models

torch_model = torch.load("test.pth") # pytorch模型加载

model = models.resnet50()
model.fc = torch.nn.Linear(2048, 4)
model.load_state_dict(torch_model)

batch_size = 1  #批处理大小
input_shape = (3, 244, 384)   #输入数据,改成自己的输入shape

# #set the model to inference mode
model.eval()

x = torch.randn(batch_size, *input_shape)	# 生成张量
export_onnx_file = "test.onnx"			# 目的ONNX文件名
torch.onnx.export(model,
                    x,
                    export_onnx_file,
                    opset_version=10,
                    do_constant_folding=True,	# 是否执行常量折叠优化
                    input_names=["input"],	# 输入名
                    output_names=["output"],	# 输出名
                    dynamic_axes={
    
    "input":{
    
    0:"batch_size"},  # 批处理变量
                                    "output":{
    
    0:"batch_size"}})

Generally, the above conversion is used

"""
如果保存的是整个模型
"""
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = torch.load("test.pth") # pytorch模型加载
batch_size = 1  #批处理大小
input_shape = (3, 244, 384)   #输入数据,改成自己的输入shape

# #set the model to inference mode
model.eval()

x = torch.randn(batch_size, *input_shape)   # 生成张量
x = x.to(device)
export_onnx_file = "test.onnx"		# 目的ONNX文件名
torch.onnx.export(model,
                    x,
                    export_onnx_file,
                    opset_version=10,
                    do_constant_folding=True,	# 是否执行常量折叠优化
                    input_names=["input"],	# 输入名
                    output_names=["output"],	# 输出名
                    dynamic_axes={
    
    "input":{
    
    0:"batch_size"},  # 批处理变量
                                    "output":{
    
    0:"batch_size"}})

onn to trt model

import onnx
import tensorrt as trt
# import sys
# sys.setrecursionlimit(500000)


def onnx_export_engine(model_name, workspace):
    # path='weights/cam_fusion_net.onnx'
    # filename='onnxmodel'
    # model_name='breath_cls'
    # model_name='facedect'

    path='test2/'+model_name+'.onnx'
    path = r'D:\project\T2M-GPT2\weights\T2M-GPT-vqvae.onnx'
    #创建构建器
    logger=trt.Logger(trt.Logger.ERROR)
    builder=trt.Builder(logger)
    #创建一个构建配置
    config=builder.create_builder_config()
    # config.max_workspace_size=workspace*1<<30
    #创建网络定义
    flag=(1<<int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    network=builder.create_network(flag)
    #导入onnx模型
    parser=trt.OnnxParser(network,logger)
    if not parser.parse_from_file(str(path)):
        raise RuntimeError(f'failed to load ONNX file: {
      
      onnx}')

    inputs=[network.get_input(i) for i in range(network.num_inputs)]
    # print('inputs',inputs)
    outputs=[network.get_output(i) for i in  range(network.num_outputs)]
    # network.get_input(0).setAllowedFormats(int)
    # network.get_input(1).setAllowedFormats(int)
    profile = builder.create_optimization_profile()
    # profile.set_shape('idx', (1, 1), (1, 20), (1, 55))# trans
    profile.set_shape('input', (1, 1), (1, 20), (1, 55))# 此处为定义动态输入
    # profile.set_shape("index", (1, ), (1, ), (1, ))
    config.add_optimization_profile(profile)
    # for inp in inputs:
    #     LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
    # for out in outputs:
    #     LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
    #
    # LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')

    # if builder.platform_has_fast_fp16:
    #
    # config.set_flag(trt.BuilderFlag.FP16)
    engine_path=model_name+'.engine'
    # with builder.build_engine(network,config) as engine:
    with builder.build_serialized_network(network,config) as engine:
        with open(engine_path,'wb') as t:
            # t.write(engine.serialize())
            t.write(engine)
    print('转化完成')

if __name__ == '__main__':
    # model_names = ['modified_stable_diffusion']
    model_names = ['resnet']
    for modelname in model_names:
        onnx_export_engine(modelname, 4)

onnx reasoning

import time

import onnxruntime as rt
import numpy as  np
import torch

import onnxruntime

# # 创建 ONNX Runtime InferenceSession
# sess = onnxruntime.InferenceSession(r'D:\project\T2M-GPT\models\T2M-GPT-trans.onnx', providers=[ 'CUDAExecutionProvider', 'CPUExecutionProvider'])

# 准备输入数据(PyTorch张量)
data = torch.randn(1, 512)
print("初始输入",data.shape,'numpy输入:',data.numpy().shape)
print("初始输入",torch.tensor([[1]]).shape,'numpy输入:',torch.tensor([[1]]).numpy().shape)
print('*' * 80)
sess = rt.InferenceSession(r'D:\project\T2M-GPT2\onnx\T2M-GPT-trans2.onnx', providers=[ 'CUDAExecutionProvider', 'CPUExecutionProvider'])
iutput_info = sess.get_inputs()
for output in iutput_info:
    print("Iutput Name:", output.name)
    print("Iutput Shape:", output.shape)
output_info = sess.get_outputs()
for output in output_info:
    print("Output Name:", output.name)
    print("Output Shape:", output.shape)

print('*' * 80)
input_names = [input_name.name for input_name in iutput_info ]
output_names = sess.get_outputs()[0].name
# print(len(sess.run([output_names], {input_name:data.astype(np.float32)})))
pred_onx= sess.run([output_names], {
    
    'input':data.numpy(), 'idx':torch.tensor([[256,256,417]]).numpy()})[0]#,'idx':torch.tensor([[256, 417, 266, 211, 399]]).numpy()
print("初始输出", pred_onx,pred_onx.shape, '输出numpy类型:', type(pred_onx))
print('*' * 80)

trt reasoning

from typing import Union, Optional, Sequence, Dict, Any

import torch
import tensorrt as trt
class TRTWrapper(torch.nn.Module):
    def __init__(self, engine: Union[str, trt.ICudaEngine],
                 output_names: Optional[Sequence[str]] = None) -> None:
        super().__init__()
        self.engine = engine
        if isinstance(self.engine, str):
            with trt.Logger() as logger, trt.Runtime(logger) as runtime:
                with open(self.engine, mode='rb') as f:
                    engine_bytes = f.read()
                self.engine = runtime.deserialize_cuda_engine(engine_bytes)
        self.context = self.engine.create_execution_context()
        names = [_ for _ in self.engine]
        input_names = list(filter(self.engine.binding_is_input, names))
        self._input_names = input_names
        self._output_names = output_names

        if self._output_names is None:
            output_names = list(set(names) - set(input_names))
            self._output_names = output_names

    def forward(self, inputs: Dict[str, torch.Tensor]):
        assert self._input_names is not None
        assert self._output_names is not None
        bindings = [None] * (len(self._input_names) + len(self._output_names))
        profile_id = 0
        for input_name, input_tensor in inputs.items():
            # check if input shape is valid
            profile = self.engine.get_profile_shape(profile_id, input_name)
            assert input_tensor.dim() == len(
                profile[0]), 'Input dim is different from engine profile.'
            for s_min, s_input, s_max in zip(profile[0], input_tensor.shape,
                                             profile[2]):
                assert s_min <= s_input <= s_max, \
                    'Input shape should be between ' \
                    + f'{
      
      profile[0]} and {
      
      profile[2]}' \
                    + f' but get {
      
      tuple(input_tensor.shape)}.'
            idx = self.engine.get_binding_index(input_name)

            # All input tensors must be gpu variables
            assert 'cuda' in input_tensor.device.type
            input_tensor = input_tensor.contiguous()
            # if input_tensor.dtype == torch.long:
            #     input_tensor = input_tensor.int()
            self.context.set_binding_shape(idx, tuple(input_tensor.shape))
            bindings[idx] = input_tensor.contiguous().data_ptr()

            # create output tensors
        outputs = {
    
    }
        for output_name in self._output_names:
            idx = self.engine.get_binding_index(output_name)
            dtype = torch.float32 # 输出向量类型
            shape = tuple(self.context.get_binding_shape(idx))

            device = torch.device('cuda')
            output = torch.empty(size=shape, dtype=dtype, device=device)
            outputs[output_name] = output
            bindings[idx] = output.data_ptr()
        self.context.execute_async_v2(bindings,
                                      torch.cuda.current_stream().cuda_stream)
        return outputs

if __name__=="__main__":
    model = TRTWrapper('end2end.engine', ['simcc_x', 'simcc_y'])
    output = model(dict(input=torch.randn(1, 3, 256, 256).cuda()))
    simcc_x, simcc_y = output
    # keypoints, scores = decode(simcc_x, simcc_y, simcc_split_ratio)
    #
    # # rescale keypoints
    # keypoints = keypoints / model_input_size * scale + center - scale / 2
    print(output)

Experience

  1. During conversion, the onnx and tensorrt models are in static mode, so for the forward of the network, it is necessary to remove if else, for loops, etc., and also pay attention to whether operations such as reshape, @ (replaced with torch.mutl) are supported, and the actual test torch. topk is supported.
  2. During the conversion, it is found that although the dynamic input is set, the parameters will still be fixed after vector indexing. You can try to set the input to an empty vector when converting to onnx, or set the vector value of the first index inside the model to be empty. The actual test was successful

Guess you like

Origin blog.csdn.net/qq_44224801/article/details/131851722