pt转onnx

此处的model在运行时替换为自己的网络结构即可

"""
如果保存的是模型参数
"""
import torch
import torchvision.models as models

torch_model = torch.load("test.pth") # pytorch模型加载

model = models.resnet50()
model.fc = torch.nn.Linear(2048, 4)
model.load_state_dict(torch_model)

batch_size = 1  #批处理大小
input_shape = (3, 244, 384)   #输入数据,改成自己的输入shape

# #set the model to inference mode
model.eval()

x = torch.randn(batch_size, *input_shape)	# 生成张量
export_onnx_file = "test.onnx"			# 目的ONNX文件名
torch.onnx.export(model,
                    x,
                    export_onnx_file,
                    opset_version=10,
                    do_constant_folding=True,	# 是否执行常量折叠优化
                    input_names=["input"],	# 输入名
                    output_names=["output"],	# 输出名
                    dynamic_axes={
    
    "input":{
    
    0:"batch_size"},  # 批处理变量
                                    "output":{
    
    0:"batch_size"}})

一般为用上面的转换

"""
如果保存的是整个模型
"""
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = torch.load("test.pth") # pytorch模型加载
batch_size = 1  #批处理大小
input_shape = (3, 244, 384)   #输入数据,改成自己的输入shape

# #set the model to inference mode
model.eval()

x = torch.randn(batch_size, *input_shape)   # 生成张量
x = x.to(device)
export_onnx_file = "test.onnx"		# 目的ONNX文件名
torch.onnx.export(model,
                    x,
                    export_onnx_file,
                    opset_version=10,
                    do_constant_folding=True,	# 是否执行常量折叠优化
                    input_names=["input"],	# 输入名
                    output_names=["output"],	# 输出名
                    dynamic_axes={
    
    "input":{
    
    0:"batch_size"},  # 批处理变量
                                    "output":{
    
    0:"batch_size"}})

onn转trt模型

import onnx
import tensorrt as trt
# import sys
# sys.setrecursionlimit(500000)


def onnx_export_engine(model_name, workspace):
    # path='weights/cam_fusion_net.onnx'
    # filename='onnxmodel'
    # model_name='breath_cls'
    # model_name='facedect'

    path='test2/'+model_name+'.onnx'
    path = r'D:\project\T2M-GPT2\weights\T2M-GPT-vqvae.onnx'
    #创建构建器
    logger=trt.Logger(trt.Logger.ERROR)
    builder=trt.Builder(logger)
    #创建一个构建配置
    config=builder.create_builder_config()
    # config.max_workspace_size=workspace*1<<30
    #创建网络定义
    flag=(1<<int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    network=builder.create_network(flag)
    #导入onnx模型
    parser=trt.OnnxParser(network,logger)
    if not parser.parse_from_file(str(path)):
        raise RuntimeError(f'failed to load ONNX file: {
      
      onnx}')

    inputs=[network.get_input(i) for i in range(network.num_inputs)]
    # print('inputs',inputs)
    outputs=[network.get_output(i) for i in  range(network.num_outputs)]
    # network.get_input(0).setAllowedFormats(int)
    # network.get_input(1).setAllowedFormats(int)
    profile = builder.create_optimization_profile()
    # profile.set_shape('idx', (1, 1), (1, 20), (1, 55))# trans
    profile.set_shape('input', (1, 1), (1, 20), (1, 55))# 此处为定义动态输入
    # profile.set_shape("index", (1, ), (1, ), (1, ))
    config.add_optimization_profile(profile)
    # for inp in inputs:
    #     LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
    # for out in outputs:
    #     LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
    #
    # LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')

    # if builder.platform_has_fast_fp16:
    #
    # config.set_flag(trt.BuilderFlag.FP16)
    engine_path=model_name+'.engine'
    # with builder.build_engine(network,config) as engine:
    with builder.build_serialized_network(network,config) as engine:
        with open(engine_path,'wb') as t:
            # t.write(engine.serialize())
            t.write(engine)
    print('转化完成')

if __name__ == '__main__':
    # model_names = ['modified_stable_diffusion']
    model_names = ['resnet']
    for modelname in model_names:
        onnx_export_engine(modelname, 4)

onnx推理

import time

import onnxruntime as rt
import numpy as  np
import torch

import onnxruntime

# # 创建 ONNX Runtime InferenceSession
# sess = onnxruntime.InferenceSession(r'D:\project\T2M-GPT\models\T2M-GPT-trans.onnx', providers=[ 'CUDAExecutionProvider', 'CPUExecutionProvider'])

# 准备输入数据（PyTorch张量）
data = torch.randn(1, 512)
print("初始输入",data.shape,'numpy输入:',data.numpy().shape)
print("初始输入",torch.tensor([[1]]).shape,'numpy输入:',torch.tensor([[1]]).numpy().shape)
print('*' * 80)
sess = rt.InferenceSession(r'D:\project\T2M-GPT2\onnx\T2M-GPT-trans2.onnx', providers=[ 'CUDAExecutionProvider', 'CPUExecutionProvider'])
iutput_info = sess.get_inputs()
for output in iutput_info:
    print("Iutput Name:", output.name)
    print("Iutput Shape:", output.shape)
output_info = sess.get_outputs()
for output in output_info:
    print("Output Name:", output.name)
    print("Output Shape:", output.shape)

print('*' * 80)
input_names = [input_name.name for input_name in iutput_info ]
output_names = sess.get_outputs()[0].name
# print(len(sess.run([output_names], {input_name:data.astype(np.float32)})))
pred_onx= sess.run([output_names], {
    
    'input':data.numpy(), 'idx':torch.tensor([[256,256,417]]).numpy()})[0]#,'idx':torch.tensor([[256, 417, 266, 211, 399]]).numpy()
print("初始输出", pred_onx,pred_onx.shape, '输出numpy类型:', type(pred_onx))
print('*' * 80)

trt推理

from typing import Union, Optional, Sequence, Dict, Any

import torch
import tensorrt as trt
class TRTWrapper(torch.nn.Module):
    def __init__(self, engine: Union[str, trt.ICudaEngine],
                 output_names: Optional[Sequence[str]] = None) -> None:
        super().__init__()
        self.engine = engine
        if isinstance(self.engine, str):
            with trt.Logger() as logger, trt.Runtime(logger) as runtime:
                with open(self.engine, mode='rb') as f:
                    engine_bytes = f.read()
                self.engine = runtime.deserialize_cuda_engine(engine_bytes)
        self.context = self.engine.create_execution_context()
        names = [_ for _ in self.engine]
        input_names = list(filter(self.engine.binding_is_input, names))
        self._input_names = input_names
        self._output_names = output_names

        if self._output_names is None:
            output_names = list(set(names) - set(input_names))
            self._output_names = output_names

    def forward(self, inputs: Dict[str, torch.Tensor]):
        assert self._input_names is not None
        assert self._output_names is not None
        bindings = [None] * (len(self._input_names) + len(self._output_names))
        profile_id = 0
        for input_name, input_tensor in inputs.items():
            # check if input shape is valid
            profile = self.engine.get_profile_shape(profile_id, input_name)
            assert input_tensor.dim() == len(
                profile[0]), 'Input dim is different from engine profile.'
            for s_min, s_input, s_max in zip(profile[0], input_tensor.shape,
                                             profile[2]):
                assert s_min <= s_input <= s_max, \
                    'Input shape should be between ' \
                    + f'{
      
      profile[0]} and {
      
      profile[2]}' \
                    + f' but get {
      
      tuple(input_tensor.shape)}.'
            idx = self.engine.get_binding_index(input_name)

            # All input tensors must be gpu variables
            assert 'cuda' in input_tensor.device.type
            input_tensor = input_tensor.contiguous()
            # if input_tensor.dtype == torch.long:
            #     input_tensor = input_tensor.int()
            self.context.set_binding_shape(idx, tuple(input_tensor.shape))
            bindings[idx] = input_tensor.contiguous().data_ptr()

            # create output tensors
        outputs = {
    
    }
        for output_name in self._output_names:
            idx = self.engine.get_binding_index(output_name)
            dtype = torch.float32 # 输出向量类型
            shape = tuple(self.context.get_binding_shape(idx))

            device = torch.device('cuda')
            output = torch.empty(size=shape, dtype=dtype, device=device)
            outputs[output_name] = output
            bindings[idx] = output.data_ptr()
        self.context.execute_async_v2(bindings,
                                      torch.cuda.current_stream().cuda_stream)
        return outputs

if __name__=="__main__":
    model = TRTWrapper('end2end.engine', ['simcc_x', 'simcc_y'])
    output = model(dict(input=torch.randn(1, 3, 256, 256).cuda()))
    simcc_x, simcc_y = output
    # keypoints, scores = decode(simcc_x, simcc_y, simcc_split_ratio)
    #
    # # rescale keypoints
    # keypoints = keypoints / model_input_size * scale + center - scale / 2
    print(output)

心得

转换时，onnx与tensorrt模型为静态模式，所以对于网络的forward里，要去掉if else、for循环这种，同时还要注意是否支持reshape，@(用torch.mutl替换)等操作，实测torch.topk是支持的。
在转换时，发现虽然设置的动态输入，但是经过向量索引时，参数还会固定，可以尝试一下在转onnx时，输入设置为空向量，或者模型内部设置第一次索引的向量值为空，实测成功了

pt网络转onnx、tensorRT模型记录贴

onnx、trt转换及推理

pt转onnx

onn转trt模型

onnx推理

trt推理

心得

猜你喜欢