run onnx model with tf-backend and onnxruntime-gpu


onnx.__version__
'1.8.0'

1. prepare model

import torchvision.models as models
resnet18 = models.resnet18(pretrained=True)
Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to C:\Users\KangningCAI/.cache\torch\hub\checkpoints\resnet18-5c106cde.pth
100.0%
import torch
import torchvision

dummy_input = torch.randn(10, 3, 224, 224, device='cuda')
model = torchvision.models.resnet18(pretrained=True).cuda()
print(model(dummy_input))
# 可以根据模块图形的数值设置输入输出的显示名称。这些设置不会改变此图形的语义。只是会变得更加可读了。
#该网络的输入包含了输入的扁平表(flat list)。也就是说传入forward()里面的值,其后是扁平表的参数。
#你可以指定一部分名字,例如指定一个比该模块输入数量更少的表,随后我们会从一开始就设定名字。
input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
output_names = [ "output1" ]

torch.onnx.export(model, dummy_input, "resnet18.onnx", verbose=True,
                  input_names=input_names, output_names=output_names)
tensor([[-1.4664, -1.2065, -0.2031,  ..., -0.3920,  5.7189,  3.1503],
        [-1.5115, -0.8432,  0.7770,  ...,  1.0732, -1.6643,  0.2798],
        [ 0.3694, -1.5528, -1.0725,  ...,  0.9766,  4.3036,  0.3204],
        ...,
        [ 1.1199,  0.8067,  2.0376,  ...,  0.2713, -0.2034,  0.0839],
        [-0.0470,  0.7559, -1.8203,  ..., -0.0480,  1.7802,  1.0056],
        [-1.1093, -2.6424, -1.1345,  ..., -1.4737,  0.6720,  0.4368]],
       device='cuda:0', grad_fn=<AddmmBackward>)
graph(%actual_input_1 : Float(10:150528, 3:50176, 224:224, 224:1, requires_grad=0, device=cuda:0),
      %fc.weight : Float(1000:512, 512:1, requires_grad=1, device=cuda:0),
      %fc.bias : Float(1000:1, requires_grad=1, device=cuda:0),
      %193 : Float(64:147, 3:49, 7:7, 7:1, requires_grad=0, device=cuda:0),
      %194 : Float(64:1, requires_grad=0, device=cuda:0),
      %196 : Float(64:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %197 : Float(64:1, requires_grad=0, device=cuda:0),
      %199 : Float(64:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %200 : Float(64:1, requires_grad=0, device=cuda:0),
      %202 : Float(64:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %203 : Float(64:1, requires_grad=0, device=cuda:0),
      %205 : Float(64:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %206 : Float(64:1, requires_grad=0, device=cuda:0),
      %208 : Float(128:576, 64:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %209 : Float(128:1, requires_grad=0, device=cuda:0),
      %211 : Float(128:1152, 128:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %212 : Float(128:1, requires_grad=0, device=cuda:0),
      %214 : Float(128:64, 64:1, 1:1, 1:1, requires_grad=0, device=cuda:0),
      %215 : Float(128:1, requires_grad=0, device=cuda:0),
      %217 : Float(128:1152, 128:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %218 : Float(128:1, requires_grad=0, device=cuda:0),
      %220 : Float(128:1152, 128:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %221 : Float(128:1, requires_grad=0, device=cuda:0),
      %223 : Float(256:1152, 128:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %224 : Float(256:1, requires_grad=0, device=cuda:0),
      %226 : Float(256:2304, 256:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %227 : Float(256:1, requires_grad=0, device=cuda:0),
      %229 : Float(256:128, 128:1, 1:1, 1:1, requires_grad=0, device=cuda:0),
      %230 : Float(256:1, requires_grad=0, device=cuda:0),
      %232 : Float(256:2304, 256:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %233 : Float(256:1, requires_grad=0, device=cuda:0),
      %235 : Float(256:2304, 256:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %236 : Float(256:1, requires_grad=0, device=cuda:0),
      %238 : Float(512:2304, 256:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %239 : Float(512:1, requires_grad=0, device=cuda:0),
      %241 : Float(512:4608, 512:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %242 : Float(512:1, requires_grad=0, device=cuda:0),
      %244 : Float(512:256, 256:1, 1:1, 1:1, requires_grad=0, device=cuda:0),
      %245 : Float(512:1, requires_grad=0, device=cuda:0),
      %247 : Float(512:4608, 512:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %248 : Float(512:1, requires_grad=0, device=cuda:0),
      %250 : Float(512:4608, 512:9, 3:3, 3:1, requires_grad=0, device=cuda:0),
      %251 : Float(512:1, requires_grad=0, device=cuda:0)):
  %192 : Float(10:802816, 64:12544, 112:112, 112:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[7, 7], pads=[3, 3, 3, 3], strides=[2, 2]](%actual_input_1, %193, %194)
  %125 : Float(10:802816, 64:12544, 112:112, 112:1, requires_grad=1, device=cuda:0) = onnx::Relu(%192) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %126 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::MaxPool[kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2]](%125) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:586:0
  %195 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%126, %196, %197)
  %129 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Relu(%195) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %198 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%129, %199, %200)
  %132 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Add(%198, %126)
  %133 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Relu(%132) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %201 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%133, %202, %203)
  %136 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Relu(%201) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %204 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%136, %205, %206)
  %139 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Add(%204, %133)
  %140 : Float(10:200704, 64:3136, 56:56, 56:1, requires_grad=1, device=cuda:0) = onnx::Relu(%139) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %207 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2]](%140, %208, %209)
  %143 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Relu(%207) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %210 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%143, %211, %212)
  %213 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[2, 2]](%140, %214, %215)
  %148 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Add(%210, %213)
  %149 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Relu(%148) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %216 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%149, %217, %218)
  %152 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Relu(%216) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %219 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%152, %220, %221)
  %155 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Add(%219, %149)
  %156 : Float(10:100352, 128:784, 28:28, 28:1, requires_grad=1, device=cuda:0) = onnx::Relu(%155) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %222 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2]](%156, %223, %224)
  %159 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Relu(%222) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %225 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%159, %226, %227)
  %228 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[2, 2]](%156, %229, %230)
  %164 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Add(%225, %228)
  %165 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Relu(%164) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %231 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%165, %232, %233)
  %168 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Relu(%231) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %234 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%168, %235, %236)
  %171 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Add(%234, %165)
  %172 : Float(10:50176, 256:196, 14:14, 14:1, requires_grad=1, device=cuda:0) = onnx::Relu(%171) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %237 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[2, 2]](%172, %238, %239)
  %175 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Relu(%237) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %240 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%175, %241, %242)
  %243 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[2, 2]](%172, %244, %245)
  %180 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Add(%240, %243)
  %181 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Relu(%180) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %246 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%181, %247, %248)
  %184 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Relu(%246) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %249 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1]](%184, %250, %251)
  %187 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Add(%249, %181)
  %188 : Float(10:25088, 512:49, 7:7, 7:1, requires_grad=1, device=cuda:0) = onnx::Relu(%187) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1134:0
  %189 : Float(10:512, 512:1, 1:1, 1:1, requires_grad=1, device=cuda:0) = onnx::GlobalAveragePool(%188) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:936:0
  %190 : Float(10:512, 512:1, requires_grad=1, device=cuda:0) = onnx::Flatten[axis=1](%189) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torchvision\models\resnet.py:214:0
  %output1 : Float(10:1000, 1000:1, requires_grad=1, device=cuda:0) = onnx::Gemm[alpha=1., beta=1., transB=1](%190, %fc.weight, %fc.bias) # D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\torch\nn\functional.py:1690:0
  return (%output1)

2. run onnx with tf backend

import onnx
from onnx_tf.backend import prepare

onnx_model = onnx.load("resnet18.onnx") # load onnx model

# Check that the IR is well formed
onnx.checker.check_model(onnx_model)


# Print a human readable representation of the graph
onnx.helper.printable_graph(onnx_model.graph)
'graph torch-jit-export (\n  %actual_input_1[FLOAT, 10x3x224x224]\n) initializers (\n  %193[FLOAT, 64x3x7x7]\n  %194[FLOAT, 64]\n  %196[FLOAT, 64x64x3x3]\n  %197[FLOAT, 64]\n  %199[FLOAT, 64x64x3x3]\n  %200[FLOAT, 64]\n  %202[FLOAT, 64x64x3x3]\n  %203[FLOAT, 64]\n  %205[FLOAT, 64x64x3x3]\n  %206[FLOAT, 64]\n  %208[FLOAT, 128x64x3x3]\n  %209[FLOAT, 128]\n  %211[FLOAT, 128x128x3x3]\n  %212[FLOAT, 128]\n  %214[FLOAT, 128x64x1x1]\n  %215[FLOAT, 128]\n  %217[FLOAT, 128x128x3x3]\n  %218[FLOAT, 128]\n  %220[FLOAT, 128x128x3x3]\n  %221[FLOAT, 128]\n  %223[FLOAT, 256x128x3x3]\n  %224[FLOAT, 256]\n  %226[FLOAT, 256x256x3x3]\n  %227[FLOAT, 256]\n  %229[FLOAT, 256x128x1x1]\n  %230[FLOAT, 256]\n  %232[FLOAT, 256x256x3x3]\n  %233[FLOAT, 256]\n  %235[FLOAT, 256x256x3x3]\n  %236[FLOAT, 256]\n  %238[FLOAT, 512x256x3x3]\n  %239[FLOAT, 512]\n  %241[FLOAT, 512x512x3x3]\n  %242[FLOAT, 512]\n  %244[FLOAT, 512x256x1x1]\n  %245[FLOAT, 512]\n  %247[FLOAT, 512x512x3x3]\n  %248[FLOAT, 512]\n  %250[FLOAT, 512x512x3x3]\n  %251[FLOAT, 512]\n  %fc.bias[FLOAT, 1000]\n  %fc.weight[FLOAT, 1000x512]\n) {\n  %192 = Conv[dilations = [1, 1], group = 1, kernel_shape = [7, 7], pads = [3, 3, 3, 3], strides = [2, 2]](%actual_input_1, %193, %194)\n  %125 = Relu(%192)\n  %126 = MaxPool[kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]](%125)\n  %195 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%126, %196, %197)\n  %129 = Relu(%195)\n  %198 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%129, %199, %200)\n  %132 = Add(%198, %126)\n  %133 = Relu(%132)\n  %201 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%133, %202, %203)\n  %136 = Relu(%201)\n  %204 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%136, %205, %206)\n  %139 = Add(%204, %133)\n  %140 = Relu(%139)\n  %207 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]](%140, %208, %209)\n  %143 = Relu(%207)\n  %210 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%143, %211, %212)\n  %213 = Conv[dilations = [1, 1], group = 1, kernel_shape = [1, 1], pads = [0, 0, 0, 0], strides = [2, 2]](%140, %214, %215)\n  %148 = Add(%210, %213)\n  %149 = Relu(%148)\n  %216 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%149, %217, %218)\n  %152 = Relu(%216)\n  %219 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%152, %220, %221)\n  %155 = Add(%219, %149)\n  %156 = Relu(%155)\n  %222 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]](%156, %223, %224)\n  %159 = Relu(%222)\n  %225 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%159, %226, %227)\n  %228 = Conv[dilations = [1, 1], group = 1, kernel_shape = [1, 1], pads = [0, 0, 0, 0], strides = [2, 2]](%156, %229, %230)\n  %164 = Add(%225, %228)\n  %165 = Relu(%164)\n  %231 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%165, %232, %233)\n  %168 = Relu(%231)\n  %234 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%168, %235, %236)\n  %171 = Add(%234, %165)\n  %172 = Relu(%171)\n  %237 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [2, 2]](%172, %238, %239)\n  %175 = Relu(%237)\n  %240 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%175, %241, %242)\n  %243 = Conv[dilations = [1, 1], group = 1, kernel_shape = [1, 1], pads = [0, 0, 0, 0], strides = [2, 2]](%172, %244, %245)\n  %180 = Add(%240, %243)\n  %181 = Relu(%180)\n  %246 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%181, %247, %248)\n  %184 = Relu(%246)\n  %249 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [1, 1, 1, 1], strides = [1, 1]](%184, %250, %251)\n  %187 = Add(%249, %181)\n  %188 = Relu(%187)\n  %189 = GlobalAveragePool(%188)\n  %190 = Flatten[axis = 1](%189)\n  %output1 = Gemm[alpha = 1, beta = 1, transB = 1](%190, %fc.weight, %fc.bias)\n  return %output1\n}'
print()
#.data().to_numpy())
output = prepare(onnx_model).run(dummy_input.cpu())  # run the loaded model
output
Outputs(output1=array([[0.5069145 , 2.8980782 , 2.8367603 , ..., 0.2859818 , 0.17711115,
        1.2873001 ],
       [0.46708533, 2.6108694 , 2.5460322 , ..., 0.3299198 , 0.44836068,
        1.4698317 ],
       [0.5995231 , 2.6909895 , 2.4556172 , ..., 0.3942047 , 0.28599155,
        1.350537  ],
       ...,
       [1.0403817 , 3.0089192 , 3.004794  , ..., 0.0915335 , 0.13177788,
        1.5754144 ],
       [0.2126701 , 2.686536  , 2.877485  , ..., 0.20710337, 0.32794827,
        1.4289923 ],
       [0.38755298, 2.7030327 , 2.4524589 , ..., 0.21225189, 0.4668123 ,
        1.3190124 ]], dtype=float32))

GPU运行(算子不支持)

output = prepare(onnx_model, device="CUDA").run(dummy_input.cpu())  # run the loaded model
print(output)
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-70-d727142059ee> in <module>
----> 1 output = prepare(onnx_model, device="CUDA").run(dummy_input.cpu())  # run the loaded model
      2 print(output)

c:\users\kangningcai\downloads\compressed\onnx-tensorflow-master\onnx_tf\backend_rep.py in run(self, inputs, **kwargs)
     91     input_dict = dict([(x[0], tf.constant(x[1])) for x in feed_dict.items()])
     92 
---> 93     output_values = self.tf_module(**input_dict)
     94     output_values = [
     95         val.numpy() if isinstance(val, tf.Tensor) else val

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self, *args, **kwds)
    778       else:
    779         compiler = "nonXla"
--> 780         result = self._call(*args, **kwds)
    781 
    782       new_tracing_count = self._get_tracing_count()

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self, *args, **kwds)
    844               *args, **kwds)
    845       # If we did not create any variables the trace we have is good enough.
--> 846       return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds)  # pylint: disable=protected-access
    847 
    848     def fn_with_cond(*inner_args, **inner_kwds):

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\function.py in _filtered_call(self, args, kwargs, cancellation_manager)
   1846                            resource_variable_ops.BaseResourceVariable))],
   1847         captured_inputs=self.captured_inputs,
-> 1848         cancellation_manager=cancellation_manager)
   1849 
   1850   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1922       # No tape is watching; skip to running the function.
   1923       return self._build_call_outputs(self._inference_function.call(
-> 1924           ctx, args, cancellation_manager=cancellation_manager))
   1925     forward_backward = self._select_forward_and_backward_functions(
   1926         args,

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\function.py in call(self, ctx, args, cancellation_manager)
    548               inputs=args,
    549               attrs=attrs,
--> 550               ctx=ctx)
    551         else:
    552           outputs = execute.execute_with_cancellation(

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     58     ctx.ensure_initialized()
     59     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60                                         inputs, attrs, num_outputs)
     61   except core._NotOkStatusException as e:
     62     if name is not None:

InvalidArgumentError:  Default MaxPoolingOp only supports NHWC on device type CPU
	 [[node MaxPool (defined at c:\users\kangningcai\downloads\compressed\onnx-tensorflow-master\onnx_tf\handlers\backend\dilated_pooling.py:705) ]] [Op:__inference___call___7188]

Errors may have originated from an input operation.
Input Source operations connected to node MaxPool:
 PadV2 (defined at c:\users\kangningcai\downloads\compressed\onnx-tensorflow-master\onnx_tf\handlers\backend\dilated_pooling.py:517)

Function call stack:
__call__

3. run onnx with onnxruntime-gpu backend

import onnxruntime as ort
sess = ort.InferenceSession("resnet18.onnx")#, provider_options)
input_s =sess.get_inputs()
print(input_s)
[<onnxruntime.capi.onnxruntime_pybind11_state.NodeArg object at 0x00000233B2779378>]
help(ort.InferenceSession)
Help on class InferenceSession in module onnxruntime.capi.onnxruntime_inference_collection:

class InferenceSession(Session)
 |  InferenceSession(path_or_bytes, sess_options=None, providers=None, provider_options=None)
 |  
 |  This is the main class used to run a model.
 |  
 |  Method resolution order:
 |      InferenceSession
 |      Session
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, path_or_bytes, sess_options=None, providers=None, provider_options=None)
 |      :param path_or_bytes: filename or serialized ONNX or ORT format model in a byte string
 |      :param sess_options: session options
 |      :param providers: list of providers to use for session. If empty, will use all available providers.
 |      :param provider_options: list of provider options dict for each provider, in the same order as 'providers'
 |      
 |      The model type will be inferred unless explicitly set in the SessionOptions.
 |      To explicitly set:
 |        so = onnxruntime.SessionOptions()
 |        so.add_session_config_entry('session.load_model_format', 'ONNX') or
 |        so.add_session_config_entry('session.load_model_format', 'ORT') or
 |      
 |      A file extension of '.ort' will be inferred as an ORT format model.
 |      All other filenames are assumed to be ONNX format models.
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from Session:
 |  
 |  disable_fallback(self)
 |      Disable session.run() fallback mechanism.
 |  
 |  enable_fallback(self)
 |      Enable session.Run() fallback mechanism. If session.Run() fails due to an internal Execution Provider failure,
 |      reset the Execution Providers enabled for this session.
 |      If GPU is enabled, fall back to CUDAExecutionProvider.
 |      otherwise fall back to CPUExecutionProvider.
 |  
 |  end_profiling(self)
 |      End profiling and return results in a file.
 |      
 |      The results are stored in a filename if the option
 |      :meth:`onnxruntime.SessionOptions.enable_profiling`.
 |  
 |  get_inputs(self)
 |      Return the inputs metadata as a list of :class:`onnxruntime.NodeArg`.
 |  
 |  get_modelmeta(self)
 |      Return the metadata. See :class:`onnxruntime.ModelMetadata`.
 |  
 |  get_outputs(self)
 |      Return the outputs metadata as a list of :class:`onnxruntime.NodeArg`.
 |  
 |  get_overridable_initializers(self)
 |      Return the inputs (including initializers) metadata as a list of :class:`onnxruntime.NodeArg`.
 |  
 |  get_profiling_start_time_ns(self)
 |      Return the nanoseconds of profiling's start time
 |      Comparable to time.monotonic_ns() after Python 3.3
 |      On some platforms, this timer may not be as precise as nanoseconds
 |      For instance, on Windows and MacOS, the precision will be ~100ns
 |  
 |  get_provider_options(self)
 |      Return registered execution providers' configurations.
 |  
 |  get_providers(self)
 |      Return list of registered execution providers.
 |  
 |  get_session_options(self)
 |      Return the session options. See :class:`onnxruntime.SessionOptions`.
 |  
 |  io_binding(self)
 |      Return an onnxruntime.IOBinding object`.
 |  
 |  run(self, output_names, input_feed, run_options=None)
 |      Compute the predictions.
 |      
 |      :param output_names: name of the outputs
 |      :param input_feed: dictionary ``{ input_name: input_value }``
 |      :param run_options: See :class:`onnxruntime.RunOptions`.
 |      
 |      ::
 |      
 |          sess.run([output_name], {input_name: x})
 |  
 |  run_with_iobinding(self, iobinding, run_options=None)
 |      Compute the predictions.
 |      
 |      :param iobinding: the iobinding object that has graph inputs/outputs bind.
 |      :param run_options: See :class:`onnxruntime.RunOptions`.
 |  
 |  set_providers(self, providers, provider_options=None)
 |      Register the input list of execution providers. The underlying session is re-created.
 |      
 |      :param providers: list of execution providers
 |      :param provider_options: list of provider options dict for each provider, in the same order as 'providers'
 |      
 |      The list of providers is ordered by Priority. For example ['CUDAExecutionProvider', 'CPUExecutionProvider']
 |      means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from Session:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
print(sess.get_inputs()[0].shape)
sess.get_provider_options()
[10, 3, 224, 224]





{'CUDAExecutionProvider': {'device_id': '0',
  'cuda_mem_limit': '18446744073709551615',
  'arena_extend_strategy': 'kNextPowerOfTwo'},
 'CPUExecutionProvider': {}}
sess.get_providers()
['CUDAExecutionProvider', 'CPUExecutionProvider']
#help(ort.RunOptions)
sess.set_providers(["CUDAExecutionProvider"])

inputs = sess.get_inputs()
len_inputs = len(inputs)
print(len_inputs, inputs)
print(inputs[0].name, inputs[0].shape)

outputs = sess.get_outputs()
len_outputs = len(outputs)
print(len_outputs, outputs)
print(outputs[0].name, outputs[0].shape)
1 [<onnxruntime.capi.onnxruntime_pybind11_state.NodeArg object at 0x00000233B2BD96C0>]
actual_input_1 [10, 3, 224, 224]
1 [<onnxruntime.capi.onnxruntime_pybind11_state.NodeArg object at 0x00000233AD7EFF48>]
output1 [10, 1000]

dummy_input = torch.randn(10, 3, 224, 224, device='cuda')

output = sess.run(["output1"], {
    
    "actual_input_1": dummy_input.cpu().numpy()})
print(output[0].shape)
print( output[0])
(10, 1000)
[[ 1.4698393   2.0387304   1.7258139  ...  0.83965874  0.5422346
   1.2121105 ]
 [ 1.1619968   2.2490318   2.1027348  ...  0.37480694  0.4261159
   1.2232935 ]
 [ 1.0652378   2.1549475   1.186148   ...  1.0919206   0.6438071
   1.1574954 ]
 ...
 [ 1.1870115   2.4116068   1.9177108  ...  0.27283034 -0.06160793
   1.1494317 ]
 [ 1.6173385   1.9768811   1.3131188  ...  0.78727126  0.57735306
   1.0042048 ]
 [ 1.6731455   2.8796082   2.7225587  ...  0.38563025  0.8073966
   1.1718726 ]]

4. run onnx with caffe2 backend

caffe2 backend is used for pytorch deployement by default.
caffe2 对于onnx模型推理的支持 实测不好,如下为代码和运行结果

import torch
import onnx
import caffe2.python.onnx.backend
from caffe2.python.onnx.backend import prepare


# Prepare the inputs, here we use numpy to generate some random inputs for demo purpose
import numpy as np
img = np.random.randn(1, 3, 224, 224).astype(np.float32)
"""
# Load the ONNX model
model = onnx.load('assets/squeezenet.onnx')
# Run the ONNX model with Caffe2
outputs = caffe2.python.onnx.backend.run_model(model, [img])
"""
dummy_input = torch.randn(10, 3, 224, 224, device='cuda')
onnx_model = onnx.load("resnet18.onnx") # load onnx model

prepared = prepare(onnx_model, device="CPU")

B = {
    
    onnx_model.graph.input[0].name: dummy_input.cpu().numpy()}
print(B)
    
outputs = caffe2.python.onnx.backend.run_model(onnx_model, B, device="CUDA")
help(caffe2.python.onnx.backend)
IndexError                                Traceback (most recent call last)
<ipython-input-61-45a155392781> in <module>
     17 onnx_model = onnx.load("resnet18.onnx") # load onnx model
     18 
---> 19 prepared = prepare(onnx_model, device="CPU")
     20 
     21 B = {onnx_model.graph.input[0].name: dummy_input.cpu().numpy()}

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\caffe2\python\onnx\backend.py in prepare(cls, model, device, raw_values_dict, **kwargs)
    711         device_option = get_device_option(Device(device))
    712 
--> 713         init_net, predict_net = cls._onnx_model_to_caffe2_net(model, device, opset_version, False)
    714 
    715         if raw_values_dict:

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\caffe2\python\onnx\backend.py in _onnx_model_to_caffe2_net(cls, onnx_model, device, opset_version, include_initializers)
    874         device_option = get_device_option(Device(device))
    875 
--> 876         onnx_model = onnx.utils.polish_model(onnx_model)
    877         init_model = cls.optimize_onnx(onnx_model, init=True)
    878         pred_model = cls.optimize_onnx(onnx_model, predict=True)

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\onnx\utils.py in polish_model(model)
     19     onnx.helper.strip_doc_string(model)
     20     model = onnx.shape_inference.infer_shapes(model)
---> 21     model = onnx.optimizer.optimize(model)
     22     onnx.checker.check_model(model)
     23     return model

D:\Anaconda3\Anaconda3_201910_64\envs\AI_gpu\lib\site-packages\onnx\optimizer.py in optimize(model, passes, fixed_point)
     53         optimized_model_str = C.optimize_fixedpoint(model_str, passes)
     54     else:
---> 55         optimized_model_str = C.optimize(model_str, passes)
     56 
     57     return onnx.load_from_string(optimized_model_str)

IndexError: Input 193 is undefined!

help( onnx.backend.base.Backend.supports_device)
#onnx.backend.base.Backend.supports_device("NPU")
Help on method supports_device in module onnx.backend.base:

supports_device(device) method of builtins.type instance
    Checks whether the backend is compiled with particular device support.
    In particular it's used in the testing suite.

猜你喜欢

转载自blog.csdn.net/weixin_41521681/article/details/113026479
今日推荐