YOLOv7 uses onnx to export tensorrt model to achieve high-performance deployment, including opencv multi-threading, image queue access, automatic saving of xml and jpg and other modules
0 Preface
This article is based on yolov7+tensorrt target detection, and integrates opencv multi-threading, image queue access, automatic saving of xml and jpg and other modules to achieve multi-directional high-performance deployment.
If you don’t know how to train and export the model, you can move to this article: YOLOv7 series of tutorials: 1. Train your own target detection model based on a custom dataset (nanny-level tutorial, including dataset preprocessing), including train.py/test.py/detect.py/export.py detailed description
This article has been open sourced on github: github link: yolov7_tensorrt_opencv_queue
1. Basic environment
Before starting, you need to configure cuda+cudnn+tensorrt on the local machine. Note that the versions of the three must be aligned, otherwise errors will occur later (tensorrt8. .3.1 is ok), the configuration of this article is as follows:
- free20.04
- miracles 11.2
- hidden8.4.0
- tensorrt8.4.3.1
- python3.7
- pytorch1.10.0
- torchvision0.11.0
2. Model export
If tensorrt is installed with a tar package, you need to enter the tensorrt installation directory to export, of course, you can also use other installation methods, just use the trtexec tool directly. The specific usage of trtexec can be found in this article: TensorRT Tutorial 3: Using the trtexec tool to convert the engine
/你安装tensorrt路径/TensorRt-8.4.3.1/targets/x86_64-linux-gnu/bin/trtexec --onnx=/你的onnx路径/best.onnx --saveEngine=/你需要保存的路径/best.engine --fp16 --workspace=1000
3. Environment configuration
(1) Clone warehouse
git clone https://github.com/ZhijunLStudio/yolov7_tensorrt_opencv_queue.git
(2) Install dependent packages
pip install -r requirements.txt
4. Modify the code configuration
Modify the configuration in detect.py, where:
(1) Modify the model name
Create a new "model" folder in the root directory of the code, and put best.engine in it
trt_name = "best.engine"
(2) Modify the rtsp stream address
If you are using a usb camera or onboard camera, you can change it to 0 (note that there are no double quotes)
RtspUrl = "rtsp://admin:[email protected]:554/Streaming/Channels/101"
(3) Automatically generate jpg and xml configurations
It needs to be configured according to {"folder name": {0: "label 1", 1: "label 2", 2: "label 3"...}}
label_dict = {
'person': {
0: 'person'}}
5. Run the code
python detect.py
6. Code Details
(1) Master code
The main function is to call each packaged module, including the tensorrt engine module, save the xml and jpg modules, the GPU module available for multi-card indexing, and the image storage queue module, and process the data in the image queue in a loop:
import os
import cv2
from cv2 import getTickCount, getTickFrequency
from queue import Queue
import queue
from algorithm.yolov7_trt import TRT_engine
from algorithm.yolov7_trt import visualize
from utils.myPrint import customPrint
from utils.generateXml import GenerateJpgAndXml
from utils.nvidia import indexGPU
from utils.frameThread import FrameThread
if __name__ == '__main__':
# 根据自己模型和摄像头信息,修改1、2、3即可
# 1.放在model文件夹下tensorrt引擎的名字
trt_name = "best.engine"
# 2.rtsp地址,如果使用的是USB摄像头或者其他板载摄像头,可以更改为0(没有引号)
RtspUrl = "rtsp://admin:[email protected]:554/Streaming/Channels/101"
# 3.自动生成xml配置——标签字典,需要按照{"配置后的文件夹名": {0: "标签1", 1: "标签2", 2: "标签3"...}}进行配置
label_dict = {
'person': {
0: 'person'}}
# 根据gpu使用情况获取占用率低的GPU编号
gpu_id = indexGPU()
# 获取当前路径
trt_path = os.path.join(os.getcwd(), "model", trt_name)
# 第一个参数为预测图的大小,第二个参数为模型路径,第三个参数为选用第几号GPU
trt_engine = TRT_engine(imgsz=640, weight=trt_path, GPUId=0)
# trt_engine = TRT_engine(imgsz=640, weight=trt_path, GPUId=gpu_id)
# 新建保存xml的文件夹
label_dict_key = list(label_dict.keys())[0]
label_dict_value = list(label_dict.values())[0]
car_write_xml = GenerateJpgAndXml(label_dict_key, label_dict_value)
# 新建一个队列,用来存放图像数组
q = Queue()
# 多线程对象
thread = FrameThread(RtspUrl, q)
# 设置读图线程为守护线程
thread.setDaemon(True)
# 启动读图线程
thread.start()
while True:
try:
loop_start = getTickCount()
# 获取一帧图像
frame = q.get(block=True, timeout=3)
results = trt_engine.predict(frame, threshold=0.5)
# 结果可视化
frame = visualize(frame, results)
# FPS计时
loop_time = getTickCount() - loop_start
total_time = loop_time / (getTickFrequency())
FPS = 1 / total_time
# 左上角文字信息
cv2.putText(frame, f"FPS: {
int(FPS)}", (0, 100), cv2.FONT_HERSHEY_COMPLEX, 2.0, (100, 200, 200), 2)
cv2.putText(frame, "Press q to exit", (0, 200), cv2.FONT_HERSHEY_COMPLEX, 2.0, (100, 100, 200), 2)
out_win = "yolov7_trt_output"
cv2.namedWindow(out_win, cv2.WINDOW_NORMAL)
cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
cv2.imshow(out_win, frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except queue.Empty:
print('队列为空,get失败')
customPrint("----------------------所有程序已结束----------------------")
(2) tensorrt engine module
import cv2
import tensorrt as trt
import torch
import numpy as np
import os
from collections import OrderedDict,namedtuple
class TRT_engine():
def __init__(self, imgsz, weight, GPUId) -> None:
self.imgsz = [imgsz,imgsz]
self.weight = weight
self.device = torch.device('cuda:'+str(GPUId))
# self.device = torch.cuda.set_device(GPUId)
self.init_engine()
def init_engine(self):
# Infer TensorRT Engine
self.Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
self.logger = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(self.logger, namespace="")
with open(self.weight, 'rb') as self.f, trt.Runtime(self.logger) as self.runtime:
self.model = self.runtime.deserialize_cuda_engine(self.f.read())
self.bindings = OrderedDict()
self.fp16 = False
for index in range(self.model.num_bindings):
self.name = self.model.get_binding_name(index)
self.dtype = trt.nptype(self.model.get_binding_dtype(index))
self.shape = tuple(self.model.get_binding_shape(index))
self.data = torch.from_numpy(np.empty(self.shape, dtype=np.dtype(self.dtype))).to(self.device)
self.bindings[self.name] = self.Binding(self.name, self.dtype, self.shape, self.data, int(self.data.data_ptr()))
if self.model.binding_is_input(index) and self.dtype == np.float16:
self.fp16 = True
self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
self.context = self.model.create_execution_context()
def letterbox(self,im,color=(114, 114, 114), auto=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
new_shape = self.imgsz
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
self.r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
self.r = min(self.r, 1.0)
# Compute padding
new_unpad = int(round(shape[1] * self.r)), int(round(shape[0] * self.r))
self.dw, self.dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
self.dw, self.dh = np.mod(self.dw, stride), np.mod(self.dh, stride) # wh padding
self.dw /= 2 # divide padding into 2 sides
self.dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(self.dh - 0.1)), int(round(self.dh + 0.1))
left, right = int(round(self.dw - 0.1)), int(round(self.dw + 0.1))
self.img = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return self.img,self.r,self.dw,self.dh
def preprocess(self,image):
self.img,self.r,self.dw,self.dh = self.letterbox(image)
self.img = self.img.transpose((2, 0, 1))
self.img = np.expand_dims(self.img,0)
self.img = np.ascontiguousarray(self.img)
self.img = torch.from_numpy(self.img).to(self.device)
self.img = self.img.float()
return self.img
def predict(self,img,threshold):
img = self.preprocess(img)
self.binding_addrs['images'] = int(img.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values()))
nums = self.bindings['num_dets'].data[0].tolist()
boxes = self.bindings['det_boxes'].data[0].tolist()
scores =self.bindings['det_scores'].data[0].tolist()
classes = self.bindings['det_classes'].data[0].tolist()
num = int(nums[0])
new_bboxes = []
for i in range(num):
if(scores[i] < threshold):
continue
xmin = (boxes[i][0] - self.dw)/self.r
ymin = (boxes[i][1] - self.dh)/self.r
xmax = (boxes[i][2] - self.dw)/self.r
ymax = (boxes[i][3] - self.dh)/self.r
new_bboxes.append([classes[i],scores[i],xmin,ymin,xmax,ymax])
return new_bboxes
def visualize(img,bbox_array):
for temp in bbox_array:
xmin = int(temp[2])
ymin = int(temp[3])
xmax = int(temp[4])
ymax = int(temp[5])
clas = int(temp[0])
score = temp[1]
cv2.rectangle(img,(xmin,ymin),(xmax,ymax), (105, 237, 249), 2)
img = cv2.putText(img, "class:"+str(clas)+" "+str(round(score,2)), (xmin,int(ymin)-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (105, 237, 249), 1)
# img = cv2.putText(img, "h:"+str(int(ymax-ymin))+ " w:" + str(int(xmax - xmin)), (xmin,int(ymin)-25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 237, 249), 2)
return img
(3) Save the xml and jpg modules
import os
import cv2
import datetime
from utils.myPrint import customPrint
class GenerateJpgAndXml:
"""
参数名含义:
parentName:存放jpg和xml上一级文件夹名字,如person
"""
def __init__(self, parentName, labelDict):
self.parentName = parentName
# 存放所有文件的主文件夹路径
self.parentPath = os.path.join(os.getcwd(), "JpgAndXml")
self.midPath = os.path.join(self.parentPath, self.parentName)
# 存放jpg文件夹名字
self.jpgName = "JPEGImages"
# 存放xml文件夹名字
self.xmlName = "Annotations"
# 存放标签的字典
self.labelDict = labelDict
# 第一次进来,需要判断下文件夹是否存在
self.isExist()
def isExist(self):
# 存放jpg文件的文件夹
self.jpgPath = os.path.join(self.midPath, self.jpgName)
# 存放xml文件的文件夹
self.xmlPath = os.path.join(self.midPath, self.xmlName)
# 判断jpg和xml文件夹是否存在,不存在则创建
for perPath in [self.jpgPath, self.xmlPath]:
# 判断所在目录下是否有该文件名的文件夹
if not os.path.exists(perPath):
# 创建多级目录用mkdirs
print(f"创建成功,已创建文件夹{
perPath}")
os.makedirs(perPath)
else:
print(f"创建失败,已存在文件夹{
perPath}")
def generatr_xml(self, frame, result):
# print('开始写xml')
# 获取当前时间戳
xmlPrefix = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
# print(xmlPrefix)
hwc = frame.shape
# jpg名字
jpgName = xmlPrefix + ".jpg"
# jpg路径
jpgPath = os.path.join(self.jpgPath, jpgName)
# 写图片
cv2.imwrite(jpgPath, frame)
# xml路径
xmlPath = os.path.join(self.xmlPath, xmlPrefix + ".xml")
with open(xmlPath, 'w') as xml_file:
xml_file.write('<annotation>\n')
xml_file.write('\t<folder>' + self.parentName +'</folder>\n')
xml_file.write('\t<filename>' + jpgName + '</filename>\n')
xml_file.write('\t<path>' + jpgPath + '</path>\n')
xml_file.write('\t<source>\n')
xml_file.write('\t\t<database>' + 'Unknown' + '</database>\n')
xml_file.write('\t</source>\n')
xml_file.write('\t<size>\n')
xml_file.write('\t\t<width>' + str(hwc[1]) + '</width>\n')
xml_file.write('\t\t<height>' + str(hwc[0]) + '</height>\n')
xml_file.write('\t\t<depth>'+str(hwc[2])+'</depth>\n')
xml_file.write('\t</size>\n')
xml_file.write('\t<segmented>0</segmented>\n')
for re in result:
ObjName = self.labelDict[re[0]]
xmin = int(re[2])
ymin = int(re[3])
xmax = int(re[4])
ymax = int(re[5])
xml_file.write('\t<object>\n')
xml_file.write('\t\t<name>' + ObjName + '</name>\n')
xml_file.write('\t\t<pose>Unspecified</pose>\n')
xml_file.write('\t\t<truncated>0</truncated>\n')
xml_file.write('\t\t<difficult>0</difficult>\n')
xml_file.write('\t\t<bndbox>\n')
xml_file.write('\t\t\t<xmin>' + str(xmin) + '</xmin>\n')
xml_file.write('\t\t\t<ymin>' + str(ymin) + '</ymin>\n')
xml_file.write('\t\t\t<xmax>' + str(xmax) + '</xmax>\n')
xml_file.write('\t\t\t<ymax>' + str(ymax) + '</ymax>\n')
# xml_file.write('\t\t\t<angle>' + str(4) + '</angle>\n')
xml_file.write('\t\t</bndbox>\n')
# xml_file.write('\t\t<extra/>\n')
xml_file.write('\t</object>\n')
xml_file.write('</annotation>')
customPrint(f"{
jpgPath}的jpg和xml已写入")
(4) Multi-card index available GPU
import pynvml
from utils.myPrint import customPrint
UNIT = 1024 * 1024
def indexGPU():
pynvml.nvmlInit() #初始化
gpuDeviceCount = pynvml.nvmlDeviceGetCount()#获取Nvidia GPU块数
gpudir = {
}
for i in range(gpuDeviceCount):
handle = pynvml.nvmlDeviceGetHandleByIndex(i) #获取GPU i的handle,后续通过handle来处理
memoryInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)#通过handle获取GPU i的信息
customPrint(f"显存空闲率:{
memoryInfo.free/memoryInfo.total}")
gpudir[i] = memoryInfo.free/memoryInfo.total
gpumin = max(gpudir.keys(),key=(lambda k:gpudir[k]))
pynvml.nvmlShutdown() #最后关闭管理工具
customPrint(f"选择第{
gpumin}号GPU")
return gpumin
(5) opencv multithreading, store the image in the queue for the main code to read the image
import cv2
import threading
import redis
import queue
from utils.myPrint import customPrint
class FrameThread(threading.Thread):
def __init__(self, rtsp_url, q):
super(FrameThread, self).__init__()
self.rtsp_url = rtsp_url
self.q = q
self.thread_exit = False
# self.run()
def run(self):
customPrint('已进入取图循环')
# 用来记录异常次数的标记
exit_frame_num = 0
exit_cap_num = 0
cap = cv2.VideoCapture(self.rtsp_url)
while not self.thread_exit:
ret, frame = cap.read()
if ret:
exit_frame_num = 0
exit_cap_num = 0
try:
self.q.put(frame, block=True, timeout=3)
except queue.Full:
customPrint('队列已满,写入失败')
# print(self.q.qsize())
else:
exit_frame_num += 1
# 异常5次则重新读一下流
if exit_frame_num >= 5:
customPrint(f"读流异常,已经开始{
exit_frame_num}:{
exit_cap_num}重新读流")
cap = cv2.VideoCapture(self.rtsp_url)
exit_cap_num += 1
# 读流异常5次则退出
if exit_cap_num == 5:
self.thread_exit = True
customPrint(f'摄像头已经退出')
cap.release()
Reference:
YOLOv7 Tensorrt Python Deployment Tutorial