cv2调用YOLO实现目标检测

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/github_39611196/article/details/88087916

本文主要介绍基于cv2模块,调用YOLO目标检测算法,实现目标检测。

YOLO的全称是You Only Look Once,它是一种基于深度神经网络的对象识别和定位算法,其最大的特点是运行速度很快,可以用于实时系统。

下载相关文件:

类名文件:https://github.com/pjreddie/darknet/blob/master/data/coco.names?raw=true

权值文件:https://pjreddie.com/media/files/yolov3.weights

配置文件:https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg?raw=true

其中类名文件和权值文件打开后,复制到txt文件中,并更改文件名即可。

下面是实现代码:

import cv2
import sys
import numpy as np
import os.path

# 初始化参数
confThreshold = 0.5  # 置信度阈值
nmsThreshold = 0.4  # 非最大抑制阈值
inpWidth = 416 # 网络输入图像的宽度
inpHeight = 416 # 网络输入图像的高度

# 加载类名
classesFile = 'coco.names'
classes = None
with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# 模型的配置文件和权值文件
modelConfiguration = 'yolov3.cfg'
modelWeights = 'yolov3.weights'

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

# 获取输出层的名称
def getOutputsNames(net):
    # 获取网络中所有层的名称
    layersNames = net.getLayerNames()
    # 获取输出层的名称
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# 绘制预测得到的边界框
def drawPred(classId, conf, left, top, right, bottom):
    # 绘制边界框
    cv2.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
    
    label = '%.2f' % conf
    
    # 通过类名获取标签值和置信度
    if classes:
        assert(classId < len(classes))
        label = '%s: %s' % (classes[classId], label)
        
    # 在边界框上显示标签值
    labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv2.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255, 255, 255), cv2.FILLED)
    cv2.putText(frame, label, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1)

# 使用非最大值抑制移除低置信度的边界框
def postprocess(frame, outs):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]
    classIds = []
    confidences = []
    boxes = []
    
    # 扫描从网络输出的所有边界框并仅保留
    # 置信度得分很高的边界框,将框的类标签指定为具有最高分数的类。
    classIds = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                width = int(detection[2] * frameWidth)
                height = int(detection[3] * frameHeight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])
                
    # 执行非最大抑制以消除置信度较低的冗余重叠框
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

# 处理输入
winName = 'Deep learning object detection in OpenCV'
cv2.namedWindow(winName, cv2.WINDOW_NORMAL)

outputFile = 'yolo_out_py.avi'
cap = cv2.VideoCapture('run.mp4')

vid_writer = cv2.VideoWriter(outputFile, cv2.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

while cv2.waitKey(1) < 0:
    # 从视频获取帧
    hasFrame, frame = cap.read()
    
    # 在视频结束时终止程序
    if not hasFrame:
        print('Done processing !!!')
        print('Output file is stored as ', outputFile)
        cv2.waitKey(3000)
        break
    
    # 从框架创建4D blob。
    blob = cv2.dnn.blobFromImage(frame, 1/ 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)
    
    # 设置网络的输入
    net.setInput(blob)
    
    # 运行前向传递以获得输出层的输出
    outs = net.forward(getOutputsNames(net))
    
    # 移除低置信度的边界框
    postprocess(frame, outs)
    
    # 提出效率信息。 函数getPerfProfile返回推理的总时间(t)和每个层的时间(在layersTimes中)
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(frame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
    
    vid_writer.write(frame.astype(np.uint8))
    cv2.imshow(winName, frame)

测试结果:

猜你喜欢

转载自blog.csdn.net/github_39611196/article/details/88087916