OpenCV example (9) moving target detection based on deep learning (3) YOLOv3 object recognition


Target detection, roughly speaking, is to input a picture/video, and after processing, obtain the position information of the target (such as the coordinates of the upper left corner and the lower right corner), the predicted category of the target, and the prediction confidence of the target. We explained a lot of theoretical knowledge earlier, and now we need to do it in practice. For beginners, it is not realistic to implement the YOLO algorithm by yourself. Fortunately, OpenCV's DNN (Deep Neural Network) module encapsulates the Darknet framework (encapsulates the YOLO algorithm). It is more convenient to use OpenCV to directly run the trained deep learning model. This time, YOLOv3, which is the most powerful in target detection, is used. The basic step is to let OpenCV load the pre-trained YOLOv3 model, and then perform various detections, such as image recognition, open The computer has its own camera for object detection, etc.

In order to load the pre-trained YOLOv3 model, you need to prepare 3 files (in the project directory): yolov3.cfg, yolov3.weights and coco.names. Among them, yolov3.cfg is the yolov3 network configuration file, yolov3.weights is the weight file, and coco.names is the label file.

1. Recognize objects based on YOLOv3

Use the OpenCV dnn module to load the YOLO model, the code is as follows:

     net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

Import categories from coco.names and store them as lists, the code is as follows:

classes = []
with open("coco.names","r")as f:
	classes = [line.strip() for line inf.readlines()]
print(classes)

Full code:

import cv2
import numpy as np

net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:   #这里使用的是coco所训练的模型yolov3.cfg所以这里对应为coco.names
    classes = [line.strip() for line in f.readlines()]

print(classes)

layer_names = net.getLayerNames()
print(layer_names)
      
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
print(output_layers)

img = cv2.imread("demo1.jpg")

# 获取图像尺寸与通道值
height, width, channels = img.shape
print('The image height is:',height)
print('The image width is:',width)
print('The image channels is:',channels)

blob = cv2.dnn.blobFromImage(img, 1.0 / 255.0, (416, 416), (0, 0, 0), True, crop=False)


from matplotlib import pyplot as plt
 
fig = plt.gcf()
fig.set_size_inches(20, 10)

num = 0
for b in blob:
    for img_blob in b:
        img_blob=cv2.cvtColor(img_blob, cv2.COLOR_BGR2RGB)
        num += 1
        ax = plt.subplot(3/3, 3, num)
        ax.imshow(img_blob)
        title = 'blob_image:{}'.format(num)
        ax.set_title(title, fontsize=20)


net.setInput(blob)
outs = net.forward(output_layers)

for i in range(len(outs)):
    print('The {} layer out shape is:'.format(i), outs[i].shape)

class_ids = []
confidences = []
boxes = []

i = 0
for out in outs:
    for detection in out:
        a = sum(detection[5:])
        if a > 0:
            print(detection[5:])
            i += 1
        if i == 2:
            break
 

i = 0
for out in outs:
    for detection in out:
        print('中心像素坐标 X 对原图宽比值:',detection[0])
        print('中心像素坐标 Y 对原图高比值:',detection[1])
        print('边界框的宽度 W 对原图宽比值:',detection[2])
        print('边界框的高度 H 对原图高比值:',detection[3])
        print('此边界框置信度:',detection[4])
        break
    break
 



plt_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

fig = plt.gcf()
fig.set_size_inches(20, 10)

plt.imshow(plt_img)

# jupyter 对每次运行结果会保留,再次运行列表创建
class_ids = []
confidences = []
boxes = []

i = 0

for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            
            w = int(detection[2] * width)        
            h = int(detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)
            label = classes[class_id]
            plt.gca().add_patch(
            plt.Rectangle((x, y), w,
                          h, fill=False,
                          edgecolor=(0, 1, 1), linewidth=2)
            )
            plt.text(x, y - 10, label, color = (1, 0, 0), fontsize=20)
            
            print('object {} :'.format(i), label)
            i += 1

plt.show()
  

plt_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

fig = plt.gcf()
fig.set_size_inches(30, 20)

ax_img = plt.subplot(1, 2, 1)

ax_img.imshow(plt_img)

# jupyter 对每次运行结果会保留,再次运行一次
class_ids = []
confidences = []
boxes = []

i = 0

for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            
            w = int(detection[2] * width)        
            h = int(detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)
            label = classes[class_id]
            plt.gca().add_patch(
            plt.Rectangle((x, y), w,
                          h, fill=False,
                          edgecolor=(0, 1, 1), linewidth=2)
            )
            plt.text(x, y - 10, label, color = (1, 0, 0), fontsize=20)
            
            print('object {} :'.format(i), label + ' '*(10 - len(label)), 'confidence :{}'.format(confidence))
            i += 1

print(confidences)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
print(indexes, end='')

ax_img = plt.subplot(1, 2, 2)
ax_img.imshow(plt_img)
for j in range(len(boxes)):
    if j in indexes:
        x, y, w, h = boxes[j]
        label = classes[class_ids[j]]
        plt.gca().add_patch(
            plt.Rectangle((x, y), w,
                          h, fill=False,
                          edgecolor=(0, 1, 1), linewidth=2)
            )
        plt.text(x, y - 10, label, color = (1, 0, 0), fontsize=20)
        

plt.show()
 

The code to get the output layer:

     layer_names = net.getLayerNames()
     print(layer_names)
     
     output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
     print(output_layers)

Among them, the getLayerNames function obtains the names of each layer of the network; the getUnconnectedOutLayers function returns the layer index with unconnected outputs.

Add the code to process the image and get the blob:

    img = cv2.imread("demo1.jpg")
    # 获取图像尺寸与通道值
    height, width, channels = img.shape
    print('The image height is:',height)
    print('The image width is:',width)
    print('The image channels is:',channels)
    
    blob = cv2.dnn.blobFromImage(img, 1.0 / 255.0, (416, 416), (0, 0, 0), True,
crop=False)

Run the program at this time, the printed height, width and number of channels are as follows:

     The image height is: 2250
     The image width is: 4000
     The image channels is: 3

(Add the image under the Matplotlib visualization blob, the code is as follows:

     from matplotlib import pyplot as plt

OpenCV uses BGR, and Matplotlib uses RGB. You need to use cv2.COLOR_BGR2RGB to convert BGR to RGB.

Use the setInput function to input the blob into the network, and use the forward function to input the name of the network output layer to calculate the network output. In this calculation, output_layers contains a list of 3 output layers, so the value of outs is also a list (list) containing 3 matrices (array).
This loop will output the following:

     The 0 layer out shape is: (507, 85)
     The 1 layer out shape is: (2028, 85)
     The 2 layer out shape is: (8112, 85)

Identification and labeling are then performed to create a list of recorded data.
Among them, class_ids records the category name; confidences records the probability of the object detected by the algorithm; boxes records the coordinates of the box. For a 416×416 input image, YOLOv3 sets 3 prior boxes in each grid of the feature map of each scale, and there are a total of 13×13×3 + 26×26×3 + 52×52×3 = 10647 predictions. Each prediction is a 85 (4+1+80) dimensional vector, which contains frame coordinates (4 values), frame confidence (1 value), probability of object category (for the COCO dataset, there are 80 object), so we obtain the last 80 data of detection (similar to one-hot code) through detection[5:], and obtain the coco.names category corresponding to its maximum index.

insert image description here
During the detection, it was found that a double frame (or multiple frame) effect appeared. The OpenCV dnn module comes with the NMSBoxes() function, which can use the NMS algorithm to solve the multi-box problem. The purpose of NMS is to retain the box with the highest confidence of the same detection target in the neighborhood. In the output below, it can be found that only the box index with the highest confidence value is reserved for the detection of the same target in the neighborhood, such as object 0: tvmonitor and object 3: tvmonitor The probabilities are 0.9334805607795715 and 0.9716598987579346, respectively. Obviously, object 3: tvmonitor is reserved, and there is no [0] element in the indexes. The rest of the inferences are similar.

insert image description here

2. Make the color of the capture frame of different types of objects different

code:

import cv2
import numpy as np
from matplotlib import pyplot as plt

# Load Yolo
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

colors = np.random.uniform(0, 255, size=(len(classes), 3)) / 255

# Loading image
img = cv2.imread("demo1.jpg")
# img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape

# Detecting objects
blob = cv2.dnn.blobFromImage(img, 1.0 / 255.0, (416, 416), (0, 0, 0), True, crop=False)

net.setInput(blob)
outs = net.forward(output_layers)

# Showing informations on the screen
class_ids = []
confidences = []
boxes = []

fig = plt.gcf()
fig.set_size_inches(20, 10)
plt_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(plt_img)

for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            # Object detected
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)

            # Rectangle coordinates
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

for i in range(len(boxes)):
    if i in indexes:
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        color = colors[i]
        plt.gca().add_patch(
            plt.Rectangle((x, y), w,
                          h, fill=False,
                          edgecolor=color, linewidth=2)
            )
        plt.text(x, y - 10, label, color = color, fontsize=20)
 

plt.show()
 

 

operation result:

insert image description here

3. Realize target detection without Matplotlib

code:

import cv2
import numpy as np

# Load Yolo
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))

# Loading image
img = cv2.imread("demo1.jpg")
height, width, channels = img.shape

# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        
net.setInput(blob)
outs = net.forward(output_layers)

# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            # Object detected
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)

            # Rectangle coordinates
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

font = cv2.FONT_HERSHEY_SIMPLEX
for i in range(len(boxes)):
    if i in indexes:
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])
        color = colors[i]
        cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
        cv2.putText(img, label, (x, y - 20), font, 2, color, 3)

cv2.namedWindow("Image",0)
cv2.resizeWindow("Image", 1600, 900)
cv2.imshow("Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Output result:

insert image description here

Guess you like

Origin blog.csdn.net/qq_41600018/article/details/132395409