mask rcnn onnx test

Mask R-CNN is an instance segmentation algorithm that can be used for target detection, target instance segmentation, and target key point detection.
The onnx version of it is tested below.

The model and related information of onnx can be downloaded here .

Test picture:

In the preprocessing part of the picture, Image is used to read the picture in the reference , and it is changed to the opencv version here.

import numpy as np
import cv2
import onnxruntime
from utils import _COLORS  #自己定义一些mask颜色

#这里把Image版本改成了opencv版本
def preprocess(image):
    # Resize
    ratio = 800.0 / min(image.shape[0], image.shape[1])
    image = cv2.resize(image, (int(ratio * image.shape[1]), int(ratio * image.shape[0])))

    # Convert to BGR
    image = np.array(image)[:, :, [2, 1, 0]].astype('float32')

    # HWC -> CHW
    image = np.transpose(image, [2, 0, 1])

    # Normalize
    mean_vec = np.array([102.9801, 115.9465, 122.7717])
    for i in range(image.shape[0]):
        image[i, :, :] = image[i, :, :] - mean_vec[i]

    # Pad to be divisible of 32
    import math
    padded_h = int(math.ceil(image.shape[1] / 32) * 32)
    padded_w = int(math.ceil(image.shape[2] / 32) * 32)

    padded_image = np.zeros((3, padded_h, padded_w), dtype=np.float32)
    padded_image[:, :image.shape[1], :image.shape[2]] = image
    image = padded_image

    return image

post-processing part.
The original version is to draw the outline, but here it is changed to draw the mask area.

def display_objdetect_image(image, boxes, labels, scores, masks, score_threshold=0.7):
    # Resize boxes
    ratio = 800.0 / min(image.shape[0], image.shape[1])
    boxes /= ratio

    image = np.array(image)

    for mask, box, label, score in zip(masks, boxes, labels, scores):
        # Showing boxes with score > 0.7
        if score <= score_threshold:
            continue

        # Finding contour based on mask
        #mask:(1,28,28)
        mask = mask[0, :, :, None]  #(28,28,1)
        int_box = [int(i) for i in box]
        mask = cv2.resize(mask, (int_box[2]-int_box[0]+1,int_box[3]-int_box[1]+1)) #box区域大小
        mask = mask > 0.5
        im_mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8) #image大小的全0mask
        x_0 = max(int_box[0], 0)
        x_1 = min(int_box[2] + 1, image.shape[1])
        y_0 = max(int_box[1], 0)
        y_1 = min(int_box[3] + 1, image.shape[0])
        mask_y_0 = max(y_0 - box[1], 0)
        mask_y_1 = max(0, mask_y_0 + y_1 - y_0)
        mask_x_0 = max(x_0 - box[0], 0)
        mask_x_1 = max(0, mask_x_0 + x_1 - x_0)
     
        im_mask[y_0:y_1, x_0:x_1] = mask[  #box区域填上mask,(480,640,1)
            mask_y_0 : mask_y_1, mask_x_0 : mask_x_1
        ]

        im_mask = im_mask[:, :, None] #(480,640,1)
        mask3 = image.copy()  #填上颜色的mask

        color = (_COLORS[label]*255).astype(np.uint8).tolist()
        mask3[im_mask[:,:,0]>0] = color
        image = cv2.addWeighted(image, 0.5, mask3, 0.5, 0)
		
        text = "{}:{:.1f}%".format(classes[label], score * 100)
        txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255, 255)
        font = cv2.FONT_HERSHEY_SIMPLEX
        txt_size = cv2.getTextSize(text, font, 0.5, 2)[0]
        #画box框
        cv2.rectangle(image, (x_0, y_0), (x_1, y_1), color, 1)
        #标签区域框
        cv2.rectangle(
            image,
            (x_0, y_0 - txt_size[1] - 1),
            (x_0 + txt_size[0] + txt_size[1], y_0 - 1),
            color,
            -1,
        )
        #填上标签
        cv2.putText(image, text, (x_0, y_0 - 1), font, 0.5, txt_color, thickness=1)
    cv2.imshow("image", image)
    cv2.waitKey(0)

ONNX reasoning part (main function)

if __name__ == '__main__':
    img = cv2.imread('./demo.jpg')
    #前处理
    img_data = preprocess(img)

    #onnx process
    session = onnxruntime.InferenceSession("./MaskRCNN-12.onnx",
                                           providers=['CUDAExecutionProvider',
                                                      'CPUExecutionProvider'])
    model_inputs = session.get_inputs()
    input_names = [model_inputs[i].name for i in range(len(model_inputs))]

    input_shape = model_inputs[0].shape
    input_height = input_shape[1]
    input_width = input_shape[2]
    model_outputs = session.get_outputs()
    output_names = [model_outputs[i].name for i in range(len(model_outputs))]
    # time1 = time.time()
    outputs = session.run(output_names, {
    
    input_names[0]: img_data})
    # time2 = time.time()
    boxes = outputs[0]
    labels = outputs[1]
    scores = outputs[2]
    masks = outputs[3]
  	#后处理
    display_objdetect_image(img, boxes, labels, scores, masks)

Please add a picture description

Guess you like

Origin blog.csdn.net/level_code/article/details/130596715
Recommended