opencv3.4.2 cv2.dnn.blobFromImage

opencv3.4.2 cv2.dnn.blobFromImage

在做object_detction的项目的时候,遇到一个问题,就是将tensorflow训练好的模型,来进行物体检测和将tensorflow训练好的模型导出成pb和pbtxt文件,再用opencv(3.4.2版本)进行物体检测,效果 不同。经过对比后发现,在opencv直接读取模型,检测效果差很多。

这是直接用tensorflow训练好的pb模型进行检测的代码(只贴出了其中核心部分)

# -*- coding: utf-8 -*-
#Imports
import time
start = time.time()
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2
from scipy import misc
 
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
 
# if tf.__version__ < '1.4.0':
#     raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')
  
os.chdir('/root/workspace/models-master/research/object_detection')
  
  
#Env setup 
# This is needed to display the images.
#%matplotlib inline
 
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..") 
#Object detection imports
from utils import label_map_util
 
from utils import visualization_utils as vis_util
 
 
 
 
#Model preparation
# What model to download.
 
#这是我们刚才训练的模型
MODEL_NAME = '/root/workspace/models-master/research/object_detection/shangpinshibie_inference_graph10'
 
 
 
#对应的Frozen model位置
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

print(PATH_TO_CKPT)
 
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('train_dierpi', 'labelmap.pbtxt')

print(PATH_TO_LABELS)
 
#改成自己例子中的类别数,2
NUM_CLASSES = 11
 
 
 
'''
#Download Model
自己的模型,不需要下载了
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
  file_name = os.path.basename(file.name)
  if 'frozen_inference_graph.pb' in file_name:
    tar_file.extract(file, os.getcwd())
'''   
    
    
#Load a (frozen) Tensorflow model into memory.    
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')    
    
    
#Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
#print('label_map:',label_map)
#print('category_index:',category_index)
 
 
#Helper code
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
 
 
#Detection
 
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
#测试图片位置
PATH_TO_TEST_IMAGES_DIR = '/root/workspace/test_diyipi/'
os.chdir(PATH_TO_TEST_IMAGES_DIR)
TEST_IMAGE_PATHS = os.listdir(PATH_TO_TEST_IMAGES_DIR)
 
# Size, in inches, of the output images.
IMAGE_SIZE = (50, 30)
 
output_path = ('/root/workspace/notebook_code/image_out/')
predict_right_num = 0
all_num = 0

with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        for image_path in TEST_IMAGE_PATHS:
            _,label_ = image_path.split('_')
            label_true,_ = label_.split('.') 
#             print(label_true)
            image = Image.open(image_path)
          # the array based representation of the image will be used later in order to prepare the
          # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)
          # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
          # Actual detection.
            (boxes, scores, classes, num) = sess.run(
              [detection_boxes, detection_scores, detection_classes, num_detections],
              feed_dict={image_tensor: image_np_expanded})
          # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
              image_np,
              np.squeeze(boxes),
              np.squeeze(classes).astype(np.int32),
              np.squeeze(scores),
              category_index,
              use_normalized_coordinates=True,
              line_thickness=8)
#             print(classes)
            
            misc.imsave(output_path+image_path, image_np)
#             plt.imshow(image_np)
#             plt.show()

        end =  time.time()
    print("Execution Time: ", end - start)

这是在opencv中进行object detection检测的代码(核心部分):

import numpy as np
import argparse
import cv2 
import matplotlib.pyplot as plt
from scipy import misc
import time
import os
%matplotlib inline
start = time.time()

output_path = ('/root/workspace/notebook_code/image_out/')
# image = '/root/workspace/test_dierpi/t12.jpg'
prototxt ='/root/workspace/models-master/research/object_detection/shangpinshibie_inference_graph10/frozen_inference_graph.pbtxt'
weights = '/root/workspace/models-master/research/object_detection/shangpinshibie_inference_graph10/frozen_inference_graph.pb'
thr =0.01
input_path= '/root/workspace/test_diyipi/'
image_list = os.listdir(input_path)
print(input_path)

classNames = { 1: 'xpp', 2: 'nfsq',3: 'kl',4: 'jxb',5: 'ylzcn',6:'nnbg',7:'lqy',8: 'jdb',9: 'xlyb',10:'yxrsf',11:'hand'}
net = cv2.dnn.readNetFromTensorflow(weights,prototxt)
# Load image fro

count = 1
right_count = 1

for image_name in image_list:
    true_label,_ = image_name.split('.')
    _,true_label = true_label.split('_')
    frame = cv2.imread(input_path+image_name)   
#     print(frame)
    frame_resized = cv2.resize(frame,(300,300)) # resize frame for prediction
    heightFactor = frame.shape[0]/300.0
    widthFactor = frame.shape[1]/300.0  
    blob = cv2.dnn.blobFromImage(frame_resized, 1.0/127.5, (300, 300), (127.5,127.5,127.5),True)    
    #Set to network the input blob 
    net.setInput(blob)
    #Prediction of network
    detections = net.forward()

    frame_copy = frame.copy()
    frame_copy2 = frame.copy()
    #Size of frame resize (300x300)
    cols = frame_resized.shape[1] 
    rows = frame_resized.shape[0]
    
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2] #Confidence of prediction 
        if confidence > thr: # Filter prediction 
            class_id = int(detections[0, 0, i, 1]) # Class label

            # Object location 
            xLeftBottom = int(detections[0, 0, i, 3] * cols) 
            yLeftBottom = int(detections[0, 0, i, 4] * rows)
            xRightTop   = int(detections[0, 0, i, 5] * cols)
            yRightTop   = int(detections[0, 0, i, 6] * rows)

            xLeftBottom_ = int(widthFactor * xLeftBottom) 
            yLeftBottom_ = int(heightFactor* yLeftBottom)
            xRightTop_   = int(widthFactor * xRightTop)
            yRightTop_   = int(heightFactor * yRightTop)
            cv2.rectangle(frame, (xLeftBottom_, yLeftBottom_), (xRightTop_, yRightTop_),(0, 0, 0),2)
            # Draw label and confidence of prediction in frame resized
            if class_id in classNames:
                label = classNames[class_id] + ": " + str(confidence)
                print(label)
                labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_TRIPLEX, 0.8, 1)
                yLeftBottom_ = max(yLeftBottom_, labelSize[1])
                cv2.rectangle(frame, (xLeftBottom_, yLeftBottom_ - labelSize[1]),
                                     (xLeftBottom_ + labelSize[0], yLeftBottom_ + baseLine),
                                     (255, 255, 255), cv2.FILLED)
                cv2.putText(frame, label, (xLeftBottom_, yLeftBottom_),
                            cv2.FONT_HERSHEY_TRIPLEX, 0.8, (0, 0, 0))
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

blob = cv2.dnn.blobFromImage(frame_resized, 1.0/127.5, (300, 300), (127.5,127.5,127.5),True) ------这个函数是用来读取图片的接口,其中参数很重要,会直接影响到模型的检测效果,前面几个参数与模型训练的时候对图片进行预处理有关系。其中最后一个参数是blob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size, mean, swapRB=True),swapRB,是选择是否交换R与B颜色通道,一般用opencv读取caffe的模型就需要将这个参数设置为false,读取tensorflow的模型,则默认选择True即可,这样才不会出现在opencv框架和tensorflow框架下,object detection检测效果不同。


猜你喜欢

转载自blog.csdn.net/weixin_42280271/article/details/81740603