前提
- 安装tensorflow的object detection库
- clone Google的TensorRT仓库并且安装
- 安装好TensorRT
本机环境:
- python3.5.5
- tensorflow1.9.0源码编译版
- cuda-9.0
- cudnn7.3.1
- GTX1080x2
- Intel E5
实例代码(使用object detection的时候需要export PYTHONPATH=~/models/research:~/models/slim:${PYTHONPATH}
:
示例代码
from tftrt.examples.object_detection import optimize_model
import tensorflow as tf
import os
from PIL import Image
import numpy as np
import glob
import cv2
import json
import time
from object_detection.utils.label_map_util import get_label_map_dict,create_categories_from_labelmap
class TensorRT_OP(object):
def __init__(self,config_path,checkpoint_path):
self.config_path = config_path
self.checkpoint_path = checkpoint_path
self.network = frozen_graph = optimize_model(config_path = self.config_path,
checkpoint_path = self.checkpoint_path,
use_trt=True,
output_path='opt.pb',
precision_mode='int8')
self.INPUT_NAME = 'image_tensor'
self.BOXES_NAME = 'detection_boxes'
self.CLASSES_NAME = 'detection_classes'
self.SCORES_NAME = 'detection_scores'
self.MASKS_NAME = 'detection_masks'
self.NUM_DETECTIONS_NAME = 'num_detections'
self.FROZEN_GRAPH_NAME = 'frozen_inference_graph.pb'
self.PIPELINE_CONFIG_NAME = 'pipeline.config'
self.CHECKPOINT_PREFIX = 'model.ckpt'
self.tf_config = self.set_gpu()
def read_image(self,image_path, image_shape):
image = Image.open(image_path).convert('RGB')
if image_shape is not None:
image = image.resize(image_shape[::-1])
return np.array(image)
def set_gpu(self):
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
return tf_config
def vis_image(self,image_path,image_shape,label_list,thr = 0.3,save = None):
image = cv2.imread(image_path)
image = cv2.resize(image,image_shape)
output_dict = self.predict(image_path)
bbox = output_dict['boxes']
classes = output_dict['classes']
scores = output_dict['scores']
detect_num = output_dict['nums']
detect_classes = []
for i in range(int(detect_num)):
if scores[0][i] > thr:
box = bbox[i][:]
# print("bbox:{}".format(box))
index = int(classes[0][i])
# print(index,type(index))
# print(label_list[index-1]["name"], type(label_list[index-1]))
score_info = "{:.2f}:{}".format(scores[0][i],label_list[index-1]["name"])
cv2.rectangle(image, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 1)
cv2.putText(image,score_info,(box[0]-5, box[1]-4),cv2.FONT_HERSHEY_COMPLEX_SMALL,0.4,
(0,255,0),1)
# print(classes[0][i],classes[0][i].shape)
# print(int(classes[0][i]))
# print(label_list(int(classes[0][i]))["name"])
if save is not None:
cv2.imwrite(save,image)
else:
# cv2.imwrite(os.path.join(save), image)
cv2.imshow("TRT", image)
cv2.waitKey()
#
def predict(self,image_path,shape=(300,300)):
output_dict = {}
runtimes = []
with tf.Graph().as_default() as tf_graph:
with tf.Session(config=self.tf_config) as tf_sess:
tf.import_graph_def(self.network, name='')
tf_input = tf_graph.get_tensor_by_name(self.INPUT_NAME + ':0')
tf_boxes = tf_graph.get_tensor_by_name(self.BOXES_NAME + ':0')
tf_classes = tf_graph.get_tensor_by_name(self.CLASSES_NAME + ':0')
tf_scores = tf_graph.get_tensor_by_name(self.SCORES_NAME + ':0')
tf_num_detections = tf_graph.get_tensor_by_name(
self.NUM_DETECTIONS_NAME + ':0')
image = self.read_image(image_path, shape)
t0 = time.time()
boxes, classes, scores, num_detections = tf_sess.run(
[tf_boxes, tf_classes, tf_scores, tf_num_detections],
feed_dict={tf_input: [image]})
t1 = time.time()
runtimes.append(float(t1 - t0))
image_height,image_width = image.shape[:2]
bbox_list = []
for j in range(int(num_detections)):
bbox = boxes[0][j]
bbox_coco_fmt = [
int(bbox[1] * image_width), # x0
int(bbox[0] * image_height), # x1
int((bbox[3] - bbox[1]) * image_width), # width
int((bbox[2] - bbox[0]) * image_height), # height
]
bbox_list.append(bbox_coco_fmt)
output_dict.update({"boxes":np.array(bbox_list),"classes":classes,
"scores":scores,"nums":num_detections})
return output_dict
def benchmark(self,images_dir,shape = (300,300),save = 'result.json',single = False):
statistics = {}
runtimes = []
image_data_list = []
scores_list = []
with tf.Graph().as_default() as tf_graph:
with tf.Session(config=self.tf_config) as tf_sess:
tf.import_graph_def(self.network, name='')
tf_input = tf_graph.get_tensor_by_name(self.INPUT_NAME + ':0')
tf_boxes = tf_graph.get_tensor_by_name(self.BOXES_NAME + ':0')
tf_classes = tf_graph.get_tensor_by_name(self.CLASSES_NAME + ':0')
tf_scores = tf_graph.get_tensor_by_name(self.SCORES_NAME + ':0')
tf_num_detections = tf_graph.get_tensor_by_name(
self.NUM_DETECTIONS_NAME + ':0')
images_list = glob.glob(images_dir+"/*.png")
if single:
image = self.read_image(images_dir, shape)
for i in range(25):
t0 = time.time()
boxes, classes, scores, num_detections = tf_sess.run(
[tf_boxes, tf_classes, tf_scores, tf_num_detections],
feed_dict={tf_input: [image]})
t1 = time.time()
runtimes.append(float(t1 - t0))
for i in scores[0, :]:
if i > 0.3:
scores_list.append("{:.4f}".format(i))
statistics = {
'inference': 1000.0 * np.mean(runtimes[1:]),
'fps': 24 / np.sum(runtimes[1:]),
'runtimes_ms': [1000.0 * r for r in runtimes[1:]],
'scores':scores_list
}
else:
for image_path in images_list:
image = self.read_image(image_path, shape)
t0 = time.time()
boxes, classes, scores, num_detections = tf_sess.run(
[tf_boxes, tf_classes, tf_scores, tf_num_detections],
feed_dict={tf_input: [image]})
t1 = time.time()
runtimes.append(float(t1-t0))
statistics = {
'avg_latency_ms': 1000.0 * np.mean(runtimes[1:]),
'avg_throughput_fps': len(images_list) / np.sum(runtimes[1:]),
'runtimes_ms': [1000.0 * r for r in runtimes[1:]],
}
if save is not None:
with open(save,'w') as f:
json.dump(statistics,f)
return statistics
def main():
MODEL_PATH = '/mnt/train_chess/chess_mult/ssd_inception_v2_coco_2018_01_28_trainoutput/export_models'
pbtxt_file = '/mnt/chess_all/Origin_data/tf/mult_label_map.pbtxt'
image_path = '/mnt/train_chess/test_image/example.png'
config_path = os.path.join(MODEL_PATH, 'pipeline.config')
checkpoint_path = os.path.join(MODEL_PATH, 'model.ckpt')
label_list = create_categories_from_labelmap(pbtxt_file)
t = TensorRT_OP(config_path,checkpoint_path)
output_dict = t.predict(image_path=image_path)
bbox = output_dict['boxes']
classes = output_dict['classes']
scores = output_dict['scores']
detect_num = output_dict['nums']
# print("box's shape:{}".format(bbox.shape))
# print("classes's classes:{}".format(classes.shape))
# print("scores's shape:{}".format(scores.shape))
# print("nums's shape:{}".format(detect_num.shape))
save_image_path = os.path.join(MODEL_PATH,"TensorRT_Predict_int8.png")
t.vis_image(image_path,(300,300),label_list,save=save_image_path)
images_dir = '/mnt/chess_all/test/JPEGImages'
result = t.benchmark(images_dir=image_path,single=True,save=os.path.join(MODEL_PATH,"Tensorrt_result_int8.json"))\
# print("Averge latency:{}".format(result['avg_latency_ms']))
# print("FPS:{}".format(result['avg_throughput_fps']))
# print("runtimes:{}".format(result['runtimes_ms']))
# img = cv2.imread(image_path)
# img = cv2.resize(img,(300,300))
# for i in range(100):
# if scores[0][i]>0.3:
# box = bbox[i][:]
# print("bbox:{}".format(box))
# cv2.rectangle(img,(box[0],box[1]),(box[0]+box[2],box[1]+box[3]),(0,255,0),1)
# cv2.imshow("TRT",img)
# cv2.waitKey()
if __name__ == "__main__":
main()%