Tensorflow object detection API训练自己的目标检测模型

一、Tensorflow object detection API的详细配置教程

简单介绍Tensorflow object detection API：

这个API是基于tensorflow构造的开源框架，易于构建、训练和部署目标检测模型。

关于tensorflow安装：自行百度，教程很多，分CPU,GPU版本的；

环境：

win10

pycharm

anaconda

python 3.6

tensorflow gpu

需要的python库有： pillow, lxml, matplotlib, jupyter, 请自行安装。

第一步：下载tensorflow object detection API模型

第二步：下载Protoc：点击下载Protoc

protoc的作用是将Tensorflow object detection API模型文件中的.pro

文件编译成python文件。window下下载的版本可以是：

下载后解压，可以看到目录如下：

将bin文件夹的路径添加到环境变量：

打开cmd,输入 protoc：输出如下信息则表示添加环境变量成功：

第三步：将下载的tensorflow object detection文件解压，文件名可改为model：打开models\research\object_detection\protos，会看到里面有很多的.proto文件，利用Protoc将这些.proto文件编译成py文件，下面介绍具体的做法：

在cmd下，一路cd到\models\research文件夹下。 protos文件夹下的.proto要逐一编译，在这里可以直接用跟快速的方法，直接输入：protoc ./object_detection/protos/*.proto --python_out=. 就可以快速编译所有文件， *.proto相当于匹配所有的.proto文件。

结果显示找不到protoc文件，查了一下好像是protoc3.5版本会有这个问题，单个文件依次生成就可以了。命令列在下面可以直接复制。

protoc ./object_detection/protos/anchor_generator.proto --python_out=.

protoc ./object_detection/protos/argmax_matcher.proto --python_out=.
protoc ./object_detection/protos/bipartite_matcher.proto --python_out=.
protoc ./object_detection/protos/box_coder.proto --python_out=.
protoc ./object_detection/protos/box_predictor.proto --python_out=.
protoc ./object_detection/protos/eval.proto --python_out=.
protoc ./object_detection/protos/faster_rcnn.proto --python_out=.
protoc ./object_detection/protos/faster_rcnn_box_coder.proto --python_out=.
protoc ./object_detection/protos/grid_anchor_generator.proto --python_out=.
protoc ./object_detection/protos/hyperparams.proto --python_out=.
protoc ./object_detection/protos/image_resizer.proto --python_out=.
protoc ./object_detection/protos/input_reader.proto --python_out=.
protoc ./object_detection/protos/keypoint_box_coder.proto --python_out=.
protoc ./object_detection/protos/losses.proto --python_out=.
protoc ./object_detection/protos/matcher.proto --python_out=.
protoc ./object_detection/protos/mean_stddev_box_coder.proto --python_out=.
protoc ./object_detection/protos/model.proto --python_out=.
protoc ./object_detection/protos/optimizer.proto --python_out=.
protoc ./object_detection/protos/pipeline.proto --python_out=.
protoc ./object_detection/protos/post_processing.proto --python_out=.
protoc ./object_detection/protos/preprocessor.proto --python_out=.
protoc ./object_detection/protos/region_similarity_calculator.proto --python_out=.
protoc ./object_detection/protos/square_box_coder.proto --python_out=.
protoc ./object_detection/protos/ssd.proto --python_out=.
protoc ./object_detection/protos/ssd_anchor_generator.proto --python_out=.
protoc ./object_detection/protos/string_int_label_map.proto --python_out=.
protoc ./object_detection/protos/train.proto --python_out=.

编译完后是这样的：每一个.proto对应一个python文件。

第四步：需要添加两个环境变量

-> \models\research

-> \models\research\slim

至此object detection API已经配置完毕。

接下来需要测试一下：

在research/object_detection文件夹下新建一个python文件，命名为object_detection_tutorial.py最后的结果：下图为模型中的测试图片

# -*- coding: utf-8 -*-
"""
Created on Sat Jan  5 09:58:08 2019

@author: chr
"""

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
 
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
 
 
# # This is needed to display the images.
# %matplotlib inline
 
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
 
# from utils import label_map_util
# from utils import visualization_utils as vis_util
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
 
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
 
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
 
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
 
NUM_CLASSES = 90
 
# download model
opener = urllib.request.URLopener()
# 下载模型，如果已经下载好了下面这句代码可以注释掉
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
    file_name = os.path.basename(file.name)
    if 'frozen_inference_graph.pb' in file_name:
        tar_file.extract(file, os.getcwd())
 
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)
 
 
# Helper code
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint8)
 
 
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3)]
 
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
 
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        for image_path in TEST_IMAGE_PATHS:
            image = Image.open(image_path)
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            scores = detection_graph.get_tensor_by_name('detection_scores:0')
            classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            # Actual detection.
            (boxes, scores, classes, num_detections) = sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=8)
            plt.figure(figsize=IMAGE_SIZE)
            plt.imshow(image_np)
            plt.show()

最后的结果：

下图为模型中的测试图片

二、使用tensorflow object detection API 训练自己的目标检测模型

第一步：准备自己的数据集。

比如我要检测车牌。首先用到的是labelImg软件：

先简要介绍一下labelimg安装的步骤。下载labelImg-master文件：解压文件，如下图所示：

接下来需要安装一些python的包：我的环境是

win10

anaconda

pythonn36

需要安装的库有：lxml, pyqt5,一般anaconda会有lxml和pyqt包，下面提一下pyqt5的安装：

如果在cmd下使用pip install pyqt5, 安装会非常的慢。建议直接下载PyQt4‑4.11.4‑cp36‑cp36m‑win_amd64.whl文件，具体版本根据个人实际情况来，python版本以及x系统位数都要对应上即可，下面以我的开发环境为例安装步骤如下：

1、下载PyQt4‑4.11.4‑cp36‑cp36m‑win_amd64.whl后将其放到E:\python\TensorFlow\Objection Detection路径下；
2、cmd 管理员身份运行，cd E:\python\TensorFlow\Objection Detection
pip PyQt4‑4.11.4‑cp35‑cp35m‑win_amd64.whl
3、安装完成后，新建测试程序验证一下：

from PyQt4.QtGui import *
import sys
a= QApplication(sys.argv)
w= QWidget()
w.resize(320, 240)
w.setWindowTitle("Hello World")
w.show()
sys.exit(a.exec_())

最后运行，出现以下界面说明安装成功，当然也可以直接导出库查看也是一样效果。

下面就可以尝试打开labelImg了，打开cmd, 进入labelImg目录：

运行：

pyrcc5 -o resources.py resources.qrc命令

python labelImg.py

就可以打开labelImg了

开始给图片做标注，这里先修改一下data文件夹下的predefined_classes.txt文件，用notepad 打开：

输入自己的分类类别：比如我要检测的车辆和车牌

保存修改后的predefined_classes.txt文件。

再次打开labelImg, 按快捷键CTRL+R, 修改默认的输出文件的保存路径，可以设置为自己的路径，点击open dir，添加图片所在的路径，如下图显示：

LabelImg使用方法

修改默认的XML文件保存位置，使用快捷键“Ctrl+R”，改为自定义位置，这里的路径一定不能包含中文，否则无法保存。
源码文件夹中使用notepad++打开data/predefined_classes.txt，修改默认类别，比如改成person、car、motorcycle三个类别。
“Open Dir”打开图片文件夹，选择第一张图片开始进行标注，使用“Create RectBox”或者“Ctrl+N”开始画框，单击结束画框，再双击选择类别。完成一张图片后点击“Save”保存，此时XML文件已经保存到本地了。点击“Next Image”转到下一张图片。
标注过程中可随时返回进行修改，后保存的文件会覆盖之前的。
完成标注后打开XML文件，发现确实和PASCAL VOC所用格式一样。

生成之后如下图所示：

但是这仍然不满足tensorflow object detection API对训练数据的格式要求(API要求tfrecord个格式的数据)，所以下面将.xml文件转化为tfrecord格式的文件。

转换数据格式

在models工程下新建文件夹dataset,目录结构如图所示：data文件夹用来存放转换的数据。

转换过程分为两步：

第一步：将标记生成xml文件转换为csv文件，在dataset下新建xml_to_csv.py文件，代码如下：

注意：path路径需要修改，修改path路径，运行代码，就可以进行转换：生成两个csv文件

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
 
 
def xml_to_csv(path):
    xml_list = []
    # 读取注释文件
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
 
    # 将所有数据分为样本集和验证集，一般按照3:1的比例
    train_list = xml_list[0: int(len(xml_list) * 0.67)]
    eval_list = xml_list[int(len(xml_list) * 0.67) + 1: ]
 
    # 保存为CSV格式
    train_df = pd.DataFrame(train_list, columns=column_name)
    eval_df = pd.DataFrame(eval_list, columns=column_name)
    train_df.to_csv('data/train.csv', index=None)
    eval_df.to_csv('data/eval.csv', index=None)
 
 
def main():
   
    path = r'E:\python\TensorFlow\Objection Detection\images'  #path参数更具自己xml文件所在的文件夹路径修改
    xml_to_csv(path)
    print('Successfully converted xml to csv.')
 
if  __name__ === ""__main__:
    main()

第二步：将生成的csv转换转换为tfrecord格式

在dataset下新建generate_tfrecord.py文件，代码如下：

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
 
import os
import io
import pandas as pd
import tensorflow as tf
 
from PIL import Image
# from object_detection.utils import dataset_util
from research.object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
 
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
 
 
# 将分类名称转成ID号
def class_text_to_int(row_label):
    if row_label == 'plate':
        return 1
    # elif row_label == 'car':
    #     return 2
    # elif row_label == 'person':
    #     return 3
    # elif row_label == 'kite':
    #     return 4
    else:
        print('NONE: ' + row_label)
        # None
 
 
def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
 
 
def create_tf_example(group, path):
    print(os.path.join(path, '{}'.format(group.filename)))
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size
 
    filename = (group.filename + '.jpg').encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []
 
    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))
 
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example
 
 
def main(csv_input, output_path, imgPath):
    writer = tf.python_io.TFRecordWriter(output_path)
    path = imgPath
    examples = pd.read_csv(csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())
 
    writer.close()
    print('Successfully created the TFRecords: {}'.format(output_path))
 
 
if __name__ == '__main__':
 
    # imgPath = 'E:\data\Images'
    imgPath = r'E:\smart city\data_distribution\picyure_new'
 
    # 生成train.record文件
    output_path = 'data/train.record'
    csv_input = 'data/train.csv'
    main(csv_input, output_path, imgPath)
 
    # 生成验证文件 eval.record
    output_path = 'data/eval.record'
    csv_input = 'data/eval.csv'
    main(csv_input, output_path, imgPath)
 
 if row_label == 'plate':
        return 1
    # elif row_label == 'car':
    #     return 2
    # elif row_label == 'person':
    #     return 3
    # elif row_label == 'kite':
    #     return 4
    else:
        print('NONE: ' + row_label)
        # None


def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    print(os.path.join(path, '{}'.format(group.filename)))
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = (group.filename + '.jpg').encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(csv_input, output_path, imgPath):
    writer = tf.python_io.TFRecordWriter(output_path)
    path = imgPath
    examples = pd.read_csv(csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    print('Successfully created the TFRecords: {}'.format(output_path))


if __name__ == '__main__':

   
    imgPath = r'E:\smart city\data_distribution\picyure_new'

    # 生成train.record文件
    output_path = 'data/train.record'
    csv_input = 'data/train.csv'
    main(csv_input, output_path, imgPath)

    # 生成验证文件 eval.record
    output_path = 'data/eval.record'
    csv_input = 'data/eval.csv'
    main(csv_input, output_path, imgPath)

这里有两处需要修改：

第一处：分类号转换为ID:

根据自己在LabelImg中打的标签进行修改，比如我只有plate，car标签：

# 将分类名称转成ID号
def class_text_to_int(row_label):
    if row_label == 'plate':
        return 1
    elif row_label == 'car':
    #     return 2
    
    else:
        print('NONE: ' + row_label)

第二处：根据自己的路径修改

imgPath = r'E:\smart city\data_distribution\picyure_new'

对图片的路径进行修改，图片应该要与xml文件一一对应的：

运行generate_tfrecord.py文件，可以看到已经生成了两个tfrecord文件：