前言

已完成TensorFlow Object Detection API环境搭建，具体搭建过程请参照：

安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统

或

Ubuntu系统安装配置tensorflow开发环境

准备工作

下载训练数据和验证数据

香港中文大学(Chinese University of Hong Kong)有大量的标注图像数据集。WIDER FACE数据集是一个人脸检测基准数据集。我用labelImg（https://github.com/tzutalin/labelImg）来显示边框。所选的文本是人脸检测注释。

结果集下载地址：http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/

labelImg安装命令

pip install labelImg

其他安装方式请参照上面提供的地址自行尝试

lableImg使用命令

labelImg

效果

结果集中的图片与标注文件XML一一对应

下载模型

模型下载地址：https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md

我选择的模型是faster_rcnn_inception_v2_coco，下载地址是：

http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz

编写代码进行相关操作001_down_data.py

# -*- coding: utf-8 -*-
'''
结果集下载与模型下载
'''
import requests
import os
import shutil
# unzip the files
import zipfile
import tarfile


def download_file_from_google_drive(id, destination):

    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value

        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 32768

        with open(destination, "wb") as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk:  # filter out keep-alive new chunks
                    f.write(chunk)

    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params={ 'id' : id }, stream=True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params=params, stream=True)

    save_response_content(response, destination)


# The script
curr_path = os.getcwd()
models_path = os.path.join(curr_path, "data")

# make dir => wider_data in folder
try:
    os.makedirs(models_path)
except Exception as e:
    pass

print("files download start")

if os.path.exists(os.path.join(models_path, "train.zip")) == False:
    print("downloading.. train.zip -- 1.47GB")
    download_file_from_google_drive("0B6eKvaijfFUDQUUwd21EckhUbWs", os.path.join(models_path, "train.zip"))

if os.path.exists(os.path.join(models_path, "val.zip")) == False:
    print("downloading.. val.zip -- 362.8MB")
    download_file_from_google_drive("0B6eKvaijfFUDd3dIRmpvSk8tLUk", os.path.join(models_path, "val.zip"))

print("files download end")

print("files unzip start")

if os.path.exists(os.path.join(models_path, "WIDER_train")) == False:
    with zipfile.ZipFile(os.path.join(models_path, "train.zip"), "r") as zip_ref:
        zip_ref.extractall(models_path)

if os.path.exists(os.path.join(models_path, "WIDER_val")) == False:
    with zipfile.ZipFile(os.path.join(models_path, "val.zip"), "r") as zip_ref:
        zip_ref.extractall(models_path)

print("files unzip end")

print("annotation download start")

url = 'http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip'
r = requests.get(url) 
with open(os.path.join(models_path, "wider_face_split.zip"), "wb") as code:
    code.write(r.content)

if os.path.exists(os.path.join(models_path, "wider_face_split")) == False:
    with zipfile.ZipFile(os.path.join(models_path, "wider_face_split.zip"), "r") as zip_ref:
        zip_ref.extractall(models_path) 
print("annotation download end")   

# downloading from: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
url = 'http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz'

if os.path.exists(os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz")) == False:
    response = requests.get(url, stream=True)
    with open(os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz"), 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
    del response

filePath = os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz")
os.chdir(models_path)

if (filePath.endswith("tar.gz")):
    tar = tarfile.open(filePath, "r:gz")
    tar.extractall()
    tar.close()
elif (filePath.endswith("tar")):
    tar = tarfile.open(filePath, "r:")
    tar.extractall()
    tar.close()

print("done")

数据预处理

将WIDERFace转换为Pascal XML

首先，我们需要将人脸检测数据集转换为Pascal XML。Tensorflow和labelImg使用不同的格式。这些人脸检测图像将下载到WIDER_train文件夹中。我们将使用002 _data-to-pascal-xml.py转换WIDERFace数据并且将数据复制到一个不同的子文件夹中。我的电脑需要5分钟处理9263张图片。

002_data-to-pascal-xml.py

#!/usr/bin/env python3


"""
This script crawls over 9263 training images and 1873 items
On my Macbook pro this takes: 4 minutes

"""
import cv2
import os
import numpy as np
from glob import iglob # python 3.5 or newer
from shutil import copyfile


# The script
curr_path = os.getcwd()

import xml.etree.cElementTree as ET

# settings
cnt = 0
hog = cv2.HOGDescriptor((80, 80), (16, 16), (8,8), (8,8), 9)
# data = []
# label = []


def newXMLPASCALfile(imageheight, imagewidth, path, basename):
    # print(filename)
    annotation = ET.Element("annotation", verified="yes")
    ET.SubElement(annotation, "folder").text = "images"
    ET.SubElement(annotation, "filename").text = basename
    ET.SubElement(annotation, "path").text = path

    source = ET.SubElement(annotation, "source")
    ET.SubElement(source, "database").text = "test"

    size = ET.SubElement(annotation, "size")
    ET.SubElement(size, "width").text = str(imagewidth)
    ET.SubElement(size, "height").text = str(imageheight)
    ET.SubElement(size, "depth").text = "3"

    ET.SubElement(annotation, "segmented").text = "0"

    tree = ET.ElementTree(annotation)
    # tree.write("filename.xml")
    return tree

def appendXMLPASCAL(curr_et_object,x1, y1, w, h, filename):
    et_object = ET.SubElement(curr_et_object.getroot(), "object")
    ET.SubElement(et_object, "name").text = "face"
    ET.SubElement(et_object, "pose").text = "Unspecified"
    ET.SubElement(et_object, "truncated").text = "0"
    ET.SubElement(et_object, "difficult").text = "0"
    bndbox = ET.SubElement(et_object, "bndbox")
    ET.SubElement(bndbox, "xmin").text = str(x1)
    ET.SubElement(bndbox, "ymin").text = str(y1)
    ET.SubElement(bndbox, "xmax").text = str(x1+w)
    ET.SubElement(bndbox, "ymax").text = str(y1+h)
    filename = filename.strip().replace(".jpg",".xml")
    curr_et_object.write(filename)
    return curr_et_object




def readAndWrite(bbx_gttxtPath):
    cnt = 0
    with open(bbx_gttxtPath, 'r') as f:
        curr_img = ''

        curr_filename = ""
        curr_path = ""

        curr_et_object = ET.ElementTree()


        img = np.zeros((80, 80))
        for line in f:
            inp = line.split(' ')

            # if line.find("--") != -1:
            #     curr_filename = line.split('--')[1]
            #     # reset elements
            #     # emptyEl = ET.Element("")
            #     curr_et_object = ET.ElementTree()

            if len(inp)==1:
                img_path = inp[0]
                img_path = img_path[:-1]
                curr_img = img_path
                if curr_img.isdigit():
                    continue
                # print(Train_path+'/'+curr_img)
                img = cv2.imread(Train_path + '/' + curr_img, 2) # POSIX only
                # print( len(list(curr_et_object.getroot()) )  )
                curr_filename = curr_img.split("/")[1].strip()
                curr_path = os.path.join(Train_path, os.path.dirname(curr_img))
                curr_et_object = newXMLPASCALfile(img.shape[0],img.shape[1],curr_path, curr_filename )
                # print( curr_et_object  )

            else:
                # print(img)
                inp = [int(i) for i in inp[:-1]]
                x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose = inp
                n = max(w,h)
                if invalid == 1 or blur > 0 or n < 50:
                    continue
                img2 = img[y1:y1+n, x1:x1+n]
                img3 = cv2.resize(img2, (80, 80))
                vec = hog.compute(img3)
                # data.append(vec)
                # label.append(1)
                cnt += 1

                fileNow = os.path.join(curr_path,curr_filename)
                print("{}: {} {} {} {}".format(len(vec),x1, y1, w, h) + " " + fileNow)

                curr_et_object = appendXMLPASCAL(curr_et_object,x1, y1, w, h, fileNow )


# ################################ TRAINING DATA 9263 ITEMS ##################################
# # # Run Script for Training data
Train_path = os.path.join(curr_path, "data", "WIDER_train", "images" )
## comment this out
bbx_gttxtPath = os.path.join(curr_path, "data", "wider_face_split", "wider_face_train_bbx_gt.txt" )
readAndWrite(bbx_gttxtPath)


# To folders:
to_xml_folder = os.path.join(curr_path, "data", "tf_wider_train", "annotations", "xmls" )
to_image_folder = os.path.join(curr_path, "data", "tf_wider_train", "images" )

# make dir => wider_data in folder
try:
    os.makedirs(to_xml_folder)
    os.makedirs(to_image_folder)
except Exception as e:
    pass

rootdir_glob = Train_path + '/**/*' # Note the added asterisks # This will return absolute paths
file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]

train_annotations_index = os.path.join(curr_path, "data", "tf_wider_train", "annotations", "train.txt" )

with open(train_annotations_index, "a") as indexFile:
    for f in file_list:
        if ".xml" in f:
            print(f)
            copyfile(f, os.path.join(to_xml_folder, os.path.basename(f) ))
            img = f.replace(".xml",".jpg")
            copyfile(img, os.path.join(to_image_folder, os.path.basename(img) ))
            indexFile.write(os.path.basename(f.replace(".xml","")) + "\n")


################################ VALIDATION DATA 1873 ITEMS ##################################

# Run Script for Validation data
Train_path = os.path.join(curr_path, "data", "WIDER_val", "images" )
bbx_gttxtPath = os.path.join(curr_path, "data", "wider_face_split", "wider_face_val_bbx_gt.txt" )
readAndWrite(bbx_gttxtPath)


# To folders:
to_xml_folder = os.path.join(curr_path, "data", "tf_wider_val", "annotations", "xmls" )
to_image_folder = os.path.join(curr_path, "data", "tf_wider_val", "images" )

# make dir => wider_data in folder
try:
    os.makedirs(to_xml_folder)
    os.makedirs(to_image_folder)
except Exception as e:
    pass


rootdir_glob = Train_path + '/**/*' # Note the added asterisks # This will return absolute paths
file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]

train_annotations_index = os.path.join(curr_path, "data", "tf_wider_val", "annotations", "val.txt" )

with open(train_annotations_index, "a") as indexFile:
    for f in file_list:
        if ".xml" in f:
            print(f)
            copyfile(f, os.path.join(to_xml_folder, os.path.basename(f) ))
            img = f.replace(".xml",".jpg")
            copyfile(img, os.path.join(to_image_folder, os.path.basename(img) ))
            indexFile.write(os.path.basename(f.replace(".xml","")) + "\n")

运行

python 002_data-to-pascal-xml.py

效果

创建Pascal XML到Tensorflow CSV的索引

当数据转换为Pascal XML时，索引已经被创建。通过训练和验证数据集，我们将这些文件作为输入来制作TFRecords。也可以用labelImg这样的工具来手动标记图像，并使用这个步骤在这里创建一个索引。

003_xml-to-csv.py

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET

# source and credits:
# https://raw.githubusercontent.com/datitran/raccoon_dataset/master/xml_to_csv.py

def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


def train():
    image_path = os.path.join(os.getcwd(), 'data', 'tf_wider_train', 'annotations','xmls')
    xml_df = xml_to_csv(image_path)
    labels_path = os.path.join(os.getcwd(), 'data', 'tf_wider_train','train.csv')
    xml_df.to_csv(labels_path, index=None)
    print('> tf_wider_train - Successfully converted xml to csv.')

def val():
    image_path = os.path.join(os.getcwd(), 'data', 'tf_wider_val', 'annotations','xmls')
    xml_df = xml_to_csv(image_path)
    labels_path = os.path.join(os.getcwd(), 'data', 'tf_wider_val', 'val.csv')
    xml_df.to_csv(labels_path, index=None)
    print('> tf_wider_val -  Successfully converted xml to csv.')

train()
val()

运行

python 003_xml-to-csv.py

效果

创建TFRecord文件

TFRecords文件是一个大型的二进制文件，该文件被读取以训练机器学习模型。在下一步中，该文件将被Tensorflow按顺序读取。训练和验证数据将被转换成二进制文件。

004_generate_tfrecord.py

"""
Usage:
  # From tensorflow/models/
  # Create train data:
  python3 004_generate_tfrecord.py --images_path=data/tf_wider_train/images --csv_input=data/tf_wider_train/train.csv  --output_path=data/train.record
  # creates 847.6MB train.record

  # Create test/validation data:
  python3 004_generate_tfrecord.py --images_path=data/tf_wider_val/images --csv_input=data/tf_wider_val/val.csv  --output_path=data/val.record
  # creates 213.1MB  val.record

  source without adjustments: https://raw.githubusercontent.com/datitran/raccoon_dataset/master/generate_tfrecord.py
"""

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import os
import io
import pandas as pd
import tensorflow as tf

from PIL import Image
from object_detection.utils import dataset_util # from path
from collections import namedtuple, OrderedDict # tf slim

flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('images_path', '', 'Path to images_folder')

FLAGS = flags.FLAGS


# TO-DO replace this with label map
def class_text_to_int(row_label):
    if row_label == 'face':
        return 1
    else:
        None


def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    path = os.path.join(os.getcwd(), FLAGS.images_path)
    examples = pd.read_csv(FLAGS.csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print('Successfully created the TFRecords: {}'.format(output_path))


if __name__ == '__main__':
    tf.app.run()

TFRecord的训练数据(847.6 MB)

python 004_generate_tfrecord.py --images_path=data/tf_wider_train/images --csv_input=data/tf_wider_train/train.csv  --output_path=data/train.record

TFRecord 的验证数据(213.1MB)

python 004_generate_tfrecord.py --images_path=data/tf_wider_val/images --csv_input=data/tf_wider_val/val.csv  --output_path=data/val.record

至此数据预处理已全部完成，效果如下：

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型（一）安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统 Ubuntu系统安装配置tensorflow开发环境

前言

安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统

Ubuntu系统安装配置tensorflow开发环境

准备工作

下载训练数据和验证数据

下载模型

数据预处理

将WIDERFace转换为Pascal XML

创建Pascal XML到Tensorflow CSV的索引

创建TFRecord文件

猜你喜欢

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型（一） 安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统 Ubuntu系统安装配置tensorflow开发环境

前言

安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统

Ubuntu系统安装配置tensorflow开发环境

准备工作

下载训练数据和验证数据

下载模型

数据预处理

将WIDERFace转换为Pascal XML

创建Pascal XML到Tensorflow CSV的索引

创建TFRecord文件

猜你喜欢

基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型（一）安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统 Ubuntu系统安装配置tensorflow开发环境