table of Contents
Log
Test environment:
GPU: TITAN Xp
CPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz
OS: Ubuntu 16.04
Anaconda: conda 4.6.11
Python: 3.5.6
Tensorflow: 1.10.0
data preparation
Update:
About generate TF record, the Internet to find another METHODS: Run the following script to the training data set and test data set, and get train.record val.record
# xml2csv.py
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
os.chdir('/root/proj_emotor/data/VOCdevkit/VOC2012/Annotations')
path ='/root/proj_emotor/data/VOCdevkit/VOC2012/Annotations'
def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def main():
image_path = path
xml_df = xml_to_csv(image_path)
xml_df.to_csv('emotor_train.csv', index=None)
print('Successfully converted xml to csv.')
main()
# generate_tfrecord.py
# -*- coding: utf-8 -*-
"""
Usage:
# From tensorflow/models/
# Create train data:
python generate_tfrecord.py --csv_input=data/tv_vehicle_labels.csv --output_path=train.record
# Create test data:
python generate_tfrecord.py --csv_input=data/test_labels.csv --output_path=test.record
"""
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
os.chdir('/root/proj_emotor/data')
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
# TO-DO replace this with label map
def class_text_to_int(row_label):
if row_label == 'emotor': # 需改动
return 1
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
path = os.path.join(os.getcwd(), '/root/proj_emotor/data/VOCdevkit/VOC2012/JPEGImages') # 需改动
examples = pd.read_csv(FLAGS.csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), FLAGS.output_path)
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
tf.app.run()
original
A total of 1700 images are not marked. 800 randomly selected to do preliminary tests.
LabelImg use manual tagging, object label name to emotor.
Picture 1-400 label when the folder name for the result
Picture 401-800 of the folder name for ImageSets
lead to get .xml
there attributes:
<folder>result</folder>
<filename>1.jpg</filename>
<path>F:\result\1.jpg</path>
<folder>ImageSets</folder>
<filename>405.jpg</filename>
<path>/home/hzq0/VOC2012/ImageSets/405.jpg</path>
one of them
<folder>ImageSets</folder>
Have an impact at the time of generating TFRecord, as explained later.
All resulting .xml
in Anotations
folder 1-400 in the picture result
folder, Picture 401-800 on the ImageSets
folder.
Execute scripts
import os
import random
trainval_percent = 1
train_percent = 0.5
xmlfilepath = 'VOC2012\VOC2012\Annotations'
txtsavepath = 'VOC2012\VOC2012\ImageSets\Main'
total_xml = os.listdir(xmlfilepath)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train = random.sample(trainval, tr)
ftrainval = open('VOC2012\VOC2012\ImageSets\Main\\trainval.txt', 'w')
ftest = open('VOC2012\VOC2012\ImageSets\Main\\test.txt', 'w')
ftrain = open('VOC2012\VOC2012\ImageSets\Main\\train.txt', 'w')
fval = open('VOC2012\VOC2012\ImageSets\Main\\val.txt', 'w')
for i in list:
name = total_xml[i][:-4] + ' '+ '1' + '\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
To create ImageSets/Main
four .txt
, elsewhere execute the script needs to modify the file path in which their own.
When you create tfrecord, we found four .txt
used only two. How to modify the back say.
When you are ready you require several folders, copy the script create_pascal_tf_record.py , copy files pascal_label_map.pbtxt to your working directory. And modify pascal_label_map.pbtxt
the file nameemotor_label_map.pbtxt
Folder structure
I called the working directory proj_emotor
, engineering construction data preparation part as follows:
+ proj_emotor:
+ data:
- create_pascal_tf_record.py
- emotor_label_map.pbtxt
+ VOCdevkit:
+ ImageSets:
+ JPEGImages:
- 401~800.jpg
+ result:
+ JPEGImages:
- 1~400.jpg
+ VOC2012:
+ Anotations:
- .xml
+ ImageSets:
+ Main:
- emotor_train.txt
- emotor_val.txt
+ JPEGImages:
- all pics
Earlier we do not have to turn over the contents of the configuration files and scripts, the following need to make some changes to them.
First, delete emotor_label_map.pbtxt
the content, amend as follows:
item {
id: 1
name: 'emotor'
}
Then, modify create_pascal_tf_record.py
the line 165, the aeroplane_
changeemotor_
A close look at the tree, the original of ImageSets/Main
which four .txt
only retained train.txt
and val.txt
and at the front have increased emotor_
due to such a change in create_pascal_tf_record.py line 165.
After editing, you can execute the script.
Prior to re-explain why VOCdevkit
more result
and ImageSets
.
Because the use of LabelImage
tagging when former 400 is in the result
folder to play, after 400 is ImageSets
in play (this is a mistake, the intention is trying folder name VOC2012
of), said earlier get .xml
there so many words
<folder>ImageSets</folder>
Because of this extra information, resulting in the creation of TFRecord
the time will go to the corresponding folder, find JPEGImages
. So later when labeling data, you should be unified picture first into the VOC2012
folder and then resort to labels.
Creating TFRecord
#From proj_emotor/data
python create_pascal_tf_record.py \
--label_map_path=pascal_label_map.pbtxt \
--data_dir=VOCdevkit --year=VOC2012 --set=train\
--output_path=emotor_train.record
python create_pascal_tf_record.py \
--label_map_path=pascal_label_map.pbtxt \
--data_dir=VOCdevkit --year=VOC2012 --set=val\
--output_path=emotor_val.record
Note position to execute the command.
After successfully get two files in the data directory:
emotor_train.record
emotor_val.record
Training
Ready to work
First, TensorFlow Model Zoo download the official pre-training model.
I use ssd_mobilenet_v2_coco
in proj_emotor/models/model
unpack.
Now proj_emotor
under the folder structure:
+ proj_emotor
+ data:
...
+ models:
- model_main.py
- pipeline.config
+ model:
+ train
+ eval
+ ssd_mobilenet_v2_coco_2018_03_29
Which model_main.py copied from its original position.
pipeline.config
After extracting from the obtained copy ssd_mobilenet_v2_coco_2018_03_29/pipeline.config
.
Modify its contents, modify the content as follows:
model {
ssd {
num_classes: 1
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
feature_extractor {
type: "ssd_mobilenet_v2"
depth_multiplier: 1.0
min_depth: 16
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 3.99999989895e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.0299999993294
}
}
activation: RELU_6
batch_norm {
decay: 0.999700009823
center: true
scale: true
epsilon: 0.0010000000475
train: true
}
}
batch_norm_trainable: true
use_depthwise: true
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 3.99999989895e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.0299999993294
}
}
activation: RELU_6
batch_norm {
decay: 0.999700009823
center: true
scale: true
epsilon: 0.0010000000475
train: true
}
}
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.800000011921
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.20000000298
max_scale: 0.949999988079
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.333299994469
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 0.300000011921
iou_threshold: 0.600000023842
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.990000009537
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 3
}
classification_weight: 1.0
localization_weight: 1.0
}
}
}
train_config {
batch_size: 24
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
optimizer {
rms_prop_optimizer {
learning_rate {
exponential_decay_learning_rate {
initial_learning_rate: 0.00400000018999
decay_steps: 800720
decay_factor: 0.949999988079
}
}
momentum_optimizer_value: 0.899999976158
decay: 0.899999976158
epsilon: 1.0
}
}
fine_tune_checkpoint: "/root/proj_emotor/model/ssd_mobilenet_v2_coco_2018_03_29/model.ckpt"
num_steps: 200000
fine_tune_checkpoint_type: "detection"
}
train_input_reader {
label_map_path: "/root/proj_emotor/data/emotor_label_map.pbtxt"
tf_record_input_reader {
input_path: "/root/proj_emotor/data/emotor_train.record"
}
}
eval_config {
num_examples: 8000
max_evals: 10
use_moving_averages: false
}
eval_input_reader {
label_map_path: "/root/proj_emotor/data/emotor_label_map.pbtxt"
shuffle: false
num_readers: 1
tf_record_input_reader {
input_path: "/root/proj_emotor/data/emotor_val.record"
}
}
Start training
# From the proj_emotor/models directory
python model_main.py \
--pipeline_config_path=pipline.config \
--model_dir=model \
--num_train_steps=80000 \
--sample_1_of_n_eval_examples=1 \
--alsologtostderr
You can use TensorBoard real-time view of the progress during the training exercise.
tensorboard --logdor={PATH TO LOG}
Here is the location of the log position during training log output
After the training, model folder to get the following:
model
├── checkpoint
├── eval
│ └── events.out.tfevents.1563524306.hzq
├── events.out.tfevents.1563523648.hzq
├── export
│ └── Servo_0
│ └── 1563551359
│ ├── saved_model.pb
│ └── variables
│ ├── variables.data-00000-of-00001
│ └── variables.index
├── graph.pbtxt
├── model.ckpt-74690.data-00000-of-00001
├── model.ckpt-74690.index
├── model.ckpt-74690.meta
├── model.ckpt-76420.data-00000-of-00001
├── model.ckpt-76420.index
├── model.ckpt-76420.meta
├── model.ckpt-78144.data-00000-of-00001
├── model.ckpt-78144.index
├── model.ckpt-78144.meta
├── model.ckpt-79957.data-00000-of-00001
├── model.ckpt-79957.index
├── model.ckpt-79957.meta
├── model.ckpt-80000.data-00000-of-00001
├── model.ckpt-80000.index
├── model.ckpt-80000.meta
├── pipeline.config
└── train
Get the model for reasoning
python export_inference_graph.py \
--input_type=image_tensor \
--pipeline_config_path=./model/pipeline.config \
--trained_checkpoint_prefix=/root/proj_emotor/models/model/model.ckpt-80000 \
--output_directory=./model/train/
Which --pipeline_config_path
is obtained by training pipeline.config
, not for trainingpipeline.config
train
├── checkpoint
├── frozen_inference_graph.pb
├── model.ckpt.data-00000-of-00001
├── model.ckpt.index
├── model.ckpt.meta
├── pipeline.config
└── saved_model
├── saved_model.pb
└── variables
train got pb folder for files frozen_inference_graph.pb reasoning.
reasoning
The code for the reasoning:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops
if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
# This is needed to display the images.
#%matplotlib inline
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
# What model to download.
MODEL_NAME = '/root/proj_emotor/models/model/train/'
#MODEL_FILE = MODEL_NAME + '.tar.gz'
#DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + 'frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('/root/proj_emotor/data', 'emotor_label_map.pbtxt')
#opener = urllib.request.URLopener()
#opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
#tar_file = tarfile.open(MODEL_FILE)
#for file in tar_file.getmembers():
# file_name = os.path.basename(file.name)
# if 'frozen_inference_graph.pb' in file_name:
# tar_file.extract(file, os.getcwd())
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'TEST_IMGs'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, '{}.jpg'.format(i)) for i in range(1, 100) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
plt.show()
problem
1
google.protobuf.text_format.ParseError: 35:7 : Message type "object_detection.protos.SsdFeatureExtractor" has no field named "batch_norm_trainable"
The delete pipline.config
batch_norm_trainable: true
After, it has been resolved. But I do not know what influence, did not come across before.
Explanation
When the first few trials pipeline.config
are copied from the Samples / configs / ssd_mobilenet_v2_coco.config , which do not have this configuration option. This option is what specific role is not known.
2
After the start of training, some parameters could not be initialized using checkpoints
Use `tf.data.Dataset.batch(..., drop_remainder=True)`.
W0718 14:56:12.331554 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/beta] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.331802 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/gamma] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.331923 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/moving_mean] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.332051 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/moving_variance] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.332157 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/weights] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1, 1, 320, 1280]], model variable shape: [[1, 1, 320, 256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.336651 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/layer_19_1_Conv2d_2_1x1_256/weights] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1, 1, 1280, 256]], model variable shape: [[1, 1, 256, 256]]. This variable will not be initialized from the checkpoint.