tensorflow/model库源码Deeplabv3+实现（三）之VOC2012数据集重新划分训练集、验证集

1.PASACAL VOC2012数据集介绍

前面几节只是机械地运行代码，不知道深层含义。这里从新回到最开始的地方–数据集。voc2012数据集介绍网上有很多，这个介绍得很详细：PASCAL-VOC2012数据集（vocdevkit、Vocbenchmark_release）详细介绍。

VOCdevkit
    +VOC2012
        +Annotations（17125） {
    
    id}.xml形式保存信息
        +ImageSets
            +Action （33） 存放人的动作
            +Layout（3） train.txt/val.tx/trainval.txt  存放人体部位
            +Main（63）  以{
    
    class}_val.txt等形式命名
            +Segmentation（3） train.txt等存放语义分割图像信息
        +JPEGImages（17125）  原图jpg格式
        +SegmentationClass（2913）分割图像png格式
        +SegmentationClassRaw（2913）
        +SegmentationObject（2913）  实例分割对象

以上是数据集文件夹的结构，后面对应的数字表示包含的子文件的数量。pascal voc2012整个数据集有17125张图片，但只有2913张图片用于语义分割。在Annotation文件夹中每张图片的xml格式信息对应的segmentation为1就表示用于语义分割。
对于语义分割，我们需要关注的是Segmentation文件夹下的三个txt文档，对应train，val，trainval数量。官方deeplab使用的train为1464,val1449,trainval2913。如果想要重新划分的训练集、验证集的话就修改这几个文档的内容。下面介绍怎么修改。

2.随机划分训练集、验证集

这里只给出随机抽样的方法，k fold交叉验证以后再试。

from __future__ import absolute_import, print_function
import os
import pandas as pd

path = '/home/hy/document/dataset/VOCdevkit/VOC2012/SegmentationClass'

lis = [i.split('.')[0] for i in os.listdir(path)]   # 读取原图
df = pd.DataFrame(lis, columns=['name'])
temp1 = df.sample(n=1464)   
train = temp1['name'].values.tolist()
print(len(train))
with open('/home/hy/document/dataset/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt', 'w') as f:
    for i in train[:-1]:
        f.write(i+'\r\n')
    f.write(train[-1])

temp2 = df.sample(n=583)
val = temp2['name'].values.tolist()
print(len(val))
with open('/home/hy/document/dataset/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt', 'w') as f:  #保存为val.txt
    for i in val[:-1]:
        f.write(i+'\r\n')
    f.write(val[-1])

print(len(set(train) & set(val)))

因为之前val时出现内存不足的错误，我把val数据集改小了。然后就是生成tfrecord文件了，我在官方build_data.py和build_voc2012_data.py代码上进行修改，只需要修改一些路径就可以转换了：

import math
import os.path
import sys
import tensorflow as tf
import collections
import six

root_path = '/home/hy/document/dataset/VOCdevkit/VOC2012/ImageSets/Segmentation'
output_dir = '/home/hy/document/dataset/tfrecord'
image_folder = '/home/hy/document/dataset/VOCdevkit/VOC2012/JPEGImages'
semantic_segmentation_folder = '/home/hy/document/dataset/VOCdevkit/VOC2012/SegmentationClassRaw'
_NUM_SHARDS = 4

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_enum('image_format', 'png', ['jpg', 'jpeg', 'png'],
                         'Image format.')

tf.app.flags.DEFINE_enum('label_format', 'png', ['png'],
                         'Segmentation label format.')

# A map from image format to expected data format.
_IMAGE_FORMAT_MAP = {
    
    
    'jpg': 'jpeg',
    'jpeg': 'jpeg',
    'png': 'png',
}


class ImageReader(object):
  """Helper class that provides TensorFlow image coding utilities."""

  def __init__(self, image_format='jpeg', channels=3):
    with tf.Graph().as_default():
      self._decode_data = tf.placeholder(dtype=tf.string)
      self._image_format = image_format
      self._session = tf.Session()
      if self._image_format in ('jpeg', 'jpg'):
        self._decode = tf.image.decode_jpeg(self._decode_data,
                                            channels=channels)
      elif self._image_format == 'png':
        self._decode = tf.image.decode_png(self._decode_data,
                                           channels=channels)

  def read_image_dims(self, image_data):
    image = self.decode_image(image_data)
    return image.shape[:2]

  def decode_image(self, image_data):
    image = self._session.run(self._decode,
                              feed_dict={
    
    self._decode_data: image_data})
    if len(image.shape) != 3 or image.shape[2] not in (1, 3):
      raise ValueError('The image channels not supported.')

    return image


def _int64_list_feature(values):
  if not isinstance(values, collections.Iterable):
    values = [values]

  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))


def _bytes_list_feature(values):
  def norm2bytes(value):
    return value.encode() if isinstance(value, str) and six.PY3 else value

  return tf.train.Feature(
      bytes_list=tf.train.BytesList(value=[norm2bytes(values)]))


def image_seg_to_tfexample(image_data, filename, height, width, seg_data):
  return tf.train.Example(features=tf.train.Features(feature={
    
    
      'image/encoded': _bytes_list_feature(image_data),
      'image/filename': _bytes_list_feature(filename),
      'image/format': _bytes_list_feature(
          _IMAGE_FORMAT_MAP[FLAGS.image_format]),
      'image/height': _int64_list_feature(height),
      'image/width': _int64_list_feature(width),
      'image/channels': _int64_list_feature(3),
      'image/segmentation/class/encoded': (
          _bytes_list_feature(seg_data)),
      'image/segmentation/class/format': _bytes_list_feature(
          FLAGS.label_format),
  }))


def _convert_dataset(dataset_split):
  """Converts the specified dataset split to TFRecord format.

  Args:
    dataset_split: The dataset split (e.g., train, test).

  Raises:
    RuntimeError: If loaded image and label have different shape.
  """
  dataset = os.path.basename(dataset_split)[:-4]
  sys.stdout.write('Processing ' + dataset)
  filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
  num_images = len(filenames)
  num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

  image_reader = ImageReader('jpeg', channels=3)
  label_reader = ImageReader('png', channels=1)

  if not tf.gfile.Exists(output_dir):
      tf.gfile.MakeDirs(output_dir)

  for shard_id in range(_NUM_SHARDS):
    output_filename = os.path.join(output_dir,
                                   '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
    with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
      start_idx = shard_id * num_per_shard
      end_idx = min((shard_id + 1) * num_per_shard, num_images)
      for i in range(start_idx, end_idx):
        sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
            i + 1, len(filenames), shard_id))
        sys.stdout.flush()
        # Read the image.
        image_filename = os.path.join(image_folder, filenames[i] + '.' + 'jpg')
        image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
        height, width = image_reader.read_image_dims(image_data)
        # Read the semantic segmentation annotation.
        seg_filename = os.path.join(semantic_segmentation_folder,
                                    filenames[i] + '.' + FLAGS.label_format)
        seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
        seg_height, seg_width = label_reader.read_image_dims(seg_data)
        if height != seg_height or width != seg_width:
          raise RuntimeError('Shape mismatched between image and label.')
        # Convert to tf example.
        example = image_seg_to_tfexample(
            image_data, filenames[i], height, width, seg_data)
        tfrecord_writer.write(example.SerializeToString())
    sys.stdout.write('\n')
    sys.stdout.flush()


if __name__ == '__main__':
    dataset_splits = tf.gfile.Glob(os.path.join(root_path, '*.txt'))
    for dataset_split in dataset_splits:
        _convert_dataset(dataset_split)

转换结果：
在这里插入图片描述
然后把对应的tfrecord文件和Segmentation文件去替换deeplab官方对应的文件。重新开始跑train.py和eval.py，得到评估结果：
坑坑坑
之前一直得不到eval的miou结果，发现是我下的tensorflow/models模块的deeplab版本跟最新的代码不同，估计是没有打印miou值。从最新版的deeplab复制eval.py代码后就能输出了。

-------------------------2020.4.24更新------------------------------------------------------------------------------
在转换tfrecord文件时，不需要像我那样麻烦，直接用build_voc2012_data.py，在把路径等作为参数传递进去。

# 转换TFRecord
python build_voc2012_data.py --image_folder=‘/home/hy/document/dataset/VOCdevkit/VOC2012/JPEGImages’ \
                             --semantic_segmentation_folder=‘/home/hy/document/dataset/VOCdevkit/VOC2012/SegmentationClassRaw’ \
                             --list_folder=‘/home/hy/document/dataset/VOCdevkit/VOC2012/ImageSets/Segmentation’ \ 
                             --output_dir=/home/hy/document/dataset/tfrecord