深度学习数据集准备——目标检测、分割

深度学习中常用数据集的制作与转换

一. 数据集的制作。以常用的LabelImg和Labelme为例。

1. PASCAL VOC格式数据集（常用于目标检测）。

a. 安装`LabelImg`。LabelImg是一款开源的图像标注工具，标签可用于分类和目标检测，它是用 Python 编写的，并使用Qt作为其图形界面，简单好用。注释以 PASCAL VOC 格式保存为 XML 文件。

    # Python 3 + Qt5 (Recommended)
    pip install labelImg
    labelImg

本文LabelImg版本：1.8.3

b. 操作界面。详细教程

c. PASCAL VOC数据操作

    import sys
    import os
    from xml.etree import ElementTree
    from xml.etree.ElementTree import Element, SubElement
    from lxml import etree
    import codecs
    import cv2

    img_path = 'img/timg.png'
    xml_path = 'img/timg.xml'

01. 数据读取


class PascalVocReader:

def __init__(self, filepath):
    # shapes type:
    # [labbel, [Xmin, Xmax, Ymin, Ymax], color, color, difficult]
    self.shapes = []
    self.filepath = filepath
    self.verified = False
    self.XML_EXT = '.xml'
    self.ENCODE_METHOD = 'utf-8'
    try:
        self.parseXML()
    except:
        pass

def getShapes(self):
    return self.shapes

def addShape(self, label, bndbox, difficult):
    xmin = int(float(bndbox.find('xmin').text))
    ymin = int(float(bndbox.find('ymin').text))
    xmax = int(float(bndbox.find('xmax').text))
    ymax = int(float(bndbox.find('ymax').text))
    points = [xmin, xmax, ymin, ymax]
    self.shapes.append((label, points, None, None, difficult))

def parseXML(self):
    assert self.filepath.endswith(self.XML_EXT), "Unsupport file format"
    parser = etree.XMLParser(encoding=self.ENCODE_METHOD)
    xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
    filename = xmltree.find('filename').text
    try:
        verified = xmltree.attrib['verified']
        if verified == 'yes':
            self.verified = True
    except KeyError:
        self.verified = False

    for object_iter in xmltree.findall('object'):
        bndbox = object_iter.find("bndbox")
        label = object_iter.find('name').text
        # Add chris
        difficult = False
        if object_iter.find('difficult') is not None:
            difficult = bool(int(object_iter.find('difficult').text))
        self.addShape(label, bndbox, difficult)
    return True

    reader = PascalVocReader(xml_path)
    shapes = reader.getShapes()
    print(shapes)
    '''
	# [labbel, [Xmin, Xmax, Ymin, Ymax], color, color, difficult]
	[('dog', [135, 454, 117, 556], None, None, False), ('cat', [405, 918, 21, 546], None, None, False)]
    '''

02. 数据可视化


class PascalVocVisualizer:

def __init__(self, imgpath, shapes):
    self.BOX_COLOR = (0, 0, 255)
    self.TEXT_COLOR = (255, 255, 255)
    self.shapes = shapes
    self.imgpath = imgpath

def visualize_bbox(self, img, bbox, class_name, thickness=2):
    x_min, x_max, y_min, y_max = bbox
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=self.BOX_COLOR, thickness=thickness)
    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
    if y_min < int(1.4 * text_height):
        y_min += int(1.4 * text_height)
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), self.BOX_COLOR, -1)
    cv2.putText(img, class_name, (x_min, y_min - int(0.3 * text_height)), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                self.TEXT_COLOR, lineType=cv2.LINE_AA)
    return img

def visualize(self):
    img = cv2.imread(self.imgpath)
    for idx, shape in enumerate(self.shapes):
        img = self.visualize_bbox(img, shape[1], shape[0])
    cv2.imshow('vis', img)
    cv2.waitKey(0)

    visualizer = PascalVocVisualizer(img_path, shapes)
    vis = visualizer.visualize()

03. 数据写入


class PascalVocWriter:

def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None):
    self.foldername = foldername
    self.filename = filename
    self.databaseSrc = databaseSrc
    self.imgSize = imgSize
    self.boxlist = []
    self.localImgPath = localImgPath
    self.verified = False
    self.XML_EXT = '.xml'
    self.ENCODE_METHOD = 'utf-8'

def prettify(self, elem):
    """
        Return a pretty-printed XML string for the Element.
    """
    rough_string = ElementTree.tostring(elem, 'utf8')
    root = etree.fromstring(rough_string)
    return etree.tostring(root, pretty_print=True, encoding=self.ENCODE_METHOD).replace("  ".encode(), "\t".encode())

def ustr(self, x):
    if sys.version_info < (3, 0, 0):
        from PyQt4.QtCore import QString
        if type(x) == str:
            return x.decode(self.DEFAULT_ENCODING)
        if type(x) == QString:
            return unicode(x.toUtf8(), self.DEFAULT_ENCODING, 'ignore')
        return x
    else:
        return x

def genXML(self):
    """
        Return XML root
    """
    # Check conditions
    if self.filename is None or \
            self.foldername is None or \
            self.imgSize is None:
        return None

    top = Element('annotation')
    if self.verified:
        top.set('verified', 'yes')

    folder = SubElement(top, 'folder')
    folder.text = self.foldername

    filename = SubElement(top, 'filename')
    filename.text = self.filename

    if self.localImgPath is not None:
        localImgPath = SubElement(top, 'path')
        localImgPath.text = self.localImgPath

    source = SubElement(top, 'source')
    database = SubElement(source, 'database')
    database.text = self.databaseSrc

    size_part = SubElement(top, 'size')
    width = SubElement(size_part, 'width')
    height = SubElement(size_part, 'height')
    depth = SubElement(size_part, 'depth')
    width.text = str(self.imgSize[1])
    height.text = str(self.imgSize[0])
    if len(self.imgSize) == 3:
        depth.text = str(self.imgSize[2])
    else:
        depth.text = '1'

    segmented = SubElement(top, 'segmented')
    segmented.text = '0'
    return top

def addBndBox(self, shape):
    name = shape[0]
    xmin, ymin, xmax, ymax = shape[1]
    difficult = shape[4]
    bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
    bndbox['name'] = name
    bndbox['difficult'] = difficult
    self.boxlist.append(bndbox)

def appendObjects(self, top):
    for each_object in self.boxlist:
        object_item = SubElement(top, 'object')
        name = SubElement(object_item, 'name')
        name.text = self.ustr(each_object['name'])
        pose = SubElement(object_item, 'pose')
        pose.text = "Unspecified"
        truncated = SubElement(object_item, 'truncated')
        if int(float(each_object['ymax'])) == int(float(self.imgSize[0])) or (int(float(each_object['ymin']))== 1):
            truncated.text = "1" # max == height or min
        elif (int(float(each_object['xmax']))==int(float(self.imgSize[1]))) or (int(float(each_object['xmin']))== 1):
            truncated.text = "1" # max == width or min
        else:
            truncated.text = "0"
        difficult = SubElement(object_item, 'difficult')
        difficult.text = str( bool(each_object['difficult']) & 1 )
        bndbox = SubElement(object_item, 'bndbox')
        xmin = SubElement(bndbox, 'xmin')
        xmin.text = str(each_object['xmin'])
        ymin = SubElement(bndbox, 'ymin')
        ymin.text = str(each_object['ymin'])
        xmax = SubElement(bndbox, 'xmax')
        xmax.text = str(each_object['xmax'])
        ymax = SubElement(bndbox, 'ymax')
        ymax.text = str(each_object['ymax'])

def save(self, targetFile=None):
    root = self.genXML()
    self.appendObjects(root)
    out_file = None
    if targetFile is None:
        out_file = codecs.open(
            self.filename.split('.')[0] + self.XML_EXT, 'w', encoding=self.ENCODE_METHOD)
    else:
        out_file = codecs.open(os.path.join(self.foldername, targetFile), 'w', encoding=self.ENCODE_METHOD)

    prettifyResult = self.prettify(root)
    out_file.write(prettifyResult.decode('utf8'))
    out_file.close()

    img = cv2.imread(img_path)
    writer = PascalVocWriter(os.path.dirname(img_path), os.path.basename(img_path), img.shape, localImgPath=os.path.abspath(img_path))
    for shape in shapes:
        writer.addBndBox(shape)
    writer.save('new.xml')

2. 分割数据集。

a. 安装`labelme`。 labelme是一款开源的图像/视频标注工具，标签可用于目标检测、分割和分类，支持图像的标注的组件有：矩形框，多边形，圆，线，点等，保存为labelme json文件。

    # Python 3 + Qt5 (Recommended)
    pip install labelme
    labelme

本文Labelme版本：4.2.9

b. 操作界面。详细教程

c. labelme json数据操作

可使用labelme工具转换json文件为数据集

    labelme_json_to_dataset *.json

二. 数据集的转换。

1. Labelme json 转 COCO json

Labelme json文件一般只存储单个图片的标记信息，不同于COCO json.

    import os
    import json
    import glob
    import base64
    import io
    import cv2
    import time
    import sys
	
    import numpy as np
    import PIL.Image

labelme to coco


class Lableme2CoCo:
def __init__(self, img_format):
    self.images = []
    self.annotations = []
    self.categories = []
    self.category_id = 0
    self.img_id = 0
    self.ann_id = 0
    self.ann_num = 0
    self.img_format = img_format

def save_coco_json(self, instance, save_path):
    json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=4)
    print("\nsave instance json file to {}".format(save_path))

def to_coco(self, json_path_list):
    for json_path in json_path_list:
        obj = self.read_jsonfile(json_path)
        self.images.append(self._image(obj, json_path))
        shapes = obj['shapes']
        for shape in shapes:
            annotation = self._annotation(shape)
            self.annotations.append(annotation)
            self.ann_id += 1
        self.img_id += 1
    instance = dict()
    instance['info'] = 'instance segmentation'
    instance['license'] = ['license']
    instance['images'] = self.images
    instance['annotations'] = self.annotations
    instance['categories'] = self.categories
    return instance

def _init_categories(self, label):
    category = dict()
    if len(self.categories) == 0:
        category['id'] = self.category_id
        category['name'] = label
        self.categories.append(category)
        self.category_id += 1
    else:
        category_list = []
        for c in self.categories:
            category_list.append(c['name'])
        if label not in category_list:
            category['id'] = self.category_id
            category['name'] = label
            self.categories.append(category)
            self.category_id += 1

def _image(self, obj, path):
    image = dict()
    from labelme import utils
    img_x = utils.img_b64_to_arr(obj['imageData'])
    if len(img_x.shape[:]) == 3:
        h, w = img_x.shape[:-1]
    else:
        h, w = img_x.shape[:]
    image['height'] = h
    image['width'] = w
    image['id'] = self.img_id
    image['file_name'] = os.path.basename(path).replace(".json", self.img_format)
    return image

def _annotation(self, shape):
    label = shape['label']
    self._init_categories(label)
    points = shape['points']
    category = list(filter(lambda c: c['name'] == label, self.categories))[0]
    annotation = dict()
    annotation['id'] = self.ann_id
    annotation['image_id'] = self.img_id
    annotation['category_id'] = category['id']
    annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
    annotation['bbox'] = self._get_box(points)
    annotation['iscrowd'] = 0
    annotation['area'] = 1.0
    return annotation

def read_jsonfile(self, path):
    self.ann_num += 1
    sys.stdout.write("\rload json file: {}, number: {}".format(path, self.ann_num))
    sys.stdout.flush()
    try:
        with open(path, "r", encoding='utf-8') as f:
            return json.load(f)
    except:
        with open(path, "r", encoding='gbk') as f:
            return json.load(f)

def _get_box(self, points):
    min_x = min_y = np.inf
    max_x = max_y = 0
    for x, y in points:
        min_x = min(min_x, x)
        min_y = min(min_y, y)
        max_x = max(max_x, x)
        max_y = max(max_y, y)
    return [min_x, min_y, max_x - min_x, max_y - min_y]

a. 数据集转换

    labelme_json_path = "./labelme_json" # 输入的labelme json文件夹（需要包含imageData字段的内容）
    save_coco_path = "./coco_dataset" # 输出文件夹
    annotations_path = os.path.join(save_coco_path, 'annotations')
    image_path = os.path.join(save_coco_path, 'images')

    if not os.path.exists(annotations_path):
        os.makedirs(annotations_path)
    if not os.path.exists(image_path):
        os.makedirs(image_path)

    json_list_path = glob.glob(os.path.join(input_path, '*.json'))

    l2c_train = Lableme2CoCo(img_format='.png')
    train_instance = l2c_train.to_coco(json_list_path)
    l2c_train.save_coco_json(train_instance, os.path.join(annotations_path, 'trainval.json'))

    print("Start creating images..")
    for json_path in json_list_path:
        data_dict = json.load(open(json_path))
        imageData = data_dict['imageData']
        img = img_b64_to_arr(imageData)
        img_save_path = os.path.join(image_path, os.path.basepath(json_path).split('.')[0] + img_format)
        img.save(img_save_path)
	
    print('\nSave dataset to {}. end!'.format(save_coco_path))

b. 数据集测试

from pycocotools.coco import COCO

ann_file = "./coco_dataset/annotations/trainval.json" # 转换后的COCO json文件
coco = COCO(annotation_file=ann_file)

print("coco\nimages.size [%05d]\t annotations.size [%05d]\t category.size [%05d]"
      % (len(coco.imgs), len(coco.anns), len(coco.cats)))
if len(coco.imgs) < 1:
    print('error!')
else:
    print('success!')

'''
    loading annotations into memory...
    Done (t=0.00s)
    creating index...
    index created!
    coco
    images.size [00002]	 annotations.size [00002]	 category.size [00001]
    success!
'''

深度学习数据集准备——目标检测、分割

深度学习中常用数据集的制作与转换

一. 数据集的制作。以常用的LabelImg和Labelme为例。

1. PASCAL VOC格式数据集（常用于目标检测）。

a. 安装LabelImg。LabelImg是一款开源的图像标注工具，标签可用于分类和目标检测，它是用 Python 编写的，并使用Qt作为其图形界面，简单好用。注释以 PASCAL VOC 格式保存为 XML 文件。

b. 操作界面。详细教程

c. PASCAL VOC数据操作

2. 分割数据集。

a. 安装labelme。 labelme是一款开源的图像/视频标注工具，标签可用于目标检测、分割和分类，支持图像的标注的组件有：矩形框，多边形，圆，线，点等，保存为labelme json文件。

b. 操作界面。详细教程

c. labelme json数据操作

二. 数据集的转换。

1. Labelme json 转 COCO json

a. 数据集转换

b. 数据集测试

猜你喜欢

a. 安装`LabelImg`。LabelImg是一款开源的图像标注工具，标签可用于分类和目标检测，它是用 Python 编写的，并使用Qt作为其图形界面，简单好用。注释以 PASCAL VOC 格式保存为 XML 文件。

a. 安装`labelme`。 labelme是一款开源的图像/视频标注工具，标签可用于目标检测、分割和分类，支持图像的标注的组件有：矩形框，多边形，圆，线，点等，保存为labelme json文件。