【DOTA】制作Efficientdet训练数据

记录一下用Efficientdet训练DOTA数据集前的数据准备工作

文章目录

- 【DOTA】制作Efficientdet训练数据

DOTA数据集(v1.0)：
15类：‘plane’, ‘baseball-diamond’, ‘bridge’, ‘ground-track-field’,
‘small-vehicle’, ‘large-vehicle’, ‘ship’, ‘tennis-court’,
‘basketball-court’, ‘storage-tank’, ‘soccer-ball-field’,
‘roundabout’, ‘harbor’, ‘swimming-pool’, ‘helicopter’

2806张png格式的航空影像，188282个实例对象，图像尺寸为800x800~4000X4000

在这里插入图片描述

图片：train、val、test
标签：PXXXX.txt，标注格式为

imagesource:影像来源
gsd:地面采样距离(若缺少，则注释为null)
x1 y1 x2 y2 x3 y3 x4 y4 category difficult
#(四个角点坐标，类别，difficult=0不困难/1困难)

Efficientdet要求的训练数据格式：
在这里插入图片描述

1.图片整理

将train文件夹重命名为train2017，val重命名为val2017，二者的标签统一放于./DOTA/labeltxt/下

2.生成train.txt,val.txt

train.txt,val.txt按行存放影像名称

'''
数据集所有影像名称，写入tran/val.txt  0-2806
'''
import os

txt_path="/home/DOTA/train.txt"  #val.txt
file_path = "/home/DOTA/train2017/"   #val2017
"

path_list = os.listdir(file_path)  #遍历整个文件夹下的文件name并返回一个列表
path_list.sort() #--------------
path_name = []

for i in path_list:
    path_name.append(i.split(".")[0])  #若带有后缀名，利用循环遍历path_list列表，split去掉后缀名
    #path_name.append(i)


for file_name in path_name:
    # "a"表示以不覆盖的形式写入到文件中,当前文件夹如果没有"save.txt"会自动创建
    with open(txt_path, "a") as file:
        file.write(file_name + "\n")
        #print(file_name)
    file.close()

得到：
在这里插入图片描述

3.标签格式转换

因为我后面要以xml格式进行数据增强，所以没有把DOTA的txt直接转换为json而是txt-xml,xml-json。

(1) txt-xml

生成的xml位于./DOTA/Annotations_xml/下

'''
DOTA数据集中标签为txt，将其转换为xml

类别数目：15
类别名称：plane, ship, storage tank, baseball diamond, tennis court, basketball court,
ground track field, harbor, bridge, large vehicle, small vehicle, helicopter, roundabout,
soccer ball field , swimming pool

'''
import os
import cv2
from xml.dom.minidom import Document
# import importlib,sys



category_set = ['plane', 'baseball-diamond', 'bridge', 'ground-track-field',
                'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
                'basketball-court', 'storage-tank', 'soccer-ball-field',
                'roundabout', 'harbor', 'swimming-pool', 'helicopter']


def custombasename(fullname):
    return os.path.basename(os.path.splitext(fullname)[0])


def limit_value(a, b):
    if a < 1:
        a = 1
    if a >= b:
        a = b - 1
    return a


def readlabeltxt(txtpath, height, width, hbb=True):
    print(txtpath)
    with open(txtpath, 'r') as f_in:  # 打开txt文件
        lines = f_in.readlines()
        splitlines = [x.strip().split(' ') for x in lines]  # 根据空格分割
        boxes = []
        for i, splitline in enumerate(splitlines):
            if i in [0, 1]:   # DOTA数据集前两行对于我们来说是无用的
                continue
            #
            # if len(splitline)<10:
            #     print(txtpath+lines)


            label = splitline[8]
            kunnan = splitline[9]
            if label not in category_set:  # 只书写制定的类别
                print(label)
                continue
            x1 = int(float(splitline[0]))
            y1 = int(float(splitline[1]))
            x2 = int(float(splitline[2]))
            y2 = int(float(splitline[3]))
            x3 = int(float(splitline[4]))
            y3 = int(float(splitline[5]))
            x4 = int(float(splitline[6]))
            y4 = int(float(splitline[7]))
            # 如果是hbb
            if hbb:
                xx1 = min(x1, x2, x3, x4)
                xx2 = max(x1, x2, x3, x4)
                yy1 = min(y1, y2, y3, y4)
                yy2 = max(y1, y2, y3, y4)

                xx1 = limit_value(xx1, width)
                xx2 = limit_value(xx2, width)
                yy1 = limit_value(yy1, height)
                yy2 = limit_value(yy2, height)

                box = [xx1, yy1, xx2, yy2, label, kunnan]
                boxes.append(box)
            else:  # 否则是obb
                x1 = limit_value(x1, width)
                y1 = limit_value(y1, height)
                x2 = limit_value(x2, width)
                y2 = limit_value(y2, height)
                x3 = limit_value(x3, width)
                y3 = limit_value(y3, height)
                x4 = limit_value(x4, width)
                y4 = limit_value(y4, height)

                box = [x1, y1, x2, y2, x3, y3, x4, y4, label, kunnan]
                boxes.append(box)
    return boxes


def writeXml(tmp, imgname, w, h, d, bboxes, hbb=True):
    doc = Document()
    # owner
    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)
    # owner
    folder = doc.createElement('folder')
    annotation.appendChild(folder)
    folder_txt = doc.createTextNode("VOC2007")
    folder.appendChild(folder_txt)

    filename = doc.createElement('filename')
    annotation.appendChild(filename)
    filename_txt = doc.createTextNode(imgname)
    filename.appendChild(filename_txt)
    # ones#
    source = doc.createElement('source')
    annotation.appendChild(source)

    database = doc.createElement('database')
    source.appendChild(database)
    database_txt = doc.createTextNode("My Database")
    database.appendChild(database_txt)

    annotation_new = doc.createElement('annotation')
    source.appendChild(annotation_new)
    annotation_new_txt = doc.createTextNode("VOC2007")
    annotation_new.appendChild(annotation_new_txt)

    image = doc.createElement('image')
    source.appendChild(image)
    image_txt = doc.createTextNode("flickr")
    image.appendChild(image_txt)
    # owner
    owner = doc.createElement('owner')
    annotation.appendChild(owner)

    flickrid = doc.createElement('flickrid')
    owner.appendChild(flickrid)
    flickrid_txt = doc.createTextNode("NULL")
    flickrid.appendChild(flickrid_txt)

    ow_name = doc.createElement('name')
    owner.appendChild(ow_name)
    ow_name_txt = doc.createTextNode("idannel")
    ow_name.appendChild(ow_name_txt)
    # onee#
    # twos#
    size = doc.createElement('size')
    annotation.appendChild(size)

    width = doc.createElement('width')
    size.appendChild(width)
    width_txt = doc.createTextNode(str(w))
    width.appendChild(width_txt)

    height = doc.createElement('height')
    size.appendChild(height)
    height_txt = doc.createTextNode(str(h))
    height.appendChild(height_txt)

    depth = doc.createElement('depth')
    size.appendChild(depth)
    depth_txt = doc.createTextNode(str(d))
    depth.appendChild(depth_txt)
    # twoe#
    segmented = doc.createElement('segmented')
    annotation.appendChild(segmented)
    segmented_txt = doc.createTextNode("0")
    segmented.appendChild(segmented_txt)

    for bbox in bboxes:
        # threes#
        object_new = doc.createElement("object")
        annotation.appendChild(object_new)

        name = doc.createElement('name')
        object_new.appendChild(name)
        name_txt = doc.createTextNode(str(bbox[-2]))
        name.appendChild(name_txt)

        pose = doc.createElement('pose')
        object_new.appendChild(pose)
        pose_txt = doc.createTextNode("Unspecified")
        pose.appendChild(pose_txt)

        truncated = doc.createElement('truncated')
        object_new.appendChild(truncated)
        truncated_txt = doc.createTextNode("0")
        truncated.appendChild(truncated_txt)

        difficult = doc.createElement('difficult')
        object_new.appendChild(difficult)
        difficult_txt = doc.createTextNode(bbox[-1])
        difficult.appendChild(difficult_txt)
        # threes-1#
        bndbox = doc.createElement('bndbox')
        object_new.appendChild(bndbox)

        if hbb:
            xmin = doc.createElement('xmin')
            bndbox.appendChild(xmin)
            xmin_txt = doc.createTextNode(str(bbox[0]))
            xmin.appendChild(xmin_txt)

            ymin = doc.createElement('ymin')
            bndbox.appendChild(ymin)
            ymin_txt = doc.createTextNode(str(bbox[1]))
            ymin.appendChild(ymin_txt)

            xmax = doc.createElement('xmax')
            bndbox.appendChild(xmax)
            xmax_txt = doc.createTextNode(str(bbox[2]))
            xmax.appendChild(xmax_txt)

            ymax = doc.createElement('ymax')
            bndbox.appendChild(ymax)
            ymax_txt = doc.createTextNode(str(bbox[3]))
            ymax.appendChild(ymax_txt)
        else:
            x0 = doc.createElement('x0')
            bndbox.appendChild(x0)
            x0_txt = doc.createTextNode(str(bbox[0]))
            x0.appendChild(x0_txt)

            y0 = doc.createElement('y0')
            bndbox.appendChild(y0)
            y0_txt = doc.createTextNode(str(bbox[1]))
            y0.appendChild(y0_txt)

            x1 = doc.createElement('x1')
            bndbox.appendChild(x1)
            x1_txt = doc.createTextNode(str(bbox[2]))
            x1.appendChild(x1_txt)

            y1 = doc.createElement('y1')
            bndbox.appendChild(y1)
            y1_txt = doc.createTextNode(str(bbox[3]))
            y1.appendChild(y1_txt)

            x2 = doc.createElement('x2')
            bndbox.appendChild(x2)
            x2_txt = doc.createTextNode(str(bbox[4]))
            x2.appendChild(x2_txt)

            y2 = doc.createElement('y2')
            bndbox.appendChild(y2)
            y2_txt = doc.createTextNode(str(bbox[5]))
            y2.appendChild(y2_txt)

            x3 = doc.createElement('x3')
            bndbox.appendChild(x3)
            x3_txt = doc.createTextNode(str(bbox[6]))
            x3.appendChild(x3_txt)

            y3 = doc.createElement('y3')
            bndbox.appendChild(y3)
            y3_txt = doc.createTextNode(str(bbox[7]))
            y3.appendChild(y3_txt)

    xmlname = os.path.splitext(imgname)[0]
    tempfile = os.path.join(tmp, xmlname + '.xml')
    with open(tempfile, 'wb') as f:
        f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
    return


if __name__ == '__main__':
    data_path = '/home/DOTA/train2017'  #val2017
    images_path = os.path.join(data_path)  # 样本图片路径
    labeltxt_path = os.path.join('/home/DOTA/labeltxt/')  # 
    anno_new_path = os.path.join('/home/DOTA/Annotations_xml/')  # 新的voc格式存储位置（hbb形式）
    ext = '.png'  # 样本图片的后缀
    filenames = os.listdir(labeltxt_path)  # 获取每一个txt的名称
    for filename in filenames:
        filepath = labeltxt_path + '/' + filename  # 每一个DOTA标签的具体路径
        picname = os.path.splitext(filename)[0] + ext
        pic_path = os.path.join(images_path, picname)
        im = cv2.imread(pic_path)  # 读取相应的图片
        (H, W, D) = im.shape  # 返回样本的大小
        boxes = readlabeltxt(filepath, H, W, hbb=True)  # 默认是矩形（hbb）得到gt
        if len(boxes) == 0:
            print('文件为空', filepath)
        # 读取对应的样本图片，得到H,W,D用于书写xml

        # 书写xml
        writeXml(anno_new_path, picname, W, H, D, boxes, hbb=True)
        print('正在处理%s' % filename)

(2) xml-json

生成的json位于./annotations/

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging
import os.path as osp
from collections import OrderedDict  #获取一个有序的字典对象
import json
import xmltodict
import mmcv  #在visual stuio里面添加vc++模块再安装
logger = logging.getLogger(__name__)
class PASCALVOC2COCO(object):
    """Converters that convert PASCAL VOC annotations to MSCOCO format."""
    def __init__(self):   #初始化参数
        self.cat2id = {
    
    
            'plane':1, 'baseball-diamond':2, 'bridge':3, 'ground-track-field':4,
            'small-vehicle':5, 'large-vehicle':6, 'ship':7, 'tennis-court':8,
            'basketball-court':9, 'storage-tank':10, 'soccer-ball-field':11,
            'roundabout':12, 'harbor':13, 'swimming-pool':14, 'helicopter':15
        }   #名称对应的序号数

    def get_img_item(self, file_name, image_id, size):
        """Gets a image item."""
        image = OrderedDict()   #建立一个有序的字典对象
        image['file_name'] = file_name
        image['height'] = int(size['height'])
        image['width'] = int(size['width'])
        image['id'] = image_id
        return image

    def get_ann_item(self, obj, image_id, ann_id):
        """Gets an annotation item."""
        x1 = int(obj['bndbox']['xmin']) - 1
        y1 = int(obj['bndbox']['ymin']) - 1
        w = int(obj['bndbox']['xmax']) - x1
        h = int(obj['bndbox']['ymax']) - y1

        annotation = OrderedDict()
        annotation['segmentation'] = [[x1, y1, x1, (y1 + h), (x1 + w), (y1 + h), (x1 + w), y1]]
        annotation['area'] = w * h
        annotation['iscrowd'] = 0
        annotation['image_id'] = image_id
        annotation['bbox'] = [x1, y1, w, h]
        annotation['category_id'] = self.cat2id[obj['name']]
        annotation['id'] = ann_id
      #  annotation['ignore'] = int(obj['difficult'])
        annotation['ignore'] = int(1)
        return annotation

    def get_cat_item(self, name, id):
        """Gets an category item."""
        category = OrderedDict()
        category['supercategory'] = 'none'
        category['id'] = id
        category['name'] = name
        return category

    def convert(self, devkit_path, year, split, save_file):
        """Converts PASCAL VOC annotations to MSCOCO format. """
        #r'T:\shujuji\VOC2007+2012\VOCtrainval_06-Nov-2007\VOCdevkit'
      #  year = '2007'
      #  split = 'train'
        split_file = osp.join(devkit_path, '{}.txt'.format(split))  #获取训练样本的名称
        ann_dir = osp.join(devkit_path, 'Annotations')  #获取xml文件
        name_list = mmcv.list_from_file(split_file)
        images, annotations = [], []
        ann_id = 1
        for name in name_list:
            image_id = int(''.join(name.split('_'))) if '_' in name else int(name)

            xml_file = osp.join(ann_dir, name + '.xml')

            with open(xml_file, 'r') as f:
                ann_dict = xmltodict.parse(f.read(), force_list=('object',))

            # Add image item.
            image = self.get_img_item(name + '.jpg', image_id, ann_dict['annotation']['size'])
            images.append(image)

            if 'object' in ann_dict['annotation']:
                for obj in ann_dict['annotation']['object']:
                    # Add annotation item.
                    annotation = self.get_ann_item(obj, image_id, ann_id)
                    annotations.append(annotation)
                    ann_id += 1
            else:
                logger.warning('{} does not have any object'.format(name))

        categories = []
        for name, id in self.cat2id.items():
            # Add category item.
            category = self.get_cat_item(name, id)
            categories.append(category)

        ann = OrderedDict()
        ann['images'] = images
        ann['type'] = 'instances'
        ann['annotations'] = annotations
        ann['categories'] = categories

        logger.info('Saving annotations to {}'.format(save_file))
        with open(save_file, 'w') as f:
            json.dump(ann, f)


if __name__ == '__main__':
    converter = PASCALVOC2COCO()
    devkit_path = r'/home/DOTA'
    year = '2017'
    split = 'train'
    save_file = '/home/DOTA/annotations/instances_train2017.json'  #val
    converter.convert(devkit_path, year, split, save_file)