pytorch 目标检测数据增强cutmix和mixup混合

摘要

我写的这个版本不同于之前的写法，我使用的是线下手段对照片进行合成和json的修改，训练测试效果非常的好。能够大幅度提升分数。写起来也是比较繁琐的。

效果展示

在这里插入图片描述

这种是提取照片融合按照0.5的数值混合，不同于mixup，这种加入没有背景，效果会好，这种混合的照片人眼看起来并不是很舒服，但是能对训练带来很好涨分。

具体流程

第一，你需要提取训练时候最低的几个类别，这里提取是提取单独的物体，不包含背景，也就是说你需要根据json文件，或者xml文件来知道bbox的位置进行提取，这种提取就跟上图所示一样，小的部分作为前景，后面我们需要在找到一部分照片作为背景。我选择的还是比较比较低的ap类别作为背景，这个时候也需要从总共的xml信息中提取出来，因为你需要将新添加的前景的bbox位置加入其中，这里我选择修改json文件，xml比较难操作。

步骤一，提取前景

在提取前景之前你需要先提取出你想要的低ap类别的全部照片，这个代码可以提取jpg文件，也可以提取xml文件，只需要简单的修改一下就能提取我们想要的低ap类别

import os
import numpy as np
from pycocotools.coco import COCO
import random
import cv2
coco = COCO('./coco/all.json')
ids1 = coco.getAnnIds()
ids2 = coco.getImgIds()
items=[]
for i in range(len(ids1)):

    data = coco.loadAnns(ids1[i])

    if data[0]['category_id']==2:
        items.append(data[0]['image_id'])
    elif data[0]['category_id']==6:
        items.append(data[0]['image_id'])
    elif data[0]['category_id']==20:
        items.append(data[0]['image_id'])
    elif data[0]['category_id'] == 28:
        items.append(data[0]['image_id'])
    elif data[0]['category_id'] == 36:
        items.append(data[0]['image_id'])
    elif data[0]['category_id'] == 37:
        items.append(data[0]['image_id'])
    else:
        continue
item =np.unique(items)
print('++++++++++++++++++')
item=list(item)
name=[]
for j in range(len(ids2)):
    imgdata=coco.loadImgs(ids2[j])
    if imgdata[0]['id'] in item:
        name.append(imgdata[0]['file_name'])
print(name)

import os
import shutil
xml_train = './coco/xml'
i = 0

while(i<len(name)):
    random_file = name[i].split('.')[0]+'.xml'
    source_file = "%s/%s" % (xml_train, random_file)
    xml_val = './coco/yuanxml/'
    print(i)
    if random_file not in os.listdir(xml_val):
        shutil.move(source_file, xml_val)
        i=i+1

接下来就是把低ap的类别单独截取下来，这个过程需要利用xml文件获取位置，然后截取保存，这里我用label.txt文件用来存储类别信息的，后面加入到背景之中，需要对背景的json文件修改，添加信息。

import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import cv2
import os

file=os.listdir('./data/aaa/')
xml=os.listdir('./data/nnnxml/')
for i in range(len(file)):
    print(i)
    a=imread('./data/aaa/'+file[i])
    b = cv2.resize(a,(150,150))
    b = cv2.cvtColor(b, cv2.COLOR_BGR2RGB)
    path='./data/resize/'+file[i]
    cv2.imwrite(path,b)
for i in range(len(xml)):
    tree = ET.parse('./data/nnnxml/'+xml[i])
    root = tree.getroot()
    xml_list = []
    for member in root.findall('object'):
        name = member[0].text
        if name in cate:
            value = (name,
              int(member[4][0].text),
              int(member[4][1].text),
              int(member[4][2].text),
              int(member[4][3].text)
                              )
            print(value)
            filen =open('./data/label.txt','a+')
            filen.write(str(i)+' ')
            filen.write(value[0])
            filen.write('\n')

            imgfile='./data/nnn/'+file[i]
            print(imgfile)
            imgdata=imread(imgfile)
            newimg = imgdata[value[1]:value[3], value[2]:value[4], :]
            w,h,c=newimg.shape
            if w!=0 and h!=0:
                newimg = cv2.cvtColor(newimg, cv2.COLOR_BGR2RGB)
                pa = './data/aaa/' + str(i) + '.jpg'
                cv2.imwrite(pa, newimg)
        else:
            continue

融合

这一个步骤就是利用之前的前景和背景融合，因为照片大小关系，所以我设置了二种大小这个是目前最初的基础版本，效果已经十分出色。我中间又创建了一个文件，保存label，为什么需要二个？因为第一个是保存前景的类别，第二个是保存背景加入前景对应后的类别，因为os.listdir中间会乱序，所以这样是保证照片label的对应。

filea=os.listdir('./data/aaa/')
fileb = os.listdir('./data/ji/')


for i in range(len(fileb)):
    # a=cv2.resize(a,(250,250))
    b=imread('./data/ji/'+fileb[i])
    dd=filea[i].split('.')[0]
    cls=''
    filen = open('./data/label.txt', 'r')
    for line in filen.readlines():
        c = line.split(' ')[0]
        if dd == c:
            cls = line.split(' ')[1]
    print(cls)
    yiyi=fileb[i].split('.')[0]
    filen = open('./data/nnn.txt', 'a+')
    filen.write(yiyi+' ')
    filen.write(cls)
    filen.write('\n')
    w,h,c=b.shape
    c=b*0.5
    if w>350 and h>350:
        c[100:350,100:350,:]=c[100:350,100:350,:]+a*0.5
    else:
        a = cv2.resize(a, (50, 50))
        c[:50, :50, :] = c[:50, :50, :] + a * 0.5
    c=c[:,:,::-1]
    pa='./data/new2/'+fileb[i]
    cv2.imwrite(pa,c)

json修改

前一步只是照片融合，json文件并没有改变，所以我们需要在每个照片上都添加新增的照片位置和类别就行了。

import os
import cv2
import json
import xml.dom.minidom
import xml.etree.ElementTree as ET

data_dir = './data/' #根目录文件，其中包含image文件夹和box文件夹（根据自己的情况修改这个路径）

image_file_dir = os.path.join(data_dir, 'new')
xml_file_dir = os.path.join(data_dir, 'jixml')

annotations_info = {'images': [], 'annotations': [], 'categories': []}

categories_map = {'一次性快餐盒':1,'书籍纸张':2,   '充电宝':3,'剩饭剩菜':4,    '包':5,'垃圾桶':6,
                      '塑料器皿':7,'塑料玩具':8, '塑料衣架':9,  '大骨头':10,'干电池':11,
                     '快递纸袋':12,'插头电线':13,  '旧衣服':14,'易拉罐':15}

for key in categories_map:
    categoriy_info = {"id":categories_map[key], "name":key}
    annotations_info['categories'].append(categoriy_info)

file_names = [image_file_name.split('.')[0]
              for image_file_name in os.listdir(image_file_dir)]

ann_id = 1
for i, file_name in enumerate(file_names):
    print(i)
    image_file_name = file_name + '.jpg'
    print(file_name)
    xml_file_name = file_name + '.xml'
    image_file_path = os.path.join(image_file_dir, image_file_name)
    xml_file_path = os.path.join(xml_file_dir, xml_file_name)

    image_info = dict()
    image = cv2.cvtColor(cv2.imread(image_file_path), cv2.COLOR_BGR2RGB)
    height, width, _ = image.shape
    image_info = {'file_name': image_file_name, 'id': i+1,
                  'height': height, 'width': width}
    annotations_info['images'].append(image_info)

    DOMTree = xml.dom.minidom.parse(xml_file_path)
    collection = DOMTree.documentElement

    names = collection.getElementsByTagName('name')
    names = [name.firstChild.data for name in names]

    xmins = collection.getElementsByTagName('xmin')
    xmins = [xmin.firstChild.data for xmin in xmins]
    ymins = collection.getElementsByTagName('ymin')
    ymins = [ymin.firstChild.data for ymin in ymins]
    xmaxs = collection.getElementsByTagName('xmax')
    xmaxs = [xmax.firstChild.data for xmax in xmaxs]
    ymaxs = collection.getElementsByTagName('ymax')
    ymaxs = [ymax.firstChild.data for ymax in ymaxs]

    object_num = len(names)

    for j in range(object_num):
        if names[j] in categories_map:
            image_id = i + 1
            x1,y1,x2,y2 = int(xmins[j]),int(ymins[j]),int(xmaxs[j]),int(ymaxs[j])
            x1,y1,x2,y2 = x1 ,y1 ,x2 ,y2

            if x2 == width:
                x2 -= 1
            if y2 == height:
                y2 -= 1

            x,y = x1,y1
            w,h = x2 - x1 + 1,y2 - y1 + 1
            category_id = categories_map[names[j]]
            area = w * h
            annotation_info = {"id": ann_id, "image_id":image_id, "bbox":[x, y, w, h], "category_id": category_id, "area": area,"iscrowd": 0}
            annotations_info['annotations'].append(annotation_info)
            ann_id += 1
    filen = open('./data/nnn.txt', 'r')
    d='书籍纸张'
    for line in filen.readlines():
        c = line.split(' ')[0]
        if file_name == c:
            nn=line.split(' ')[1]
            d=nn.split('\n')[0]
    print(d)

    category_id = categories_map[d]
    if width>350 and height>350:
            x1, y1, x2, y2=100,100,350,350,
            if x2 == width:
                x2 -= 1
            if y2 == height:
                y2 -= 1

            x, y = x1, y1
            w, h = x2 - x1 + 1, y2 - y1 + 1
            area = w * h
            annotation_info = {"id": ann_id, "image_id": image_id, "bbox": [x, y, w, h], "category_id": category_id,
                            "area": area, "iscrowd": 0}
            annotations_info['annotations'].append(annotation_info)
    else:
        x1, y1, x2, y2 = 0, 0, 50, 50,
        if x2 == width:
            x2 -= 1
        if y2 == height:
            y2 -= 1

        x, y = x1, y1
        w, h = x2 - x1 + 1, y2 - y1 + 1
        area = w * h
        annotation_info = {"id": ann_id, "image_id": image_id, "bbox": [x, y, w, h], "category_id": category_id,
                           "area": area, "iscrowd": 0}
        annotations_info['annotations'].append(annotation_info)
    ann_id += 1




with  open('./data/new.json', 'w')  as f:
    json.dump(annotations_info, f, indent=4)

print('---整理后的标注文件---')
print('所有图片的数量：',  len(annotations_info['images']))
print('所有标注的数量：',  len(annotations_info['annotations']))
print('所有类别的数量：',  len(annotations_info['categories']))

总结

效果图展示，未加之前的ap如下，
在这里插入图片描述
加入训练之后

提升了一大圈的分数。这个也只是最基础版本，可以设计的更合理一些。