pytorch 目标检测数据增强详细讲解

摘要

目标检测中的数据增强是比较复杂，每一次改变图像同时也要考虑boxes的信息，比起目标分类更加局限性，比如翻转，左右翻转一般影响不大，但上下翻转造成的影响就截然不同。下面操作坐标点全是xyxy形式

resize操作

先来看下对比，在图片大小改变的同时也要改变boxes的位置信息，第一张是原图

在这里插入图片描述
我截图是一样大小，可以看出框的位置依旧准确，原图是（480，364）变化后的是（300,300），对比之前的照片清晰度明显下降，所以训练照片差距太多再好的模型也难以起到很好的效果。下面是代码实现

import math
import random
import torch
from PIL import Image, ImageDraw


def resize(img, boxes, size, max_size=1000):
    w, h = img.size    #(480, 364)
    print('wwww',w)
    print('hhhh',h)
    if isinstance(size, int):   #这里是按照高和宽等比例缩放
        print('is============')
        size_min = min(w,h)     #这里我以输入300为例
        size_max = max(w,h)       #首先找到最小额一边，缩放的边是364，
        sw = sh = float(size) / size_min   #计算出高缩放的比例，将宽缩放到同等比例
        print('sw',sw)
        print('sh',sh)
        if sw * size_max > max_size: #放置缩放过大
            sw = sh = float(max_size) / size_max
            print('ifl====',sw)
        ow = int(w * sw + 0.5)   #向上取整
        print('ow',ow)
        oh = int(h * sh + 0.5)
        print('oh',oh)
    else:
        print('else============')    #这里输入的就是（300,300）
        ow, oh = size
        print('ow',ow)
        print('oh',oh)
        sw = float(ow) / w
        print('sw',sw)
        sh = float(oh) / h   #计算高和框的缩放比例，boxes乘以这个比例即可
        print('sh',sh)
        print('boxex',boxes*torch.Tensor([sw,sh,sw,sh]))
    return img.resize((ow,oh), Image.BILINEAR), \
           boxes*torch.Tensor([sw,sh,sw,sh])
def draw(img, boxes):
    draw = ImageDraw.Draw(img)
    for box in boxes:
        draw.rectangle(list(box), outline='red')
    img.show()


def test():
    img = Image.open('G:\detection\image/1.jpg')
    boxes = torch.Tensor([[167, 63, 280, 202]])
    img, boxes = resize(img, boxes,(300,300))
    print(img.size)
    draw(img, boxes)

test()

数据增强的写法有二种，一种是利用库中自带的图像操作，自己只需要写boxes的变化规则就可以，另一种全部自己实现，在这里我肯定利用前者，比较方便。
resize的变化规则这里我写了二种，一种是输入300，另一个边按照长宽比进行缩放，另一种输入（300,300）高和宽固定缩放到（300,300）的大小。

center_crop操作

也先来看下效果
在这里插入图片描述
这个操作只是将你输入的大小部分截取出来，其他的没有任何变化，使用这一个操作对数据集不同也要做相应的改变，我这张照片只标注了一个人，在中间的位置，如果有其他边缘位置的信息，你就需要利用平移在取中间部分效果才好，不能漏掉原有的信息。具体代码实现

def center_crop(img, boxes, size):
    w, h = img.size
    ow, oh = size
    i = int(round((h - oh) / 2.))   #同样我们只需要将照片二边需要减掉的高和宽计算出来
    print('i',i)
    j = int(round((w - ow) / 2.))
    print('j',j)
    img = img.crop((j, i, j+ow, i+oh))  #利用自带的图像处理，选取图像固定位置
    print(img.size)
    print('bo',boxes)
    boxes -= torch.Tensor([j,i,j,i])   #将boxes减去就可以需要的boxes位置信息
    print('boxes',boxes)
    boxes[:,0::2].clamp_(min=0, max=ow-1)  #clamp函数是用来防止超出边界
    print('bo1',boxes)
    boxes[:,1::2].clamp_(min=0, max=oh-1)
    print('bo2',boxes)
    return img, boxes

random_flip操作

随机水平翻转，这个算是最常用的操作了，boxes的位置计算也简单
在这里插入图片描述

def random_flip(img, boxes):

    if random.random() < 0.5:
        img = img.transpose(Image.FLIP_LEFT_RIGHT)  #0.5的概率随机翻转
        w = img.width
        xmin = w - boxes[:,2]   #计算左顶角的位置   高的位置都不变
        xmax = w - boxes[:,0]   #计算右低角的位置
        boxes[:,0] = xmin
        boxes[:,2] = xmax
    return img, boxes

random_crop操作

随机截取是比较复杂的，需要截取的规范，不能超出边界，还要保证框的位置在截取范围内，所以就需要尝试截取位置，如果尝试10次都失败的话就截取中间部分
在这里插入图片描述
这个操作也只是把周围的背景随机的剪掉，

def random_crop(img, boxes):
    success = False
    for attempt in range(10):
        area = img.size[0] * img.size[1]
        print('000',img.size[0])
        print('111',img.size[1])
        print('area',area)
        target_area = random.uniform(0.56, 1.0) * area
        print('target_area',target_area)    #生成随机大小区域
        aspect_ratio = random.uniform(3. / 4, 4. / 3)   #再次随机大小
        print('aspect_ratio',aspect_ratio)

        w = int(round(math.sqrt(target_area * aspect_ratio)))
        print('w',w)  #计算高和宽
        h = int(round(math.sqrt(target_area / aspect_ratio)))
        print('h',h)
        if random.random() < 0.5:
            print('=============0.5')   
            w, h = h, w

        if w <= img.size[0] and h <= img.size[1]:  #宝成截取规范，不能超出边界
            print('+++++++++++wh')
            x = random.randint(0, img.size[0] - w)
            print(x)
            y = random.randint(0, img.size[1] - h)
            print(y)
            success = True
            break

    # Fallback
    if not success:
        print('NONO',success)   #尝试10次都不满足的话就直接中间截取
        w = h = min(img.size[0], img.size[1])
        print('w',w)
        x = (img.size[0] - w) // 2
        print('x',x)
        y = (img.size[1] - h) // 2
        print('y',y)
    print('box',boxes)
    print('img',img.size)
    img = img.crop((x, y, x+w, y+h))
    print('bianhua img',img.size)
    boxes -= torch.Tensor([x,y,x,y])
    print('boxes',boxes)
    boxes[:,0::2].clamp_(min=0, max=w-1)
    boxes[:,1::2].clamp_(min=0, max=h-1)
    print('final',boxes)
    return img, boxes

(480, 364)
000 480
111 364
area 174720
target_area 99011.19318377912
aspect_ratio 1.1786427101246018
w 342
h 290
+++++++++++wh
30
41
box tensor([[167.,  63., 280., 203.]])
img (480, 364)
bianhua img (342, 290)
boxes tensor([[137.,  22., 250., 162.]])
final tensor([[137.,  22., 250., 162.]])
(342, 290)

全部代码


import math
import random
import torch
from PIL import Image, ImageDraw


def resize(img, boxes, size, max_size=1000):
    w, h = img.size
    print('wwww',w)
    print('hhhh',h)
    if isinstance(size, int):
        print('is============')
        size_min = min(w,h)
        size_max = max(w,h)
        sw = sh = float(size) / size_min
        print('sw',sw)
        print('sh',sh)
        if sw * size_max > max_size:
            sw = sh = float(max_size) / size_max
            print('ifl====',sw)
        ow = int(w * sw + 0.5)
        print('ow',ow)
        oh = int(h * sh + 0.5)
        print('oh',oh)
    else:
        print('else============')
        ow, oh = size
        print('ow',ow)
        print('oh',oh)
        sw = float(ow) / w
        print('sw',sw)
        sh = float(oh) / h
        print('sh',sh)
        print('boxex',boxes*torch.Tensor([sw,sh,sw,sh]))
    return img.resize((ow,oh), Image.BILINEAR), \
           boxes*torch.Tensor([sw,sh,sw,sh])
def center_crop(img, boxes, size):
    w, h = img.size
    ow, oh = size
    i = int(round((h - oh) / 2.))
    print('iiii==',i)
    j = int(round((w - ow) / 2.))
    print('jjjjjjj',j)
    img = img.crop((j, i, j+ow, i+oh))
    print(img.size)
    print('bo',boxes)
    boxes -= torch.Tensor([j,i,j,i])
    print('boxes',boxes)
    boxes[:,0::2].clamp_(min=0, max=ow-1)
    print('bo1',boxes)
    boxes[:,1::2].clamp_(min=0, max=oh-1)
    print('bo2',boxes)
    return img, boxes
def random_flip(img, boxes):
    if random.random() < 0.5:
        img = img.transpose(Image.FLIP_LEFT_RIGHT)
        w = img.width
        print('boex',boxes)
        xmin = w - boxes[:,2]
        print('xmin',xmin)
        xmax = w - boxes[:,0]
        print('xmax',xmax)
        boxes[:,0] = xmin
        boxes[:,2] = xmax
        print('final',boxes)
    return img, boxes

def draw(img, boxes):
    draw = ImageDraw.Draw(img)
    for box in boxes:
        draw.rectangle(list(box), outline='red')
    img.show()

def random_crop(img, boxes):
    success = False
    for attempt in range(1):
        area = img.size[0] * img.size[1]
        print('000',img.size[0])
        print('111',img.size[1])
        print('area',area)
        target_area = random.uniform(0.56, 1.0) * area
        print('target_area',target_area)
        aspect_ratio = random.uniform(3. / 4, 4. / 3)
        print('aspect_ratio',aspect_ratio)

        w = int(round(math.sqrt(target_area * aspect_ratio)))
        print('w',w)
        h = int(round(math.sqrt(target_area / aspect_ratio)))
        print('h',h)
        if random.random() < 0.5:
            print('=============0.5')
            w, h = h, w

        if w <= img.size[0] and h <= img.size[1]:
            print('+++++++++++wh')
            x = random.randint(0, img.size[0] - w)
            print(x)
            y = random.randint(0, img.size[1] - h)
            print(y)
            success = True
            break

    # Fallback
    if not success:
        print('NONO',success)
        w = h = min(img.size[0], img.size[1])
        print('w',w)
        x = (img.size[0] - w) // 2
        print('x',x)
        y = (img.size[1] - h) // 2
        print('y',y)
    print('box',boxes)
    print('img',img.size)
    img = img.crop((x, y, x+w, y+h))
    print('bianhua img',img.size)
    boxes -= torch.Tensor([x,y,x,y])
    print('boxes',boxes)
    boxes[:,0::2].clamp_(min=0, max=w-1)
    boxes[:,1::2].clamp_(min=0, max=h-1)
    print('final',boxes)
    return img, boxes



img = Image.open('G:\detection\image/1.jpg')
print(img.size)
boxes = torch.Tensor([[167, 63, 280, 203]])
draw(img,boxes)
img, boxes = random_crop(img, boxes)
print(img.size)
draw(img, boxes)

自己可以去查看img自带的更多操作，配合自己写boxes部分可以写出更多的数据增强操作，这里的写法没有封装成compose的操作，直接使用函数就可以。

具体使用

 def __getitem__(self, idx):
 		boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels

在getitem中返回目标检测基本大部分都是返回img，boxes，labels的形式，当然我用的yolo3就不是，但是原理是一样的，你只需要将img，和boxes经过之前写好的函数就行，在封装成你自己的形式就可以

视觉盛宴

原创文章 25 获赞 35 访问量 5188

关注私信