PyTorch目标检测（九）

今天根据实际数据读取和锚框产生的情况对之前的代码进行了修改

数据读取

import os
import torch
from torchvision import transforms
import torch.utils.data as data
from PIL import Image
import numpy as np
import xml.etree.ElementTree as ET

classname = ['redbox','matrix','bluebox','beer','redbull','ball','AD','milk']
VOC_CLASSES = [  # always index 0
    'aeroplane', 'bicycle', 'bird', 'boat',
    'bottle', 'bus', 'car', 'cat', 'chair',
    'cow', 'diningtable', 'dog', 'horse',
    'motorbike', 'person', 'pottedplant',
    'sheep', 'sofa', 'train', 'tvmonitor']
# 读取第i张图片的xml信息和jpg图像
def get_example(self, i):
    id_ = self.ids[i]
    anno = ET.parse(os.path.join(self.root_dir, 'Annotations', id_+'.xml'))
    bbox = []
    label = []
    img = Image.open(os.path.join(self.root_dir, 'JPEGImages', id_+'.jpg'))
    w, h = img.size
    img = self.transform(img)
    for obj in anno.findall('object'):
        bndbox_anno = obj.find('bndbox')
        tem = []
        tem.append((int(bndbox_anno.find('xmin').text)-1)/w)
        tem.append((int(bndbox_anno.find('ymin').text)-1)/h)
        tem.append((int(bndbox_anno.find('xmax').text)-1)/w)
        tem.append((int(bndbox_anno.find('ymax').text)-1)/h)
        bbox.append(tem)
        name = obj.find('name').text
        label.append([VOC_CLASSES.index(name)+1])
    bbox = torch.Tensor(bbox).squeeze(0)
    label = torch.Tensor(label)
    print(bbox, label)
    return img, torch.cat((label, bbox), dim=1)

# 数据读取
class my_date(data.Dataset):

    def __init__(self, root_dir, name):
        self.root_dir = root_dir
        self.annopath = os.path.join(root_dir,'Annotations')
        self.imgpath = os.path.join(root_dir, 'JPEGImages')
        self.idpath = os.path.join(root_dir, 'ImageSets', name+'.txt')
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
        ])
        self.ids = []
        for line in open(self.idpath):
            self.ids.append(line.strip('\n'))
     
    def __getitem__(self, idx): 
        return get_example(self, idx)
    
    def __len__(self):
        return len(self.ids)

data = my_date('./czkdata', 'train')
print(data[0])

这里拿VOC训练集做了实验，读取完返回的格式为（tensor（[图片]），tensor （[类别, xmin, ymin, xmax, ymax]））以便与产生的锚框格式对应

锚框产生部分

import torch
import math
import numpy as np 
from PIL import Image

img = Image.open('./czkdata/JPEGImages/1.jpg')
classname = ['redbox','matrix','bluebox','beer','redbull','ball','AD','milk']

#锚框产生
def MultiBoxPrior(feature_map, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5]):
    """
    Args:
        feature_map: torch tensor, Shape: [N, C, H, W].
        sizes: List of sizes (0~1) of generated MultiBoxPriores. 
        ratios: List of aspect ratios (non-negative) of generated MultiBoxPriores. 
    Returns:
        anchors of shape (1, num_anchors, 4). 由于batch里每个都一样, 所以第一维为1
    """
    pairs = [] # pair of (size, sqrt(ration))
    for r in ratios:
        pairs.append([sizes[0], math.sqrt(r)])
    for s in sizes[1:]:
        pairs.append([s, math.sqrt(ratios[0])])
    
    pairs = np.array(pairs)
    
    ss1 = pairs[:, 0] * pairs[:, 1] # size * sqrt(ration)
    ss2 = pairs[:, 0] / pairs[:, 1] # size / sqrt(ration)
    
    base_anchors = np.stack([-ss1, -ss2, ss1, ss2], axis=1) / 2
    
    h, w = feature_map.shape[-2:]
    shifts_x = np.arange(0, w) / w
    shifts_y = np.arange(0, h) / h
    shift_x, shift_y = np.meshgrid(shifts_x, shifts_y)
    shift_x = shift_x.reshape(-1)
    shift_y = shift_y.reshape(-1)
    shifts = np.stack((shift_x, shift_y, shift_x, shift_y), axis=1)
    
    anchors = shifts.reshape((-1, 1, 4)) + base_anchors.reshape((1, -1, 4))
    
    return torch.tensor(anchors, dtype=torch.float32).view(1, -1, 4)

#计算交集
def compute_intersection(set_1, set_2):
    """
    计算anchor之间的交集
    Args:
        set_1: a tensor of dimensions (n1, 4), anchor表示成(xmin, ymin, xmax, ymax)
        set_2: a tensor of dimensions (n2, 4), anchor表示成(xmin, ymin, xmax, ymax)
    Returns:
        intersection of each of the boxes in set 1 with respect to each of the boxes in set 2, shape: (n1, n2)
    """
    # PyTorch auto-broadcasts singleton dimensions
    lower_bounds = torch.max(set_1[:, :2].unsqueeze(1), set_2[:, :2].unsqueeze(0))  # (n1, n2, 2)
    upper_bounds = torch.min(set_1[:, 2:].unsqueeze(1), set_2[:, 2:].unsqueeze(0))  # (n1, n2, 2)
    intersection_dims = torch.clamp(upper_bounds - lower_bounds, min=0)  # (n1, n2, 2)
    return intersection_dims[:, :, 0] * intersection_dims[:, :, 1]  # (n1, n2)


def compute_jaccard(set_1, set_2):
    """
    计算anchor之间的Jaccard系数(IoU)
    Args:
        set_1: a tensor of dimensions (n1, 4), anchor表示成(xmin, ymin, xmax, ymax)
        set_2: a tensor of dimensions (n2, 4), anchor表示成(xmin, ymin, xmax, ymax)
    Returns:
        Jaccard Overlap of each of the boxes in set 1 with respect to each of the boxes in set 2, shape: (n1, n2)
    """
    # Find intersections
    intersection = compute_intersection(set_1, set_2)  # (n1, n2)

    # Find areas of each box in both sets
    areas_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1])  # (n1)
    areas_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1])  # (n2)

    # Find the union
    # PyTorch auto-broadcasts singleton dimensions
    union = areas_set_1.unsqueeze(1) + areas_set_2.unsqueeze(0) - intersection  # (n1, n2)

    return intersection / union  # (n1, n2)

def assign_anchor(bb, anchor, jaccard_shreshold=0.5):
    """
    Args:
        bb: 真实边界框(bounding box), shape:（nb, 4）
        anchor: 待分配的anchor, shape:（na, 4）
        jaccard_threshold: 预先设定的阈值
    Returns:
        assigned_idx: shape: (na, ), 每个anchor分配的真实bb对应的索引, 若未分配任何bb则为-1
    """
    na = anchor.shape[0]
    nb = bb.shape[0]
    jaccard = compute_jaccard(anchor, bb).detach().cpu().numpy()
    assigned_idx = np.ones(na) * -1

    jaccard_cp = jaccard.copy()
    for j in range(nb):
        i = np.argmax(jaccard_cp[:, j])
        assigned_idx[i] = j
        jaccard_cp[i, :] = float("-inf")
    
    for i in range(na):
        if assigned_idx[i] == -1:
            j = np.argmax(jaccard[i, :])
            if jaccard[i, j] >= jaccard_shreshold:
                assigned_idx[i] = j
    return torch.tensor(assigned_idx, dtype=torch.long)

def xy_to_cxcy(xy):
    """
    Args:
        xy: bounding boxes in boundary coordinates, a tensor of size (n_boxes, 4)
    Returns: 
        bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4)
    """
    return torch.cat([(xy[:, 2:]+xy[:, :2])/2, xy[:, 2:]-xy[:, :2]], 1)

def MultiBoxTarget_one(anc, lab, eps=1e-6):
    """
    MultiBoxTarget函数的辅助函数, 处理batch中的一个
    Args:
        anc: shape of (锚框总数, 4)
        lab: shape of (真实锚框数, 5), 5代表[类别标签, 四个坐标值]
        eps: 一个极小值, 防止log0
    Returns:
        offset: (锚框总数*4, )
        bbox_mask: (锚框总数*4, ), 0代表背景, 1代表非背景
        cls_labels: (锚框总数, 4), 0代表背景
    """
    an = anc.shape[0]
    assigned_idx = assign_anchor(lab[:, 1:], anc)
    bbox_mask = ((assigned_idx>=0).float().unsqueeze(-1).repeat(1, 4))

    cls_lables = torch.zeros(an, dtype=torch.long)
    assigned_bb = torch.zeros((an, 4), dtype=torch.float32)
    for i in range(an):
        bb_idx = assigned_idx[i]
        if bb_idx >= 0:
            cls_lables[i] = lab[bb_idx, 0].long()+1
            assigned_bb[i, :] = lab[bb_idx, 1:]
    
    center_anc = xy_to_cxcy(anc)
    center_assigned_bb = xy_to_cxcy(assigned_bb)
    offset_xy = 10.0*(center_assigned_bb[:, :2]-center_anc[:, :2])/center_anc[:, 2:]
    offset_wh = 5.0*torch.log(eps + center_assigned_bb[:, 2:]/center_anc[:, 2:])
    offset = torch.cat([offset_xy, offset_wh], dim=1)

    return offset.view(-1), bbox_mask.view(-1), cls_lables

def MultiBoxTarget(anchor, label):
    """
    Args:
        anchor: torch tensor, 输入的锚框, 一般是通过MultiBoxPrior生成, shape:（1，锚框总数，4）
        label: 真实标签, shape为(bn, 每张图片最多的真实锚框数, 5)
               第二维中，如果给定图片没有这么多锚框, 可以先用-1填充空白, 最后一维中的元素为[类别标签, 四个坐标值]
    Returns:
        列表, [bbox_offset, bbox_mask, cls_labels]
        bbox_offset: 每个锚框的标注偏移量，形状为(bn，锚框总数*4)
        bbox_mask: 形状同bbox_offset, 每个锚框的掩码, 一一对应上面的偏移量, 负类锚框(背景)对应的掩码均为0, 正类锚框的掩码均为1
        cls_labels: 每个锚框的标注类别, 其中0表示为背景, 形状为(bn，锚框总数)
    """
    assert len(anchor.shape)==3 and len(label.shape)==3
    bn = label.shape[0]
    batch_offset = []
    batch_mask = []
    batch_cls_labels = []
    for b in range(bn):
        offset, bbox_mask, cls_lables = MultiBoxTarget_one(anchor[0, :, :], label[b, :, :])
        batch_offset.append(offset)
        batch_mask.append(bbox_mask)
        batch_cls_labels.append(cls_lables)
    
    bbox_offset = torch.stack(batch_offset)
    bbox_mask = torch.stack(batch_mask)
    cls_lables = torch.stack(batch_cls_labels)

    return [bbox_offset, bbox_mask, cls_lables]
    

#测试部分
x = torch.Tensor(1, 3, 561, 728)
y = MultiBoxPrior(x)
print(y)
boxes = y.reshape(561, 728, 5, 4)
print(boxes[250, 250, 0, :])
bbox_scale = torch.tensor((728, 561, 728, 561), dtype=torch.float32)
ground_truth = torch.tensor([[0, 0.1, 0.08, 0.52, 0.92],[1, 0.55, 0.2, 0.9, 0.88]])
anchors = torch.tensor([[0, 0.1, 0.2, 0.3], [0.15, 0.2, 0.4, 0.4],
[0.63, 0.05, 0.88, 0.98], [0.66, 0.45, 0.8, 0.8],[0.57, 0.3, 0.92, 0.9]])
labels = MultiBoxTarget(anchors.unsqueeze(dim=0),ground_truth.unsqueeze(dim=0))
print(labels)
print(labels[0]*labels[1])

锚框产生调用MultiBoxPrior
使用时直接调用最后一个函数
计算类别和偏差用MultiBoxTarget
函数的输入输出源码中都有标注

模型部分

import torch
import torchvision
from torch import nn
import numpy as np 
from anchors import MultiBoxPrior

sizes = [[0.2, 0.272], [0.37, 0.447], [0.54, 0.619], [0.71, 0.79],
         [0.88, 0.961]]
ratios = [[1, 2, 0.5]] * 5
classname = ['redbox','matrix','bluebox','beer','redbull','ball','AD','milk']
num_anchors = len(sizes[0]) + len(ratios[0]) - 1

#预测函数
def cls_predictor(num_anchors, num_classes, input_size):
    return nn.Conv2d(input_size, num_anchors*(num_classes+1), kernel_size=3, padding=1)

def bbox_predictor(num_anchors, input_size):
    return nn.Conv2d(input_size, num_anchors*4, kernel_size=3,padding=1)

def flatten_pred(pred):
    return pred.permute(0, 2, 3, 1).flatten(1)

def concat_preds(preds):
    return torch.cat(tuple([flatten_pred(p) for p in preds]), dim=1)


#模型
class TinySSD(nn.Module):
    def __init__(self, num_classes, **kwargs):
        super(TinySSD, self).__init__(**kwargs)
        self.num_classes = num_classes
        # 网络层
        self.blk1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.cls1 = cls_predictor(num_anchors, num_classes,64)
        self.bbox1 = bbox_predictor(num_anchors, 64)
        self.blk2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.cls2 = cls_predictor(num_anchors, num_classes, 128)
        self.bbox2 = bbox_predictor(num_anchors, 128)
        self.blk3 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.cls3 = cls_predictor(num_anchors, num_classes, 128)
        self.bbox3 = bbox_predictor(num_anchors, 128)
        self.blk4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.cls4 = cls_predictor(num_anchors, num_classes, 128)
        self.bbox4 = bbox_predictor(num_anchors, 128)
        self.blk5 = nn.MaxPool2d(4)
        self.cls5 = cls_predictor(num_anchors, num_classes, 128)
        self.bbox5 = bbox_predictor(num_anchors, 128)
        
    def forward(self, x):
        anchors, cls_preds, bbox_preds = [None]*5, [None]*5, [None]*5
        # 第一层输出
        x = self.blk1(x)
        cls_preds[0] = self.cls1(x)
        bbox_preds[0] = self.bbox1(x)
        anchors[0] = MultiBoxPrior(x, sizes[0], ratios[0])
        # 第二层输出
        x = self.blk2(x)
        cls_preds[1] = self.cls2(x)
        bbox_preds[1] = self.bbox2(x)
        anchors[1] = MultiBoxPrior(x, sizes[1], ratios[1])
        # 第三层输出
        x = self.blk3(x)
        cls_preds[2] = self.cls3(x)
        bbox_preds[2] = self.bbox3(x)
        anchors[2] = MultiBoxPrior(x, sizes[2], ratios[2])
        # 第四层输出
        x = self.blk4(x) 
        cls_preds[3] = self.cls4(x)
        bbox_preds[3] = self.bbox4(x)
        anchors[3] = MultiBoxPrior(x, sizes[3], ratios[3])
        # 第五层输出
        x = self.blk5(x) 
        cls_preds[4] = self.cls5(x)
        bbox_preds[4] = self.bbox5(x)
        anchors[4] = MultiBoxPrior(x, sizes[4], ratios[4])
        a=(concat_preds(cls_preds)).shape[0]
        return torch.cat(tuple(anchors), dim=1), concat_preds(cls_preds).reshape(a, -1, self.num_classes+1), concat_preds(bbox_preds)

训练部分

import torch
from torch import nn, autograd
from torch.utils.data import DataLoader
from tinySSD import TinySSD
from load_data import my_date, detection_collate
from anchors import MultiBoxTarget
import visdom

is_visdom = False

# 损失函数
cls_loss = nn.CrossEntropyLoss(weight=None, reduction='sum')
bbox_loss = nn.L1Loss(reduction='sum')

def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
   loss1 = cls_loss(cls_preds, cls_labels)
   loss2 = bbox_loss(bbox_preds*bbox_masks, bbox_labels*bbox_masks)
   return loss1+loss2

# 评价函数
def cls_eval(cls_preds, cls_labels):
   values, indices = cls_preds.max(-1)
   correct = torch.sum(indices==cls_labels).numpy()
   return correct

def bbox_eval(bbox_preds, bbox_labels, bbox_masks):
   tem = ((bbox_labels-bbox_preds)*bbox_masks).abs()
   return torch.sum(tem).numpy()

# 可视化
def create_vis_plot(viz, _xlabel, _ylabel, _title, _legend):
   return viz.line(
       X=torch.zeros((1,)).cpu(),
       Y=torch.zeros((1,)).cpu(),
       opts=dict(
           xlabel=_xlabel,
           ylabel=_ylabel,
           title=_title,
           legend=_legend
       )
   )

def update_vis_plot(viz, iteration, loc, window1, update_type,
                   epoch_size=1):
   viz.line(X=torch.ones((1, 3)).cpu() * iteration,
   Y=torch.Tensor([loc]).unsqueeze(0).cpu() / epoch_size,
   win=window1,
   update=update_type
   )
# 训练
def train(model, dataloader, epoch_size):
   lr = 0.001
   optimizer = torch.optim.SGD(params=SSD.parameters(), lr=lr, momentum=0.9)
   data_iter = iter(dataloader)
   if is_visdom == True:
       viz = visdom.Visdom()
       vis_title = 'TinySSD'
       vis_legend = ['Loss']
       iter_plot = create_vis_plot(viz, 'Iteration', 'Loss', vis_title, vis_legend)
   epoch = 0
   sumlos = torch.Tensor([0])
   for iteration in range(100000):
       if iteration!=0 and iteration%epoch_size==0:
           epoch += 1
           print('epoch:',epoch)
       try:
           images, targets = next(data_iter)
       except StopIteration:
           data_iter = iter(dataloader)
           images, targets = next(data_iter)
       
       with torch.no_grad():
           targets = [autograd.Variable(ann.cuda()) for ann in targets]
           images = autograd.Variable(images.cuda())
           
       # forward
       anchors, cls_preds, bbox_preds = model(images)
       bbox_offset, bbox_mask, cls_labels = MultiBoxTarget(anchors, targets)

       # backprop
       optimizer.zero_grad()
       loss = calc_loss(cls_preds.permute(0, 2 ,1), cls_labels.squeeze(2), bbox_preds, bbox_offset.squeeze(2) ,bbox_mask.squeeze(2))
       sumlos += loss
       loss.backward()
       optimizer.step()

       # out
       if iteration % 10 == 0:
           print("iter:%d, loss:%f" % (iteration,sumlos/(160)))
           sumlos = torch.Tensor([0])
           if is_visdom == True:
               update_vis_plot(viz, iteration, sumlos, iter_plot, 'append', 1)
       if iteration % 5000 == 0 and iteration != 0:
           torch.save(model.state_dict(), 'TinySSD_VOC_' +repr(iteration) + '.pth')

# 学习率调整
def adjust_learning_rate(optimizer, gamma, step, lr):
   """Sets the learning rate to the initial LR decayed by 10 at every
       specified step
   """
   lr = lr * (gamma ** (step))
   for param_group in optimizer.param_groups:
       param_group['lr'] = lr

 

if __name__ == "__main__":
   SSD = TinySSD(21).cuda()
   # 数据读取
   data = my_date('./czkdata', 'train')
   dataloader1 = DataLoader(data, batch_size=16, shuffle=True, collate_fn=detection_collate) 
   # 训练
   train(SSD, dataloader1, len(data))
   ```
   采用GPU训练

czkjmohzy

发布了25 篇原创文章 · 获赞 2 · 访问量 2098

私信关注

PyTorch目标检测（九）

今天根据实际数据读取和锚框产生的情况对之前的代码进行了修改

数据读取

锚框产生部分

模型部分

训练部分

猜你喜欢