哈哈哈哈哈，终于跑起来了，GitHub上找的代码，改了很多bug，超级开心，分享给大家！

实测可以在Colab上运行，需要GPU

注意

配置信息是我百度的，就是解决数据集路径问题。
虽然可以运行，但需要相应的数据集文件，有需要可以私聊，我是搬运工。
不了解YoLo的可以点下面这个链接

一点心得

这次的任务说实话自己造轮子的话很难完成，所以这次借鉴了Github，
之前也没用过，白嫖还是很爽的。
感谢布置任务的学长，最起码让我懂了深度学习训练模型究竟是个什么事情。
目前仅仅是跑起来就废了好大劲，给自己一天时间读读代码，
最终到能够自己复现出来的地步。
新年事情很多，要走很多亲戚，但实际上给的时间我觉得足够了…
自己也需要加速成长了！加油！

文章目录

一点心得
一、配置
二、模型
三、输出处理
四、损失函数
五、数据设定
六、训练
七、测试

来自： github

一、配置

!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={
    
    creds.client_id} -secret={
    
    creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {
    
    vcode} | google-drive-ocamlfuse -headless -id={
    
    creds.client_id} -secret={
    
    creds.client_secret}

from google.colab import drive
drive.mount('/content/drive/')

二、模型

import torch.nn as nn

class Convention(nn.Module):
    def __init__(self,in_channels,out_channels,conv_size,conv_stride,padding):
        super(Convention,self).__init__()
        self.Conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, conv_size, conv_stride, padding),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU()
        )

    def forward(self, x):
        return self.Conv(x)

class YOLO_V1(nn.Module):

    def __init__(self,B=2,Classes_Num=20):
        super(YOLO_V1,self).__init__()
        self.B = B
        self.Classes_Num = Classes_Num

        self.Conv_448 = nn.Sequential(
            Convention(3, 64, 7, 2, 3),
            nn.MaxPool2d(2,2),
        )

        self.Conv_112 = nn.Sequential(
            Convention(64, 192, 3, 1, 1),
            nn.MaxPool2d(2, 2),
        )

        self.Conv_56 = nn.Sequential(
            Convention(192, 128, 1, 1, 0),
            Convention(128, 256, 3, 1, 1),
            Convention(256, 256, 1, 1, 0),
            Convention(256, 512, 3, 1, 1),
            nn.MaxPool2d(2, 2),
        )

        self.Conv_28 = nn.Sequential(
            Convention(512, 256, 1, 1, 0),
            Convention(256, 512, 3, 1, 1),
            Convention(512, 256, 1, 1, 0),
            Convention(256, 512, 3, 1, 1),
            Convention(512, 256, 1, 1, 0),
            Convention(256, 512, 3, 1, 1),
            Convention(512, 256, 1, 1, 0),
            Convention(256, 512, 3, 1, 1),
            Convention(512,512,1,1,0),
            Convention(512,1024,3,1,1),
            nn.MaxPool2d(2, 2),
        )

        self.Conv_14 = nn.Sequential(
            Convention(1024,512,1,1,0),
            Convention(512,1024,3,1,1),
            Convention(1024, 512, 1, 1, 0),
            Convention(512, 1024, 3, 1, 1),
            Convention(1024, 1024, 3, 1, 1),
            Convention(1024, 1024, 3, 2, 1),
        )

        self.Conv_7 = nn.Sequential(
            Convention(1024,1024,3,1,1),
            Convention(1024, 1024, 3, 1, 1),
        )

        self.Fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(7*7*1024,4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,7 * 7 * (B*5 + Classes_Num)),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.Conv_448(x)
        x = self.Conv_112(x)
        x = self.Conv_56(x)
        x = self.Conv_28(x)
        x = self.Conv_14(x)
        x = self.Conv_7(x)
        # batch_size * channel * height * weight -> batch_size * height * weight * channel
        x = x.permute(0,2,3,1).contiguous()
        x = x.view(-1,7*7*1024)
        x = self.Fc(x)
        x = x.view((-1,7,7,(self.B*5 + self.Classes_Num)))
        return x

        # 定义权值初始化
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.xavier_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                torch.nn.init.normal_(m.weight.data, 0, 0.01)
                m.bias.data.zero_()

三、输出处理

def iou(self, box1, box2):  # 计算两个box的IoU值
    # box: lx-左上x ly-左上y rx-右下x ry-右下y 图像向右为y 向下为x
    # 1. 获取交集的矩形左上和右下坐标
    interLX = max(box1[0],box2[0])
    interLY = max(box1[1],box2[1])
    interRX = min(box1[2],box2[2])
    interRY = min(box1[3],box2[3])

    # 2. 计算两个矩形各自的面积
    Area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    Area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # 3. 不存在交集
    if interRX < interLX or interRY < interLY:
        return 0

    # 4. 计算IOU
    interSection = (interRX - interLX) * (interRY - interLY)
    return interSection / (Area1 + Area2 - interSection)

import numpy as np
# 这边要求的bounding_boxes为处理后的实际的样子
def NMS(bounding_boxes,confidence_threshold,iou_threshold):
    # boxRow : x y dx dy c
    # 1. 初步筛选,先把grid cell预测的两个bounding box取出置信度较高的那个
    boxes = []
    for boxRow in bounding_boxes:
        # grid cell预测出的两个box,含有物体的置信度没有达到阈值
        if boxRow[4] < confidence_threshold or boxRow[9] < confidence_threshold:
            continue
        # 获取物体的预测概率
        classes = boxRow[10:-1]
        class_probality_index = np.argmax(classes,axis=1)
        class_probality = classes[class_probality_index]
        # 选择拥有更大置信度的box
        if boxRow[4] > boxRow[9]:
            box = boxRow[0:4]
        else:
            box = boxRow[5:9]
        # box : x y dx dy class_probality_index class_probality
        box.append(class_probality_index)
        box.append(class_probality)
        boxes.append(box)

    # 2. 循环直到待筛选的box集合为空
    predicted_boxes = []
    while len(boxes) != 0:
        # 对box集合按照置信度从大到小排序
        boxes = sorted(boxes, key=(lambda x : [x[4]]), reverse=True)
        # 确定含有最大值信度的box会被选中
        choiced_box = boxes[0]
        predicted_boxes.append(choiced_box)
        for index in len(boxes):
            # 如果冲突的box的iou值已经大于阈值 需要丢弃
            if iou(boxes[index],choiced_box) > iou_threshold:
                boxes.pop(index)

    return predicted_boxes

四、损失函数

import torch.nn as nn
import math
import torch

class Yolov1_Loss(nn.Module):

    def __init__(self, S=7, B=2, Classes=20, l_coord=5, l_noobj=0.5):
        # 有物体的box损失权重设为l_coord,没有物体的box损失权重设置为l_noobj
        super(Yolov1_Loss, self).__init__()
        self.S = S
        self.B = B
        self.Classes = Classes
        self.l_coord = l_coord
        self.l_noobj = l_noobj

    def iou(self, bounding_box, ground_box, gridX, gridY, img_size=448, grid_size=64):  # 计算两个box的IoU值
        # predict_box: [centerX, centerY, width, height]
        # ground_box : [centerX / self.grid_cell_size - indexJ,centerY / self.grid_cell_size - indexI,(xmax-xmin)/self.img_size,(ymax-ymin)/self.img_size,1,xmin,ymin,xmax,ymax,(xmax-xmin)*(ymax-ymin)
        # 1.  预处理 predict_box  变为  左上X,Y  右下X,Y  两个边界点的坐标 避免浮点误差 先还原成整数
        # 不要共用引用
        predict_box = list([0,0,0,0])
        predict_box[0] = (int)(gridX + bounding_box[0] * grid_size)
        predict_box[1] = (int)(gridY + bounding_box[1] * grid_size)
        predict_box[2] = (int)(bounding_box[2] * img_size)
        predict_box[3] = (int)(bounding_box[3] * img_size)

        # [xmin,ymin,xmax,ymax]
        predict_coord = list([max(0, predict_box[0] - predict_box[2] / 2), max(0, predict_box[1] - predict_box[3] / 2),min(img_size - 1, predict_box[0] + predict_box[2] / 2), min(img_size - 1, predict_box[1] + predict_box[3] / 2)])
        predict_Area = (predict_coord[2] - predict_coord[0]) * (predict_coord[3] - predict_coord[1])

        ground_coord = list([ground_box[5],ground_box[6],ground_box[7],ground_box[8]])
        ground_Area = (ground_coord[2] - ground_coord[0]) * (ground_coord[3] - ground_coord[1])

        # 存储格式 xmin ymin xmax ymax

        # 2.计算交集的面积 左边的大者 右边的小者 上边的大者 下边的小者
        CrossLX = max(predict_coord[0], ground_coord[0])
        CrossRX = min(predict_coord[2], ground_coord[2])
        CrossUY = max(predict_coord[1], ground_coord[1])
        CrossDY = min(predict_coord[3], ground_coord[3])

        if CrossRX < CrossLX or CrossDY < CrossUY: # 没有交集
            return 0

        interSection = (CrossRX - CrossLX + 1) * (CrossDY - CrossUY + 1)
        return interSection / (predict_Area + ground_Area - interSection)

    def forward(self, bounding_boxes, ground_truth, batch_size=32,grid_size=64, img_size=448):  # 输入是 S * S * ( 2 * B + Classes)
        # 定义三个计算损失的变量 正样本定位损失 样本置信度损失 样本类别损失
        loss = 0
        loss_coord = 0
        loss_confidence = 0
        loss_classes = 0
        iou_sum = 0
        object_num = 0
        mseLoss = nn.MSELoss()
        for batch in range(len(bounding_boxes)):
            for i in range(self.S):  # 先行 - Y
                for j in range(self.S):  # 后列 - X
                    # 取bounding box中置信度更大的框
                    if bounding_boxes[batch][i][j][4] < bounding_boxes[batch][i][j][9]:
                        predict_box = bounding_boxes[batch][i][j][5:]
                        # 另一个框是负样本
                        loss = loss + self.l_noobj * torch.pow(bounding_boxes[batch][i][j][4], 2)
                        loss_confidence += self.l_noobj * math.pow(bounding_boxes[batch][i][j][4].item(), 2)
                    else:
                        predict_box = bounding_boxes[batch][i][j][0:5]
                        predict_box = torch.cat((predict_box, bounding_boxes[batch][i][j][10:]), dim=0)
                        # 另一个框是负样本
                        loss = loss + self.l_noobj * torch.pow(bounding_boxes[batch][i][j][9], 2)
                        loss_confidence += self.l_noobj * math.pow(bounding_boxes[batch][i][j][9].item(), 2)
                    # 为拥有最大置信度的bounding_box找到最大iou的groundtruth_box
                    if ground_truth[batch][i][j][0][9] == 0:  # 面积为0的grount_truth 为了形状相同强行拼接的无用的0-box negative-sample
                        loss = loss + self.l_noobj * torch.pow(predict_box[4], 2)
                        loss_confidence += self.l_noobj * math.pow(predict_box[4].item(), 2)
                    else:
                        object_num = object_num + 1
                        iou = self.iou(predict_box, ground_truth[batch][i][j][0], j * 64, i * 64)
                        iou_sum = iou_sum + iou
                        ground_box = ground_truth[batch][i][j][0]
                        loss = loss + self.l_coord * (torch.pow((ground_box[0] - predict_box[0]), 2) + torch.pow((ground_box[1] - predict_box[1]), 2) + torch.pow(torch.sqrt(ground_box[2] + 1e-8) - torch.sqrt(predict_box[2] + 1e-8), 2) + torch.pow(torch.sqrt(ground_box[3] + 1e-8) - torch.sqrt(predict_box[3] + 1e-8), 2))
                        loss_coord += self.l_coord * (math.pow((ground_box[0] - predict_box[0]), 2) + math.pow((ground_box[1] - predict_box[1]), 2) + math.pow(math.sqrt(ground_box[2] + 1e-8) - math.sqrt(predict_box[2] + 1e-8), 2) + math.pow(math.sqrt(ground_box[3] + 1e-8) - math.sqrt(predict_box[3] + 1e-8), 2))
                        loss = loss + torch.pow(ground_box[4] - predict_box[4], 2)
                        loss_confidence += math.pow(ground_box[4] - predict_box[4], 2)
                        ground_class = ground_box[10:]
                        predict_class = bounding_boxes[batch][i][j][self.B * 5:]
                        loss = loss + mseLoss(ground_class,predict_class) * self.Classes
                        loss_classes += mseLoss(ground_class,predict_class).item() * self.Classes
        print("坐标误差:{} 置信度误差:{} 类别损失:{} iou_sum:{} object_num:{} iou:{}".format(loss_coord, loss_confidence, loss_classes, iou_sum, object_num, "nan" if object_num == 0 else (iou_sum / object_num)))
        return loss, loss_coord, loss_confidence, loss_classes, iou_sum, object_num

五、数据设定

from torch.utils.data import Dataset
import os
import cv2
import xml.etree.ElementTree as ET
import torch
import torchvision.transforms as transforms

class YoloV1DataSet(Dataset):

    def __init__(self, imgs_dir="/content/drive/My Drive/VOC2007/Train/JPEGImages", annotations_dir="/content/drive/My Drive/VOC2007/Train/Annotations", img_size=448, S=7, B=2, ClassesFile="/content/drive/My Drive/VOC2007/Train/class.data"): # 图片路径、注解文件路径、图片尺寸、每个grid cell预测的box数量、类别文件
        img_names = os.listdir(imgs_dir)
        img_names.sort()
        self.transfrom = transforms.Compose([
            transforms.ToTensor(), # height * width * channel -> channel * height * width
            transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))
        ])
        self.img_path = []
        for img_name in img_names:
            self.img_path.append(os.path.join(imgs_dir,img_name))
        annotation_names = os.listdir(annotations_dir)
        annotation_names.sort() #图片和文件排序后可以按照相同索引对应
        self.annotation_path = []
        for annotation_name in annotation_names:
            self.annotation_path.append(os.path.join(annotations_dir,annotation_name))
        self.img_size = img_size
        self.S = S
        self.B = B
        self.grid_cell_size = self.img_size / self.S
        self.ClassNameToInt = {
    
    }
        classIndex = 0
        with open(ClassesFile, 'r') as f:
            for line in f:
                line = line.replace('\n','')
                self.ClassNameToInt[line] = classIndex #根据类别名制作索引
                classIndex = classIndex + 1
        print(self.ClassNameToInt)
        self.Classes = classIndex # 一共的类别个数
        self.getGroundTruth()

    # PyTorch 无法将长短不一的list合并为一个Tensor
    def getGroundTruth(self):
        self.ground_truth = [[[list() for i in range(self.S)] for j in range(self.S)] for k in
                             range(len(self.img_path))]  # 根据标注文件生成ground_truth
        ground_truth_index = 0
        for annotation_file in self.annotation_path:
            ground_truth = [[list() for i in range(self.S)] for j in range(self.S)]
            # 解析xml文件--标注文件
            tree = ET.parse(annotation_file)
            annotation_xml = tree.getroot()
            # 计算 目标尺寸 -> 原图尺寸 self.img_size * self.img_size , x的变化比例
            width = (int)(annotation_xml.find("size").find("width").text)
            scaleX = self.img_size / width
            # 计算 目标尺寸 -> 原图尺寸 self.img_size * self.img_size , y的变化比例
            height = (int)(annotation_xml.find("size").find("height").text)
            scaleY = self.img_size / height
            # 因为两次除法的误差可能比较大 这边采用除一次乘一次的方式
            # 一个注解文件可能有多个object标签，一个object标签内部包含一个bnd标签
            objects_xml = annotation_xml.findall("object")
            for object_xml in objects_xml:
                # 获取目标的名字
                class_name = object_xml.find("name").text
                if class_name not in self.ClassNameToInt: # 不属于我们规定的类
                    continue
                bnd_xml = object_xml.find("bndbox")
                # 目标尺度放缩
                xmin = (int)((int)(bnd_xml.find("xmin").text) * scaleX)
                ymin = (int)((int)(bnd_xml.find("ymin").text) * scaleY)
                xmax = (int)((int)(bnd_xml.find("xmax").text) * scaleX)
                ymax = (int)((int)(bnd_xml.find("ymax").text) * scaleY)
                # 目标中心点
                centerX = (xmin + xmax) / 2
                centerY = (ymin + ymax) / 2
                # 当前物体的中心点落于 第indexI行 第indexJ列的 grid cell内
                indexI = (int)(centerY / self.grid_cell_size)
                indexJ = (int)(centerX / self.grid_cell_size)
                # 真实物体的list
                ClassIndex = self.ClassNameToInt[class_name]
                ClassList = [0 for i in range(self.Classes)]
                ClassList[ClassIndex] = 1
                ground_box = list([centerX / self.grid_cell_size - indexJ,centerY / self.grid_cell_size - indexI,(xmax-xmin)/self.img_size,(ymax-ymin)/self.img_size,1,xmin,ymin,xmax,ymax,(xmax-xmin)*(ymax-ymin)])
                #增加上类别
                ground_box.extend(ClassList)
                ground_truth[indexI][indexJ].append(ground_box)

            #同一个grid cell内的多个groudn_truth，选取面积最大的两个
            for i in range(self.S):
                for j in range(self.S):
                    if len(ground_truth[i][j]) == 0:
                        self.ground_truth[ground_truth_index][i][j].append([0 for i in range(10 + self.Classes)])
                    else:
                        ground_truth[i][j].sort(key = lambda box: box[9], reverse=True)
                        self.ground_truth[ground_truth_index][i][j].append(ground_truth[i][j][0])

            ground_truth_index = ground_truth_index + 1
        self.ground_truth = torch.Tensor(self.ground_truth).float()

    def __getitem__(self, item):
        # height * width * channel
        img_data = cv2.imread(self.img_path[item])
        img_data = cv2.resize(img_data, (448, 448), interpolation=cv2.INTER_AREA)
        img_data = self.transfrom(img_data)
        return img_data,self.ground_truth[item]


    def __len__(self):
        return len(self.img_path)

六、训练

#---------------step1:Dataset-------------------
import torch
dataSet = YoloV1DataSet()
from torch.utils.data import DataLoader
dataLoader = DataLoader(dataSet,batch_size=32,shuffle=True,num_workers=4)

#---------------step2:Model-------------------
Yolo = YOLO_V1().cuda(device=0)
Yolo.initialize_weights()

#---------------step3:LossFunction-------------------
loss_function = Yolov1_Loss().cuda(device=1)

#---------------step4:Optimizer-------------------
import torch.optim as optim
optimizer = optim.SGD(Yolo.parameters(),lr=5e-3,momentum=0.9,weight_decay=0.0005)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[200,400,600,20000,30000],gamma=0.9)

#--------------step5:Tensorboard Feature Map------------
from tensorboardX import SummaryWriter
import torchvision.utils as vutils
import torch.nn as nn
writer = SummaryWriter('log')

def feature_map_visualize(img_data, writer):
    img_data = img_data.unsqueeze(0)
    img_grid = vutils.make_grid(img_data, normalize=True, scale_each=True)
    for i,m in enumerate(Yolo.modules()):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.BatchNorm2d) or \
                isinstance(m, nn.ReLU) or isinstance(m, nn.MaxPool2d) or isinstance(m, nn.AdaptiveAvgPool2d):
            img_data = m(img_data)
            img_data = img_data.permute(1,0,2,3)
            img_grid = vutils.make_grid(img_data, normalize=True, scale_each=True)
            img_data = img_data.permute(1, 0, 2, 3)
            writer.add_image('feature_map', img_grid)

#---------------step6:Train-------------------
epoch = 1
while epoch <= 2000*dataSet.Classes:

    loss_sum = 0
    loss_coord = 0
    loss_confidence = 0
    loss_classes = 0
    epoch_iou = 0
    epoch_object_num = 0
    scheduler.step()

    for batch_index, batch_train in enumerate(dataLoader):
        optimizer.zero_grad()
        train_data = batch_train[0].float().cuda(device=0)
        train_data.requires_grad = True
        label_data = batch_train[1].float().cuda(device=0)
        loss = loss_function(bounding_boxes=Yolo(train_data),ground_truth=label_data)
        loss_coord += loss[0]
        loss_confidence += loss[1]
        loss_classes += loss[2]
        batch_loss = loss[0] + loss[1] + loss[2]
        epoch_iou += loss[3]
        epoch_object_num += loss[4]
        batch_loss.backward()
        optimizer.step()
        loss_sum += batch_loss.item()
        #writer.add_graph(loss_function, (Yolo(train_data),label_data))
        for name, param in Yolo.named_parameters():
            print("name:{} param:{}".format(name, param.grad))
        print("batch_index : {} ; batch_loss : {}".format(batch_index,batch_loss.item()))
    epoch += 1
    if (epoch < 1000 and epoch % 100 == 0) or epoch % 1000 == 0:
        torch.save(Yolo.state_dict(), './YOLO_V1_' + str(epoch) + '.pth')
    print("epoch : {} ; loss : {}".format(epoch,{
    
    loss_sum}))
    for name, layer in Yolo.named_parameters():
        writer.add_histogram(name + '_grad', layer.grad.cpu().data.numpy(), epoch)
        writer.add_histogram(name + '_data', layer.cpu().data.numpy(), epoch)
    #feature_map_visualize(batch_train[0], writer)
    writer.add_scalar('Train/Loss_sum', loss_sum, epoch + 1)
    writer.add_scalar('Train/Loss_coord', loss_coord, epoch + 1)
    writer.add_scalar('Train/Loss_confidenct', loss_confidence, epoch + 1)
    writer.add_scalar('Train/Loss_classes', loss_classes, epoch + 1)
    writer.add_scalar('Train/Epoch_iou', epoch_iou / epoch_object_num, epoch + 1)

writer.close()

七、测试

# 网络加载
Yolo_V1 = YOLO_V1()
import torch
Yolo_V1.load_state_dict(torch.load('/content/drive/My Drive/YOLO_V1_41.pth'))
Yolo_V1 = Yolo_V1.cuda()
# 类别与索引转换
IndexToClassName = {
    
    }
with open("/content/drive/My Drive/VOC2007/Train/class.data","r") as f:
    index = 0
    for line in f:
        IndexToClassName[index] = line
        index = index + 1

def iou(box_one, box_two):
    LX = max(box_one[0], box_two[0])
    LY = max(box_one[1], box_two[1])
    RX = min(box_one[2], box_two[2])
    RY = min(box_one[3], box_two[3])
    if LX >= RX or LY >= RY:
        return 0
    return (RX - LX) * (RY - LY) / ((box_one[2]-box_one[0]) * (box_one[3] - box_one[1]) + (box_two[2]-box_two[0]) * (box_two[3] - box_two[1]))

import numpy as np
def NMS(bounding_boxes,S=7,B=2,img_size=448,confidence_threshold=0.5,iou_threshold=0.7):
    bounding_boxes = bounding_boxes.cpu().detach().numpy().tolist()
    predict_boxes = []
    nms_boxes = []
    grid_size = img_size / S
    for batch in range(len(bounding_boxes)):
        for i in range(S):
            for j in range(S):
                gridX = grid_size * i
                gridY = grid_size * j
                if bounding_boxes[batch][i][j][4] < bounding_boxes[batch][i][j][9]:
                    bounding_box = bounding_boxes[batch][i][j][5:10]
                else:
                    bounding_box = bounding_boxes[batch][i][j][0:5]
                bounding_box.extend(bounding_boxes[batch][i][j][10:])
                if bounding_box[4] >= confidence_threshold:
                    predict_boxes.append(bounding_box)
                centerX = (int)(gridX + bounding_box[0] * grid_size)
                centerY = (int)(gridY + bounding_box[1] * grid_size)
                width = (int)(bounding_box[2] * grid_size)
                height = (int)(bounding_box[3] * grid_size)
                bounding_box[0] = max(0, (int)(centerX - width / 2))
                bounding_box[1] = max(0, (int)(centerY - height / 2))
                bounding_box[2] = min(img_size - 1, (int)(centerX + width / 2))
                bounding_box[3] = min(img_size - 1, (int)(centerY + height / 2))

        while len(predict_boxes) != 0:
            predict_boxes.sort(key=lambda box:box[4])
            assured_box = predict_boxes[0]
            temp = []
            classIndex = np.argmax(assured_box[5:])
            #print("类别索引:{}".format(classIndex))
            assured_box[4] = assured_box[4] * assured_box[5 + classIndex] #修正置信度为 物体分类准确度 × 含有物体的置信度
            assured_box[5] = classIndex
            nms_boxes.append(assured_box)
            i = 1
            while i < len(predict_boxes):
                if iou(assured_box,predict_boxes[i]) <= iou_threshold:
                    temp.append(predict_boxes[i])
                i = i + 1
            predict_boxes = temp

        return nms_boxes


# 读取测试数据
import cv2
import torch
test_dir = "/content/drive/My Drive/VOC2007/Train/JPEGImages/000005.jpg"
img_data = cv2.imread(test_dir)
img_data = cv2.resize(img_data,(448,448),interpolation=cv2.INTER_AREA)
train_data = torch.Tensor(img_data).float().cuda()
train_data = train_data.resize(1,448,448,3)
bounding_boxes = Yolo_V1(train_data)
NMS_boxes = NMS(bounding_boxes)

for box in NMS_boxes:
    print(box)
    img_data = cv2.rectangle(img_data, (box[0],box[1]),(box[2],box[3]),(0,255,0),1)
    img_data = cv2.putText(img_data, "class:{} confidence:{}".format(IndexToClassName[box[5]],box[4]),(box[0],box[1]),cv2.FONT_HERSHEY_PLAIN,0.5,(0,0,255),1)

cv2.imshow("local",img_data)
cv2.waitKey()

Pytorch复现YoLo v-1

一点心得

文章目录

一、配置

二、模型

三、输出处理

四、损失函数

五、数据设定

六、训练

七、测试

猜你喜欢