combinación de código yolov7, lectura de datos en formato json

1. Estructura general

Para ser honesto, la forma de ver el archivo de configuración realmente no es adecuada para leer el código. Lo reescribí
https://blog.csdn.net/u012863603/article/details/126118799 , frente a este diagrama de red

# coding=utf-8
import math

import torch
import torch.nn as nn

'''
Author:Don
date:2022/10/17 15:51
desc:
'''
from baseblock import *
from utils import *
class Model(nn.Module):
	def __init__(self,classes=2,anchors=None,ch=3):
		super(Model, self).__init__()
		# self.train=False
		na=3*(classes+5)

		self.layer0=Conv(ch,32,3,1)  # 0
		self.layer1=Conv(32,64,3,2)  # 1-p1/2
		self.layer2=Conv(64,64,3,1)
		self.layer3=Conv(64,128,3,2) #3-p2/4

		self.layer11=ELAN(128)
		self.layer16=MP_1(256)
		self.layer24=ELAN(256)
		self.layer29=MP_1(512)
		self.layer37=ELAN(512)
		self.layer42=MP_1(1024)
		self.layer50=ELAN_E(1024)

		self.layer51=SPPCSPC(1024,512)

		self.layer52=Conv(512,256,1,1)
		self.upsample=nn.Upsample(None,2,'nearest')
		self.concat = Concat(1)
		self.layer54=Conv(1024,256,1,1)


		self.layer63=ELAN_W(512)

		self.layer64=Conv(256,128,1,1)
		self.layer66=Conv(512,128,1,1)

		self.layer75=ELAN_W(256)
		self.layer80=MP_2(128)

		self.layer88=ELAN_W(512)
		self.layer93=MP_2(256)
		self.layer101=ELAN_W(1024)

		self.layer102=RepConv(128,256,3,1)
		self.layer103=RepConv(256,512,3,1)
		self.layer104=RepConv(512,1024,3,1)

		self.idetect=IDetect(classes,anchors=anchors,ch=[256,512,1024])
		s = 256  # 2x min stride  把256带入求得缩放的倍数
		self.idetect.stride=torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])
		self.idetect.anchors /= self.idetect.stride.view(-1, 1, 1)
		# check_anchor_order(self.idetect)
		self.stride = self.idetect.stride
		self._initialize_biases()  # only run once

		# Init weights, biases
		initialize_weights(self)



	def forward(self,x):
		x3=self.layer3(self.layer2(self.layer1(self.layer0(x))))
		x24=self.layer24(self.layer16(self.layer11(x3)))
		x37=self.layer37(self.layer29(x24))
		x51=self.layer51(self.layer50(self.layer42(x37)))
		x63=self.layer63(self.concat([self.layer54(x37),self.upsample(self.layer52(x51))]))
		x75=self.layer75(self.concat([self.layer66(x24),self.upsample(self.layer64(x63))]))
		x88=self.layer88(self.concat([self.layer80(x75),x63]))
		x101=self.layer101(self.concat([self.layer93(x88),x51]))

		x102=self.layer102(x75)
		x103=self.layer103(x88)
		x104=self.layer104(x101)

		out=self.idetect([x102,x103,x104])


		return out

	def _initialize_biases(self):
		for mi,s in zip(self.idetect.m,self.idetect.stride):
			b=mi.bias.view(self.idetect.na,-1) # conv.bias(255) to (3,85)
			b.data[:,4]+= math.log(8/(640/s)**2)
			b.data[:, 5:] += math.log(0.6 / (self.idetect.nc - 0.99))   # cls
			mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)

# device=torch.device('cuda:0')
# anchors=[[12,16, 19,36, 40,28],[36,75, 76,55, 72,146],[142,110, 192,243, 459,401]]
#
# yolov7 = Model(classes=1,anchors=anchors).to(device)
# img = torch.rand( 1, 3, 640, 640).to(device)
# y = yolov7(img)
# print(y[0].shape)

2. Cada módulo

O compare el
enlace del mapa de cuadrícula: enlace
inserte la descripción de la imagen aquí

# coding=utf-8

import torch
import torch.nn as nn
'''
Author:Don
date:2022/10/17 16:24
desc:
'''

def autopad(k,p=None):
	if p is None:
		p=k//2 if isinstance(k,int) else [x//2 for x in k]
	return p

class Conv(nn.Module):
	# ch_in, ch_out, kernel, stride, padding, groups
	def __init__(self,c1,c2,k=1,s=1,p=None,g=1,act=True):
		super(Conv, self).__init__()
		self.conv=nn.Conv2d(c1,c2,k,s,autopad(k,p),groups=g,bias=False)
		self.bn=nn.BatchNorm2d(c2)
		self.act=nn.SiLU() if act is True else (act if isinstance(act,nn.Module) else nn.Identity())

	def forward(self,x):
		return  self.act(self.bn(self.conv(x)))

	def fuseforward(self,x):
		return self.act(self.conv(x))



class Concat(nn.Module):
	def __init__(self,dimension=1):
		super(Concat, self).__init__()
		self.d=dimension

	def forward(self,x):
		return torch.cat(x,self.d)


class MP(nn.Module):
	def __init__(self,k=2):
		super(MP, self).__init__()
		self.m=nn.MaxPool2d(kernel_size=k,stride=k)

	def forward(self,x):
		return self.m(x)


class MP_2(nn.Module):
	def __init__(self,c1):
		super(MP_2, self).__init__()
		self.conv1=Conv(c1,c1,1,1)
		self.conv2=Conv(c1,c1,3,2)
		self.m=MP()
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(self.m(x))
		y2=self.conv2(self.conv1(x))
		return self.concat([y2,y1])


class MP_1(nn.Module):
	def __init__(self,c1):
		super(MP_1, self).__init__()
		c2=c1//2
		self.m=MP()
		self.conv1=Conv(c1,c2,1,1)
		self.conv2=Conv(c2,c2,3,2)
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(self.m(x))
		y2=self.conv2(self.conv1(x))
		return self.concat([y1,y2])

class ELAN(nn.Module):
	def __init__(self, c1):
		super(ELAN, self).__init__()
		c2=c1//2
		c3=c1*2
		self.conv1 = Conv(c1, c2, 1, 1)
		self.conv2 = Conv(c2, c2, 3, 1)
		self.conv3 = Conv(c3, c3, 1, 1)
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(x)
		y2=self.conv1(x)
		y3=self.conv2(self.conv2(y2))
		y4=self.conv2(self.conv2(y3))
		return self.conv3(self.concat([y4,y3,y2,y1]))


class ELAN_E(nn.Module):
	def __init__(self, c1):
		super(ELAN_E, self).__init__()
		c2=c1//4
		c3=c1
		self.conv1 = Conv(c1, c2, 1, 1)
		self.conv2 = Conv(c2, c2, 3, 1)
		self.conv3 = Conv(c3, c3, 1, 1)
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(x)
		y2=self.conv1(x)
		y3=self.conv2(self.conv2(y2))
		y4=self.conv2(self.conv2(y3))
		return self.conv3(self.concat([y4,y3,y2,y1]))


class ELAN_W(nn.Module):
	def __init__(self, c1):
		super(ELAN_W, self).__init__()
		c2=c1//2
		c3=c2//2
		c4=2*c2+4*c3
		self.conv1 = Conv(c1, c2, 1, 1)
		self.conv2 = Conv(c2, c3, 3, 1)
		self.conv3 = Conv(c3, c3, 3, 1)
		self.conv4 = Conv(c4, c4//4, 1, 1)
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(x)
		y2=self.conv1(x)
		y3=self.conv2(y2)
		y4=self.conv3(y3)
		y5=self.conv3(y4)
		y6=self.conv3(y5)
		return self.conv4(self.concat([y6,y5,y4,y3,y2,y1]))

class SPPCSPC(nn.Module):
	def __init__(self,c1,c2,e=0.5,k=(5,9,13)):
		super(SPPCSPC, self).__init__()
		c_=int(2*c2*e) #hidden channels
		self.cv1=Conv(c1,c_,1,1)
		self.cv2=Conv(c1,c_,1,1)
		self.cv3=Conv(c_,c_,3,1)
		self.cv4=Conv(c_,c_,1,1)
		self.m=nn.ModuleList([nn.MaxPool2d(kernel_size=x,stride=1,padding=x//2) for x in k])
		self.cv5=Conv(4*c_,c_,1,1)
		self.cv6=Conv(c_,c_,3,1)
		self.cv7=Conv(2*c_,c2,1,1)

	def forward(self,x):
		x1=self.cv4(self.cv3(self.cv1(x)))
		y1=self.cv6(self.cv5(torch.cat([x1]+[m(x1) for m in self.m],1)))
		y2=self.cv2(x)
		return self.cv7(torch.cat((y1,y2),dim=1))

class RepConv(nn.Module):
	def __init__(self,c1,c2,k=3,s=1,p=None,g=1,act=True,deploy=False):
		super(RepConv, self).__init__()
		self.deploy=deploy
		self.groups=g
		self.in_channels=c1
		self.out_channels=c2
		padding_11=autopad(k,p)-k//2

		self.act=nn.SiLU() if act is True else (act if isinstance(act,nn.Module) else nn.Identity())

		if deploy:
			self.rbr_reparam=nn.Conv2d(c1,c2,k,s,autopad(k,p),groups=g,bias=True)

		else:
			self.rbr_dense = nn.Sequential(
				nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False),
				nn.BatchNorm2d(num_features=c2),
			)
			self.rbr_1x1 = nn.Sequential(
				nn.Conv2d(c1, c2, 1, s, padding_11, groups=g, bias=False),
				nn.BatchNorm2d(num_features=c2),
			)
	def forward(self,x):
		if hasattr(self, "rbr_reparam"):
			return self.act(self.rbr_reparam(x))
		return self.act(self.rbr_dense(x) + self.rbr_1x1(x))

class ImplicitA(nn.Module):
	def __init__(self,channel,mean=0.,std=.02):
		super(ImplicitA, self).__init__()
		self.channel=channel
		self.mean=mean
		self.std=std
		self.implicit=nn.Parameter(torch.zeros(1,channel,1,1))
		nn.init.normal_(self.implicit,mean=self.mean,std=std)

	def forward(self,x):
		return self.implicit+x


class ImplicitM(nn.Module):
	def __init__(self,channel,mean=0,std=.02):
		super(ImplicitM, self).__init__()
		self.channel=channel
		self.mean=mean
		self.std=std
		self.implicit=nn.Parameter(torch.ones(1,channel,1,1))
		nn.init.normal_(self.implicit,mean=self.mean,std=self.std)

	def forward(self,x):
		return self.implicit*x



class IDetect(nn.Module):
	stride = None  # strides computed during build
	export = False  # onnx export
	def __init__(self,nc=80,anchors=(),ch=()):
		super(IDetect, self).__init__()
		self.nc=nc
		self.no=nc+6
		self.nl=len(anchors)  #3
		self.na = len(anchors[0]) // 2  # number of anchors 3
		self.grid=[torch.zeros(1)]*self.nl
		a = torch.tensor(anchors).float().view(self.nl, -1, 2) # 3,3,2
		self.register_buffer('anchors', a)  # shape(nl,na,2)
		self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
		self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)
		self.ia=nn.ModuleList(ImplicitA(x) for x in ch)
		self.im=nn.ModuleList(ImplicitM(self.no*self.na) for _ in ch)
		# self.training=True

	def forward(self,x):
		z=[]
		self.training|=self.export
		for i in range(self.nl):
			x[i]=self.m[i](self.ia[i](x[i]))
			x[i]=self.im[i](x[i])
			bs,_,ny,nx=x[i].shape
			x[i]=x[i].view(bs,self.na,self.no,ny,nx).permute(0,1,3,4,2).contiguous()
			if not self.training:  # inference
				if self.grid[i].shape[2:4] != x[i].shape[2:4]:
					self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

				y = x[i].sigmoid()
				y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
				y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
				z.append(y.view(bs, -1, self.no))

		return x if self.training else (torch.cat(z, 1), x)


	@staticmethod
	def _make_grid(nx=20, ny=20):
		yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
		return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()




# anchors=[[12,16, 19,36, 40,28],[36,75, 76,55, 72,146],[142,110, 192,243, 459,401]]
# ch=[256,512,1024]
# d=IDetect(anchors=anchors)
# implicit=nn.Parameter(torch.zeros(1,3,1,1))
# nn.init.normal_(implicit,mean=0.,std=.02)
# print(implicit)

3. Tratamiento de datos

Debido a que es demasiado complicado y mi conjunto de datos es pequeño, eliminé el mosaico y la mezcla, solo giré y volteé, y el formato de datos de lectura está marcado directamente por labelimg, no es necesario convertirlo al formato de etiqueta yolov5

# coding=utf-8
import os
import random
import sys

path = os.path.dirname(__file__)
sys.path.append(path)

'''
Author:Don
date:2022/10/19 15:00
desc:
'''
from utils import  *
from torch.utils.data import Dataset
from pathlib import Path
import glob
from tqdm import tqdm
from PIL import  Image
import json
import numpy as np
import cv2
def img2label_paths(img_paths):
	# Define label paths as a function of image paths
	sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep  # /images/, /labels/ substrings
	return ['json'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]

def xyxy2xywh(x):
	# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
	y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
	y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
	y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
	y[:, 2] = (x[:, 2] - x[:, 0])  # width
	y[:, 3] = (x[:, 3] - x[:, 1])  # height
	return y

def segments2boxes(segments,img_h,img_w):
	# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
	boxes = []
	for s in segments:
		x, y = s.T  # segment xy
		boxes.append([x.min(), y.min(), x.max(), y.max()])  # cls, xyxy
	y=xyxy2xywh(np.array(boxes))  # cls, xywh
	y[:,[1, 3]] /= img_h  # normalized height 0-1
	y[:,[0, 2]] /= img_w  # normalized width 0-1
	return y



def load_image(self, index):
	img = self.imgs[index]
	if img is None:
		path=self.img_files[index]
		img=cv2.imread(path)
		h0,w0=img.shape[:2]
		r=self.img_size/max(h0,w0)
		if r!=1:
			interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
			img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
		return img,(h0,w0),img.shape[:2]


def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), scaleup=True, stride=32):
	shape = img.shape[:2]  # current shape [height, width]
	if isinstance(new_shape, int):
		new_shape = (new_shape, new_shape)

	# Scale ratio (new / old)
	r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

	ratio=r,r
	new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
	dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding  640-640 640-512

	dw /= 2  # divide padding into 2 sides
	dh /= 2

	top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
	left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
	img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
	return img, ratio, (dw, dh)


def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
	y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
	y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
	y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
	y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
	y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
	return y


def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
	# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
	w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
	w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
	ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
	return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates



def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
					   border=(0, 0)):
	# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
	# targets = [cls, xyxy]

	height = img.shape[0] + border[0] * 2  # shape(h,w,c)
	width = img.shape[1] + border[1] * 2

	# Center
	C = np.eye(3)
	C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
	C[1, 2] = -img.shape[0] / 2  # y translation (pixels)

	# Perspective
	P = np.eye(3)
	P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
	P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)

	# Rotation and Scale
	R = np.eye(3)
	a = random.uniform(-degrees, degrees)
	# a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
	s = random.uniform(1 - scale, 1.1 + scale)
	# s = 2 ** random.uniform(-scale, scale)
	R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)

	# Shear
	S = np.eye(3)
	S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
	S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)

	# Translation
	T = np.eye(3)
	T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
	T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)

	# Combined rotation matrix
	M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
	if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
		if perspective:
			img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
		else:  # affine
			img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))

	# Visualize
	# import matplotlib.pyplot as plt
	# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
	# ax[0].imshow(img[:, :, ::-1])  # base
	# ax[1].imshow(img2[:, :, ::-1])  # warped

	# Transform label coordinates
	n = len(targets)
	xy = np.ones((n * 4, 3))
	xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
	xy = xy @ M.T  # transform
	xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine

	# create new boxes
	x = xy[:, [0, 2, 4, 6]]
	y = xy[:, [1, 3, 5, 7]]
	new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

	# clip
	new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
	new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)

	# filter candidates
	i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.10)
	targets = targets[i]
	targets[:, 1:5] = new[i]

	return img, targets


def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
	r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
	hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
	dtype = img.dtype  # uint8

	x = np.arange(0, 256, dtype=np.int16)
	lut_hue = ((x * r[0]) % 180).astype(dtype)
	lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
	lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

	img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
	cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed





class LoadImagesAndLabels(Dataset):


	def __init__(self,path,img_size=640,batch_size=16,augment=False,image_weights=False, single_cls=False, stride=32, pad=0.0):
		self.img_size=img_size
		self.augment=augment
		self.image_weights=image_weights
		self.stride=stride
		self.path=path

		try:
			f=[]
			for p in path if isinstance(path,list) else[path]:
				p = Path(p)  # os-agnostic
				f += glob.glob(str(p / '**' / '*.*'), recursive=True)
			self.img_files = sorted([x.replace('/', os.sep) for x in f])
		except Exception as e:
			pass
		self.label_files = img2label_paths(self.img_files)  # labels
		cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')

		if cache_path.is_file():
			cache, exists = torch.load(cache_path), True  # load
		else:
			cache, exists = self.cache_labels(cache_path), False  # cache

		labels, shapes, self.segments = zip(*cache.values())
		self.labels = list(labels)
		self.shapes = np.array(shapes, dtype=np.float64)
		self.img_files = list(cache.keys())  # update
		self.label_files = img2label_paths(cache.keys())  # update
		if single_cls:
			for x in self.labels:
				x[:, 0] = 0
		n=len(shapes)
		bi=np.floor(np.arange(n)/batch_size).astype(np.int32)
		nb=bi[-1]+1
		self.batch=bi
		self.n=n
		self.indices=range(n)

		self.imgs=[None]*n

	def __len__(self):
		return len(self.img_files)

	def __getitem__(self, item):
		index=self.indices[item]

		img,(h0,w0),(h,w)=load_image(self,index)
		shape=self.img_size
		img, ratio, pad = letterbox(img, shape, scaleup=self.augment)  #没有缩放只是增加了pad

		shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling

		labels = self.labels[index].copy()

		if labels.size:
			labels[:,1:]=xywhn2xyxy(labels[:,1:],ratio[0]*w,ratio[1]*h,padw=pad[0],padh=pad[1])

		if self.augment:
			# Augment imagespace

			img, labels = random_perspective(img, labels,
											 degrees=0.0,
											 translate=0.2,
											 scale=0.9,
											 shear=0.0,
											 perspective=0.0)

			# img, labels = self.albumentations(img, labels)

			# Augment colorspace
			augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4)


		nL = len(labels)  # number of labels
		if nL:
			labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])  # convert xyxy to xywh
			labels[:, [2, 4]] /= img.shape[0]  # normalized height 0-1
			labels[:, [1, 3]] /= img.shape[1]  # normalized width 0-1

		if self.augment:

			# flip left-right
			if random.random() < 0.5:
				img = np.fliplr(img)
				if nL:
					labels[:, 1] = 1 - labels[:, 1]

		labels_out = torch.zeros((nL, 6))
		if nL:
			labels_out[:, 1:] = torch.from_numpy(labels)

		# Convert
		img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
		img = np.ascontiguousarray(img)

		return torch.from_numpy(img), labels_out, self.img_files[index], shapes

	@staticmethod
	def collate_fn(batch):
		img, label, path, shapes = zip(*batch)  # transposed
		for i, l in enumerate(label):
			l[:, 0] = i  # add target image index for build_targets()
		return torch.stack(img, 0), torch.cat(label, 0), path, shapes






	def cache_labels(self, cache_path):
		x={
    
    }
		pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
		for i ,(im_file,lb_file) in enumerate(pbar):
			try:
				im=Image.open(im_file)
				im.verify()
				shape = im.size  # image size
				segments = []  # instance segments
				if os.path.isfile(lb_file):
					with open(lb_file,'r')as f:
						j = json.load(f)
						img_h = j["imageHeight"]
						img_w = j["imageWidth"]
						classes = np.array([0 for x in j["shapes"] if x["label"]=="sack"], dtype=np.float32)
						segments = [np.array(x["points"], dtype=np.float32).reshape(-1, 2) for x in j["shapes"] ]
						l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments,img_h,img_w)), 1)
					l = np.array(l, dtype=np.float32)
				else:
					l = np.zeros((0, 5), dtype=np.float32)
				x[im_file] = [l, shape, segments]
			except Exception as e:
				pass
		pbar.close()
		path = str(cache_path).replace('/', os.sep)
		torch.save(x, path)  # save for next time
		return x


def create_dataloader(path, imgsz, batch_size, stride,single_cls, augment=True, pad=0.0,world_size=1, workers=8, image_weights=False):
	dataset = LoadImagesAndLabels(path, imgsz, batch_size,
								  augment=augment,  # augment images
								  single_cls=single_cls,
								  stride=int(stride),
								  pad=pad,
								  image_weights=image_weights)

	batch_size = min(batch_size, len(dataset))
	nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers])
	loader = torch.utils.data.DataLoader
	dataloader = loader(dataset,
						batch_size=batch_size,
						num_workers=nw,
						sampler=None,
						pin_memory=True,
						collate_fn=LoadImagesAndLabels.collate_fn)

	return dataloader, dataset

img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']

# path='./data/kongdong/images'
# create_dataloader(path,640,1,32,True)
class LoadImages:
	def __init__(self,path,img_size=640):
		p = str(Path(path).absolute())  # os-agnostic absolute path
		if '*' in p:
			files = sorted(glob.glob(p, recursive=True))  # glob
		elif os.path.isdir(p):
			files = sorted(glob.glob(os.path.join(p, '*.*')))  # dir
		elif os.path.isfile(p):
			files = [p]  # files
		else:
			raise Exception(f'ERROR: {
      
      p} does not exist')

		images = [x for x in files if x.split('.')[-1].lower() in img_formats]
		ni = len(images)
		self.img_size=img_size
		self.files=images
		self.nf=ni

	def __iter__(self):
		self.count=0
		return self
	def __next__(self):
		if self.count==self.nf:
			raise StopIteration
		path = self.files[self.count]
		self.count+=1
		img0=cv2.imread(path)
		img=letterbox(img0,self.img_size)[0]
		img=img[:,:,::-1].transpose(2,0,1)
		img=np.ascontiguousarray(img)

		return path,img,img0

4. La pérdida es tan complicada que no quiero que la gente la vea en absoluto

Agregué comentarios personalmente y eliminé la pérdida de cls, porque solo tengo una categoría

# coding=utf-8
import os
import sys

import math
import numpy as np
import torch
import torch.nn.functional as F
path = os.path.dirname(__file__)
sys.path.append(path)
import torch.nn as nn

'''
Author:Don
date:2022/10/21 14:28
desc:
'''


def xywh2xyxy(x):
	y=x.clone() if isinstance(x,torch.Tensor) else np.copy(x)
	y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
	y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
	y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
	y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
	return y


def box_iou(box1, box2):
	def box_area(box):
		# box = 4xn
		return (box[2] - box[0]) * (box[3] - box[1])
	area1=box_area(box1.T)
	area2=box_area(box2.T)

	# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
	inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
	return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)


def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
	box2=box2.T
	if x1y1x2y2:  # x1, y1, x2, y2 = box1
		b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
		b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
	else: # transform from xywh to xyxy
		b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
		b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
		b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
		b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

	# Intersection area
	inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
			(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

	# Union Area
	w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
	w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
	union = w1 * h1 + w2 * h2 - inter + eps

	iou = inter / union
	if GIoU or DIoU or CIoU:
		cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
		ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
		if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
			c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
			rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
					(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
			if DIoU:
				return iou - rho2 / c2  # DIoU
			elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
				v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2)
				with torch.no_grad():
					alpha = v / (v - iou + (1 + eps))
				return iou - (rho2 / c2 + v * alpha)  # CIoU
		else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
			c_area = cw * ch + eps  # convex area
			return iou - (c_area - union) / c_area  # GIoU
	else:
		return iou  # IoU


class ComputeLossOTA:
	def __init__(self,model,autobalance=False):
		super(ComputeLossOTA, self).__init__()
		device=next(model.parameters()).device

		BCEcls=nn.BCEWithLogitsLoss(pos_weight=torch.tensor(1.0,device=device))
		BCEobj=nn.BCEWithLogitsLoss(pos_weight=torch.tensor(1.0,device=device))

		det=model.idetect
		self.balance = {
    
    3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
		self.BCEcls,self.BCEobj=BCEcls,BCEobj
		for k in 'na','nc','nl','anchors','stride':
			setattr(self,k,getattr(det,k))

	def __call__(self, p,targets,imgs):
		device=targets.device
		lcls,lbox,lobj=torch.zeros(1,device=device),torch.zeros(1,device=device),torch.zeros(1,device=device)
		bs, as_, gjs, gis, targets, anchors = self.build_targets(p, targets, imgs)
		pre_gen_gains=[torch.tensor(pp.shape,device=device)[[3,2,3,2]] for pp in p] # [80,80,80,80,][40,40,40,40,][20,20,20,20]

		#loss
		for i,pi in enumerate(p):
			b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i]  # image, anchor, gridy, gridx
			tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
			n = b.shape[0]  # number of targets
			if n:
				ps=pi[b,a,gj,gi]

				# regression  回归box
				grid=torch.stack([gi,gj],dim=1)
				pxy=ps[:,:2].sigmoid()*2-0.5
				pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
				pbox = torch.cat((pxy, pwh), 1)  # predicted box
				selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i]  # xywh * 80,40,20
				selected_tbox[:, :2] -= grid   #计算的是中心点到左上角网格的距离,所以这里的真实标签要减去网格的xy
				iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
				lbox += (1.0 - iou).mean()  # iou loss

				# Objectness
				tobj[b, a, gj, gi]=iou.detach().clamp(0).type(tobj.dtype)  #通过iou判断是否有物体,iou 【0,1】之间

			obji=self.BCEobj(pi[...,4],tobj)  #获取obj loss
			lobj += obji * self.balance[i]  # obj loss    每一层的loss比例不一样 给低层的loss比例高一点 是为了检测小物体

		lbox *= 0.05
		lobj *= 0.7
		bs=tobj.shape[0]

		loss=lbox+lobj
		return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()




	def build_targets(self, p, targets, imgs):

		indices, anch = self.find_3_positive(p, targets)  # 增加true label 个数

		matching_bs = [[] for pp in p]
		matching_as = [[] for pp in p]
		matching_gjs = [[] for pp in p]
		matching_gis = [[] for pp in p]
		matching_targets = [[] for pp in p]
		matching_anchs = [[] for pp in p]

		nl = len(p)

		for batch_idx in range(p[0].shape[0]):
			b_idx=targets[:,0]==batch_idx
			this_target=targets[b_idx]
			if this_target.shape[0] == 0:
				continue
			txywh=this_target[:,2:6]*imgs[batch_idx].shape[1]
			txyxy=xywh2xyxy(txywh)

			pxyxys = []
			p_cls = []
			p_obj = []
			from_which_layer = []
			all_b = []
			all_a = []
			all_gj = []
			all_gi = []
			all_anch = []
			for i,pi in enumerate(p):   #获取每一层预测值
				b, a, gj, gi = indices[i]    #获取对应层的真实标签落在的中心坐标点
				idx = (b == batch_idx)
				b, a, gj, gi = b[idx], a[idx], gj[idx], gi[idx]
				all_b.append(b)
				all_a.append(a)
				all_gj.append(gj)
				all_gi.append(gi)
				all_anch.append(anch[i][idx])
				from_which_layer.append(torch.ones(size=(len(b),)) * i)
				fg_pred = pi[b, a, gj, gi] # b  batch_size a  对应的anchor gj gi 就是对应中心点的xy  从1,3,80,80,6(5+cls) 取出 6,6  这一层只有6个和真实标签
				p_obj.append(fg_pred[:, 4:5])
				p_cls.append(fg_pred[:, 5:6])

				grid = torch.stack([gi, gj], dim=1)   #  物体的中心点所在的网格
				pxy=(fg_pred[:,:2].sigmoid()*2-0.5+grid)*self.stride[i]  #预测的是中心点到网格的偏移
				pwh = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i]  # / 8. #预测的是anch的比值
				pxywh=torch.cat([pxy,pwh],dim=-1)
				pxyxy=xywh2xyxy(pxywh)
				pxyxys.append(pxyxy)

			pxyxys=torch.cat(pxyxys,dim=0)
			if pxyxys.shape[0] == 0:
				continue

			p_obj = torch.cat(p_obj, dim=0)
			p_cls = torch.cat(p_cls, dim=0)
			from_which_layer = torch.cat(from_which_layer, dim=0)
			all_b = torch.cat(all_b, dim=0)
			all_a = torch.cat(all_a, dim=0)
			all_gj = torch.cat(all_gj, dim=0)
			all_gi = torch.cat(all_gi, dim=0)
			all_anch = torch.cat(all_anch, dim=0)

			pair_wise_iou=box_iou(txyxy,pxyxys)
			pair_wise_iou_loss=-torch.log(pair_wise_iou+1e-8)

			top_k,_=torch.topk(pair_wise_iou,min(10,pair_wise_iou.shape[1]),dim=1) # 最多取10个最大的iou
			dynamic_ks=torch.clamp(top_k.sum(1).int(),min=1)  #计算k的取值 这里有5个gt,得到的k是【4,3,3,4,4】

			gt_cls_per_image=(F.one_hot(this_target[:,1].to(torch.int64),self.nc).float().unsqueeze(1).repeat(1,pxyxys.shape[0],1))  #5,60,1

			num_gt=this_target.shape[0]  # 5

			cls_preds_=(p_cls.float().unsqueeze(0).repeat(num_gt,1,1).sigmoid_()*p_obj.unsqueeze(0).repeat(num_gt,1,1).sigmoid_())  #5,60,1

			y=cls_preds_.sqrt_()

			pair_wise_cls_loss=F.binary_cross_entropy_with_logits(torch.log(y/(1-y)),gt_cls_per_image,reduction="none").sum(-1)

			del cls_preds_

			cost=(pair_wise_cls_loss+3.0*pair_wise_iou_loss)  #5,60  5是gt label  60 是根据gt label在 9个anchor的5个网格的中心点

			matching_matrix=torch.zeros_like(cost)  # 5,60

			for gt_idx in range(num_gt):
				_,pos_idx=torch.topk(cost[gt_idx],k=dynamic_ks[gt_idx].item(),largest=False)  #取k个最大的loss
				matching_matrix[gt_idx][pos_idx]=1.0

			del top_k,dynamic_ks
			anchor_matching_gt=matching_matrix.sum(0)
			#  大于1 说明一个网格对应了2个gt
			if (anchor_matching_gt>1).sum()>0:
				_,cost_argmin=torch.min(cost[:,anchor_matching_gt>1],dim=0)  #计算grid和gt loss最小的的是哪一个
				matching_matrix[:, anchor_matching_gt > 1] *= 0.0   # 对应网格的变成0
				matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0 # 把loss最小的grid 变成1
			fg_mask_inboxes=matching_matrix.sum(0)>0.0  #动态提取的top k 个 因为有可能一个grid对应2个gt所以 这里的k 小于等于dynamic_ks

			matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)  #

			from_which_layer = from_which_layer[fg_mask_inboxes]
			all_b = all_b[fg_mask_inboxes]
			all_a = all_a[fg_mask_inboxes]
			all_gj = all_gj[fg_mask_inboxes]
			all_gi = all_gi[fg_mask_inboxes]
			all_anch = all_anch[fg_mask_inboxes]

			this_target=this_target[matched_gt_inds]

			for i in range(nl):
				layer_idx=from_which_layer==i
				matching_bs[i].append(all_b[layer_idx])
				matching_as[i].append(all_a[layer_idx])
				matching_gis[i].append(all_gi[layer_idx])
				matching_gjs[i].append(all_gj[layer_idx])
				matching_targets[i].append(this_target[layer_idx])
				matching_anchs[i].append(all_anch[layer_idx])


		for i in range(nl):
			if matching_targets[i] != []:
				matching_bs[i] = torch.cat(matching_bs[i], dim=0)
				matching_as[i] = torch.cat(matching_as[i], dim=0)
				matching_gjs[i] = torch.cat(matching_gjs[i], dim=0)
				matching_gis[i] = torch.cat(matching_gis[i], dim=0)
				matching_targets[i] = torch.cat(matching_targets[i], dim=0)
				matching_anchs[i] = torch.cat(matching_anchs[i], dim=0)

			else:
				matching_bs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_as[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_gjs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_gis[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_targets[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_anchs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)

		return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs


	def find_3_positive(self, p, targets):
		na,nt=self.na,targets.shape[0]   # nt 一个图像中多少个物体  5个
		indices,anch=[],[]
		gain=torch.ones(7,device=targets.device).long()
		ai=torch.arange(na,device=targets.device).float().view(na,1).repeat(1,nt)  # ai=[[0,0,0,0,0],[1,1,1,1,1],[2,2,2,2,2]]
		targets=torch.cat((targets.repeat(na,1,1),ai[:,:,None]),2)  #  shape=3,5,7  5个检测五  3 是3层  7是0,0,cls ,x,y,w,h,

		g=0.5
		off = torch.tensor([[0, 0],[1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device).float() * g  # offsets
        # p  [[1,3,80,80,6],[1,3,40,40,6],[1,3,20,20,6] ]
		for i in range(self.nl): # 3 层
			anchors=self.anchors[i]   # 每层对应3个anchors
			gain[2:6]=torch.tensor(p[i].shape)[[3,2,3,2]] # xyxy gain  # 1,1,80,80,80,80,1

			t=targets*gain   #target *80

			if nt:
				r=t[:,:,4:6]/anchors[:,None]  #   计算真实框和anchor的wh的比值
				j = torch.max(r, 1. / r).max(2)[0] < 4.0  #   筛选比值小于4的真实框
				t = t[j]  # filter

				gxy=t[:,2:4]
				gxi = gain[[2, 3]] - gxy  # xy相反的坐标inverse   80*80-真实的框在80*80的下的xy坐标
				j, k = ((gxy % 1. < g) & (gxy > 1.)).T   # 如果真实框在左上角且不是第一行第一列 就取上方和左侧作为真实标签
				l, m = ((gxi % 1. < g) & (gxi > 1.)).T  # 如果真实框在右下角且不是最后一行最后一列 就取下方和右侧作为
				j = torch.stack((torch.ones_like(j), j, k, l, m)) # 选出符合
				t = t.repeat((5, 1, 1))[j]  # 复制5分  筛选符合要求的中心点的上下左右加上中心点5个坐标点
				offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]   # 真实中心点复制5分分别求出5个点的坐标
			else:
				t=targets[0]
				offsets=0

			b,c=t[:,:2].long().T  # batch  clss
			gxy=t[:,2:4]
			gwh=t[:,4:6]
			gij=(gxy-offsets).long()
			gi,gj=gij.T

			a=t[:,6].long()
			indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
			anch.append(anchors[a])  # anchors

		return indices,anch


5. Entrenamiento

No hay entrenamiento multi-gpu, por lo que se elimina el entrenamiento distribuido, que también es complicado, con un montón de parámetros.

# coding=utf-8
import os
import sys
from tqdm import tqdm
path = os.path.dirname(__file__)
sys.path.append(path)
from model import Model
import torch
import torch.optim as optim
import  torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
from datasets import create_dataloader
from torch.cuda import amp
from loss import ComputeLossOTA
'''
Author:Don
date:2022/10/19 12:01
desc:
'''

def train():
	epochs=300
	imgsz=640
	batch_size=1
	train_path='./data/kongdong/images'
	classes=1
	# Optimizer
	nbs = 64  # nominal batch size
	accumulate = max(round(nbs / 1), 1)  # accumulate loss before optimizing
	device = torch.device('cuda:0')
	anchors = [[12, 16, 19, 36, 40, 28], [36, 75, 76, 55, 72, 146], [142, 110, 192, 243, 459, 401]]

	yolov7 = Model(classes=classes, anchors=anchors).to(device)

	pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
	for k, v in yolov7.named_modules():
		if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
			pg2.append(v.bias)  # biases
		if isinstance(v, nn.BatchNorm2d):
			pg0.append(v.weight)  # no decay
		elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
			pg1.append(v.weight)  # apply decay
		if hasattr(v, 'im'):
			if hasattr(v.im, 'implicit'):
				pg0.append(v.im.implicit)
			else:
				for iv in v.im:
					pg0.append(iv.implicit)
	optimizer = optim.SGD(pg0, lr=0.01, momentum=0.937, nesterov=True)
	optimizer.add_param_group({
    
    'params': pg1, 'weight_decay': 0.0005})  # add pg1 with weight_decay
	optimizer.add_param_group({
    
    'params': pg2})  # add pg2 (biases)
	del pg0, pg1, pg2

	lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - 0.1) + 0.1  # linear
	scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

	gs=max(int(yolov7.stride.max()),32)  #32
	nl=yolov7.idetect.nl

	mloss = torch.zeros(4, device=device)  # mean losses
	single_cls=False
	# Trainloader
	if classes ==1 :
		single_cls=True

	dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs,single_cls, augment=True,image_weights=True)
	mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
	nb = len(dataloader)

	compute_loss_ota = ComputeLossOTA(yolov7)  # init loss class
	scaler = amp.GradScaler(enabled=device)
	for epoch in range(0, epochs):  # epoch ------------------------------------------------------------------
		yolov7.train()

		pbar = enumerate(dataloader)

		pbar = tqdm(pbar, total=nb)  # progress bar
		optimizer.zero_grad()
		for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
			ni = i + nb * epoch  # number integrated batches (since train start)
			imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0

			# Forward
			with amp.autocast(enabled=True):
				pred = yolov7(imgs)  # forward
				# print(pred[0].shape)
				loss, loss_items = compute_loss_ota(pred, targets.to(device), imgs)  # loss scaled by batch_size
				# if rank != -1:
				# 	loss *= opt.world_size  # gradient averaged between devices in DDP mode
				# if opt.quad:
				# 	loss *= 4.

			# Backward
			scaler.scale(loss).backward()
			# Optimize
			if ni % accumulate == 0:
				scaler.step(optimizer)  # optimizer.step
				scaler.update()
				optimizer.zero_grad()

			mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
			mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
			s = ('%10s' * 2 + '%10.4g' * 6) % (
				'%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
			pbar.set_description(s)

		# Scheduler
		lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
		scheduler.step()


	torch.save(yolov7,'last.pt')


train()

5. Razonamiento

# coding=utf-8
import os
import sys

path = os.path.dirname(__file__)
sys.path.append(path)
import torch

from datasets import LoadImages,non_max_suppression
'''
Author:Don
date:2022/10/26 14:25
desc:
'''
from pathlib import Path
source='data/kongdong/images'
imgsz=640
dataset = LoadImages(source, img_size=imgsz)
device=torch.device('cuda:0')


def clip_coords(boxes, img_shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    boxes[:, 0].clamp_(0, img_shape[1])  # x1
    boxes[:, 1].clamp_(0, img_shape[0])  # y1
    boxes[:, 2].clamp_(0, img_shape[1])  # x2
    boxes[:, 3].clamp_(0, img_shape[0])  # y2


def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords

import random
import cv2
def plot_one_box(x, img, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)

    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)


def detect():
	model = torch.load("last.pt", map_location=device)  # load FP32 model
	model.eval()
	for path, img, im0s in dataset:
		img = torch.from_numpy(img).to(device)
		img=img.unsqueeze(0)
		img = img.float()  # uint8 to fp16/32
		img /= 255.0  # 0 - 255 to 0.0 - 1.0
		pred = model(img)[0]
		pred = non_max_suppression(pred, 0.85, 0.4, classes=1)

		for i,det in enumerate(pred):
			p, s, im0= path, '', im0s
			p = Path(p)  # to Path
			save_path = str(p.name)  # img.jpg
			gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
			if len(det):

				det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
				# Print results
				for c in det[:, -1].unique():
					n = (det[:, -1] == c).sum()  # detections per class
					s += f"{
      
      n} {
      
      'box'}{
      
      's' * (n > 1)}, "  # add to string

				# Write results
				for *xyxy, conf, cls in reversed(det):
					label = f'{
      
      conf:.2f}'
					plot_one_box(xyxy, im0)

				cv2.imshow(str(p), im0)
				cv2.waitKey()  # 1 millisecond


detect()

6. Resumen

Solo para entender yolov7, no se puede usar para entrenar por sí mismo, ¡y el efecto es extremadamente pobre! !
Solo para entender yolov7, no se puede usar para entrenar por sí mismo, ¡y el efecto es extremadamente pobre! !
Solo para entender yolov7, no se puede usar para entrenar por sí mismo, ¡y el efecto es extremadamente pobre! !

Supongo que te gusta

Origin blog.csdn.net/qq_33228039/article/details/127536621
Recomendado
Clasificación