1.整体结构

说实话，看配置文件的方式真的不适合阅读代码，自己改写了一下
https://blog.csdn.net/u012863603/article/details/126118799 ，对着这个网络图

# coding=utf-8
import math

import torch
import torch.nn as nn

'''
Author:Don
date:2022/10/17 15:51
desc:
'''
from baseblock import *
from utils import *
class Model(nn.Module):
	def __init__(self,classes=2,anchors=None,ch=3):
		super(Model, self).__init__()
		# self.train=False
		na=3*(classes+5)

		self.layer0=Conv(ch,32,3,1)  # 0
		self.layer1=Conv(32,64,3,2)  # 1-p1/2
		self.layer2=Conv(64,64,3,1)
		self.layer3=Conv(64,128,3,2) #3-p2/4

		self.layer11=ELAN(128)
		self.layer16=MP_1(256)
		self.layer24=ELAN(256)
		self.layer29=MP_1(512)
		self.layer37=ELAN(512)
		self.layer42=MP_1(1024)
		self.layer50=ELAN_E(1024)

		self.layer51=SPPCSPC(1024,512)

		self.layer52=Conv(512,256,1,1)
		self.upsample=nn.Upsample(None,2,'nearest')
		self.concat = Concat(1)
		self.layer54=Conv(1024,256,1,1)


		self.layer63=ELAN_W(512)

		self.layer64=Conv(256,128,1,1)
		self.layer66=Conv(512,128,1,1)

		self.layer75=ELAN_W(256)
		self.layer80=MP_2(128)

		self.layer88=ELAN_W(512)
		self.layer93=MP_2(256)
		self.layer101=ELAN_W(1024)

		self.layer102=RepConv(128,256,3,1)
		self.layer103=RepConv(256,512,3,1)
		self.layer104=RepConv(512,1024,3,1)

		self.idetect=IDetect(classes,anchors=anchors,ch=[256,512,1024])
		s = 256  # 2x min stride  把256带入求得缩放的倍数
		self.idetect.stride=torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])
		self.idetect.anchors /= self.idetect.stride.view(-1, 1, 1)
		# check_anchor_order(self.idetect)
		self.stride = self.idetect.stride
		self._initialize_biases()  # only run once

		# Init weights, biases
		initialize_weights(self)



	def forward(self,x):
		x3=self.layer3(self.layer2(self.layer1(self.layer0(x))))
		x24=self.layer24(self.layer16(self.layer11(x3)))
		x37=self.layer37(self.layer29(x24))
		x51=self.layer51(self.layer50(self.layer42(x37)))
		x63=self.layer63(self.concat([self.layer54(x37),self.upsample(self.layer52(x51))]))
		x75=self.layer75(self.concat([self.layer66(x24),self.upsample(self.layer64(x63))]))
		x88=self.layer88(self.concat([self.layer80(x75),x63]))
		x101=self.layer101(self.concat([self.layer93(x88),x51]))

		x102=self.layer102(x75)
		x103=self.layer103(x88)
		x104=self.layer104(x101)

		out=self.idetect([x102,x103,x104])


		return out

	def _initialize_biases(self):
		for mi,s in zip(self.idetect.m,self.idetect.stride):
			b=mi.bias.view(self.idetect.na,-1) # conv.bias(255) to (3,85)
			b.data[:,4]+= math.log(8/(640/s)**2)
			b.data[:, 5:] += math.log(0.6 / (self.idetect.nc - 0.99))   # cls
			mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)

# device=torch.device('cuda:0')
# anchors=[[12,16, 19,36, 40,28],[36,75, 76,55, 72,146],[142,110, 192,243, 459,401]]
#
# yolov7 = Model(classes=1,anchors=anchors).to(device)
# img = torch.rand( 1, 3, 640, 640).to(device)
# y = yolov7(img)
# print(y[0].shape)

2.各个模块

还是对照网格图
链接: link
在这里插入图片描述

# coding=utf-8

import torch
import torch.nn as nn
'''
Author:Don
date:2022/10/17 16:24
desc:
'''

def autopad(k,p=None):
	if p is None:
		p=k//2 if isinstance(k,int) else [x//2 for x in k]
	return p

class Conv(nn.Module):
	# ch_in, ch_out, kernel, stride, padding, groups
	def __init__(self,c1,c2,k=1,s=1,p=None,g=1,act=True):
		super(Conv, self).__init__()
		self.conv=nn.Conv2d(c1,c2,k,s,autopad(k,p),groups=g,bias=False)
		self.bn=nn.BatchNorm2d(c2)
		self.act=nn.SiLU() if act is True else (act if isinstance(act,nn.Module) else nn.Identity())

	def forward(self,x):
		return  self.act(self.bn(self.conv(x)))

	def fuseforward(self,x):
		return self.act(self.conv(x))



class Concat(nn.Module):
	def __init__(self,dimension=1):
		super(Concat, self).__init__()
		self.d=dimension

	def forward(self,x):
		return torch.cat(x,self.d)


class MP(nn.Module):
	def __init__(self,k=2):
		super(MP, self).__init__()
		self.m=nn.MaxPool2d(kernel_size=k,stride=k)

	def forward(self,x):
		return self.m(x)


class MP_2(nn.Module):
	def __init__(self,c1):
		super(MP_2, self).__init__()
		self.conv1=Conv(c1,c1,1,1)
		self.conv2=Conv(c1,c1,3,2)
		self.m=MP()
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(self.m(x))
		y2=self.conv2(self.conv1(x))
		return self.concat([y2,y1])


class MP_1(nn.Module):
	def __init__(self,c1):
		super(MP_1, self).__init__()
		c2=c1//2
		self.m=MP()
		self.conv1=Conv(c1,c2,1,1)
		self.conv2=Conv(c2,c2,3,2)
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(self.m(x))
		y2=self.conv2(self.conv1(x))
		return self.concat([y1,y2])

class ELAN(nn.Module):
	def __init__(self, c1):
		super(ELAN, self).__init__()
		c2=c1//2
		c3=c1*2
		self.conv1 = Conv(c1, c2, 1, 1)
		self.conv2 = Conv(c2, c2, 3, 1)
		self.conv3 = Conv(c3, c3, 1, 1)
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(x)
		y2=self.conv1(x)
		y3=self.conv2(self.conv2(y2))
		y4=self.conv2(self.conv2(y3))
		return self.conv3(self.concat([y4,y3,y2,y1]))


class ELAN_E(nn.Module):
	def __init__(self, c1):
		super(ELAN_E, self).__init__()
		c2=c1//4
		c3=c1
		self.conv1 = Conv(c1, c2, 1, 1)
		self.conv2 = Conv(c2, c2, 3, 1)
		self.conv3 = Conv(c3, c3, 1, 1)
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(x)
		y2=self.conv1(x)
		y3=self.conv2(self.conv2(y2))
		y4=self.conv2(self.conv2(y3))
		return self.conv3(self.concat([y4,y3,y2,y1]))


class ELAN_W(nn.Module):
	def __init__(self, c1):
		super(ELAN_W, self).__init__()
		c2=c1//2
		c3=c2//2
		c4=2*c2+4*c3
		self.conv1 = Conv(c1, c2, 1, 1)
		self.conv2 = Conv(c2, c3, 3, 1)
		self.conv3 = Conv(c3, c3, 3, 1)
		self.conv4 = Conv(c4, c4//4, 1, 1)
		self.concat=Concat(1)

	def forward(self,x):
		y1=self.conv1(x)
		y2=self.conv1(x)
		y3=self.conv2(y2)
		y4=self.conv3(y3)
		y5=self.conv3(y4)
		y6=self.conv3(y5)
		return self.conv4(self.concat([y6,y5,y4,y3,y2,y1]))

class SPPCSPC(nn.Module):
	def __init__(self,c1,c2,e=0.5,k=(5,9,13)):
		super(SPPCSPC, self).__init__()
		c_=int(2*c2*e) #hidden channels
		self.cv1=Conv(c1,c_,1,1)
		self.cv2=Conv(c1,c_,1,1)
		self.cv3=Conv(c_,c_,3,1)
		self.cv4=Conv(c_,c_,1,1)
		self.m=nn.ModuleList([nn.MaxPool2d(kernel_size=x,stride=1,padding=x//2) for x in k])
		self.cv5=Conv(4*c_,c_,1,1)
		self.cv6=Conv(c_,c_,3,1)
		self.cv7=Conv(2*c_,c2,1,1)

	def forward(self,x):
		x1=self.cv4(self.cv3(self.cv1(x)))
		y1=self.cv6(self.cv5(torch.cat([x1]+[m(x1) for m in self.m],1)))
		y2=self.cv2(x)
		return self.cv7(torch.cat((y1,y2),dim=1))

class RepConv(nn.Module):
	def __init__(self,c1,c2,k=3,s=1,p=None,g=1,act=True,deploy=False):
		super(RepConv, self).__init__()
		self.deploy=deploy
		self.groups=g
		self.in_channels=c1
		self.out_channels=c2
		padding_11=autopad(k,p)-k//2

		self.act=nn.SiLU() if act is True else (act if isinstance(act,nn.Module) else nn.Identity())

		if deploy:
			self.rbr_reparam=nn.Conv2d(c1,c2,k,s,autopad(k,p),groups=g,bias=True)

		else:
			self.rbr_dense = nn.Sequential(
				nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False),
				nn.BatchNorm2d(num_features=c2),
			)
			self.rbr_1x1 = nn.Sequential(
				nn.Conv2d(c1, c2, 1, s, padding_11, groups=g, bias=False),
				nn.BatchNorm2d(num_features=c2),
			)
	def forward(self,x):
		if hasattr(self, "rbr_reparam"):
			return self.act(self.rbr_reparam(x))
		return self.act(self.rbr_dense(x) + self.rbr_1x1(x))

class ImplicitA(nn.Module):
	def __init__(self,channel,mean=0.,std=.02):
		super(ImplicitA, self).__init__()
		self.channel=channel
		self.mean=mean
		self.std=std
		self.implicit=nn.Parameter(torch.zeros(1,channel,1,1))
		nn.init.normal_(self.implicit,mean=self.mean,std=std)

	def forward(self,x):
		return self.implicit+x


class ImplicitM(nn.Module):
	def __init__(self,channel,mean=0,std=.02):
		super(ImplicitM, self).__init__()
		self.channel=channel
		self.mean=mean
		self.std=std
		self.implicit=nn.Parameter(torch.ones(1,channel,1,1))
		nn.init.normal_(self.implicit,mean=self.mean,std=self.std)

	def forward(self,x):
		return self.implicit*x



class IDetect(nn.Module):
	stride = None  # strides computed during build
	export = False  # onnx export
	def __init__(self,nc=80,anchors=(),ch=()):
		super(IDetect, self).__init__()
		self.nc=nc
		self.no=nc+6
		self.nl=len(anchors)  #3
		self.na = len(anchors[0]) // 2  # number of anchors 3
		self.grid=[torch.zeros(1)]*self.nl
		a = torch.tensor(anchors).float().view(self.nl, -1, 2) # 3,3,2
		self.register_buffer('anchors', a)  # shape(nl,na,2)
		self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
		self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)
		self.ia=nn.ModuleList(ImplicitA(x) for x in ch)
		self.im=nn.ModuleList(ImplicitM(self.no*self.na) for _ in ch)
		# self.training=True

	def forward(self,x):
		z=[]
		self.training|=self.export
		for i in range(self.nl):
			x[i]=self.m[i](self.ia[i](x[i]))
			x[i]=self.im[i](x[i])
			bs,_,ny,nx=x[i].shape
			x[i]=x[i].view(bs,self.na,self.no,ny,nx).permute(0,1,3,4,2).contiguous()
			if not self.training:  # inference
				if self.grid[i].shape[2:4] != x[i].shape[2:4]:
					self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

				y = x[i].sigmoid()
				y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
				y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
				z.append(y.view(bs, -1, self.no))

		return x if self.training else (torch.cat(z, 1), x)


	@staticmethod
	def _make_grid(nx=20, ny=20):
		yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
		return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()




# anchors=[[12,16, 19,36, 40,28],[36,75, 76,55, 72,146],[142,110, 192,243, 459,401]]
# ch=[256,512,1024]
# d=IDetect(anchors=anchors)
# implicit=nn.Parameter(torch.zeros(1,3,1,1))
# nn.init.normal_(implicit,mean=0.,std=.02)
# print(implicit)

3.数据处理

因为太复杂，且我的数据集很小所以去掉了mosaic和mixup，只有简单的旋转翻转，且读取的数据格式是通过labelimg直接标注的，不用再转化为yolov5标签格式

# coding=utf-8
import os
import random
import sys

path = os.path.dirname(__file__)
sys.path.append(path)

'''
Author:Don
date:2022/10/19 15:00
desc:
'''
from utils import  *
from torch.utils.data import Dataset
from pathlib import Path
import glob
from tqdm import tqdm
from PIL import  Image
import json
import numpy as np
import cv2
def img2label_paths(img_paths):
	# Define label paths as a function of image paths
	sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep  # /images/, /labels/ substrings
	return ['json'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]

def xyxy2xywh(x):
	# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
	y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
	y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
	y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
	y[:, 2] = (x[:, 2] - x[:, 0])  # width
	y[:, 3] = (x[:, 3] - x[:, 1])  # height
	return y

def segments2boxes(segments,img_h,img_w):
	# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
	boxes = []
	for s in segments:
		x, y = s.T  # segment xy
		boxes.append([x.min(), y.min(), x.max(), y.max()])  # cls, xyxy
	y=xyxy2xywh(np.array(boxes))  # cls, xywh
	y[:,[1, 3]] /= img_h  # normalized height 0-1
	y[:,[0, 2]] /= img_w  # normalized width 0-1
	return y



def load_image(self, index):
	img = self.imgs[index]
	if img is None:
		path=self.img_files[index]
		img=cv2.imread(path)
		h0,w0=img.shape[:2]
		r=self.img_size/max(h0,w0)
		if r!=1:
			interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
			img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
		return img,(h0,w0),img.shape[:2]


def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), scaleup=True, stride=32):
	shape = img.shape[:2]  # current shape [height, width]
	if isinstance(new_shape, int):
		new_shape = (new_shape, new_shape)

	# Scale ratio (new / old)
	r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

	ratio=r,r
	new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
	dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding  640-640 640-512

	dw /= 2  # divide padding into 2 sides
	dh /= 2

	top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
	left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
	img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
	return img, ratio, (dw, dh)


def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
	y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
	y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
	y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
	y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
	y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
	return y


def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
	# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
	w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
	w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
	ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
	return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates



def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
					   border=(0, 0)):
	# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
	# targets = [cls, xyxy]

	height = img.shape[0] + border[0] * 2  # shape(h,w,c)
	width = img.shape[1] + border[1] * 2

	# Center
	C = np.eye(3)
	C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
	C[1, 2] = -img.shape[0] / 2  # y translation (pixels)

	# Perspective
	P = np.eye(3)
	P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
	P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)

	# Rotation and Scale
	R = np.eye(3)
	a = random.uniform(-degrees, degrees)
	# a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
	s = random.uniform(1 - scale, 1.1 + scale)
	# s = 2 ** random.uniform(-scale, scale)
	R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)

	# Shear
	S = np.eye(3)
	S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
	S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)

	# Translation
	T = np.eye(3)
	T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
	T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)

	# Combined rotation matrix
	M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
	if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
		if perspective:
			img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
		else:  # affine
			img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))

	# Visualize
	# import matplotlib.pyplot as plt
	# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
	# ax[0].imshow(img[:, :, ::-1])  # base
	# ax[1].imshow(img2[:, :, ::-1])  # warped

	# Transform label coordinates
	n = len(targets)
	xy = np.ones((n * 4, 3))
	xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
	xy = xy @ M.T  # transform
	xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine

	# create new boxes
	x = xy[:, [0, 2, 4, 6]]
	y = xy[:, [1, 3, 5, 7]]
	new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

	# clip
	new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
	new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)

	# filter candidates
	i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.10)
	targets = targets[i]
	targets[:, 1:5] = new[i]

	return img, targets


def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
	r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains
	hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
	dtype = img.dtype  # uint8

	x = np.arange(0, 256, dtype=np.int16)
	lut_hue = ((x * r[0]) % 180).astype(dtype)
	lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
	lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

	img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
	cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed





class LoadImagesAndLabels(Dataset):


	def __init__(self,path,img_size=640,batch_size=16,augment=False,image_weights=False, single_cls=False, stride=32, pad=0.0):
		self.img_size=img_size
		self.augment=augment
		self.image_weights=image_weights
		self.stride=stride
		self.path=path

		try:
			f=[]
			for p in path if isinstance(path,list) else[path]:
				p = Path(p)  # os-agnostic
				f += glob.glob(str(p / '**' / '*.*'), recursive=True)
			self.img_files = sorted([x.replace('/', os.sep) for x in f])
		except Exception as e:
			pass
		self.label_files = img2label_paths(self.img_files)  # labels
		cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')

		if cache_path.is_file():
			cache, exists = torch.load(cache_path), True  # load
		else:
			cache, exists = self.cache_labels(cache_path), False  # cache

		labels, shapes, self.segments = zip(*cache.values())
		self.labels = list(labels)
		self.shapes = np.array(shapes, dtype=np.float64)
		self.img_files = list(cache.keys())  # update
		self.label_files = img2label_paths(cache.keys())  # update
		if single_cls:
			for x in self.labels:
				x[:, 0] = 0
		n=len(shapes)
		bi=np.floor(np.arange(n)/batch_size).astype(np.int32)
		nb=bi[-1]+1
		self.batch=bi
		self.n=n
		self.indices=range(n)

		self.imgs=[None]*n

	def __len__(self):
		return len(self.img_files)

	def __getitem__(self, item):
		index=self.indices[item]

		img,(h0,w0),(h,w)=load_image(self,index)
		shape=self.img_size
		img, ratio, pad = letterbox(img, shape, scaleup=self.augment)  #没有缩放只是增加了pad

		shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling

		labels = self.labels[index].copy()

		if labels.size:
			labels[:,1:]=xywhn2xyxy(labels[:,1:],ratio[0]*w,ratio[1]*h,padw=pad[0],padh=pad[1])

		if self.augment:
			# Augment imagespace

			img, labels = random_perspective(img, labels,
											 degrees=0.0,
											 translate=0.2,
											 scale=0.9,
											 shear=0.0,
											 perspective=0.0)

			# img, labels = self.albumentations(img, labels)

			# Augment colorspace
			augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4)


		nL = len(labels)  # number of labels
		if nL:
			labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])  # convert xyxy to xywh
			labels[:, [2, 4]] /= img.shape[0]  # normalized height 0-1
			labels[:, [1, 3]] /= img.shape[1]  # normalized width 0-1

		if self.augment:

			# flip left-right
			if random.random() < 0.5:
				img = np.fliplr(img)
				if nL:
					labels[:, 1] = 1 - labels[:, 1]

		labels_out = torch.zeros((nL, 6))
		if nL:
			labels_out[:, 1:] = torch.from_numpy(labels)

		# Convert
		img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
		img = np.ascontiguousarray(img)

		return torch.from_numpy(img), labels_out, self.img_files[index], shapes

	@staticmethod
	def collate_fn(batch):
		img, label, path, shapes = zip(*batch)  # transposed
		for i, l in enumerate(label):
			l[:, 0] = i  # add target image index for build_targets()
		return torch.stack(img, 0), torch.cat(label, 0), path, shapes






	def cache_labels(self, cache_path):
		x={
    
    }
		pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
		for i ,(im_file,lb_file) in enumerate(pbar):
			try:
				im=Image.open(im_file)
				im.verify()
				shape = im.size  # image size
				segments = []  # instance segments
				if os.path.isfile(lb_file):
					with open(lb_file,'r')as f:
						j = json.load(f)
						img_h = j["imageHeight"]
						img_w = j["imageWidth"]
						classes = np.array([0 for x in j["shapes"] if x["label"]=="sack"], dtype=np.float32)
						segments = [np.array(x["points"], dtype=np.float32).reshape(-1, 2) for x in j["shapes"] ]
						l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments,img_h,img_w)), 1)
					l = np.array(l, dtype=np.float32)
				else:
					l = np.zeros((0, 5), dtype=np.float32)
				x[im_file] = [l, shape, segments]
			except Exception as e:
				pass
		pbar.close()
		path = str(cache_path).replace('/', os.sep)
		torch.save(x, path)  # save for next time
		return x


def create_dataloader(path, imgsz, batch_size, stride,single_cls, augment=True, pad=0.0,world_size=1, workers=8, image_weights=False):
	dataset = LoadImagesAndLabels(path, imgsz, batch_size,
								  augment=augment,  # augment images
								  single_cls=single_cls,
								  stride=int(stride),
								  pad=pad,
								  image_weights=image_weights)

	batch_size = min(batch_size, len(dataset))
	nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers])
	loader = torch.utils.data.DataLoader
	dataloader = loader(dataset,
						batch_size=batch_size,
						num_workers=nw,
						sampler=None,
						pin_memory=True,
						collate_fn=LoadImagesAndLabels.collate_fn)

	return dataloader, dataset

img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']

# path='./data/kongdong/images'
# create_dataloader(path,640,1,32,True)
class LoadImages:
	def __init__(self,path,img_size=640):
		p = str(Path(path).absolute())  # os-agnostic absolute path
		if '*' in p:
			files = sorted(glob.glob(p, recursive=True))  # glob
		elif os.path.isdir(p):
			files = sorted(glob.glob(os.path.join(p, '*.*')))  # dir
		elif os.path.isfile(p):
			files = [p]  # files
		else:
			raise Exception(f'ERROR: {
      
      p} does not exist')

		images = [x for x in files if x.split('.')[-1].lower() in img_formats]
		ni = len(images)
		self.img_size=img_size
		self.files=images
		self.nf=ni

	def __iter__(self):
		self.count=0
		return self
	def __next__(self):
		if self.count==self.nf:
			raise StopIteration
		path = self.files[self.count]
		self.count+=1
		img0=cv2.imread(path)
		img=letterbox(img0,self.img_size)[0]
		img=img[:,:,::-1].transpose(2,0,1)
		img=np.ascontiguousarray(img)

		return path,img,img0

4.loss 复杂的根本不想让人看

个人增加了注释，且去掉了cls的loss，因为我只有一个类别

# coding=utf-8
import os
import sys

import math
import numpy as np
import torch
import torch.nn.functional as F
path = os.path.dirname(__file__)
sys.path.append(path)
import torch.nn as nn

'''
Author:Don
date:2022/10/21 14:28
desc:
'''


def xywh2xyxy(x):
	y=x.clone() if isinstance(x,torch.Tensor) else np.copy(x)
	y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
	y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
	y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
	y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
	return y


def box_iou(box1, box2):
	def box_area(box):
		# box = 4xn
		return (box[2] - box[0]) * (box[3] - box[1])
	area1=box_area(box1.T)
	area2=box_area(box2.T)

	# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
	inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
	return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)


def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
	box2=box2.T
	if x1y1x2y2:  # x1, y1, x2, y2 = box1
		b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
		b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
	else: # transform from xywh to xyxy
		b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
		b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
		b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
		b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

	# Intersection area
	inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
			(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

	# Union Area
	w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
	w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
	union = w1 * h1 + w2 * h2 - inter + eps

	iou = inter / union
	if GIoU or DIoU or CIoU:
		cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
		ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
		if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
			c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
			rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
					(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
			if DIoU:
				return iou - rho2 / c2  # DIoU
			elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
				v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2)
				with torch.no_grad():
					alpha = v / (v - iou + (1 + eps))
				return iou - (rho2 / c2 + v * alpha)  # CIoU
		else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
			c_area = cw * ch + eps  # convex area
			return iou - (c_area - union) / c_area  # GIoU
	else:
		return iou  # IoU


class ComputeLossOTA:
	def __init__(self,model,autobalance=False):
		super(ComputeLossOTA, self).__init__()
		device=next(model.parameters()).device

		BCEcls=nn.BCEWithLogitsLoss(pos_weight=torch.tensor(1.0,device=device))
		BCEobj=nn.BCEWithLogitsLoss(pos_weight=torch.tensor(1.0,device=device))

		det=model.idetect
		self.balance = {
    
    3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
		self.BCEcls,self.BCEobj=BCEcls,BCEobj
		for k in 'na','nc','nl','anchors','stride':
			setattr(self,k,getattr(det,k))

	def __call__(self, p,targets,imgs):
		device=targets.device
		lcls,lbox,lobj=torch.zeros(1,device=device),torch.zeros(1,device=device),torch.zeros(1,device=device)
		bs, as_, gjs, gis, targets, anchors = self.build_targets(p, targets, imgs)
		pre_gen_gains=[torch.tensor(pp.shape,device=device)[[3,2,3,2]] for pp in p] # [80,80,80,80,][40,40,40,40,][20,20,20,20]

		#loss
		for i,pi in enumerate(p):
			b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i]  # image, anchor, gridy, gridx
			tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
			n = b.shape[0]  # number of targets
			if n:
				ps=pi[b,a,gj,gi]

				# regression  回归box
				grid=torch.stack([gi,gj],dim=1)
				pxy=ps[:,:2].sigmoid()*2-0.5
				pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
				pbox = torch.cat((pxy, pwh), 1)  # predicted box
				selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i]  # xywh * 80，40，20
				selected_tbox[:, :2] -= grid   #计算的是中心点到左上角网格的距离，所以这里的真实标签要减去网格的xy
				iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
				lbox += (1.0 - iou).mean()  # iou loss

				# Objectness
				tobj[b, a, gj, gi]=iou.detach().clamp(0).type(tobj.dtype)  #通过iou判断是否有物体，iou 【0，1】之间

			obji=self.BCEobj(pi[...,4],tobj)  #获取obj loss
			lobj += obji * self.balance[i]  # obj loss    每一层的loss比例不一样 给低层的loss比例高一点 是为了检测小物体

		lbox *= 0.05
		lobj *= 0.7
		bs=tobj.shape[0]

		loss=lbox+lobj
		return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()




	def build_targets(self, p, targets, imgs):

		indices, anch = self.find_3_positive(p, targets)  # 增加true label 个数

		matching_bs = [[] for pp in p]
		matching_as = [[] for pp in p]
		matching_gjs = [[] for pp in p]
		matching_gis = [[] for pp in p]
		matching_targets = [[] for pp in p]
		matching_anchs = [[] for pp in p]

		nl = len(p)

		for batch_idx in range(p[0].shape[0]):
			b_idx=targets[:,0]==batch_idx
			this_target=targets[b_idx]
			if this_target.shape[0] == 0:
				continue
			txywh=this_target[:,2:6]*imgs[batch_idx].shape[1]
			txyxy=xywh2xyxy(txywh)

			pxyxys = []
			p_cls = []
			p_obj = []
			from_which_layer = []
			all_b = []
			all_a = []
			all_gj = []
			all_gi = []
			all_anch = []
			for i,pi in enumerate(p):   #获取每一层预测值
				b, a, gj, gi = indices[i]    #获取对应层的真实标签落在的中心坐标点
				idx = (b == batch_idx)
				b, a, gj, gi = b[idx], a[idx], gj[idx], gi[idx]
				all_b.append(b)
				all_a.append(a)
				all_gj.append(gj)
				all_gi.append(gi)
				all_anch.append(anch[i][idx])
				from_which_layer.append(torch.ones(size=(len(b),)) * i)
				fg_pred = pi[b, a, gj, gi] # b  batch_size a  对应的anchor gj gi 就是对应中心点的xy  从1，3，80，80，6（5+cls） 取出 6，6  这一层只有6个和真实标签
				p_obj.append(fg_pred[:, 4:5])
				p_cls.append(fg_pred[:, 5:6])

				grid = torch.stack([gi, gj], dim=1)   #  物体的中心点所在的网格
				pxy=(fg_pred[:,:2].sigmoid()*2-0.5+grid)*self.stride[i]  #预测的是中心点到网格的偏移
				pwh = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i]  # / 8. #预测的是anch的比值
				pxywh=torch.cat([pxy,pwh],dim=-1)
				pxyxy=xywh2xyxy(pxywh)
				pxyxys.append(pxyxy)

			pxyxys=torch.cat(pxyxys,dim=0)
			if pxyxys.shape[0] == 0:
				continue

			p_obj = torch.cat(p_obj, dim=0)
			p_cls = torch.cat(p_cls, dim=0)
			from_which_layer = torch.cat(from_which_layer, dim=0)
			all_b = torch.cat(all_b, dim=0)
			all_a = torch.cat(all_a, dim=0)
			all_gj = torch.cat(all_gj, dim=0)
			all_gi = torch.cat(all_gi, dim=0)
			all_anch = torch.cat(all_anch, dim=0)

			pair_wise_iou=box_iou(txyxy,pxyxys)
			pair_wise_iou_loss=-torch.log(pair_wise_iou+1e-8)

			top_k,_=torch.topk(pair_wise_iou,min(10,pair_wise_iou.shape[1]),dim=1) # 最多取10个最大的iou
			dynamic_ks=torch.clamp(top_k.sum(1).int(),min=1)  #计算k的取值 这里有5个gt，得到的k是【4，3，3，4，4】

			gt_cls_per_image=(F.one_hot(this_target[:,1].to(torch.int64),self.nc).float().unsqueeze(1).repeat(1,pxyxys.shape[0],1))  #5，60，1

			num_gt=this_target.shape[0]  # 5

			cls_preds_=(p_cls.float().unsqueeze(0).repeat(num_gt,1,1).sigmoid_()*p_obj.unsqueeze(0).repeat(num_gt,1,1).sigmoid_())  #5，60，1

			y=cls_preds_.sqrt_()

			pair_wise_cls_loss=F.binary_cross_entropy_with_logits(torch.log(y/(1-y)),gt_cls_per_image,reduction="none").sum(-1)

			del cls_preds_

			cost=(pair_wise_cls_loss+3.0*pair_wise_iou_loss)  #5，60  5是gt label  60 是根据gt label在 9个anchor的5个网格的中心点

			matching_matrix=torch.zeros_like(cost)  # 5，60

			for gt_idx in range(num_gt):
				_,pos_idx=torch.topk(cost[gt_idx],k=dynamic_ks[gt_idx].item(),largest=False)  #取k个最大的loss
				matching_matrix[gt_idx][pos_idx]=1.0

			del top_k,dynamic_ks
			anchor_matching_gt=matching_matrix.sum(0)
			#  大于1 说明一个网格对应了2个gt
			if (anchor_matching_gt>1).sum()>0:
				_,cost_argmin=torch.min(cost[:,anchor_matching_gt>1],dim=0)  #计算grid和gt loss最小的的是哪一个
				matching_matrix[:, anchor_matching_gt > 1] *= 0.0   # 对应网格的变成0
				matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0 # 把loss最小的grid 变成1
			fg_mask_inboxes=matching_matrix.sum(0)>0.0  #动态提取的top k 个 因为有可能一个grid对应2个gt所以 这里的k 小于等于dynamic_ks

			matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)  #

			from_which_layer = from_which_layer[fg_mask_inboxes]
			all_b = all_b[fg_mask_inboxes]
			all_a = all_a[fg_mask_inboxes]
			all_gj = all_gj[fg_mask_inboxes]
			all_gi = all_gi[fg_mask_inboxes]
			all_anch = all_anch[fg_mask_inboxes]

			this_target=this_target[matched_gt_inds]

			for i in range(nl):
				layer_idx=from_which_layer==i
				matching_bs[i].append(all_b[layer_idx])
				matching_as[i].append(all_a[layer_idx])
				matching_gis[i].append(all_gi[layer_idx])
				matching_gjs[i].append(all_gj[layer_idx])
				matching_targets[i].append(this_target[layer_idx])
				matching_anchs[i].append(all_anch[layer_idx])


		for i in range(nl):
			if matching_targets[i] != []:
				matching_bs[i] = torch.cat(matching_bs[i], dim=0)
				matching_as[i] = torch.cat(matching_as[i], dim=0)
				matching_gjs[i] = torch.cat(matching_gjs[i], dim=0)
				matching_gis[i] = torch.cat(matching_gis[i], dim=0)
				matching_targets[i] = torch.cat(matching_targets[i], dim=0)
				matching_anchs[i] = torch.cat(matching_anchs[i], dim=0)

			else:
				matching_bs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_as[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_gjs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_gis[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_targets[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
				matching_anchs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)

		return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs


	def find_3_positive(self, p, targets):
		na,nt=self.na,targets.shape[0]   # nt 一个图像中多少个物体  5个
		indices,anch=[],[]
		gain=torch.ones(7,device=targets.device).long()
		ai=torch.arange(na,device=targets.device).float().view(na,1).repeat(1,nt)  # ai=[[0,0,0,0,0],[1,1,1,1,1],[2,2,2,2,2]]
		targets=torch.cat((targets.repeat(na,1,1),ai[:,:,None]),2)  #  shape=3,5,7  5个检测五  3 是3层  7是0，0,cls ，x,y,w,h,

		g=0.5
		off = torch.tensor([[0, 0],[1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device).float() * g  # offsets
        # p  [[1，3，80，80，6],[1,3,40,40,6],[1,3,20,20,6] ]
		for i in range(self.nl): # 3 层
			anchors=self.anchors[i]   # 每层对应3个anchors
			gain[2:6]=torch.tensor(p[i].shape)[[3,2,3,2]] # xyxy gain  # 1,1,80,80,80,80,1

			t=targets*gain   #target *80

			if nt:
				r=t[:,:,4:6]/anchors[:,None]  #   计算真实框和anchor的wh的比值
				j = torch.max(r, 1. / r).max(2)[0] < 4.0  #   筛选比值小于4的真实框
				t = t[j]  # filter

				gxy=t[:,2:4]
				gxi = gain[[2, 3]] - gxy  # xy相反的坐标inverse   80*80-真实的框在80*80的下的xy坐标
				j, k = ((gxy % 1. < g) & (gxy > 1.)).T   # 如果真实框在左上角且不是第一行第一列 就取上方和左侧作为真实标签
				l, m = ((gxi % 1. < g) & (gxi > 1.)).T  # 如果真实框在右下角且不是最后一行最后一列 就取下方和右侧作为
				j = torch.stack((torch.ones_like(j), j, k, l, m)) # 选出符合
				t = t.repeat((5, 1, 1))[j]  # 复制5分  筛选符合要求的中心点的上下左右加上中心点5个坐标点
				offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]   # 真实中心点复制5分分别求出5个点的坐标
			else:
				t=targets[0]
				offsets=0

			b,c=t[:,:2].long().T  # batch  clss
			gxy=t[:,2:4]
			gwh=t[:,4:6]
			gij=(gxy-offsets).long()
			gi,gj=gij.T

			a=t[:,6].long()
			indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
			anch.append(anchors[a])  # anchors

		return indices,anch

5.训练

没有多gpu训练，所以去掉了分布式训练，也是复杂的不行，一堆参数

# coding=utf-8
import os
import sys
from tqdm import tqdm
path = os.path.dirname(__file__)
sys.path.append(path)
from model import Model
import torch
import torch.optim as optim
import  torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
from datasets import create_dataloader
from torch.cuda import amp
from loss import ComputeLossOTA
'''
Author:Don
date:2022/10/19 12:01
desc:
'''

def train():
	epochs=300
	imgsz=640
	batch_size=1
	train_path='./data/kongdong/images'
	classes=1
	# Optimizer
	nbs = 64  # nominal batch size
	accumulate = max(round(nbs / 1), 1)  # accumulate loss before optimizing
	device = torch.device('cuda:0')
	anchors = [[12, 16, 19, 36, 40, 28], [36, 75, 76, 55, 72, 146], [142, 110, 192, 243, 459, 401]]

	yolov7 = Model(classes=classes, anchors=anchors).to(device)

	pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
	for k, v in yolov7.named_modules():
		if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
			pg2.append(v.bias)  # biases
		if isinstance(v, nn.BatchNorm2d):
			pg0.append(v.weight)  # no decay
		elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
			pg1.append(v.weight)  # apply decay
		if hasattr(v, 'im'):
			if hasattr(v.im, 'implicit'):
				pg0.append(v.im.implicit)
			else:
				for iv in v.im:
					pg0.append(iv.implicit)
	optimizer = optim.SGD(pg0, lr=0.01, momentum=0.937, nesterov=True)
	optimizer.add_param_group({
    
    'params': pg1, 'weight_decay': 0.0005})  # add pg1 with weight_decay
	optimizer.add_param_group({
    
    'params': pg2})  # add pg2 (biases)
	del pg0, pg1, pg2

	lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - 0.1) + 0.1  # linear
	scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

	gs=max(int(yolov7.stride.max()),32)  #32
	nl=yolov7.idetect.nl

	mloss = torch.zeros(4, device=device)  # mean losses
	single_cls=False
	# Trainloader
	if classes ==1 :
		single_cls=True

	dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs,single_cls, augment=True,image_weights=True)
	mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
	nb = len(dataloader)

	compute_loss_ota = ComputeLossOTA(yolov7)  # init loss class
	scaler = amp.GradScaler(enabled=device)
	for epoch in range(0, epochs):  # epoch ------------------------------------------------------------------
		yolov7.train()

		pbar = enumerate(dataloader)

		pbar = tqdm(pbar, total=nb)  # progress bar
		optimizer.zero_grad()
		for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
			ni = i + nb * epoch  # number integrated batches (since train start)
			imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0

			# Forward
			with amp.autocast(enabled=True):
				pred = yolov7(imgs)  # forward
				# print(pred[0].shape)
				loss, loss_items = compute_loss_ota(pred, targets.to(device), imgs)  # loss scaled by batch_size
				# if rank != -1:
				# 	loss *= opt.world_size  # gradient averaged between devices in DDP mode
				# if opt.quad:
				# 	loss *= 4.

			# Backward
			scaler.scale(loss).backward()
			# Optimize
			if ni % accumulate == 0:
				scaler.step(optimizer)  # optimizer.step
				scaler.update()
				optimizer.zero_grad()

			mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
			mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
			s = ('%10s' * 2 + '%10.4g' * 6) % (
				'%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
			pbar.set_description(s)

		# Scheduler
		lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
		scheduler.step()


	torch.save(yolov7,'last.pt')


train()

5.推理

# coding=utf-8
import os
import sys

path = os.path.dirname(__file__)
sys.path.append(path)
import torch

from datasets import LoadImages,non_max_suppression
'''
Author:Don
date:2022/10/26 14:25
desc:
'''
from pathlib import Path
source='data/kongdong/images'
imgsz=640
dataset = LoadImages(source, img_size=imgsz)
device=torch.device('cuda:0')


def clip_coords(boxes, img_shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    boxes[:, 0].clamp_(0, img_shape[1])  # x1
    boxes[:, 1].clamp_(0, img_shape[0])  # y1
    boxes[:, 2].clamp_(0, img_shape[1])  # x2
    boxes[:, 3].clamp_(0, img_shape[0])  # y2


def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords

import random
import cv2
def plot_one_box(x, img, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)

    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)


def detect():
	model = torch.load("last.pt", map_location=device)  # load FP32 model
	model.eval()
	for path, img, im0s in dataset:
		img = torch.from_numpy(img).to(device)
		img=img.unsqueeze(0)
		img = img.float()  # uint8 to fp16/32
		img /= 255.0  # 0 - 255 to 0.0 - 1.0
		pred = model(img)[0]
		pred = non_max_suppression(pred, 0.85, 0.4, classes=1)

		for i,det in enumerate(pred):
			p, s, im0= path, '', im0s
			p = Path(p)  # to Path
			save_path = str(p.name)  # img.jpg
			gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
			if len(det):

				det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
				# Print results
				for c in det[:, -1].unique():
					n = (det[:, -1] == c).sum()  # detections per class
					s += f"{
      
      n} {
      
      'box'}{
      
      's' * (n > 1)}, "  # add to string

				# Write results
				for *xyxy, conf, cls in reversed(det):
					label = f'{
      
      conf:.2f}'
					plot_one_box(xyxy, im0)

				cv2.imshow(str(p), im0)
				cv2.waitKey()  # 1 millisecond


detect()

6.总结

只是为了理解yolov7，本身不能用于训练，效果奇差！！
只是为了理解yolov7，本身不能用于训练，效果奇差！！
只是为了理解yolov7，本身不能用于训练，效果奇差！！

yolov7代码梳理，读取json格式数据