Yolov5s/Yolov8s网络结构图

一、网络模型配置

Yolov5s

# Parameters
nc: 1  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],          # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],       # 1-P2/4
   [-1, 3, C3, [128]],               # 2
   [-1, 1, Conv, [256, 3, 2]],       # 3-P3/8
   [-1, 9, C3, [256]],               # 4
   [-1, 1, Conv, [512, 3, 2]],       # 5-P4/16
   [-1, 9, C3, [512]],               # 6
   [-1, 1, Conv, [1024, 3, 2]],      # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]], # 8
   [-1, 3, C3, [1024, False]],       # 9
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],                 # 10
   [-1, 1, nn.Upsample, [None, 2, 'nearest']], # 11
   [[-1, 6], 1, Concat, [1]],                  # 12 cat backbone P4
   [-1, 3, C3, [512, False]],                  # 13

   [-1, 1, Conv, [256, 1, 1]],                 # 14
   [-1, 1, nn.Upsample, [None, 2, 'nearest']], # 15
   [[-1, 4], 1, Concat, [1]],                  # 16 cat backbone P3
   [-1, 3, C3, [256, False]],                  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],                 # 18
   [[-1, 14], 1, Concat, [1]],                 # 19 cat head P4
   [-1, 3, C3, [512, False]],                  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],                 # 21
   [[-1, 10], 1, Concat, [1]],                 # 22 cat head P5
   [-1, 3, C3, [1024, False]],                 # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],   # 24 Detect(P3, P4, P5)
  ]

Yolov8s

# Parameters
nc: 80  # number of classes
depth_multiple: 0.33  # scales module repeats
width_multiple: 0.50  # scales convolution channels

# YOLOv8.0s backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
  - [-1, 3, C2f, [128, True]]
  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
  - [-1, 6, C2f, [256, True]]
  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
  - [-1, 6, C2f, [512, True]]
  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
  - [-1, 1, SPPF, [1024, 5]]  # 9

# YOLOv8.0s head
head:
  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
  - [-1, 3, C2f, [512]]  # 13

  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
  - [-1, 3, C2f, [256]]  # 17 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
  - [[-1, 12], 1, Concat, [1]]  # cat head P4
  - [-1, 3, C2f, [512]]  # 20 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
  - [[-1, 9], 1, Concat, [1]]  # cat head P5
  - [-1, 3, C2f, [1024]]  # 23 (P5/32-large)

  - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)

二、简化版本

Yolov5s

Yolov8s

三、详细版本

四、Detect代码

Yolov5

import torch
import torch.nn as nn

class Detect(nn.Module):
    stride = None         # strides computed during build

    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super().__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
        self.register_buffer('anchors', a)  # shape(nl,na,2)
        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv

    def forward(self, x):
        z = []
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
            self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
            y = x[i].sigmoid()  # (bs,3,20,20,85)
            y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
            y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
            z.append(y.view(bs, -1, self.no))  # (bs,400*3,85)
        return torch.cat(z, 1), x

    @staticmethod
    def _make_grid(nx=20, ny=20):
        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()

Yolov8

class Detect(nn.Module):
    # YOLOv8 Detect head for detection models
    dynamic = False  # force grid reconstruction
    shape = None
    anchors = torch.empty(0)  # init
    strides = torch.empty(0)  # init

    def __init__(self, nc=80, ch=()):  # detection layer
        super().__init__()
        self.nc = nc  # number of classes
        self.nl = len(ch)  # number of detection layers
        self.reg_max = 16  # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
        self.no = nc + self.reg_max * 4  # number of outputs per anchor
        self.stride = torch.zeros(self.nl)  # strides computed during build
        c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc)  # max(16,128/4,16*4)  max(128,80)
        self.cv2 = nn.ModuleList(
            nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
        self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
        self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()

    def forward(self, x):
        shape = x[0].shape  # BCHW
        for i in range(self.nl):
            x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
        if self.training:
            return x
        elif self.dynamic or self.shape != shape:
            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
            self.shape = shape
        # anchors 2*8400   strides 1*8400  shape 1*128*80*80
        # xi: 1*144*(6400+1600+400) ---split---> 1*64*8400(box) 1*80*8400(cls)
        box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1)
        # dist2bbox(1*4*8400, 1*2*8400) * 1*8400
        dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides  # 1*4*8400
        y = torch.cat((dbox, cls.sigmoid()), 1)  # 1*84*8400
        return y, x

猜你喜欢

转载自blog.csdn.net/Goodness2020/article/details/128826307