前言
作为当前先进的深度学习目标检测算法YOLOv8,已经集合了大量的trick,但是还是有提高和改进的空间,针对具体应用场景下的检测难点,可以不同的改进方法。此后的系列文章,将重点对YOLOv8的如何改进进行详细的介绍,目的是为了给那些搞科研的同学需要创新点或者搞工程项目的朋友需要达到更好的效果提供自己的微薄帮助和参考。由于出到YOLOv8,YOLOv7、YOLOv5算法2020年至今已经涌现出大量改进论文,这个不论对于搞科研的同学或者已经工作的朋友来说,研究的价值和新颖度都不太够了,为与时俱进,以后改进算法以YOLOv7为基础,此前YOLOv5改进方法在YOLOv7同样适用,所以继续YOLOv5系列改进的序号。另外改进方法在YOLOv5等其他算法同样可以适用进行改进。希望能够对大家有帮助。
链接:
提取码:关注私信后获取
一、解决问题
尝试将原YOLO中的conv改为drconv,提升精度和效果。
二、基本原理
原文摘要:我们提出了一种新的卷积称为区域动态感知卷积(DRConv),它可以自动分配多个滤波器到具有相似特征表示的相应空间区域。在这方面,DRConv在建模语义变化方面优于标准卷积。标准卷积层可以增加filters的数量来提取更多的视觉元素,但计算成本较高。我们的DRConv通过将增加channel filters的方式转换为使用可学习了instructor的空间维度上,这不仅提高了卷积的表示能力,而且保持了与标准卷积相似的计算成本和平移不变性。DRConv是一种处理复杂多变空间信息分布的有效而优雅的方法。它的即插即用特性可以替代现有网络中的标准卷积,特别是在高效网络中的power卷积层。我们在很大范围内的模型(MobileNet系列,ShuffleNetV2等)和任务(分类,人脸识别,检测和分割)上评估DRConv。在ImageNet分类上,基于DRConv的ShuffleNetV2-0.5在46M multiplyadds级别上实现了最先进的67.1%的性能,相对提高了6.3%。
三、添加方法
在conmmon.py中添加drconv模块(部分代码如下)
import torch.nn.functional as F
import torch.nn as nn
import torch
from torch.autograd import Variable, Function
class asign_index(torch.autograd.Function):
@staticmethod
def forward(ctx, kernel, guide_feature):
ctx.save_for_backward(kernel, guide_feature)
guide_mask = torch.zeros_like(guide_feature).scatter_(1, guide_feature.argmax(dim=1, keepdim=True), 1).unsqueeze(2) # B x 3 x 1 x 25 x 25
return torch.sum(kernel * guide_mask, dim=1)
@staticmethod
def backward(ctx, grad_output):
kernel, guide_feature = ctx.saved_tensors
guide_mask = torch.zeros_like(guide_feature).scatter_(1, guide_feature.argmax(dim=1, keepdim=True), 1).unsqueeze(2) # B x 3 x 1 x 25 x 25
grad_kernel = grad_output.clone().unsqueeze(1) * guide_mask # B x 3 x 256 x 25 x 25
grad_guide = grad_output.clone().unsqueeze(1) * kernel # B x 3 x 256 x 25 x 25
grad_guide = grad_guide.sum(dim=2) # B x 3 x 25 x 25
softmax = F.softmax(guide_feature, 1) # B x 3 x 25 x 25
grad_guide = softmax * (grad_guide - (softmax * grad_guide).sum(dim=1, keepdim=True)) # B x 3 x 25 x 25
return grad_kernel, grad_guide
def xcorr_slow(x, kernel, kwargs):
"""for loop to calculate cross correlation
"""
batch = x.size()[0]
out = []
for i in range(batch):
px = x[i]
pk = kernel[i]
px = px.view(1, px.size()[0], px.size()[1], px.size()[2])
pk = pk.view(-1, px.size()[1], pk.size()[1], pk.size()[2])
po = F.conv2d(px, pk, **kwargs)
out.append(po)
out = torch.cat(out, 0)
return out
def xcorr_fast(x, kernel, kwargs):
"""group conv2d to calculate cross correlation
"""
batch = kernel.size()[0]
pk = kernel.view(-1, x.size()[1], kernel.size()[2], kernel.size()[3])
px = x.view(1, -1, x.size()[2], x.size()[3])
po = F.conv2d(px, pk, **kwargs, groups=batch)
po = po.view(batch, -1, po.size()[2], po.size()[3])
return po
class Corr(Function):
@staticmethod
def symbolic(g, x, kernel, groups):
return g.op("Corr", x, kernel, groups_i=groups)
@staticmethod
def forward(self, x, kernel, groups, kwargs):
"""group conv2d to calculate cross correlation
"""
batch = x.size(0)
channel = x.size(1)
x = x.view(1, -1, x.size(2), x.size(3))
kernel = kernel.view(-1, channel // groups, kernel.size(2), kernel.size(3))
out = F.conv2d(x, kernel, **kwargs, groups=groups * batch)
out = out.view(batch, -1, out.size(2), out.size(3))
return out
class Correlation(nn.Module):
use_slow = True
def __init__(self, use_slow=None):
super(Correlation, self).__init__()
if use_slow is not None:
self.use_slow = use_slow
else:
self.use_slow = Correlation.use_slow
def extra_repr(self):
if self.use_slow: return "xcorr_slow"
return "xcorr_fast"
def forward(self, x, kernel, **kwargs):
if self.training:
if self.use_slow:
return xcorr_slow(x, kernel, kwargs)
else:
return xcorr_fast(x, kernel, kwargs)
else:
return Corr.apply(x, kernel, 1, kwargs)
class DRConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, region_num=8, **kwargs):
super(DRConv2d, self).__init__()
self.region_num = region_num
self.conv_kernel = nn.Sequential(
nn.AdaptiveAvgPool2d((kernel_size, kernel_size)),
nn.Conv2d(in_channels, region_num * region_num, kernel_size=1),
nn.Sigmoid(),
nn.Conv2d(region_num * region_num, region_num * in_channels * out_channels, kernel_size=1, groups=region_num)
)
self.conv_guide = nn.Conv2d(in_channels, region_num, kernel_size=kernel_size, **kwargs)
self.corr = Correlation(use_slow=False)
self.kwargs = kwargs
self.asign_index = asign_index.apply
def forward(self, input):
kernel = self.conv_kernel(input)
kernel = kernel.view(kernel.size(0), -1, kernel.size(2), kernel.size(3)) # B x (r*in*out) x W X H
output = self.corr(input, kernel, **self.kwargs) # B x (r*out) x W x H
output = output.view(output.size(0), self.region_num, -1, output.size(2), output.size(3)) # B x r x out x W x H
guide_feature = self.conv_guide(input)
output = self.asign_index(output, guide_feature)
return output
四、总结
预告一下:下一篇内容将继续分享深度学习算法相关改进方法。有兴趣的朋友可以关注一下我,有问题可以留言或者私聊我哦
PS:该方法不仅仅是适用改进YOLOv5,也可以改进其他的YOLO网络以及目标检测网络,比如YOLOv7、v6、v4、v3,Faster rcnn ,ssd等。
最后,有需要的请关注私信我吧。关注免费领取深度学习算法学习资料!