Deformable ConvNets v2
Introduction: Since the geometric structure of the modules used to construct convolutional neural networks is fixed, its ability to model geometric transformations is inherently limited. Two new modules were introduced in DCN v1 to improve the transformation modeling capabilities of convolutional neural networks, namely deformable convolution and deformable ROI pooling. They are all based on the idea of further adjusting the spatial sampling position information in the module. The displacement can be learned in the target task and does not require additional supervision signals. The new modules can easily replace their general versions in existing convolutional neural networks, and can be easily trained end-to-end with standard backpropagation, resulting in a deformable convolutional network. However, increasing the offset may take irrelevant information into account, affecting the final result. Therefore, in DCN v2, the author improved DCN v1 to reduce the interference of irrelevant information.
原文地址:Deformable ConvNets v2: More Deformable, Better Results
pytorch code implementation
class DCNv2(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=1, dilation=1, groups=1, deformable_groups=1):
super(DCNv2, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = (kernel_size, kernel_size)
self.stride = (stride, stride)
self.padding = (padding, padding)
self.dilation = (dilation, dilation)
self.groups = groups
self.deformable_groups = deformable_groups
self.weight = nn.Parameter(
torch.empty(out_channels, in_channels, *self.kernel_size)
)
self.bias = nn.Parameter(torch.empty(out_channels))
out_channels_offset_mask = (self.deformable_groups * 3 *
self.kernel_size[0] * self.kernel_size[1])
self.conv_offset_mask = nn.Conv2d(
self.in_channels,
out_channels_offset_mask,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
bias=True,
)
self.bn = nn.BatchNorm2d(out_channels)
self.act = Conv.default_act
self.reset_parameters()
def forward(self, x):
offset_mask = self.conv_offset_mask(x)
o1, o2, mask = torch.chunk(offset_mask, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
x = torch.ops.torchvision.deform_conv2d(
x,
self.weight,
offset,
mask,
self.bias,
self.stride[0], self.stride[1],
self.padding[0], self.padding[1],
self.dilation[0], self.dilation[1],
self.groups,
self.deformable_groups,
True
)
x = self.bn(x)
x = self.act(x)
return x
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
std = 1. / math.sqrt(n)
self.weight.data.uniform_(-std, std)
self.bias.data.zero_()
self.conv_offset_mask.weight.data.zero_()
self.conv_offset_mask.bias.data.zero_()
class Bottleneck_DCN(nn.Module):
# Standard bottleneck with DCN
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, groups, kernels, expand
super().__init__()
c_ = int(c2 * e) # hidden channels
if k[0] == 3:
self.cv1 = DCNv2(c1, c_, k[0], 1)
else:
self.cv1 = Conv(c1, c_, k[0], 1)
if k[1] == 3:
self.cv2 = DCNv2(c_, c2, k[1], 1, groups=g)
else:
self.cv2 = Conv(c_, c2, k[1], 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class C2f_DCN(nn.Module):
# CSP Bottleneck with 2 convolutions
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
self.c = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2)
self.m = nn.ModuleList(Bottleneck_DCN(self.c, self.c, shortcut, g, k=(3, 3), e=1.0) for _ in range(n))
def forward(self, x):
y = list(self.cv1(x).split((self.c, self.c), 1))
y.extend(m(y[-1]) for m in self.m)
return self.cv2(torch.cat(y, 1))
Specific modifications
module.py file modification
Add the definition code in the pytorch code implementation to the end of the module.py file
task.py file modification
Import C2f-DCN module
def parse_model function partially imports C2f-DCN
Yolov8.yaml configuration file modification
Just replace the original C2f module and finally train.