every blog every motto: You can do more than you think.
0. 前言
金字塔池化模块简单小结。
废话: 还是菜的抠脚,哎,
1. 正文
1.1 基础概念
金字塔池化模块,即用不同尺度进行池化操作。
如池化后特征图大小为1×1,(不考虑通道,因通道无变化),假若我们的输入特征图为40×40,那么,即对所有数据(1600)取平均(如下图,平均值为60)。
如池化后的特征图为2×2,那么即将特征图分成4个区域(2×2),每个区域内进行平均池化,如下图:
说明:
1. Tensorflow中,用AveragePooling2D进行池化,需要计算池化核和步幅
Pytorch中,用AdaptiveAvgPool2d,指定输出特征图大小即可
具体见后面代码
2. 上述图片中的数值为随便取得,仅展示。
金字塔池化模块,生成不同尺度的池化结果。具体结果为1×1,2×2,3×3,6×6,四种,然后将生成的特征图上采样恢复到初始特征图大小,最后在通道方向上合并(包含最初的特征图),图如下:
通俗的理解:
说明: 仅展示从特征图大小角度进行显示,未展示通道情况
1.2 代码
1.2.1 pytorch简洁版
from torch import nn
import torch
from torch.nn import functional as F
class PPM(nn.Module):
def __init__(self, in_dim, reduction_dim, bins):
"""Pyramid Pooling Module 金字塔池化"""
super(PPM, self).__init__()
self.features = []
for bin in bins:
self.features.append(nn.Sequential(
# 池化
nn.AdaptiveAvgPool2d(bin),
# 卷积,降低通道数
nn.Conv2d(in_dim, reduction_dim, kernel_size=1, bias=False),
nn.BatchNorm2d(reduction_dim),
nn.ReLU(inplace=True)
))
self.features = nn.ModuleList(self.features)
def forward(self, x):
x_size = x.size()
out = [x]
for f in self.features:
temp = f(x) # 池化+卷积
# print(temp.shape)
# 上采样,恢复到初始大小
temp = F.interpolate(temp, x_size[2:], mode='bilinear', align_corners=True)
out.append(temp)
return torch.cat(out, 1) # 通道方向上合并
ten = torch.rand((7, 4, 30, 30))
ppm = PPM(4, 2, [1, 2, 3, 6])
ppm(ten)
1.2.2 pytorch 老版
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np
class PSPModule(nn.Module):
def __init__(self, in_channel, out_channel=1024, sizes=(1, 2, 3, 6)):
super().__init__()
self.stages = []
self.stages = nn.ModuleList([self._make_stage(in_channel, size) for size in sizes])
self.bottleneck = nn.Conv2d(in_channel * (len(sizes) + 1), out_channel, kernel_size=(1, 1))
self.relu = nn.ReLU()
def _make_stage(self, in_channel, out_size):
prior = nn.AdaptiveAvgPool2d(output_size=(out_size, out_size))
# print('prior shape:',prior.shape)
conv = nn.Conv2d(in_channel, in_channel, kernel_size=(1, 1), bias=False)
return nn.Sequential(prior, conv)
def forward(self, x):
h, w = x.size(2), x.size(3)
box = [x] # 收集不同的池化后特征图
for layer in self.stages:
tempx = layer(x) # 自适应平均池化
print('自适应平均池化 shape:', tempx.shape)
x = F.upsample(input=tempx, size=(h, w), mode='bilinear')
box.append(x)
box = [F.upsample(input=stage(x), size=(h, w), mode='bilinear') for stage in self.stages] + [x]
x = torch.cat(box, 1) # 通道方向上合并
print('通道方向合并的 shape:',x.shape)
# print('shape:', priors.shape)
bottle = self.bottleneck(x)
return self.relu(bottle)
arr = np.zeros((1, 3, 30, 30), dtype=np.float32)
ten = torch.from_numpy(arr)
a = PSPModule(3)
a(ten)
说明: 含有通道合并后的部分代码(卷积+relu)
1.2.3 tensorflow 2.x版
在Tensorflow中,由于没有想pytorch的自适应池化(AdaptiveAvgPool2d),所以使用平均池化(AveragePooling2D),即通过制定池化核大小和步幅,以达到同样的大小。
eg1: 输入特征图为(30,30),希望得到(2,2)的特征图(不考虑通道,因通道无变化)
那么,这个过程就是1.1中所示,pool_factor=2
池化核大小:pool_size = 30/pool_factor = 30/2 =15
步幅:strides = pool_size = 15
eg2: 希望得到(3,3)的特征图,那么pool_factor = 3
池化核大小:pool_size = 30/pool_factor = 30/3 =10
步幅:strides = pool_size = 10
eg3: 希望得到(3,3)的特征图,那么pool_factor =6
池化核大小:pool_size = 30/pool_factor = 30/6 =5
步幅:strides = pool_size = 5
注意: tensorflow中的代码都需计算这两个参数!
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from tensorflow.keras.layers import AveragePooling2D, Conv2D, BatchNormalization, Activation, Lambda
import tensorflow.keras.backend as K
import numpy as np
def pool_block(feats, pool_factor, out_channel):
"""
:param feats: 输入特征图 (h,w,C)
:param pool_factor: 池化后大小 (h1,w1,c)
:param out_channel: 输出通道
:return:
"""
print('pool factor为:', pool_factor)
h = K.int_shape(feats)[1]
w = K.int_shape(feats)[2]
pool_size = [int(np.round(float(h) / pool_factor)), int(np.round(float(w) / pool_factor))]
strides = pool_size
# 不同尺度的平均池化
x = AveragePooling2D(pool_size, strides=strides, padding='same')(feats)
print('平均池化以后的shape:', x.shape)
x = Conv2D(out_channel , (1, 1), padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Lambda(lambda x: tf.compat.v1.image.resize_images(x, (h, w), align_corners=True))(x)
print('final x shape:',x.shape)
return x
arr = np.zeros((1, 30, 30, 3), dtype=np.float32)
ten = tf.convert_to_tensor(arr)
pool_factorts = [1, 2, 3, 6]
for p in pool_factorts:
pool_block(ten, p, 60)
1.2.4 tensorflow 1.x版
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from tensorflow.contrib import slim
import numpy as np
def resize_image(input_images, s):
"""
:param input_images: input
:param s: 缩放比例 (2,2)
:return:
"""
h, w = input_images.get_shape().as_list()[1], input_images.get_shape().as_list()[2]
h_ratio = s[0]
w_ratio = s[1]
h = int(h * h_ratio)
w = int(w * w_ratio)
images = tf.image.resize_images(input_images, size=(h, w))
return images
def pool_block(x, pool_factor, IMAGE_ORDER='NHWC'):
if IMAGE_ORDER == 'NHWC':
h, w = x.get_shape().as_list()[1], x.get_shape().as_list()[2]
else:
h, w = x.get_shape().as_list()[2], x.get_shape().as_list()[3]
# strides = [18,18],[9,9],[6,6],[3,3]
pool_size = [int(np.round(float(h) / pool_factor)), int(np.round(float(w) / pool_factor))]
strides = pool_size
# 进行不同程度的平均池化
x = slim.avg_pool2d(x, kernel_size=pool_size, stride=strides, padding='SAME')
print('池化后的特征图大小:', x.shape)
# 进行卷积操作
x = slim.conv2d(x, 512, kernel_size=(1, 1), stride=1, padding='SAME')
x = tf.cast(x, tf.float32)
x = slim.batch_norm(x)
x = tf.nn.relu(x)
print('relu 后:', x.shape)
print('-' * 100)
x = resize_image(x, strides)
return x
arr = np.zeros((1, 30, 30, 3), dtype=np.float32)
xx = tf.convert_to_tensor(arr)
pool_factors = [1, 2, 3, 6]
for pf in pool_factors:
p = pool_block(xx, pf)
# print(p.shape)
参考文献
[1] https://blog.csdn.net/qq_24975309/article/details/108677378
[2] https://www.cnblogs.com/learningcaiji/p/14187039.html
[3] https://blog.csdn.net/qq_43258953/article/details/103300945
[4] https://zhuanlan.zhihu.com/p/75206669
[5] https://pytorch.org/docs/stable/nn.functional.html
[6] https://blog.csdn.net/weixin_44791964/article/details/108469870
[7] https://github.com/pudae/tensorflow-pspnet
[8] https://github.com/bubbliiiing/pspnet-tf2
[9] https://github.com/hszhao/semseg
[10] https://github.com/bubbliiiing/pspnet-pytorch/blob/master/nets/pspnet.py
[11] https://blog.csdn.net/qq_41375318/article/details/110071599
[12] https://www.cnblogs.com/wzyuan/p/10224793.html
[13] https://www.cnblogs.com/wzyuan/p/10224793.html
[14] https://github.com/Lextal/pspnet-pytorch/blob/master/pspnet.py