残差单元: Bottleneck
在ResNet中,图1(a)所示的残差单元被称作Bottleneck。ResNet有不同网络层数的版本,如18层,34层,50层,101层以及152层,这里以常见的50层来举例说明。ResNet-50的网络架构如图1(b)所示,最主要的部分在于中间经历4个大的卷积组,而这4个卷积组分别包含了[3,4,6,3]共4个Bottleneck模块。最后经过一个全局平均池化确保feature map大小变为1*1,然后进行1000维的全连接,最后经过Softmax层输出。
图1. ResNet-50网络结构图
后起之秀:
Pytorch
利用Pytorch实现一个带有下采样操作的Bottleneck结构,代码如下:
import torch.nn as nnclass Bottleneck(nn.Module): def __init__(self, in_dim, out_dim, stride=1): super(Bottleneck, self).__init__() # Bottleneck 由1*1,3*3,1*1这3个卷积层组成,且中间包含BN层 self.bottleneck = nn.Sequential( # 1*1 卷积层 nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1, bias=False), nn.BatchNorm2d(in_dim), nn.ReLU(inplace=True), # 3*3 卷积层 nn.Conv2d(in_dim, in_dim, 3, stride=stride, padding=1, bias=False), nn.BatchNorm2d(in_dim), nn.ReLU(inplace=True), # 1*1 卷积层 nn.Conv2d(in_dim, out_dim, kernel_size=1, bias=False), nn.BatchNorm2d(out_dim), ) self.relu = nn.ReLU(inplace=True) # 降采样(Downsample)是由一个包含BN层的1*1卷积构成 self.downsample = nn.Sequential( nn.Conv2d(in_dim, out_dim, kernel_size=1, stride=1), nn.BatchNorm2d(out_dim), ) def forward(self, x): identity = x out = self.bottleneck(x) identity = self.downsample(identity) # 将 identity(恒等映射) 与 网络堆叠层输出层进行相加, # 并经过ReLU后输出 out += identity out = self.relu(out) return out
老大哥:
TensorFlow
import tensorflow as tfslim = tf.contrib.slimdef subsample(inputs, factor, scope=None): ''' @factor 采样因子,若采样因子为1,则直接返回inputs 否则进行步长等于factor,卷积核1*1的最大池化,实现降采样。 ''' if factor == 1: return inputs else: return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)def conv2d_sam(inputs, num_outputs, kernel_size, stride, scope=None): ''' 先判断stride是否为1, 若为1,则直接进行卷积操作,否则先进行Zero-Padding操作 ''' if stride == 1: return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', scope=scope) else: pad_total = kernel_size - 1 pad_beg = pad_total//2 pad_end = pad_total - pad_beg inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, padding='VALID', scope=scope)@slim.add_arg_scopedef bottleneck(inputs, depth, depth_bottleneck, stride, outputs_collections=None, scope=None): with tf.Variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) preact = slim.batch_norm( inputs, activate_fn=tf.nn.relu, scope='preact') if depth == depth_in: shortcut = subsample(inputs, stride, 'shortcut') else: shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut') residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1') residual = conv2d_sam(residual, depth_bottleneck, 3, stride, scope='conv2') residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activate_fn=None, scope='conv3') output = shortcut + residual return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
二哥: Keras
import keras.layers as klsdef bottleneck_block(input_tensor, filters): ''' filters 表示卷积核数量 ''' residual = kls.Conv2D(filters, (1, 1), strides=1)(input_tensor) residual = kls.BatchNormalization(axis=-1)(residual) residual = kls.Activation('relu')(residual) residual = kls.Conv2D(filters, (3, 3), strides=1, padding='same')(residual) residual = kls.BatchNormalization(axis=-1)(residual) residual = kls.Activation('relu')(residual) residual = kls.Conv2D(filters, (1, 1), strides=1)(residual) residual = kls.BatchNormalization(axis=-1)(residual) residual = kls.Activation('relu')(residual) identity = input_tensor # downsample identity = kls.Conv2D(filters, (1, 1), strides=1)(identity) identity = kls.BatchNormalization(axis=-1)(identity) identity = kls.Activation('relu')(identity) out = kls.merge([residual, identity], mode='sum') out = kls.Activation('relu')(out) return out
三弟: Caffe
from caffe import layers, paramsdef conv_bn(bottom, kernel_size, num_output, name, stride=1, pad=0, group=1): conv = layers.Convolution(bottom, kernel_size=kernel_size, stride=stride, num_output=num_output, pad=pad, group=group, weight_filler=dict(type="xavier"), bias_term=False, param=[dict(lr_mult=1, decay_mult=1)], name="{0}_conv".format(name)) batch_norm = layers.BatchNorm(conv, in_place=True, batch_norm_param=dict(use_global_stats=True), name="{0}_batch_norm".format(name)) return batch_normdef proj_residual_block(bottom, conv_nums): # 1x1 residual_1 = conv_bn(bottom, kernel_size=1, stride=1, pad=0, num_output=conv_nums, name="residual_1") residual_1_relu = layers.ReLU( residual_1, in_place=True, name="residual_1_relu") # 3x3 residual_2 = conv_bn(residual_1_relu, kernel_size=3, stride=1, pad=1, num_output=conv_nums, name="residual_2") residual_2_relu = layers.ReLU( residual_2, in_place=True, name="residual_2_relu") # 1x1 residual_3 = conv_bn(residual_2_relu, kernel_size=1, stride=1, pad=0, num_output=conv_nums, name="residual_3") residual_3_relu = layers.ReLU( residual_3, in_place=True, name="residual_3_relu") # downsample identity = bottom identity = conv_bn(identity, kernel_size=1, stride=1, pad=0, num_output=conv_nums, name="identity") out_add = layers.Eltwise(residual_3_relu, identity, eltwise_param=dict(operation=params.Eltwise.SUM), name="out_add") out_add_relu = layers.ReLU(out_add, in_place=True, name="out_add_relu") return out_add_relu
四妹: MXNet
from mxnet.gluon import nnfrom mxnet import ndclass Bottleneck(nn.Block): def __init__(self, channels): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2D(channels, kernel_size=1, strides=1) self.bn1 = nn.BatchNorm() self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1, strides=1) self.bn2 = nn.BatchNorm() self.conv3 = nn.Conv2D(channels, kernel_size=1, strides=1) self.bn3 = nn.BatchNorm() # downsample self.identify = nn.Conv2D(channels, kernel_size=1, strides=1) self.identify_bn = nn.BatchNorm() def forward(self, x): conv1_out = nd.relu(self.bn1(self.conv1(x))) conv2_out = nd.relu(self.bn2(self.conv2(conv1_out))) conv3_out = nd.relu(self.bn3(self.conv3(conv2_out))) identify = self.identify_bn(self.identify(x)) out = conv3_out + identify out = nd.relu(out) return out
实战技巧:
Resnet
1. 深度学习框架都提供有resnet-18, resnet-34, resnet-50, resnet-101等预训练网络模型,供用户做迁移学习,多数情况下并不需要手动实现网络模型。以Pytorch为例,利用resnet-18进行迁移学习,只需令参数pretrain=True。
model = models.resnet18(pretrained=True)
2. 训练自己的数据库时,通常从resnet-18开始进行尝试,需要特别留意的是, 开始训练前,需要根据自己的实际情况,对resnet网络的输出维度修改。以10分类数据集为例:
# 获取resnet全连接层输入数据的维度in_puts = model.fc.in_features# 指定全连接层输出数据的维度,此处为10model.fc = nn.Linear(in_puts, 10)
3. 如果涉及网络结构的修改,需优先考虑小卷积核(1*1, 3*3, 1*3 + 3*1)的使用。因为1个大卷积核7*7的感受野与3个3*3卷积核是一致的,而2个3*3卷积的感受野等价于1个5*5的感受野。由此可见,在保证具备相同感知野的条件下,小卷积核的使用提升了网络的深度。此外,小卷积核的使用也能减少网络的参数,提升网络的运算速度。
# 1个7*7卷积的网络参数为: 1*7*7 = 49# 3个3*3 卷积的网络参数为: 3*3*3 = 27
参考资料
深度神经网络的代表: 残差网络(ResNet)
愿你出走半生,归来仍是少年
金戈铁马,气吞万里如虎
Follow Python ,Enjoy Life