用pytorch搭建自己的网络ResNet笔记
- ResNet结构种类
- 残差块
- 代码实现
- 注意
- 实现不同结构的ResNet
- 定义resnet网络
- 测试
ResNet结构种类
ResNet一共有5个变种,其网络层数分别是18,34,50,101,152。主要区别在于使用的是两层残差块还是三层残差块,以及残差块的数量。ResNet-18和ResNet-34都是使用的两层残差块,而其余三个模型使用的是三层残差块,并且第三层的输出通道数为输入通道数的4倍。
残差块
公式为y=F(x)+x,在原来输出F(x)的基础上加上输入x
代码实现
#定义两层的残差块
class Residual_2(nn.Module):
def __init__(self, in_channels, out_channels, use_1x1conv=False, stride=1):
super(Residual_2, self).__init__()
#两个3*3的卷积层
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
#1*1的卷积保证维度一致
if use_1x1conv:
self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
else:
self.conv3 = None
#BN层
self.bn1 = nn.BatchNorm2d(out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
def forward(self, X):
Y = self.conv1(X)
Y = self.bn1(Y)
Y = torch.nn.functional.relu(Y)
Y = self.conv2(Y)
Y = self.bn2(Y)
if self.conv3:
X = self.conv3(X)
return torch.nn.functional.relu(Y + X)
#定义三层的残差块
class Residual_3(nn.Module):
def __init__(self, in_channels, out_channels, use_1x1conv=False, stride=1):
super(Residual_3, self).__init__()
#三层卷积层
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
self.conv3 = nn.Conv2d(out_channels, out_channels*4, kernel_size=1)
#1*1的卷积保证维度一致
if use_1x1conv:
self.conv4 = nn.Conv2d(in_channels, out_channels*4, kernel_size=1, stride=stride)
else:
self.conv4 = None
#BN层
self.bn1 = nn.BatchNorm2d(out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.bn3 = nn.BatchNorm2d(out_channels*4)
def forward(self, X):
Y = self.conv1(X)
Y = self.bn1(Y)
Y = torch.nn.functional.relu(Y)
Y = self.conv2(Y)
Y = self.bn2(Y)
Y = torch.nn.functional.relu(Y)
Y = self.conv3(Y)
Y = self.bn3(Y)
if self.conv4:
X = self.conv4(X)
return torch.nn.functional.relu(Y + X)
注意
当X与Y通道数目不同时,这里使用1x1的conv卷积层来使得最终的输入和输出的通道数达到一致
残差块的第一层会有一个参数stride,通过设置步长为2可以改变输出图片的尺寸
第一层的输入是in_channels,输出是out_channels,通过这一层之后卷积核的数量也会发生改变。其余层的输入和输出都是out_channels。特殊地,对于三层的残差块,最后一层的输出是out_channels*4
实现不同结构的ResNet
#类别数
classes=40#平铺
class FlattenLayer(nn.Module):
def init(self):
super(FlattenLayer, self).init()def forward(self, input):
return input.view(input.size(0), -1)#全局平均池化层
class GlobalAvgPool2d(nn.Module):
def init(self):
super(GlobalAvgPool2d, self).init()
def forward(self, x):
return nn.functional.avg_pool2d(x, kernel_size=x.size()[2:])def resnet_block(in_channels, out_channels, num_residuals, basicblock=2, first_block=False):
blk = []
for i in range(num_residuals):
if basicblock == 2:
if i == 0 and first_block == False :
blk.append(Residual_2(in_channels, out_channels, use_1x1conv=True, stride=2))
else :
blk.append(Residual_2(out_channels, out_channels))
else:
if i==0:
if first_block:
blk.append(Residual_3(in_channels, out_channels, use_1x1conv=True))
else :
blk.append(Residual_3(in_channels4, out_channels, use_1x1conv=True, stride=2))
else:
blk.append(Residual_3(out_channels4, out_channels, use_1x1conv=True))return nn.Sequential(*blk)
定义resnet网络
def ResNet_model(layers):
#前两层
net = nn.Sequential(
# 7*7的卷积层
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(),
# 3*3的最大池化层
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
#定义不同结构的ResNet
if layers == 18:
basicblock=2
num_residual=[2,2,2,2]
elif layers == 34:
basicblock=2
num_residual=[3,4,6,3]
elif layers == 50:
basicblock=3
num_residual=[3,4,6,3]
elif layers == 101:
basicblock=3
num_residual=[3,4,23,3]
elif layers == 152:
basicblock=3
num_residual=[3,8,36,3]
else :
exit("ResNet结构不对!")
#添加block
net.add_module("resnet_block1", resnet_block(64, 64, num_residual[0], basicblock, first_block=True))
net.add_module("resnet_block2", resnet_block(64, 128, num_residual[1], basicblock))
net.add_module("resnet_block3", resnet_block(128, 256, num_residual[2], basicblock))
net.add_module("resnet_block4", resnet_block(256, 512, num_residual[3], basicblock))
#添加平均池化层、全连接层
net.add_module("global_avg_pool", GlobalAvgPool2d())
if basicblock==2:
net.add_module("fc", nn.Sequential(FlattenLayer(), nn.Linear(512, classes)))
else:
net.add_module("fc", nn.Sequential(FlattenLayer(), nn.Linear(2048, classes)))
return net
网络的最开始是一个7X7的卷积层接上一个3X3的最大池化层。然后是四个block块,最后加上平均池化层和全连接层
五种ResNet模型均使用了四个block块,第一个block块不改变图片的尺寸,后面三个block块的第一个残差块的第一层均使用步长为2的卷积层来使尺寸减半。
对于三层的残差块,由于每个残差块中最后一层的输出通道数是输入通道数的4倍,所以除了第一个block的第一个残差块,其余残差块的输入通道数都要乘以4.
测试
if __name__ == '__main__':
net = ResNet_model(152)
X = torch.rand((16, 3, 224, 224))
for name, layer in net.named_children():
X = layer(X)
print(name, ' output shape:\t', X.shape)
对ResNet_model()函数中的参数进行修改,即可调用不同结构的ResNet模型