VGG代码解读
目录
- VGG代码解读
- 概述
- 网络结构图
- VGG代码细节分析
概述
VGG跟AlexNet在结构上没有本质上的区别,在AlexNet的基础上变得更深了,依然是“直通”式的结构,提出了局部响应结构(LRN),效果改善很小。整体上的架构仍然是卷积、激活、池化提取特征,然后前向神经网络做分类器。
网络结构图
vgg_A、vgg_B、vgg_C、vgg_D、vgg_E分别对应不同的vgg结构变种,其中LRN(local response network)是局部响应网络,实验结果表明加上LRN的效果没有明显改善,因此pytorch官方的代码里面没有给出LRN的实现。
VGG代码细节分析
import numpy as np
import torch.nn as nn
from .utils import load_state_dict_from_url
from typing import Union,List,Dict,Any,cast
# 不同的vgg结构变种
__all__ = [
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
'vgg19_bn', 'vgg19',
]
# vgg预训练模型的下载地址
model_urls = {
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}
class VGG(nn.Module):
def __init__(
self,
features:nn.Modules,
num_classes:int = 1000,
init_weights:bool = True,
)->None:
super(VGG,self).__init__()
self.features = features
self.avgpool = nn.AdaptiveAvgPool2d((7,7))
# 分类器(前面的卷积层已经全部写好,提取出特征了)
self.classifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.RuLU(True),
nn.Dropout(),
nn.Linear(4096,4096),
nn.RuLU(True),
nn.Dropout(),
nn.Linear(4096,num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self,x:torch.Tensor)->torch.Tensor:
# pipeline:提取特征、池化、摊平、分类
x = self.features(x)
x = self.avgpool(x)
# 把x维度进行调整,保持batch数一致,[batch_size,一个展开后的特征层]
# 下面这个语句也可以写成 x = x.view(x.size(0),-1)
x = self.flatten(x,1)
x = self.classifier(x)
return x
# 参数初始化方法
def _initialize_weights(self)->None:
for m in self.modules():
# 卷积层的参数初始化方法用kaiming_normal
if isinstance(m,nn.Conv2d):
nn.init.kaiming_normal_(m.weight,mode = 'fan_out', nonlinearity='relu')
# 卷积层的bias全部初始化常数0,也就是不加偏置
if m.bias is not None:
nn.init.constant(m.bias,0)
# batch_normalization的权重初始化成常数1,偏置初始化成常数0
elif isinstance(m,nn.BatchNorm2d):
nn.init.constant_(m.weight,1)
nn.init.constant_(m.bias,0)
# 前向网络的权重初始化成均值为0,方差为0.01的随机weights,偏置初始化成常数1
elif isinstance(m,nn.Linear):
# nn.init.constant_(x,mean = mean, std = std)表示将x初始化成期望为0,方差为0.01的随机数
nn.init.constant_(m.weight,0,0.01)
nn.init.constant_(m.bias,1)
elif isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.constant_(m.bias,0)
# layers是一个列表,存储了神经网络的结构
# nn.ReLU(inplace = True)类似于C语言的地址传递,会改变输入的值,节省内存
# 默认nn.ReLU(inplace = False),这相当于C语言的值传递,不会改变输入的值
def make_layers(cfg:List[Union[str,int],batch_norm:bool = False])->nn.Sequential:
layers: List[nn.Module] = []
in_channels = 3
for v in cfg:
# M代表最大池化
if v=='M':
layers+=[nn.MaxPool2d(kernel_size = 2, stride=2)]
else
v = cast(int,v)
# 添加卷积层
conv2d = nn.Conv2d(in_channels,v,kernel_size = 3,padding = 1)
# 如果有batch normalization那就加上BN层
if batch_norm:
layers+=[conv2d,nn.BatchNorm2d(v),nn.ReLU(inplace=True)]
# 否则直接卷积+激活输出
else:
layers+=[conv2d,nn.ReLU(inplace=True)]
# 将当前层卷积核的大小当做下一层网络的输入channel大小
in_channels = v
# 返回network的网络结构
return nn.Sequential(*layers)
# 'M'代表最大池化
# cfgs的'A'、'B'、'D'、'E'对应不同的网络结构,网络结构图参考链接
#
cfgs: Dict[str, List[Union[str, int]]] = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
# torch.hub.load_state_dict_from_url(url, model_dir=None, map_location=None,
# progress=True, check_hash=False, file_name=None)
# load_state_dict_from_url当中progress这个参数默认是True,用来显示下载进度条
# progress (bool, optional) – whether or not to display a progress bar to stderr Default: True
def _vgg(arch:str,cfg:str,batch_norm:bool,pretrained:bool,process:bool,**kwargs)->VGG:
if pretrained:
# 如果调用预训练模型,那么就不用初始化参数了,直接用写好的模型参数即可
kwargs['init_weights'] = False
model = VGG(make_layers(cfg[cfg],batch_norm = batch_norm),**kwargs)
if pretrained:
# 下载预训练模型
state_dict = load_state_dict_from_url(model_urls[arch],process = process)
# 加载预训练模型
model.load_state_dict(state_dict)
return model
# 不同的vgg模型对应不同的网络结构(不同网络深度)
def vgg11(pretrained:bool = False,process:bool = True, **kwargs:Any)->VGG:
return _vgg('vgg11','A',False,pretrained,process,**kwargs)
def vgg11_bn(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
r"""VGG 11-layer model (configuration "A") with batch normalization
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs)
def vgg13(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
r"""VGG 13-layer model (configuration "B")
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)
def vgg13_bn(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
r"""VGG 13-layer model (configuration "B") with batch normalization
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs)
def vgg16(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
r"""VGG 16-layer model (configuration "D")
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)
def vgg16_bn(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
r"""VGG 16-layer model (configuration "D") with batch normalization
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg16_bn', 'D', True, pretrained, progress, **kwargs)
def vgg19(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
r"""VGG 19-layer model (configuration "E")
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)
# 加上了batch_normalization的vgg网络
def vgg19_bn(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
r"""VGG 19-layer model (configuration 'E') with batch normalization
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs)