VGG代码解读


目录

  • VGG代码解读
  • 概述
  • 网络结构图
  • VGG代码细节分析


概述

VGG跟AlexNet在结构上没有本质上的区别,在AlexNet的基础上变得更深了,依然是“直通”式的结构,提出了局部响应结构(LRN),效果改善很小。整体上的架构仍然是卷积、激活、池化提取特征,然后前向神经网络做分类器。

网络结构图

vgg_A、vgg_B、vgg_C、vgg_D、vgg_E分别对应不同的vgg结构变种,其中LRN(local response network)是局部响应网络,实验结果表明加上LRN的效果没有明显改善,因此pytorch官方的代码里面没有给出LRN的实现。

vgg pytorch_初始化

VGG代码细节分析

import numpy as np
import torch.nn as nn
from .utils import load_state_dict_from_url
from typing import Union,List,Dict,Any,cast
# 不同的vgg结构变种
__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]
# vgg预训练模型的下载地址
model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}

class VGG(nn.Module):
    def __init__(
        self,
        features:nn.Modules,
        num_classes:int = 1000,
        init_weights:bool = True,
        )->None:
        super(VGG,self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7,7))
        # 分类器(前面的卷积层已经全部写好,提取出特征了)
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7,4096),
            nn.RuLU(True),
            nn.Dropout(),
            nn.Linear(4096,4096),
            nn.RuLU(True),
            nn.Dropout(),
            nn.Linear(4096,num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self,x:torch.Tensor)->torch.Tensor:
        # pipeline:提取特征、池化、摊平、分类
        x = self.features(x)
        x = self.avgpool(x)
        # 把x维度进行调整,保持batch数一致,[batch_size,一个展开后的特征层]
        # 下面这个语句也可以写成 x = x.view(x.size(0),-1)
        x = self.flatten(x,1)
        x = self.classifier(x)
        return x
    # 参数初始化方法
    def _initialize_weights(self)->None:
        for m in self.modules():
            # 卷积层的参数初始化方法用kaiming_normal
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,mode = 'fan_out', nonlinearity='relu')
                # 卷积层的bias全部初始化常数0,也就是不加偏置
                if m.bias is not None:
                    nn.init.constant(m.bias,0)
            # batch_normalization的权重初始化成常数1,偏置初始化成常数0
            elif isinstance(m,nn.BatchNorm2d):
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            # 前向网络的权重初始化成均值为0,方差为0.01的随机weights,偏置初始化成常数1
            elif isinstance(m,nn.Linear):
                # nn.init.constant_(x,mean = mean, std = std)表示将x初始化成期望为0,方差为0.01的随机数
                nn.init.constant_(m.weight,0,0.01)
                nn.init.constant_(m.bias,1)
            elif isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                nn.init.constant_(m.bias,0)
# layers是一个列表,存储了神经网络的结构
# nn.ReLU(inplace = True)类似于C语言的地址传递,会改变输入的值,节省内存
# 默认nn.ReLU(inplace = False),这相当于C语言的值传递,不会改变输入的值
def make_layers(cfg:List[Union[str,int],batch_norm:bool = False])->nn.Sequential:
    layers:  List[nn.Module] = []
    in_channels = 3
    for v in cfg:
        # M代表最大池化
        if v=='M':
            layers+=[nn.MaxPool2d(kernel_size = 2, stride=2)]
        else
            v = cast(int,v)
            # 添加卷积层
            conv2d = nn.Conv2d(in_channels,v,kernel_size = 3,padding = 1)
            # 如果有batch normalization那就加上BN层
            if batch_norm:
                layers+=[conv2d,nn.BatchNorm2d(v),nn.ReLU(inplace=True)]
            # 否则直接卷积+激活输出
            else:
                layers+=[conv2d,nn.ReLU(inplace=True)]
            # 将当前层卷积核的大小当做下一层网络的输入channel大小
            in_channels = v
    # 返回network的网络结构
    return nn.Sequential(*layers)

# 'M'代表最大池化
# cfgs的'A'、'B'、'D'、'E'对应不同的网络结构,网络结构图参考链接
# 
cfgs: Dict[str, List[Union[str, int]]] = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

# torch.hub.load_state_dict_from_url(url, model_dir=None, map_location=None, 
# progress=True, check_hash=False, file_name=None)
# load_state_dict_from_url当中progress这个参数默认是True,用来显示下载进度条
# progress (bool, optional) – whether or not to display a progress bar to stderr Default: True

def _vgg(arch:str,cfg:str,batch_norm:bool,pretrained:bool,process:bool,**kwargs)->VGG:
    if pretrained:
        # 如果调用预训练模型,那么就不用初始化参数了,直接用写好的模型参数即可
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfg[cfg],batch_norm = batch_norm),**kwargs)
    if pretrained:
        # 下载预训练模型
        state_dict = load_state_dict_from_url(model_urls[arch],process = process)
        # 加载预训练模型
        model.load_state_dict(state_dict)
    return model
# 不同的vgg模型对应不同的网络结构(不同网络深度)
def vgg11(pretrained:bool = False,process:bool = True, **kwargs:Any)->VGG:
    return _vgg('vgg11','A',False,pretrained,process,**kwargs)

def vgg11_bn(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
    r"""VGG 11-layer model (configuration "A") with batch normalization
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs)


def vgg13(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
    r"""VGG 13-layer model (configuration "B")
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)


def vgg13_bn(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
    r"""VGG 13-layer model (configuration "B") with batch normalization
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs)


def vgg16(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
    r"""VGG 16-layer model (configuration "D")
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)


def vgg16_bn(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
    r"""VGG 16-layer model (configuration "D") with batch normalization
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg16_bn', 'D', True, pretrained, progress, **kwargs)


def vgg19(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
    r"""VGG 19-layer model (configuration "E")
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)

# 加上了batch_normalization的vgg网络
def vgg19_bn(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VGG:
    r"""VGG 19-layer model (configuration 'E') with batch normalization
    `"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`._
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs)