AlexNet

本节主要讲了AlexNet的网络结构, 相比于LeNet,它的网络要相对复杂一些,由原来的平均值池化改为了最大值池化,效果提升比较明显

import torch
from torch import nn
from d2l import torch as d2l

# AlexNet的网络结构
net = nn.Sequential(
nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), nn.Flatten(),
nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5),
nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5),
nn.Linear(4096, 10))

# 简单测试网络
X = torch.randn(1, 1, 224, 224)
for layer in net:
X = layer(X)
print(layer.__class__.__name__, 'Output shape:\t', X.shape)

# 获取数据集
batch_size=128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

# 进行训练
lr, num_epochs = 0.01, 10
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())


VGG

本节主要讲解了 VGG 的网络结构,VGG 将网络进行了块状处理,提出了 VGG 块,为后续网络的块状结构奠定了基础

import torch
from torch import nn
from d2l import torch as d2l

# 定义 VGG 块
def vgg_block(num_convs, in_channels, out_channels):
layers = []
for _ in range(num_convs):
layers.append(
nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
)
layers.append(nn.ReLU())
in_channels = out_channels
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
return nn.Sequential(*layers)

# VGG 网络结构
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))

def vgg(conv_arch):
conv_blks = []
in_channels = 1
for (num_convs, out_channels) in conv_arch:
conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
in_channels = out_channels
return nn.Sequential(*conv_blks, nn.Flatten(),
nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(),
nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(),
nn.Dropout(0.5), nn.Linear(4096, 10))

net = vgg(conv_arch)

X = torch.randn(size=(1, 1, 224, 224))
for blk in net:
X = blk(X)
print(blk.__class__.__name__, 'output shape:\t', X.shape)

# 分成更小的块
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)

lr, num_epochs, batch_size = 0.05, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())


NiN

本节主要讲解了 NiN,NiN 主要提出了用 NiN 块和全局平均池化层代替全连接层,显著减少了模型参数,被后续大量网络所采用

import torch
from torch import nn
from d2l import torch as d2l

# NiN 块
def nin_block(in_channels, out_channels, kernel_size, strides, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding),
nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1),
nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1),
nn.ReLU())

# 网络结构
net = nn.Sequential(
nin_block(1, 96, kernel_size=11, strides=4, padding=0),
nn.MaxPool2d(3, stride=2),
nin_block(96, 256, kernel_size=5, strides=1, padding=2),
nn.MaxPool2d(3, stride=2),
nin_block(256, 384, kernel_size=3, strides=1, padding=1),
nn.MaxPool2d(3, stride=2), nn.Dropout(0.5),
nin_block(384, 10, kernel_size=3, strides=1, padding=1),
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten())

X = torch.rand(size=(1, 1, 224, 224))
for layer in net:
X = layer(X)
print(layer.__class__.__name__, 'output shape:\t', X.shape)

lr, num_epochs, batch_size = 0.1, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())


GoogLeNet

GoogLeNet 是由谷歌提出的网络,该网络使用了不同尺寸的卷积核进行组合,构建了 Inception 块,使网络的深度达到很高

import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

# Inception 块
class Inception(nn.Module):
def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
super(Inception, self).__init__(**kwargs)
self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
def forward(self, x):
p1 = F.relu(self.p1_1(x))
p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
p4 = F.relu(self.p4_2(self.p4_1(x)))
return torch.cat((p1, p2, p3, p4), dim=1)

# GoogLeNet 网络结构
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2,padding=1))

b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1), nn.ReLU(),
nn.Conv2d(64, 192, kernel_size=3, padding=1),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
Inception(256, 128, (128, 192), (32, 96), 64),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
Inception(512, 160, (112, 224), (24, 64), 64),
Inception(512, 128, (128, 256), (24, 64), 64),
Inception(512, 112, (144, 288), (32, 64), 64),
Inception(528, 256, (160, 320), (32, 128), 128),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
Inception(832, 384, (192, 384), (48, 128), 128),
nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten())

net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, 10))

X = torch.rand(size=(1, 1, 96, 96))
for layer in net:
X = layer(X)
print(layer.__class__.__name__, 'output shape:\t', X.shape)
lr, num_epochs, batch_size = 0.1, 10, 64
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())


批量归一化

本节主要讲解了批量归一化操作,该操作可以持续加速深层网络的收敛速度。

import torch
from torch import nn
from d2l import torch as d2l

# 定义归一化操作
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
if not torch.is_grad_enabled():
# 预测模式下,直接使用传入的移动平均所得的均值和方差
X_hat = (X - moving_mean) / torch.sqrt(moving_var + eps)
else:
# 训练模式下,需要计算均值和方差
assert len(X.shape) in (2, 4)
if len(X.shape) == 2:
# 全连接层,直接计算特征维上的均值和方差
mean = X.mean(dim=0)
var = ((X-mean)**2).mean(dim=0)
else:
# 卷积层计算通道维上的均值和方差
# 这里需要保持X的形状以便后面可以做广播运算
mean = X.mean(dim=(0, 2, 3), keepdim=True)
var = ((X-mean)**2).mean(dim=(0, 2, 3), keepdim=True)
# 训练模式下,用当前的均值和方差做标准化
X_hat = (X - mean) / torch.sqrt(var + eps)
# 更新移动平均的均值和方差
moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
moving_var = momentum * moving_var + (1.0 - momentum) * var
Y = gamma * X_hat + beta # 缩放和位移
return Y, moving_mean.data, moving_var.data

# 定义 BatchNorm 类,进行归一化处理
class BatchNorm(nn.Module):
def __init__(self, num_features, num_dims):
super().__init__()
if num_dims == 2:
shape = (1, num_features)
else:
shape = (1, num_features, 1, 1)
self.gamma = nn.Parameter(torch.ones(shape))
self.beta = nn.Parameter(torch.zeros(shape))
self.moving_mean = torch.zeros(shape)
self.moving_var = torch.ones(shape)

def forward(self, X):
if self.moving_mean.device != X.device:
self.moving_mean = self.moving_mean.to(X.device)
self.moving_var = self.moving_var.to(X.device)
Y, self.moving_mean, self.moving_var = batch_norm(
X, self.gamma, self.beta, self.moving_mean, self.moving_var,
eps=1e-5, momentum=0.9)
return Y

# 使用自己定义的归一化操作构建网络
net = nn.Sequential(nn.Conv2d(1, 6, kernel_size=5), BatchNorm(6, num_dims=4),
nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), BatchNorm(16, num_dims=4),
nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(), nn.Linear(16 * 4 * 4, 120),
BatchNorm(120, num_dims=2), nn.Sigmoid(),
nn.Linear(120, 84), BatchNorm(84, num_dims=2),
nn.Sigmoid(), nn.Linear(84, 10))

lr, num_epochs, batch_size = 1.0, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

# 使用 pytorch 提供的归一化操作构建网络
net = nn.Sequential(nn.Conv2d(1, 6, kernel_size=5), nn.BatchNorm2d(6),
nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), nn.BatchNorm2d(16),
nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(), nn.Linear(256, 120), nn.BatchNorm1d(120),
nn.Sigmoid(), nn.Linear(120, 84), nn.BatchNorm1d(84),
nn.Sigmoid(), nn.Linear(84, 10))

d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())


ResNet

ResNet 的提出,使网络的深度得到了进一步的加深,现在大多的网络都采用了 ResNet 的做法。

import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

# 残差操作
class Residual(nn.Module):
def __init__(self, input_channels, num_channels, use_1x1conv=False, strides=1):
super().__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1, stride=strides)
self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
if use_1x1conv:
# 1x1 卷积,主要用来改变输入的尺寸,使其与卷积后的特征图尺寸相同,用于相加
self.conv3 = nn.Conv2d(input_channels, num_channels, kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
self.relu = nn.ReLU(inplace=True)

def forward(self, X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
Y += X
return F.relu(Y)

blk = Residual(3,3)
X = torch.rand(4, 3, 6, 6)
Y = blk(X)
Y.shape

blk = Residual(3,6, use_1x1conv=True, strides=2)
blk(X).shape

# ResNet 初始块
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

# 残差块
def resnet_block(input_channels, num_channels, num_residuals,
first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
# 第一块不改变尺寸和通道数
blk.append(Residual(input_channels, num_channels,
use_1x1conv=True, strides=2))
else:
# 其余块长宽减半,通道数翻倍
blk.append(Residual(num_channels, num_channels))
return blk

# 构建多个残差块
b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))

# ResNet 网络结构
net = nn.Sequential(b1, b2, b3, b4, b5,
nn.AdaptiveAvgPool2d((1,1)), nn.Flatten(), nn.Linear(512, 10))

X = torch.rand(size=(1, 1, 224, 224))
for layer in net:
X = layer(X)
print(layer.__class__.__name__,'output shape:\t', X.shape)

lr, num_epochs, batch_size = 0.05, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())


猫狗大战

运用迁移学习,采用预训练的 ResNet50 进行了训练,效果显著

!unzip '/content/drive/MyDrive/cat_dog.zip' # 解压数据,一定要解压到根目录,否则速度很慢

import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from torch.nn import functional as F
import torchvision as tv
from torchvision import models,transforms,datasets
import time
import json
import csv
from d2l import torch as d2l

# 判断是否存在GPU设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using gpu: %s ' % torch.cuda.is_available())

# 导入数据集,这里只进行了将尺寸转为 ResNet 需要的 244x244
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

resnet_format = transforms.Compose([
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])

data_dir = '/content/cat_dog'
data_test_dir = '/content/cat_dog/test'
dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), resnet_format)
for x in ['train', 'val']}

dsets_test = {'test': datasets.ImageFolder(data_test_dir, resnet_format)}
dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_sizes['test'] = len(dsets_test['test'])
dset_classes = dsets['train'].classes
print(dsets['train'].classes)
print(dsets['train'].class_to_idx)
print('dset_sizes: ', dset_sizes)
batch_size = 25
loader_train = torch.utils.data.DataLoader(dsets['train'], batch_size=batch_size, shuffle=True, num_workers=2)
loader_val = torch.utils.data.DataLoader(dsets['val'], batch_size=batch_size, shuffle=False, num_workers=2)
loader_test = torch.utils.data.DataLoader(dsets_test['test'], batch_size=batch_size, shuffle=False, num_workers=2)

# 定义准确率计算函数
def evaluate_accuracy_gpu(net, data_iter, device=None):
if isinstance(net, torch.nn.Module):
net.eval()
if not device:
device = next(iter(net.parameters())).device

metric = d2l.Accumulator(2)
for X, y in data_iter:
if isinstance(X, list):
X = [x.to(device) for x in X]
else:
X = X.to(device)
y = y.to(device)
metric.add(d2l.accuracy(net(X), y), y.numel())
return metric[0] / metric[1]

# 定义训练函数
def train(net, train_iter, test_iter, num_epochs, lr, device, name):
print('train on', device)
net.to(device)
optimizer = torch.optim.SGD(net.fc.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()
for epoch in range(num_epochs):
metric = d2l.Accumulator(3)
net.train()
print(epoch)
for i, (X, y) in enumerate(train_iter):
optimizer.zero_grad()
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
with torch.no_grad():
metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
train_l = metric[0] / metric[2]
train_acc = metric[1] / metric[2]
test_acc = evaluate_accuracy_gpu(net, test_iter)
# 每五轮保存一次权重
if (epoch+1) % 5 == 0:
torch.save(net.state_dict(), f'/content/drive/MyDrive/params/{name}{epoch+1}.params')
print(f'epoch_{epoch}: loss {train_l:.3f}, train acc {train_acc:.3f}, '
f'test acc{test_acc:.3f}')

# 导入预训练的模型
model = tv.models.resnet50(pretrained=True)

# 冻结参数
for param in model.parameters():
param.requires_grad = False

# 修改最后一层,使输出为分类的类别数
model.fc = nn.Sequential(
nn.Linear(2048, 512),
nn.Linear(512, 2))

# 进行训练,一共训练十轮
lr, num_epochs = 0.045, 10
net = model
train(net, loader_train, loader_val, num_epochs, lr, d2l.try_gpu(), 'ResNet')

# 导入保存好的权重,以进行预测
path = '/content/drive/MyDrive/params/ResNet10.params'
net = model
net.load_state_dict(torch.load(path))

# 定义预测函数
def prideict(net, data_iter, size, device=None):
predictions = np.zeros(size)
i = 0

if isinstance(net, torch.nn.Module):
net.eval()
if not device:
device = next(iter(net.parameters())).device
net.to(device)
for X, y in data_iter:
if isinstance(X, list):
X = [x.to(device) for x in X]
else:
X = X.to(device)
predictions[i: i+len(X)] = d2l.argmax(net(X), dim=1).cpu().numpy()
i += len(X)
return predictions

# 进行预测
predictions = prideict(net, loader_test, dset_sizes['test'], d2l.try_gpu())

# 保存预测结果
def save_file(inputs, set_test, file):
with open(file, 'w') as f:
writer = csv.writer(f)
for index, input in enumerate(inputs):
img_name = set_test['test'].imgs[index][0].split('/')[-1]
order = int(img_name.split('.')[0])
writer.writerow([order, int(input)])

file = '/content/drive/MyDrive/result/catVSdog_ResNet_pretrained.csv'
save_file(predictions, dsets_test, file)


结果

第四周学习_卷积