根据《深度学习入门》第八章推荐的网络,用Pytorch编写实现对Minist数据集的识别,识别精度超过99.4%。以下是自己编写的代码:
网络:
import torch.nn as nn
import torch
import numpy as np
# 定义一个卷积网络
class My_nn_s(nn.Module):
def __init__(self):
super(My_nn_s, self).__init__()
self.model = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1, stride=1),
# 输出(N, C, OH, OW) = (N, 16, 28, 28)
nn.ReLU(),
nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, padding=1, stride=1),
# 输出(N, C, OH, OW) = (N, 16, 28, 28)
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
# 输出(N, C, OH, OW) = (N, 16, 14, 14)
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1, stride=1),
# 输出(N, C, OH, OW) = (N, 32, 14, 14)
nn.ReLU(),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=2, stride=1),
# 输出(N, C, OH, OW) = (N, 32, 16, 16)
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
# 输出(N, C, OH, OW) = (N, 32, 8, 8)
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1, stride=1),
# 输出(N, C, OH, OW) = (N, 64, 8, 8)
nn.ReLU(),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1, stride=1),
# 输出(N, C, OH, OW) = (N, 64, 8, 8)
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),
# 输出(N, C, OH, OW) = (N, 64, 4, 4) # pytorch除不尽为舍去
nn.Flatten(),
# 输出(N, C, OH, OW) = (N, 1024)
nn.Linear(1024, 50),
nn.ReLU(),
nn.Dropout(),
nn.Linear(50, 10),
nn.Dropout(),
)
# 初始化权重,W采用He初始值,而B初始值为0
para_size_list = []
# 初始化B
for layer in self.model:
if isinstance(layer, nn.Linear) or isinstance(layer, nn.Conv2d):
# 为后面更新W准备参数
para_size = layer.weight.shape
num_weight = 1 # 初始化权重参数的数量
if isinstance(layer, nn.Conv2d):
for i in range(len(para_size) - 1):
num_weight = num_weight * para_size[i + 1]
else:
num_weight = para_size[-1]
para_size_list.append(num_weight)
# 更新W
para_size = layer.bias.shape
layer.bias.data = torch.from_numpy(np.zeros(*para_size))
layer.bias.data = (layer.bias.data).type(torch.float32) # torch默认的参数类型为float32类型
# 初始化W
para_size_list.insert(0, 1 * 3 * 3) # 在开头插入输入图片维度,教材为1*3*3,为什么不是1*28*28??
wight_init_scales = np.sqrt(2.0 / np.array(para_size_list)) # He初始值
i = 0
for layer in self.model:
if isinstance(layer, nn.Linear) or isinstance(layer, nn.Conv2d):
# 初始化W
para_size = layer.weight.shape
layer.weight.data = torch.from_numpy(np.random.randn(*para_size)) * wight_init_scales[i]
layer.weight.data = (layer.weight.data).type(torch.float32) # torch默认的参数类型为float32类型
i += 1
def forward(self, x):
x = self.model(x)
return x
if __name__ == '__main__':
my_nn = My_nn_s()
# print(my_nn)
这里采用“He初始值”对权重参数W进行初始化,这里有个问题,即第一层(卷积层)的n为什么是1*3*3而不是图片的节点数即1*28*28。
另外,“He初始值”是规定前一层的节点是定义本层的初始权重,但有些文章却用本层PyTorch参数初始化和Finetune - 知乎
训练代码:
# 建立神经网络顺序:
# 1、准备数据集,提取数据集的长度
# 2、利用DataLoader加载数据集
# 3、创建网络模型
# 4、定义损失函数
# 5、定义优化器(定义学习率等)
# 6、设置训练网络的参数--(记录训练次数total_train_step, 测试的次数total_test_step
# 训练的轮数epoch)
# 7、添加tensorboard
# 8、训练模型(类型.train())和评价模型 (类型.eval()),训练完每一轮后保存模型
# 注意,每训练完一轮可以和测试数据进行比较,用到with torch.no_grad(),即预测时关闭梯度
import torch
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from net_work import * # 网络位于当前文件夹中的net_work.py
# 1、准备数据
# data_train = torchvision.datasets.CIFAR10('./dataset', train=True, transform=torchvision.transforms.ToTensor(),
# download=True)
#
# data_test = torchvision.datasets.CIFAR10('./dataset', train=False, transform=torchvision.transforms.ToTensor(),
# download=True)
data_train = torchvision.datasets.MNIST('./dataset', train=True, transform=torchvision.transforms.ToTensor(),
download=True)
data_test = torchvision.datasets.MNIST('./dataset', train=False, transform=torchvision.transforms.ToTensor(),
download=True)
# 设备定义,如果能用GPU则用GPU进行运算
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# 分别在网络,损失函数和数据处用.to(device)
# 1.1、记录数据长度,用于后面计算正确率
len_train_data = len(data_train) # 50000
print('训练集的长度为: {}'.format(len_train_data))
len_test_data = len(data_test) # 10000
print('测试集的长度为: {}'.format(len_test_data))
# 2、利用DataLoader加载数据集
data_train_loader = DataLoader(data_train, batch_size=100, drop_last=True, shuffle=True)
data_test_loader = DataLoader(data_test, drop_last=True)
# 3、创建网络模型
my_nn = My_nn_s()
my_nn = my_nn.to(device) # 选择CPU或者GPU进行运算
# 4、定义损失函数
my_loss = nn.CrossEntropyLoss() # 交叉熵误差
my_loss = my_loss.to(device) # 选择CPU或者GPU进行运算
# 5、定义优化器(定义学习率等)
learn_rate = 0.001 # 学习率
# optimer = torch.optim.SGD(my_nn.parameters(), lr=learn_rate) # 传入参数和学习率
optimer = torch.optim.Adam(my_nn.parameters(), lr=learn_rate)
# optimer = torch.optim.Adam(my_nn.parameters(), lr=learn_rate, betas=(0.9, 0.999))
# 6、设置训练网络的参数--(记录训练次数total_train_step, 测试的次数total_test_step
# 训练的轮数epoch)
epoch = 20
total_train_step = 0
total_test_step = 0
# 7、添加tensorboard
writer = SummaryWriter('train_test')
# 8、训练模型(类型.train())和评价模型 (类型.eval())
for i_epoch in range(epoch):
print('----------第{}轮训练开始----------'.format(i_epoch + 1))
# 8.1 训练开始
my_nn.train() # 开启训练模式,针对特定网络层有用,例如dropout, batchnormal
for data in data_train_loader:
imgs, targets = data # 读取训练数据和标签
imgs = imgs.to(device) # 选择CPU或者GPU进行运算
targets = targets.to(device) # 选择CPU或者GPU进行运算
optimer.zero_grad() # 重置梯度
predict = my_nn(imgs) # 正向传播,获取结果
loss = my_loss(predict, targets) # 计算损失函数
loss.backward() # 使用损失函数进行反向传播
optimer.step() # 使用优化器更新梯度
total_train_step += 1 # 更新总的训练次数
accuracy_test = sum(predict.argmax(1) == targets).item() / len(targets) # .item将tensor类型转为数据类型,
# .argmax(1)表示按行求最大值,生成一个列向量(与1对应);同理,.argmax(0)则为按列求最大值,生成行向量(与0对应)
if (total_train_step + 1) % 100 == 0:
print('训练次数:{},Loss: {}, Accuracy: {}%'.format(total_train_step + 1, loss.item(), accuracy_test * 100))
writer.add_scalar('train_loss', loss.item(), total_train_step)
writer.add_scalar('train_accuracy', accuracy_test * 100, total_train_step)
# 8.2 测试开始
my_nn.eval() # 开启测试模式,针对特定网络层有用,例如dropout, batchnormal
total_test_loss = 0.0 # 用于记录测试数据集在整体的损失函数
total_test_accuracy = 0.0 # # 用于记录测试数据集在整体的预测精度
with torch.no_grad(): # with后面的代码没有梯度
for data_1 in data_test_loader:
imgs_1, targets_1 = data_1
imgs_1 = imgs_1.to(device) # 选择CPU或者GPU进行运算
targets_1 = targets_1.to(device) # 选择CPU或者GPU进行运算
predict = my_nn(imgs_1) # 正向传播,获取结果
loss = my_loss(predict, targets_1) # 计算损失函数
total_test_loss += loss.item() # 累积一轮的损失值
total_test_step += 1 # 更新总的测试次数
accuracy_test = sum(predict.argmax(1) == targets_1).item()
# accuracy_test = (predict.argmax(1) == targets_1).sum.item() / len(targets_1)
total_test_accuracy += accuracy_test
print('整体测试集上的Loss: {}'.format(total_test_loss))
print('整体测试集上的正确率: {}'.format(total_test_accuracy / len_test_data * 100))
writer.add_scalar('test_loss', loss.item(), total_test_step)
# 8.3 保存结果
torch.save(my_nn,
'./net_work_save/My_nn_{}_ac={}%.pth'.format(i_epoch + 1, total_test_accuracy / len_test_data * 100))
# torch.save(my_nn.state_dict(), './net_work_save/My_nn_{}.pth'.format(i_epoch + 1)) 官方推荐保存方法
print('模型已保存')
writer.close()
这里学习率为0.001,采用基于Adam的最优化。注意,当采用Adam时学习率太大(0.1)无法正常学习。
另外,由于Dropout层的存在,在训练层上的精度没法正常显示(远低于在测试集上的精度)。