dive into pytorch dive into pytorch电子

转载

mob64ca140530fb 2024-06-21 06:28:15

文章标签 dive into pytorch 深度学习多层感知机简洁代码神经网络 文章分类 PyTorch 人工智能

文章目录

前言
softmax的原始代码
softmax的简洁代码
多层感知机

多层感知机原始代码
多层感知机简洁代码

前言

softmax回归其实和普通线性回归很像，只是普通的线性回归是二分类，但是softmax可以是多分类。

$dive into pytorch dive into pytorch电子_简洁代码$

dive into pytorch dive into pytorch电子_神经网络_02

softmax的原始代码

想比与源代码，根据自己情况有所改动

import torch
import torchvision
import numpy as np
import d2l	# 原文的包，被我拉到本地，有所改动


num_epochs, lr = 5, 0.1
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
# 参数
W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float)   # 正则化随机创建
b = torch.zeros(num_outputs, dtype=torch.float)
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

# 模型
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim=1, keepdim=True)      # keepdim 结果保留维度
    return X_exp / partition  # 这里应用了广播机制

def net(X):
    return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)

# 损失函数
def cross_entropy(y_hat, y):	# y shape[1, ] len=batchsize
	p = y_hat.gather(1, y.view(-1, 1))	# p shape[1, ] len=batchsize
    return - torch.log(p) 
    # 在这个训练数据集中，是这个类的y_i为1,不是的y_i为0，因为数据集的读取问题，所以需要gather寻找对应位置的预测概率


# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n


# 本函数已保存在d2lzh包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)		# shape[batchsize, num_output]
            l = loss(y_hat, y).sum()

            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到

            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()    # 训练数据的正确率
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

y.gather(dim, index)函数
举个例子：

import torch
a = torch.randint(0, 30, (2, 3, 5))
print(a)
index = torch.LongTensor([[[0,1,2,0,2],
                          [0,0,0,0,0],
                          [1,1,1,1,1]],
                        [[1,2,2,2,2],
                         [0,0,0,0,0],
                         [2,2,2,2,2]]])
b = torch.gather(a, 1, index)
print(b)
'''
dim=1，所以在第二个维度上改变,第二个维度的值会根据同样位置index上的改变
a中的a[0][0][1]根据index[0][0][1]的值,变为a[0][1][1]的值（只有dim维度会变化）
'''

参考：https://www.jianshu.com/p/5d1f8cd5fe31

softmax的简洁代码

import torch
from torch import nn
from torch.nn import init
import numpy as np
import d2l

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

num_inputs = 784
num_outputs = 10


# 模型
class LinearNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)

    def forward(self, x):   # x shape: (batch, 1, 28, 28)a
        y = self.linear(x.view(x.shape[0], -1))
        return y

net = LinearNet(num_inputs, num_outputs)

# 初始化
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
# 损失函数
loss = nn.CrossEntropyLoss()
# 优化方法
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
# 训练
num_epochs = 5


def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n


def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()

            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            if optimizer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到

            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()    # 训练数据的正确率
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))

train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)  # 上节，封装到d2l中

多层感知机

前面的两种都是单层神经网络，多层神经网络可以说是在单层神经网络加上可隐藏层。

dive into pytorch dive into pytorch电子_简洁代码_03

上面可以看出，层数越多也是多加上一些多项式即可。

dive into pytorch dive into pytorch电子_dive into pytorch_04

多层感知机原始代码

用于是从单层神经网络而来，其实很多地方很像，区别在于多其他层的w，b；加上激活函数，网络结构稍作改变。还是用之前的数据集。

# 代码不完整
# w，b
num_inputs, num_outputs, num_hiddens = 784, 10, 256

W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)

params = [W1, b1, W2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)

# 激活函数
def relu(X):
    return torch.max(input=X, other=torch.tensor(0.0))
    
# 网络模型
def net(X):
    X = X.view((-1, num_inputs))
    H = relu(torch.matmul(X, W1) + b1)
    return torch.matmul(H, W2) + b2

多层感知机简洁代码

# 代码不完整，只有区别部分
num_inputs, num_outputs, num_hiddens = 784, 10, 256
# 网络，自带参数
net = nn.Sequential(
        d2l.FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens),
        nn.ReLU(),
        nn.Linear(num_hiddens, num_outputs), 
        )

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。