数字识别的神经网络

一、不含迭代次数的代码

import  numpy as np
import torch
from torch.utils.data import DataLoader
import torchvision.datasets as dsets
import torchvision.transforms as transforms

#对数损失函数
def cross_entropy_error(p, y):
delta = 1e-7
batch_size = p.shape[0]
return -np.sum(y * np.log(p + delta)) / batch_size
#激活函数
def _softmax(x):
if x.ndim == 2:
c = np.max(x, axis=1)
x = x.T - c # 溢出对策
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
c = np.max(x)
exp_x = np.exp(x - c)
return exp_x / np.sum(exp_x)
#梯度下降法
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)

it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)

x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2 * h)

x[idx] = tmp_val # 还原值
it.iternext()

return grad
def relu(in_data):
return np.maximum(0, in_data)
class TwoLayerNet:
#初始化
def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
self.params={}
self.params['W1'] = weight_init_std*np.random.randn(input_size,hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size,output_size)
self.params['b2'] = np.zeros(output_size)




#前向传播
def predict(self,x):
W1,W2 = self.params['W1'],self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x,W1)+b1
z1 =relu(a1)
a2 = np.dot(z1,W2) + b2
p=_softmax(a2)
return p
def loss(self,x,y):
p = self.predict(x)
return cross_entropy_error(p,y)

def numerical_gradient2(self, x, y):

loss_W = lambda W: self.loss(x, y)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
# 预测
def accuracy(self, x, t):
p = self.predict(x)
p = np.argmax(x, axis=1)
y = np.argmax(t, axis=1)
accuracy = np.sum(p == y) / float(x.shape[0])
return accuracy
if __name__ == '__main__':

#加载数据集
train_dataset = dsets.MNIST(root='/ml/pymnist', # 选择数据的根目录
train=True, # 选择训练集
transform=None, # 不考虑使用任何数据预处理
download=True) # 从网络上download图片
test_dataset = dsets.MNIST(root='/ml/pymnist', # 选择数据的根目录
train=False, # 选择测试集
transform=None, # 不考虑使用任何数据预处理
download=True) # 从网络上download图片
#获得测试数据和数据集的数据以及标签
x_train = train_dataset.data.numpy().reshape(-1,28*28)
y_train_temp = train_dataset.targets.reshape(train_dataset.targets.shape[0],1)
y_train = torch.zeros(y_train_temp.shape[0],10).scatter_(1,y_train_temp,1).numpy()
x_test = test_dataset.data.numpy().reshape(-1,28*28)
y_test_temp = train_dataset.targets.reshape(train_dataset.targets.shape[0], 1)
y_test = torch.zeros(y_test_temp.shape[0], 10).scatter_(1, y_test_temp, 1).numpy()

# 超参数
iters_num = 1000 # 适当设定循环的次数
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.001
# 初始化神经网络
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
y_batch = y_train[batch_mask]

grad = network.numerical_gradient2(x_batch, y_batch)

for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]

# 记录学习过程
loss = network.loss(x_batch, y_batch)
if i % 100 == 0:
print(loss)

数字识别的神经网络(个人笔记)_深度学习

含迭代次数的代码

import  numpy as np
import torch
from torch.utils.data import DataLoader
import torchvision.datasets as dsets
import torchvision.transforms as transforms


def cross_entropy_error(p, y):
delta = 1e-7
batch_size = p.shape[0]
return -np.sum(y * np.log(p + delta)) / batch_size
def _softmax(x):
if x.ndim == 2:
c = np.max(x, axis=1)
x = x.T - c # 溢出对策
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
c = np.max(x)
exp_x = np.exp(x - c)
return exp_x / np.sum(exp_x)

def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)

it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)

x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2 * h)

x[idx] = tmp_val # 还原值
it.iternext()

return grad
def relu(in_data):
return np.maximum(0, in_data)
class TwoLayerNet:
def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
self.params={}
self.params['W1'] = weight_init_std*np.random.randn(input_size,hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size,output_size)
self.params['b2'] = np.zeros(output_size)





def predict(self,x):
W1,W2 = self.params['W1'],self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x,W1)+b1
z1 =relu(a1)
a2 = np.dot(z1,W2) + b2
p=_softmax(a2)
return p
def loss(self,x,y):
p = self.predict(x)
return cross_entropy_error(p,y)

def numerical_gradient(self, x, y):

loss_W = lambda W: self.loss(x, y)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads

def accuracy(self, x, t):
p = self.predict(x)
p = np.argmax(x, axis=1)
y = np.argmax(t, axis=1)
accuracy = np.sum(p == y) / float(x.shape[0])
return accuracy
if __name__ == '__main__':


train_dataset = dsets.MNIST(root='/ml/pymnist', # 选择数据的根目录
train=True, # 选择训练集
transform=None, # 不考虑使用任何数据预处理
download=True) # 从网络上download图片
test_dataset = dsets.MNIST(root='/ml/pymnist', # 选择数据的根目录
train=False, # 选择测试集
transform=None, # 不考虑使用任何数据预处理
download=True) # 从网络上download图片
x_train = train_dataset.data.numpy().reshape(-1,28*28)
y_train_temp = train_dataset.targets.reshape(train_dataset.targets.shape[0],1)
y_train = torch.zeros(y_train_temp.shape[0],10).scatter_(1,y_train_temp,1).numpy()
x_test = test_dataset.data.numpy().reshape(-1,28*28)
y_test_temp = train_dataset.targets.reshape(train_dataset.targets.shape[0], 1)
y_test = torch.zeros(y_test_temp.shape[0], 10).scatter_(1, y_test_temp, 1).numpy()

# 超参数
iters_num = 600 # 适当设定循环的次数
train_size = x_train.shape[0]

batch_size = 100
learning_rate = 0.001
epoch = 5 #将所有数据集迭代的次数
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)



for i in range(epoch):
print('current epoch is :', i)
for num in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
y_batch = y_train[batch_mask]

grad = network.numerical_gradient(x_batch, y_batch)

for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]

loss = network.loss(x_batch, y_batch)
if num % 100 == 0:
print(loss)
print(network.accuracy(x_test, y_test))

对于损失函数,激活函数,以及梯度下降的理解


损失函数:​ 损失函数可以很好得反映模型与实际数据差距的工具。损失函数越小,模型就越好。
激活函数:​ 神经网络中的每个神经元节点接受上一层神经元的输出值作为本神经元的输入值,并将输入值传递给下一层,输入层神经元节点会将输入属性值直接传递给下一层(隐层或输出层)。在多层神经网络中,上层节点的输出和下层节点的输入之间具有一个函数关系。
梯度下降:​ 其目的就是想找到未知参数,使得损失函数最小。