数据集简介:MNIST数据集,60000个样本用于训练,10000个样本用于测试,每一个样本都是28*28点阵图,共784个像素点(pixel),每个像素点的值从0-255。 每个样本的784个像素点的值就是特征值(feature),每个样本的数字就是标签(label)。

第一种方法,一步步构建神经网络,不使用Tensorflow和Keras。

使用csv格式的数据文件,每一行代表一个样本,train用的共60000行,每一行由一个label+784个f

eatures组成,也就是共785列。

代码如下:

#DumpLocalMnistData.py
#将数据进行预处理存为一个序列化的文件
import numpy as np
import pickle    

#读取数据文件
image_size = 28
image_pixels = image_size * image_size
data_path = 'mnistdata/'
train_data = np.loadtxt(data_path + 'mnist_train.csv',
                        delimiter= ',')
test_data = np.loadtxt(data_path + 'mnist_test.csv',
                        delimiter= ',')

#预处理,将features(0-255)转换到0-1之间
frac=0.99/255
train_imgs = np.asfarray(train_data[:, 1:]) * frac +0.01 #asfarray返回浮点类型的数组
test_imgs = np.asfarray(test_data[:, 1:]) * frac + 0.01

train_labels = np.asfarray(train_data[:, :1])#只取第一列数据
test_labels = np.asfarray(test_data[:, :1])

lr = np.arange(10) #0-9的列表

#把labels转变成one hot representation
# 0 => [1 0 0 0 0 0 0 0 0 0]
train_labels_one_hot = (lr == train_labels).astype(np.float)
test_labels_one_hot = (lr == test_labels).astype(np.float)

train_labels_one_hot[train_labels_one_hot == 0] = 0.01
train_labels_one_hot[train_labels_one_hot == 1] = 0.99

test_labels_one_hot[test_labels_one_hot == 0] = 0.01
test_labels_one_hot[test_labels_one_hot == 1] = 0.99

##用pickle软件包把数据存在硬盘上,存为一个序列化的文件,也就是二进制的文件,这样就可以非常高效地读取数据
with open('mnistdata/pickled_mnist.pkl','bw') as fh:
    data = (train_imgs,
            test_imgs,
            train_labels,
            test_labels,
            train_labels_one_hot,
            test_labels_one_hot)
    pickle.dump(data, fh)
#Ann_HandWritten.py
import numpy as np
import pickle

#numpy.vectorize takes a function f:a->b and turns it into g:a[]->b[]
#让输入值和返回值向量化 + 激活函数sigmoid

@np.vectorize
def sigmoid(x):
    return 1 / (1 + np.e** -x)

activation_function = sigmoid

#引入scipy的统计包,目的是引入truncnorm函数
#truncnorm无法指定上下界,稍微改造一下,让它能体现上下界
from scipy.stats import truncnorm

def truncated_normal(mean = 0,sd = 1, low = 0, upp = 10):
    return truncnorm((low - mean) / sd,
                     (upp - mean) / sd,
                     loc = mean,
                     scale = sd)

#定义神经网络ANN
class NeuralNetwork:
    def __init__(self,
                 no_of_in_nodes,
                 no_of_out_nodes,
                 no_of_hidden_nodes,
                 learning_rate):

        self.no_of_in_nodes = no_of_in_nodes
        self.no_of_out_nodes = no_of_out_nodes
        self.no_of_hidden_nodes = no_of_hidden_nodes
        self.learning_rate = learning_rate
        #初始化的时候,调用初始化NN网络的权重矩阵
        self.create_weight_matrices()

    def create_weight_matrices(self):
        #定义上下界:(输入节点数+偏移量节点数)开平方根之1
        rad = 1 / np.sqrt(self.no_of_in_nodes)
        X = truncated_normal(mean = 0,
                            sd = 1,
                            low = -rad,
                            upp = rad)
        #输入与隐藏层之间权重矩阵
        self.wih = X.rvs((self.no_of_hidden_nodes, self.no_of_in_nodes))
        #隐藏层与输出层之间权重
        rad = 1 / np.sqrt(self.no_of_hidden_nodes)
        X = truncated_normal(mean = 0,
                            sd = 1,
                            low = -rad,
                            upp = rad)
        self.who = X.rvs((self.no_of_out_nodes, self.no_of_hidden_nodes))

    #定义训练函数
    #一个数值,一个forward propagation,一个back propagation
    def train(self, input_vector, target_vector):
        #做准备工作,让input_vector符合模型的dot product运算的输入
        input_vector = np.array(input_vector, ndmin = 2).T
        target_vector = np.array(target_vector, ndmin = 2).T

        #做输入层与隐藏层的点积计算,结果放入激活函数得到隐藏层输出
        output_vector1 = np.dot(self.wih, input_vector)
        output_hidden = activation_function(output_vector1)
        #做隐藏层与输出层的点积计算,结果放入激活函数得到输出层输出
        output_vector2 = np.dot(self.who, output_hidden)
        output_network = activation_function(output_vector2)
        #目标值减去输出值作为损失函数
        output_errors = target_vector - output_network

        #update the weights
        #实施back propagation
        #损失函数做梯度下降gradient descent,结果更新权重
        tmp = output_errors * output_network * \
              (1.0 - output_network )
        tmp = self.learning_rate * np.dot(tmp, output_hidden.T)
        #得到新的隐藏层与输出层之间的权重矩阵who
        self.who += tmp

        #通过新的who权重矩阵和输出损失函数计算隐藏层的损失
        hidden_errors = np.dot(self.who.T, output_errors)

        #得到新的输入层和隐藏层的权重矩阵wih
        tmp = hidden_errors * output_hidden * (1.0 - output_hidden)
        self.wih += self.learning_rate * np.dot(tmp, input_vector.T)

    #测试函数或者叫做predict函数,检查分类的准确性,主要看学习到的权重的有效性
    def run(self, input_vector):
        input_vector = np.array(input_vector, ndmin=2).T
        output_vector = np.dot(self.wih, input_vector)
        output_vector = activation_function(output_vector)
        output_vector = np.dot(self.who, output_vector)
        output_vector = activation_function(output_vector)
        return output_vector

    #通过调用run函数,获得模型学习结果的有效性
    def confusion_matrix(self, data_array, labels):
        cm = np.zeros((10, 10), int)
        for i in range(len(data_array)):
            res = self.run(data_array[i])
            res_max = res.argmax()
            target = labels[i][0]
            cm[res_max, int(target)] += 1
        return cm

    #计算精准率,查准率
    def precision(self, label, confusion_matrix):
        col = confusion_matrix[:, label]
        return confusion_matrix[label,label] / col.sum()
    #计算召回率,查全率
    def recall(self, label,confusion_matrix):
        row = confusion_matrix[label, :]
        return confusion_matrix[label,label] / row.sum()
    #计算分对和分错的数量
    def evaluate(self, data, labels):
        corrects, wrongs = 0, 0

        for i in range(len(data)):
            res = self.run(data[i])
            res_max = res.argmax()
            if res_max == labels[i]:
                corrects += 1
            else:
                wrongs += 1
        return corrects, wrongs

#先把pickle的数据还原出来
with open('mnistdata/pickled_mnist.pkl','br') as fh:
    data = pickle.load(fh)

train_imgs = data[0]
test_imgs = data[1]
train_labels = data[2]
test_labels = data[3]
train_labels_one_hot = data[4]
test_labels_one_hot = data[5]

image_size = 28
no_of_different_labels = 10
image_pixels = image_size * image_size

#初始化一个ANN
ANN = NeuralNetwork(no_of_in_nodes = image_pixels,
                    no_of_out_nodes = 10,
                    no_of_hidden_nodes = 100,
                    learning_rate = 0.1)
#学习一次
print('len(train_imags) before learning', len(train_imgs))

for i in range(len(train_imgs)):
    ANN.train(train_imgs[i], train_labels_one_hot[i])

for i in range(20):
    res = ANN.run(test_imgs[i])
    print('test_labels[i], argmax, max, i: ',test_labels[i], np.argmax(res), np.max(res), i)

corrects, wrongs = ANN.evaluate(train_imgs, train_labels)
print('accuracy train:', corrects / (corrects + wrongs))
corrects, wrongs = ANN.evaluate(test_imgs, test_labels)
print('accuracy test:', corrects / (corrects + wrongs))

cm = ANN.confusion_matrix(train_imgs, train_labels)
print(cm)

for i in range(10):
    print('digit:', i, 'precision', ANN.precision(i, cm), 'recall:', ANN.recall(i, cm))

运行结果:

人工神经网络数据预测 人工神经网络数据集_人工神经网络数据预测

第二种方法,使用使用Tensorflow和Keras的深度学习框架,使代码简单明了。

代码如下:

#Ann_KerasTf.py
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import utils
import matplotlib.pyplot as plt

#load data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

#flatten 28*28 images to a 784 vector
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape((X_train.shape[0], num_pixels)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], num_pixels)).astype('float32')

#normalize inputs from 0-255 to 0-1
X_train = X_train / 255.0
X_test = X_test /255.0

#构造label的one hot representation
y_train = utils.to_categorical(y_train)
y_test = utils.to_categorical(y_test)

num_classes = y_test.shape[1]

def baseline_model():
    # creat a sequential model
    model = Sequential()
    #add a hidden layer
    model.add(Dense(100, input_dim = num_pixels, kernel_initializer = 'normal', activation = 'relu'))
    #add a output layer
    model.add(Dense(num_classes, kernel_initializer = 'normal', activation = 'softmax'))
    #compile the model, 指定损失函数,优化器,测量值
    #Availabel optimizers SGD, RMSprop, Adam, Adadelta, Adagrad, Adamax, Nadam, Ftrl
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model

#build a model
model = baseline_model()
#fit the model
model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 10, batch_size = 100, verbose = 2)

#final evaluation of the model, loss and accuracy
scores = model.evaluate(X_test, y_test, verbose = 0)
print('scores', scores)
print('Baseline Error: %.3f%%' % (100-scores[1]*100))

运行结果:

人工神经网络数据预测 人工神经网络数据集_lua_02