上一期介绍了用于储存图像的矩阵类,在写卷积神经网络前,先用C++实现一个三层的神经网络。以手写数字识别为例。嗯,非常经典到老掉牙的例子。

首先先定义一些基本的操作的函数:
查看矩阵信息的函数:

void show(matrix &m) { for (int i = 0; i < m.size(); i++)  cout << m.x[i] << endl; cout << "=============" << endl; }
void show_info(matrix &m)
{
	cout << "cols is " << m.cols << endl;
	cout << "rows is " << m.rows << endl;
	cout << "chans is " << m.chans << endl;
}

用于打乱数据集:

vector<int> get_a_sequence(int length)
{
	vector<int> sequence;
	for (int i = 0; i < length; i++) { sequence.push_back(i); }
	return sequence;
}

void dataset_shuffle(vector<vector<float>> &train_images, vector<vector<int>> &Labels, vector<int> &sequence)
{
	random_shuffle(sequence.begin(), sequence.end());
	vector<vector<float>> new_train_images(60000);
	vector<vector<int>> new_train_labels(60000);
	//new_train_images = train_images;
	//new_train_labels = Labels;
	for (int i = 0; i < train_images.size(); i++)
	{
		new_train_images[i] = train_images[i];
		new_train_labels[i] = Labels[i];
	}
	int index = 0;
	for (int i = 0; i < train_images.size(); i++)
	{
		index = sequence[i];
		train_images[i] = new_train_images[index];
		Labels[i] = new_train_labels[index];
	}
}

获得向量的最大值下标,用于测试函数:

inline int Argmax(float *x, int size)
{
	float max = 0;
	int argmax = 0;
	for (int i = 0; i < size; i++)
	{
		if (max <= x[i])
		{
			max = x[i];
			argmax = i;
		}
	}
	return argmax;
}

测试函数:(因为没有封装,所以写成了三层全连接层的形式)

void test(vector<vector<float>> &test_images, vector<int> &test_labels, vector<matrix> ¶meter)
{
	cout << "正在测试中。。。" << endl;

	vector <vector<int>> Labels;
	Labels = Onehot_encoding(test_labels, 10);

	vector<float> Img;
	vector<int> Label;

	matrix W1, W2, B1, B2;
	W1 = parameter[0];
	W2 = parameter[1];
	B1 = parameter[2];
	B2 = parameter[3];

	matrix Hide, Hide_s;
	matrix Output, Output_s;

	int correct_num = 0;
	float accuracy_rate;

	for (int k = 0; k < test_images.size(); k++)
	{
		Img = test_images[k];
		Label = Labels[k];
		matrix img(28, 28, 1); vector_to_matrix(img.x, Img);
		matrix label(10, 1, 1); vector_to_matrix(label.x, Label);
		//数据正向传播
		Hide = M_dot_V(W1, img) + B1;
		Hide_s = sigmoid_f(Hide);

		Output = M_dot_V(W2, Hide_s) + B2;
		Output_s = sigmoid_f(Output);

		int L, index;
		L = Argmax(Output_s.x, 10);
		if (L == test_labels[k])  correct_num++;
	}
	accuracy_rate = 1.0f*correct_num / test_images.size();
	cout << "测试准确率为:" << accuracy_rate << endl;
}

数据集读取:写成.h文件

#pragma once


#include <iostream> // cout
#include <sstream>
#include <fstream>
#include <iomanip> //setw
#include <random>
#include <stdio.h>


namespace mnist
{
std::string data_name() {return std::string("MNIST");}

// from tiny_cnn
template<typename T>
T* reverse_endian(T* p) {
	std::reverse(reinterpret_cast<char*>(p), reinterpret_cast<char*>(p) + sizeof(T));
	return p;
}

// from tiny_cnn (kinda)
bool parse_mnist_labels(const std::string& label_file, std::vector<int> *labels) {
	std::ifstream ifs(label_file.c_str(), std::ios::in | std::ios::binary);

	if (ifs.bad() || ifs.fail()) 
	{
		return false;
	}
	int magic_number, num_items;

	ifs.read((char*) &magic_number, 4);
	ifs.read((char*) &num_items, 4);

	reverse_endian(&magic_number);
	reverse_endian(&num_items);

	for (size_t i = 0; i < num_items; i++) {
		unsigned char label;
		ifs.read((char*) &label, 1);
		labels->push_back((int) label);
	}
	return true;
}

// from tiny_cnn
struct mnist_header {
	int magic_number;
	int num_items;
	int num_rows;
	int num_cols;
};

// from tiny_cnn (kinda)
bool parse_mnist_images(const std::string& image_file, 
	std::vector<std::vector<float>> *images,
	float scale_min = -1.0, float scale_max = 1.0,
	int x_padding = 0, int y_padding = 0) 
{
	std::ifstream ifs(image_file.c_str(), std::ios::in | std::ios::binary);

	if (ifs.bad() || ifs.fail())
	{
			return false;
	}
	mnist_header header;

	// read header
	ifs.read((char*) &header.magic_number, 4);
	ifs.read((char*) &header.num_items, 4);
	ifs.read((char*) &header.num_rows, 4);
	ifs.read((char*) &header.num_cols, 4);

	reverse_endian(&header.magic_number);
	reverse_endian(&header.num_items);
	reverse_endian(&header.num_rows);
	reverse_endian(&header.num_cols);

		
	const int width = header.num_cols + 2 * x_padding;
	const int height = header.num_rows + 2 * y_padding;

	// read each image
	for (size_t i = 0; i < header.num_items; i++) 
	{
		std::vector<float> image;
		std::vector<unsigned char> image_vec(header.num_rows * header.num_cols);

		ifs.read((char*) &image_vec[0], header.num_rows * header.num_cols);
		image.resize(width * height, scale_min);
	
		for (size_t y = 0; y < header.num_rows; y++)
		{
			for (size_t x = 0; x < header.num_cols; x++)
				image[width * (y + y_padding) + x + x_padding] = 
					(image_vec[y * header.num_cols + x] / 255.0f) * (scale_max - scale_min) + scale_min;
		}
		
		images->push_back(image);
	}
	return true;
}

// == load data (MNIST-28x28x1 size, no padding, pixel range -1 to 1)
bool parse_test_data(std::string &data_path, std::vector<std::vector<float>> &test_images, std::vector<int> &test_labels, 
	float min_val=-1.f, float max_val=1.f, int padx=0, int pady=0)
{
	if(!parse_mnist_images(data_path+"/t10k-images.idx3-ubyte", &test_images, min_val, max_val, padx, pady)) 
		if (!parse_mnist_images(data_path + "/t10k-images-idx3-ubyte", &test_images, min_val, max_val, padx, pady))
			return false;
	if(!parse_mnist_labels(data_path+"/t10k-labels.idx1-ubyte", &test_labels)) 
		if (!parse_mnist_labels(data_path + "/t10k-labels-idx1-ubyte", &test_labels)) return false;
	return true;
}
bool parse_train_data(std::string &data_path, std::vector<std::vector<float>> &train_images, std::vector<int> &train_labels, 
	float min_val=-1.f, float max_val=1.f, int padx=0, int pady=0)
{
	if(!parse_mnist_images(data_path+"/train-images.idx3-ubyte", &train_images, min_val, max_val, padx, pady))
		if (!parse_mnist_images(data_path + "/train-images-idx3-ubyte", &train_images, min_val, max_val, padx, pady))
			return false;
	if(!parse_mnist_labels(data_path+"/train-labels.idx1-ubyte", &train_labels))
		if (!parse_mnist_labels(data_path + "/train-labels-idx1-ubyte", &train_labels)) return false;
	return true;
}
}

数据集处理函数:

//========================数据集处理=============================================

//对标签数据做one_hot编码
vector<vector<int>> Onehot_encoding(vector<int> &labels, int type_nums)
{
	vector<vector<int>> labels_encoding;
	for (int i = 0; i < labels.size(); i++)
	{
		vector<int> temp(type_nums, 0);
		temp[labels[i]] = 1;
		labels_encoding.push_back(temp);
	}
	return labels_encoding;
}

//把数据集分成几个大小为batch_size的集合,主要用于样本图像
vector<vector<vector<float>>> split_batch_x(vector<vector<float>> &imgs, int batch_size)
{
	vector<vector<vector<float>>> x;
	int _size = imgs.size() / batch_size;
	for (int i = 0; i < _size; i++)
	{
		vector<vector<float>> batch_x;
		for (int j = 0; j < batch_size; j++)
		{
			batch_x.push_back(imgs[j + i * batch_size]);
		}
		x.push_back(batch_x);
	}
	return x;
}

//把数据集分成几个大小为batch_size的集合,主要用于样本标签
vector<vector<vector<int>>> split_batch_y(vector<vector<int>> &labels, int batch_size)
{
	vector<vector<vector<int>>> y;
	int _size = labels.size() / batch_size;
	for (int i = 0; i < _size; i++)
	{
		vector<vector<int>> batch_y;
		for (int j = 0; j < batch_size; j++)
		{
			batch_y.push_back(labels[j + i * batch_size]);
		}
		y.push_back(batch_y);
	}
	return y;
}

激活函数和它的导数,这里选用了sigmoid:

inline matrix sigmoid_f(matrix &m)
{
	matrix out(m.cols, m.rows, m.chans);
	for (int i = 0; i < out.size(); i++) out.x[i] = 1.0f / (1.0f + exp(-(m.x[i])));
	return out;
}


inline matrix sigmoid_df(matrix &m)
{
	matrix out(m.cols, m.rows, m.chans);
	for (int i = 0; i < out.size(); i++) out.x[i] = m.x[i] * (1.f - m.x[i]);
	return out;

然后再加点矩阵类的一些补充运算

//=======================矩阵类的一些补充运算====================================
//将vector转化为matrix
inline void vector_to_matrix(float *m, vector<int> &v)
{
	for (int i = 0; i < v.size(); i++)  m[i] = v[i];
}
inline void vector_to_matrix(float *m, vector<float> &v)
{
	for (int i = 0; i < v.size(); i++)  m[i] = v[i];
}

//矩阵转置
inline matrix Transposition(matrix &m)
{
	int _w = m.cols;
	int _h = m.rows;
	matrix out(_h, _w, 1);
	for (int i = 0; i < _h; i++)
	{
		for (int j = 0; j < _w; j++)
		{
			out.x[j + i * _w] = m.x[i + j * _h];
		}
	}
	return out;
}

//矩阵*向量,结果是一个向量那种,矩阵和向量都是存成一维的形式,matrix类,你懂的
inline matrix M_dot_V(matrix &A, matrix &B)
{
	int _w = A.cols;
	int _h = A.rows;
	matrix out(_w, 1, 1);
	out.fill(0);
	for (int i = 0; i < _w; i++)
	{
		for (int j = 0; j < _h; j++)
		{
			out.x[i] += A.x[j + i * _h] * B.x[j];
		}
	}
	return out;
}

//向量*向量,结果是一个矩阵那种,矩阵和向量都是存成一维的形式,matrix类,你懂的
inline matrix V_dot_V(matrix &A, matrix &B)
{
	int _w = A.size();
	int _h = B.size();
	matrix out(_w, _h, 1);
	for (int i = 0; i < _w; i++)
	{
		for (int j = 0; j < _h; j++)
		{
			out.x[j + i * _h] = A.x[i] * B.x[j];
		}
	}
	return out;
}

别忘了还要导入一些相关的包哦:

include <vector>
include <iostream>
using namespace std;
using namespace

最后是主函数(记得把手写数字的数据集下载下来,放在对应目录下):

int main()
{
	string data_path = "data/mnist/";

	int batch_size = 10;
	int num_input = 28 * 28 * 1;
	int num_hide = 30;
	int	num_classes = 10;
	float lr = 3; // 学习率
	int epochs = 1;
	//====================================================================
	//读取数据集
	vector<vector<float>> train_images, train_images_copy;
	vector<int> train_labels;
	vector<vector<float>> test_images;
	vector<int> test_labels;

	cout << "读取数据中" << endl;
	if (!parse_test_data(data_path, test_images, test_labels)) { std::cerr << "error: could not parse data.\n"; return 1; }
	if (!parse_train_data(data_path, train_images, train_labels)) { std::cerr << "error: could not parse data.\n"; return 1; }
	cout << "数据读取完成" << endl;

	train_images_copy = train_images;
	//====================================================================
	vector<vector<int>> Labels;
	Labels = Onehot_encoding(train_labels, 10);

	vector<vector<vector<float>>> x;
	vector<vector<float>> batch_x;
	vector<float> Img;
	vector<vector<vector<int>>> y;
	vector<vector<int>> batch_y;
	vector<int> Label;

	vector<int> sequence;
	sequence = get_a_sequence(train_images.size());  //生成一个序列,用来打乱作为索引,用于打乱数据集
	//========================================================================
	//全连接层
	matrix W1(num_hide, num_input, 1);
	matrix B1(num_hide, 1, 1);
	matrix W2(num_classes, num_hide, 1);
	matrix B2(num_classes, 1, 1);

	W1.fill_random_normal(1.f);
	B1.fill_random_normal(1.f);
	W2.fill_random_normal(1.f);
	B2.fill_random_normal(1.f);

	matrix Hide, Hide_s;    //隐藏层
	matrix Output, Output_s;   //输出层
	matrix delta_1, delta_2;   //隐藏层和输出层误差
	matrix W1_T, W2_T;    //矩阵的转置
	matrix W1_t, W2_t, B1_t, B2_t;   //用来存参数的梯度
	//========================================================================
//开始训练
	for (int epoch = 0; epoch < epochs; epoch++)
	{
		dataset_shuffle(train_images, Labels, sequence);  //随机打乱数据集

		x = split_batch_x(train_images, batch_size);
		y = split_batch_y(Labels, batch_size);
		//=====================================================================
		for (int m = 0; m < x.size(); m++)
		{
			batch_x = x[m];
			batch_y = y[m];

			matrix W1_t_sum(W1.cols, W1.rows, W1.chans); W1_t_sum.fill(0);
			matrix W2_t_sum(W2.cols, W2.rows, W2.chans); W2_t_sum.fill(0);
			matrix B1_t_sum(B1.cols, B1.rows, B1.chans); B1_t_sum.fill(0);
			matrix B2_t_sum(B2.cols, B2.rows, B2.chans); B2_t_sum.fill(0);

			for (int n = 0; n < batch_x.size(); n++)
			{
				Img = batch_x[n];
				Label = batch_y[n];
				matrix img(28, 28, 1); vector_to_matrix(img.x, Img);
				matrix label(10, 1, 1); vector_to_matrix(label.x, Label);

				Hide = M_dot_V(W1, img) + B1;
				Hide_s = sigmoid_f(Hide);

				Output = M_dot_V(W2, Hide_s) + B2;
				Output_s = sigmoid_f(Output);

				//=======================================================================
				//误差反向传播
				delta_1 = (Output_s - label)*sigmoid_df(Output_s);   //输出层误差
				W1_T = Transposition(W1);
				W2_T = Transposition(W2);
				delta_2 = M_dot_V(W2_T, delta_1)*sigmoid_df(Hide_s);   //隐藏层误差

				//=======================================================================
				//计算参数梯度
				B2_t = delta_1;
				B1_t = delta_2;
				W2_t = V_dot_V(delta_1, Hide_s);
				W1_t = V_dot_V(delta_2, img);

				B2_t_sum += B2_t;
				B1_t_sum += B1_t;
				W2_t_sum += W2_t;
				W1_t_sum += W1_t;
			}
			//更新参数===============================================================
			W1 = W1 - W1_t_sum * (lr / batch_size);
			W2 = W2 - W2_t_sum * (lr / batch_size);
			B1 = B1 - B1_t_sum * (lr / batch_size);
			B2 = B2 - B2_t_sum * (lr / batch_size);

			if (m % 100 == 0)  cout << "=====================正在训练第" << epoch << "个epoch,第" << m << "个batch=====================" << endl;
		}
	}
	cout << "训练完成" << endl;

	vector<matrix> parameter;
	parameter.push_back(W1);
	parameter.push_back(W2);
	parameter.push_back(B1);
	parameter.push_back(B2);

	test(train_images_copy, train_labels, parameter);
	test(test_images,test_labels,parameter);

	system("pause");
	return 0;
}

最后是实验结果:

如何用vscode编写一个卷积神经网络 c++ 卷积神经网络_卷积神经网络


50个epochs 可以达到93%以上的准确率,然后是100个epochs的结果:

如何用vscode编写一个卷积神经网络 c++ 卷积神经网络_手写数字识别_02


可以看到准确率可以达到94%以上,不过神经网络差不多只能达到这个结果了,要想有更好的结果,就要使用卷积神经网络了,后面有时间再补充上了。