第 81 天: 数据集读取与存储

数据 train.format:下载地址 之前使用arff文件存储数据,现在用图片数据方式存储,按结构化的方式来存取 (m*n 点阵和类别)

  1. 这里使用了 java.util.List 类,在前面实现的数据结构,很多可以直接在 java.util 包中找到。与自己读取并使用 double[][] 来管理数据相比,List 类允许添加数据, 更加灵活。当然效率上会有点影响,可能是常数倍。
  2. tempLine.split 是以前没有使用过的功能。其实 String 类是的方法是比较丰富的,为了防止开发者乱改,该类是 final 的,不允许继承。
  3. List 是指列表里面只能存储 Instance 类型的变量。当然,Instance 的子类也行。
  4. Instance 类也是 public 的。Instance 的 label 是 Double 类型的,换成 double 也可以,如果已经确定是分类问题,换成 int 更好。
  5. 最长的就是构造函数,从文件中读入数据。
package xjx.cnn;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class Dataset {

	 * 所有实例是一个List表
	private List<Instance> instances;

	 * 标签索引
	private int labelIndex;

	 * 最大的标签
	private double maxLabel = -1;

	 * 第一个构造器.
	public Dataset() {
		labelIndex = -1;
		instances = new ArrayList<Instance>();

	 * 第二个构造器.
	 * @param paraFilename
	 *            The filename.
	 * @param paraSplitSign
	 *            Often comma.
	 * @param paraLabelIndex
	 *            Often the last column.
	public Dataset(String paraFilename, String paraSplitSign, int paraLabelIndex) {
		instances = new ArrayList<Instance>();
		labelIndex = paraLabelIndex;

		File tempFile = new File(paraFilename);
		try {
			BufferedReader tempReader = new BufferedReader(new FileReader(tempFile));
			String tempLine;
			while ((tempLine = tempReader.readLine()) != null) {
				String[] tempDatum = tempLine.split(paraSplitSign);
				if (tempDatum.length == 0) {

				double[] tempData = new double[tempDatum.length];
				for (int i = 0; i < tempDatum.length; i++)
					tempData[i] = Double.parseDouble(tempDatum[i]);
				Instance tempInstance = new Instance(tempData);
		} catch (IOException e) {
			System.out.println("Unable to load " + paraFilename);

	 * 追加一个实例
	 * @param paraInstance
	 *            The given record.
	public void append(Instance paraInstance) {

	 * 追加一个实例
	public void append(double[] paraAttributes, Double paraLabel) {
		instances.add(new Instance(paraAttributes, paraLabel));

	 * Getter.
	public Instance getInstance(int paraIndex) {
		return instances.get(paraIndex);

	 * Getter.
	public int size() {
		return instances.size();
	}// Of size

	 * Getter.
	public double[] getAttributes(int paraIndex) {
		return instances.get(paraIndex).getAttributes();

	 * Getter.
	public Double getLabel(int paraIndex) {
		return instances.get(paraIndex).getLabel();

	 * Unit test.
	public static void main(String args[]) {
		Dataset tempData = new Dataset("d:/data/train.format", ",", 784);
		Instance tempInstance = tempData.getInstance(0);
		System.out.println("The first instance is: " + tempInstance);

	 * An instance.
	public class Instance {
		 * 条件属性.
		private double[] attributes;

		 * 标签.
		private Double label;

		 * The first constructor.
		private Instance(double[] paraAttrs, Double paraLabel) {
			attributes = paraAttrs;
			label = paraLabel;

		 * The second constructor.
		public Instance(double[] paraData) {
			if (labelIndex == -1)
				attributes = paraData;
			else {
				label = paraData[labelIndex];
				if (label > maxLabel) {
					maxLabel = label;

				if (labelIndex == 0) {
					attributes = Arrays.copyOfRange(paraData, 1, paraData.length);
				} else {
					attributes = Arrays.copyOfRange(paraData, 0, paraData.length - 1);

		 * Getter.
		public double[] getAttributes() {
			return attributes;

		 * Getter.
		public Double getLabel() {
			if (labelIndex == -1)
				return null;
			return label;

		 * toString.
		public String toString(){
			return Arrays.toString(attributes) + ", " + label;

一个管理卷积核尺寸的类. 基础代码, 在网络运行时才能理解它们的作用.

  1. 支持 Size 的相除. 但并不是所有的 Size 都可以除.
  2. 支持 Size 的相减.
package xjx.cnn;

public class Size {
	 * Cannot be changed after initialization.
	public final int width;

	 * Cannot be changed after initialization.
	public final int height;

	 * The first constructor.
	 * @param paraWidth
	 *            The given width.
	 * @param paraHeight
	 *            The given height.
	public Size(int paraWidth, int paraHeight) {
		width = paraWidth;
		height = paraHeight;
	}// Of the first constructor

	 * Divide a scale with another one. For example (4, 12) / (2, 3) = (2, 4).
	 * @param paraScaleSize
	 *            The given scale size.
	 * @return The new size.
	public Size divide(Size paraScaleSize) {
		int resultWidth = width / paraScaleSize.width;
		int resultHeight = height / paraScaleSize.height;
		if (resultWidth * paraScaleSize.width != width
				|| resultHeight * paraScaleSize.height != height)
			throw new RuntimeException("Unable to divide " + this + " with " + paraScaleSize);
		return new Size(resultWidth, resultHeight);
	}// Of divide

	 * Subtract a scale with another one, and add a value. For example (4, 12) -
	 * (2, 3) + 1 = (3, 10).
	 * @param paraScaleSize
	 *            The given scale size.
	 * @param paraAppend
	 *            The appended size to both dimensions.
	 * @return The new size.
	public Size subtract(Size paraScaleSize, int paraAppend) {
		int resultWidth = width - paraScaleSize.width + paraAppend;
		int resultHeight = height - paraScaleSize.height + paraAppend;
		return new Size(resultWidth, resultHeight);
	}// Of subtract

	 * @param The
	 *            string showing itself.
	public String toString() {
		String resultString = "(" + width + ", " + height + ")";
		return resultString;
	}// Of toString

	 * Unit test.
	public static void main(String[] args) {
		Size tempSize1 = new Size(4, 6);
		Size tempSize2 = new Size(2, 2);
				"" + tempSize1 + " divide " + tempSize2 + " = " + tempSize1.divide(tempSize2));


		try {
					"" + tempSize2 + " divide " + tempSize1 + " = " + tempSize2.divide(tempSize1));
		} catch (Exception ee) {
		} // Of try

				"" + tempSize1 + " - " + tempSize2 + " + 1 = " + tempSize1.subtract(tempSize2, 1));
	}// Of main
}// Of class Size

以前我们使用整数型常量 (第 51 天) 和字符型常量 (第 74 天), 其实还可以有枚举类型. 后面的程序我们才能看到其用法.

package xjx.cnn;

public enum LayerTypeEnum {
}//Of enum LayerTypeEnum

第 82 天: 数学操作



  • 卷积层(Convolutions)
  • 池化层(Subsampling)
  • 全连接层(Full connection)
  • 激活函数


  • 目的
  • 参数:
假设是一张5 X 5 的单通道图片,通过使用3 X 3 大小的卷积核运算得到一个 3 X 3大小的运算结果(图片像素数值仅供参考)

如果换一个卷积核大小或者加入很多层卷积之后,图像可能最后就变成了1 X 1 大小,这不是我们希望看到的结果。并且对于原始图片当中的边缘像素来说,只计算了一遍,二对于中间的像素会有很多次过滤器与之计算,这样导致对边缘信息的丢失。

  • 缺点

Valid and Same卷积

  • Valid: 不填充,也就是最终大小为
    java数据采集分析平台 java 数据集_System_20
  • Same: 输出大小与原图大小一致,那么java数据采集分析平台 java 数据集_System_21变成了java数据采集分析平台 java 数据集_ide_22
    java数据采集分析平台 java 数据集_java数据采集分析平台_23

java数据采集分析平台 java 数据集_ide_26


  • 最大池化:Max Pooling,取窗口内的最大值作为输出
  • 平均池化:Avg Pooling,取窗口内的所有值的均值作为输出


提高了Feature Map 的鲁棒性,防止过拟合


卷积层+激活层+池化层可以看成是CNN的特征学习/特征提取层,而学习到的特征(Feature Map)最终应用于模型任务(分类、回归):

  • 先对所有 Feature Map 进行扁平化(flatten, 即 reshape 成 1 x N 向量)
  • 再接一个或多个全连接层,进行模型学习


  1. interface Operator 定义了一个算式, 其主要目的是为了矩阵操作时对每个元素都做一遍计算, 所以要看 matrixOp 方法, 以及相应的调用才能明白其作用. 这种算式的写法比较绕, 其优点是灵活, 可以增加代码的复用性. 以 Operator 类型的变量 one_value 为例, 其最终目的是获得 java数据采集分析平台 java 数据集_java数据采集分析平台_30
  2. interface OperatorOnTwo 与上一个类似, 不过它支持两个操作数, 进一步支持两个矩阵, 这样, 矩阵加法、减法就不需要单独写代码了.
  3. matrixOp 被重载了以支持不同的参数列表.
  4. rot180 将矩阵放置 180 度. 通过两次翻转实现.
  5. convnValid 是卷积操作. convnFull 为其逆向操作.
  6. scaleMatrix 是均值池化.
  7. kronecker 是池化的逆向操作.
package xjx.cnn;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;

public class MathUtils {

	 * 不同按需操作员的界面
	public interface Operator extends Serializable {
		public double process(double value);

	 * 一减去值运算符
	public static final Operator one_value = new Operator() {
		private static final long serialVersionUID = 3752139491940330714L;

		public double process(double value) {
			return 1 - value;

	 * sigmoid 激活函数
	public static final Operator sigmoid = new Operator() {
		private static final long serialVersionUID = -1952718905019847589L;

		public double process(double value) {
			return 1 / (1 + Math.pow(Math.E, -value));

	 * 具有两个运算符的操作接口
	interface OperatorOnTwo extends Serializable {
		public double process(double a, double b);

	 * 加法.
	public static final OperatorOnTwo plus = new OperatorOnTwo() {
		private static final long serialVersionUID = -6298144029766839945L;

		public double process(double a, double b) {
			return a + b;

	 * 乘法.
	public static OperatorOnTwo multiply = new OperatorOnTwo() {

		private static final long serialVersionUID = -7053767821858820698L;

		public double process(double a, double b) {
			return a * b;

	 * 减法
	public static OperatorOnTwo minus = new OperatorOnTwo() {

		private static final long serialVersionUID = 7346065545555093912L;

		public double process(double a, double b) {
			return a - b;

	 * 输出矩阵
	public static void printMatrix(double[][] matrix) {
		for (int i = 0; i < matrix.length; i++) {
			String line = Arrays.toString(matrix[i]);
			line = line.replaceAll(", ", "\t");

	 * 将矩阵旋转180度
	public static double[][] rot180(double[][] matrix) {
		matrix = cloneMatrix(matrix);
		int m = matrix.length;
		int n = matrix[0].length;
		for (int i = 0; i < m; i++) {
			for (int j = 0; j < n / 2; j++) {
				double tmp = matrix[i][j];
				matrix[i][j] = matrix[i][n - 1 - j];
				matrix[i][n - 1 - j] = tmp;
		for (int j = 0; j < n; j++) {
			for (int i = 0; i < m / 2; i++) {
				double tmp = matrix[i][j];
				matrix[i][j] = matrix[m - 1 - i][j];
				matrix[m - 1 - i][j] = tmp;
		return matrix;

	private static Random myRandom = new Random(2);

	 * 生成给定大小的随机矩阵。每个值取[-0.005, 0.095]中的值
	public static double[][] randomMatrix(int x, int y, boolean b) {
		double[][] matrix = new double[x][y];
		// int tag = 1;
		for (int i = 0; i < x; i++) {
			for (int j = 0; j < y; j++) {
				matrix[i][j] = (myRandom.nextDouble() - 0.05) / 10;
		return matrix;

	 * 生成具有给定长度的随机数组。每个值取[-0.005, 0.095]中的值
	public static double[] randomArray(int len) {
		double[] data = new double[len];
		for (int i = 0; i < len; i++) {
			//data[i] = myRandom.nextDouble() / 10 - 0.05;
			data[i] = 0;
		return data;

	 * 生成具有批量大小的随机卷积。
	public static int[] randomPerm(int size, int batchSize) {
		Set<Integer> set = new HashSet<Integer>();
		while (set.size() < batchSize) {
		int[] randPerm = new int[batchSize];
		int i = 0;
		for (Integer value : set)
			randPerm[i++] = value;
		return randPerm;

	 * 克隆矩阵。不要直接引用它
	public static double[][] cloneMatrix(final double[][] matrix) {
		final int m = matrix.length;
		int n = matrix[0].length;
		final double[][] outMatrix = new double[m][n];

		for (int i = 0; i < m; i++) {
			for (int j = 0; j < n; j++) {
				outMatrix[i][j] = matrix[i][j];
		return outMatrix;

	 * 在单个操作数上使用给定运算符的矩阵运算
	public static double[][] matrixOp(final double[][] ma, Operator operator) {
		final int m = ma.length;
		int n = ma[0].length;
		for (int i = 0; i < m; i++) {
			for (int j = 0; j < n; j++) {
				ma[i][j] = operator.process(ma[i][j]);
		return ma;

	 * 在两个操作数上使用给定运算符的矩阵运算
	public static double[][] matrixOp(final double[][] ma, final double[][] mb,
			final Operator operatorA, final Operator operatorB, OperatorOnTwo operator) {
		final int m = ma.length;
		int n = ma[0].length;
		if (m != mb.length || n != mb[0].length)
			throw new RuntimeException("ma.length:" + ma.length + "  mb.length:" + mb.length);

		for (int i = 0; i < m; i++) {
			for (int j = 0; j < n; j++) {
				double a = ma[i][j];
				if (operatorA != null)
					a = operatorA.process(a);
				double b = mb[i][j];
				if (operatorB != null)
					b = operatorB.process(b);
				mb[i][j] = operator.process(a, b);
		return mb;

	 * 将矩阵扩展到更大的矩阵(多次)
	public static double[][] kronecker(final double[][] matrix, final Size scale) {
		final int m = matrix.length;
		int n = matrix[0].length;
		final double[][] outMatrix = new double[m * scale.width][n * scale.height];

		for (int i = 0; i < m; i++) {
			for (int j = 0; j < n; j++) {
				for (int ki = i * scale.width; ki < (i + 1) * scale.width; ki++) {
					for (int kj = j * scale.height; kj < (j + 1) * scale.height; kj++) {
						outMatrix[ki][kj] = matrix[i][j];
		return outMatrix;

	 * 缩放矩阵
	public static double[][] scaleMatrix(final double[][] matrix, final Size scale) {
		int m = matrix.length;
		int n = matrix[0].length;
		final int sm = m / scale.width;
		final int sn = n / scale.height;
		final double[][] outMatrix = new double[sm][sn];
		if (sm * scale.width != m || sn * scale.height != n)
			throw new RuntimeException("scale matrix");
		final int size = scale.width * scale.height;
		for (int i = 0; i < sm; i++) {
			for (int j = 0; j < sn; j++) {
				double sum = 0.0;
				for (int si = i * scale.width; si < (i + 1) * scale.width; si++) {
					for (int sj = j * scale.height; sj < (j + 1) * scale.height; sj++) {
						sum += matrix[si][sj];
				outMatrix[i][j] = sum / size;
		return outMatrix;

	 * 完全卷积以获得更大的尺寸。它用于反向传播
	public static double[][] convnFull(double[][] matrix, final double[][] kernel) {
		int m = matrix.length;
		int n = matrix[0].length;
		final int km = kernel.length;
		final int kn = kernel[0].length;
		final double[][] extendMatrix = new double[m + 2 * (km - 1)][n + 2 * (kn - 1)];
		for (int i = 0; i < m; i++) {
			for (int j = 0; j < n; j++) {
				extendMatrix[i + km - 1][j + kn - 1] = matrix[i][j];
		return convnValid(extendMatrix, kernel);

	 * 卷积运算,从给定的矩阵和核,滑动和求和,以获得结果矩阵。它用于向前预测
	public static double[][] convnValid(final double[][] matrix, double[][] kernel) {
		// kernel = rot180(kernel);
		int m = matrix.length;
		int n = matrix[0].length;
		final int km = kernel.length;
		final int kn = kernel[0].length;
		int kns = n - kn + 1;
		final int kms = m - km + 1;
		final double[][] outMatrix = new double[kms][kns];

		for (int i = 0; i < kms; i++) {
			for (int j = 0; j < kns; j++) {
				double sum = 0.0;
				for (int ki = 0; ki < km; ki++) {
					for (int kj = 0; kj < kn; kj++)
						sum += matrix[i + ki][j + kj] * kernel[ki][kj];
				outMatrix[i][j] = sum;

		return outMatrix;

	 * 张量上的卷积
	public static double[][] convnValid(final double[][][][] matrix, int mapNoX,
			double[][][][] kernel, int mapNoY) {
		int m = matrix.length;
		int n = matrix[0][mapNoX].length;
		int h = matrix[0][mapNoX][0].length;
		int km = kernel.length;
		int kn = kernel[0][mapNoY].length;
		int kh = kernel[0][mapNoY][0].length;
		int kms = m - km + 1;
		int kns = n - kn + 1;
		int khs = h - kh + 1;
		if (matrix.length != kernel.length)
			throw new RuntimeException("length");
		final double[][][] outMatrix = new double[kms][kns][khs];
		for (int i = 0; i < kms; i++) {
			for (int j = 0; j < kns; j++)
				for (int k = 0; k < khs; k++) {
					double sum = 0.0;
					for (int ki = 0; ki < km; ki++) {
						for (int kj = 0; kj < kn; kj++)
							for (int kk = 0; kk < kh; kk++) {
								sum += matrix[i + ki][mapNoX][j + kj][k + kk]
										* kernel[ki][mapNoY][kj][kk];
					outMatrix[i][j][k] = sum;
		return outMatrix[0];

	 * sigmod 操作
	public static double sigmod(double x) {
		return 1 / (1 + Math.pow(Math.E, -x));

	 * 求矩阵的所有值之和
	public static double sum(double[][] error) {
		int m = error.length;
		int n = error[0].length;
		double sum = 0.0;
		for (int i = 0; i < m; i++) {
			for (int j = 0; j < n; j++) {
				sum += error[i][j];
		return sum;

	 * Ad hoc sum.
	public static double[][] sum(double[][][][] errors, int j) {
		int m = errors[0][j].length;
		int n = errors[0][j][0].length;
		double[][] result = new double[m][n];
		for (int mi = 0; mi < m; mi++) {
			for (int nj = 0; nj < n; nj++) {
				double sum = 0;
				for (int i = 0; i < errors.length; i++)
					sum += errors[i][j][mi][nj];
				result[mi][nj] = sum;
		return result;

	 * 获取最终分类的最大值索引
	public static int getMaxIndex(double[] out) {
		double max = out[0];
		int index = 0;
		for (int i = 1; i < out.length; i++)
			if (out[i] > max) {
				max = out[i];
				index = i;
		return index;

第 83 天: 数学操作 (续)

昨天的操作比较多, 今天可以自己写些代码来测试下. 例如, 测试用例最好是 8*8 之内的矩阵, 这样比较容易验证正确性.

public static void main(String args[]) {
	MathUtils tempmathUtils = new MathUtils();
	double[][] matrix = new double[8][8];
	matrix = tempmathUtils.randomMatrix(8, 8, true);
	//matrix = rot180(matrix);
	double[] Array = new double[8];
	Array = tempmathUtils.randomArray(8);
	int[] Perm = new int[10];
	Perm = tempmathUtils.randomPerm(20,10);
	Operator aa;
		double[][] tempmatrixOp = new double[8][8];
	tempmatrixOp = tempmathUtils.matrixOp(tempmatrixOp,aa);
	Operator a1,b1;
	OperatorOnTwo c1;
	double[][] tempmatrixOpa = new double[8][8];
	double[][] tempmatrixOpb = new double[8][8];
	tempmatrixOpa = matrixOp(tempmatrixOpa, tempmatrixOpb, a1, b1, c1);