CNN的网络结构:
conv+relu–> conv+relu+pool–>conv+relu–> conv+relu+pool–>fc+relu–>fc 。
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 27 19:38:44 2019
@author: macheng
"""
from __future__ import print_function, division
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import numpy as np
import load
#train_samples 是维度为[num_images, image_size, image_size, num_channels]大小的4维矩阵
#train_labels 是维度为[num_images, 10]大小的2维矩阵
train_samples, train_labels = load._train_samples, load._train_labels
test_samples, test_labels = load._test_samples, load._test_labels
print('Training set', train_samples.shape, train_labels.shape)
print(' Test set', test_samples.shape, test_labels.shape)
image_size = load.image_size #32
num_labels = load.num_labels #10
num_channels = load.num_channels #channel为1,灰度图
def get_chunk(samples, labels, chunkSize):
if len(samples) != len(labels):
raise Exception('Length of samples and labels must equal')
stepStart = 0 # initial step
i = 0
while stepStart < len(samples):
stepEnd = stepStart + chunkSize
if stepEnd < len(samples):
yield i, samples[stepStart:stepEnd], labels[stepStart:stepEnd]
i += 1
stepStart = stepEnd
class Network():
def __init__(self, num_hidden, batch_size, conv_depth, kernel_size, pooling_scale):
self.batch_size = batch_size
self.test_batch_size = 500
# Hyper Parameters
self.num_hidden = num_hidden
self.kernel_size = kernel_size # 卷积核的大小
self.conv1_depth = conv_depth # 卷积的深度(也就是卷积核的个数)
self.conv2_depth = conv_depth
self.conv3_depth = conv_depth
self.conv4_depth = conv_depth
self.last_conv_depth = self.conv4_depth
self.pooling_scale = pooling_scale
self.pooling_stride = self.pooling_scale # Max Pooling Stride
# Graph Related
self.graph = tf.Graph()
self.tf_train_samples = None
self.tf_train_labels = None
self.tf_test_samples = None
self.tf_test_labels = None
self.tf_test_prediction = None
# 统计
self.merged = None
self.train_summaries = []
self.test_summaries = []
"""
添加summary.FileWriter
"""
self.define_graph()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
self.session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options), graph=self.graph)
self.writer = tf.summary.FileWriter('./board', self.graph)
def define_graph(self):
'''
定义我的计算图谱
'''
with self.graph.as_default():
# 这里只是定义图谱中的各种变量
with tf.name_scope('inputs'):
self.tf_train_samples = tf.placeholder(
tf.float32, shape=(self.batch_size, image_size, image_size, num_channels), name='tf_train_samples'
)
self.tf_train_labels = tf.placeholder(
tf.float32, shape=(self.batch_size, num_labels), name='tf_train_labels'
)
self.tf_test_samples = tf.placeholder(
tf.float32, shape=(self.test_batch_size, image_size, image_size, num_channels), name='tf_test_samples'
)
# CNN不是全连接的,第一层卷积核参数的设定:长度、宽度、输入深度(通道数)、输出深度
# 全连接神经网络是一个神经元对应一个bias,而CNN中是每一个卷积核对应一个bias。
with tf.name_scope('conv1'):
conv1_weights = tf.Variable(
tf.truncated_normal([self.kernel_size, self.kernel_size, num_channels, self.conv1_depth], stddev=0.1))
conv1_biases = tf.Variable(tf.zeros([self.conv1_depth]))
# 第二层卷积核参数的设定:长度、宽度、输入深度、输出深度
with tf.name_scope('conv2'):
conv2_weights = tf.Variable(
tf.truncated_normal([self.kernel_size, self.kernel_size, self.conv1_depth, self.conv2_depth], stddev=0.1))
conv2_biases = tf.Variable(tf.constant(0.1, shape=[self.conv2_depth]))
with tf.name_scope('conv3'):
conv3_weights = tf.Variable(
tf.truncated_normal([self.kernel_size, self.kernel_size, self.conv2_depth, self.conv3_depth], stddev=0.1))
conv3_biases = tf.Variable(tf.constant(0.1, shape=[self.conv3_depth]))
with tf.name_scope('conv4'):
conv4_weights = tf.Variable(
tf.truncated_normal([self.kernel_size, self.kernel_size, self.conv3_depth, self.conv4_depth], stddev=0.1))
conv4_biases = tf.Variable(tf.constant(0.1, shape=[self.conv4_depth]))
with tf.name_scope('fc1'):
down_scale = self.pooling_scale ** 2 # because we do 2 times pooling of stride 2
# 下面乘了self.last_conv_dept是因为要将其扁平化
fc1_weights = tf.Variable(
tf.truncated_normal([(image_size // down_scale) * (image_size // down_scale) * self.last_conv_depth, self.num_hidden], stddev=0.1)
)
fc1_biases = tf.Variable(tf.constant(0.1, shape=[self.num_hidden]))
self.train_summaries.append(tf.summary.histogram('fc1_weights', fc1_weights))
self.train_summaries.append(tf.summary.histogram('fc1_biases', fc1_biases))
with tf.name_scope('fc2'):
fc2_weights = tf.Variable(
tf.truncated_normal([self.num_hidden, num_labels], stddev=0.1), name='fc2_weights'
)
fc2_biases = tf.Variable(tf.constant(0.1, shape=[num_labels]), name='fc2_biases')
self.train_summaries.append(tf.summary.histogram('fc2_weights', fc2_weights))
self.train_summaries.append(tf.summary.histogram('fc2_biases', fc2_biases))
# 定义图谱的运算
def model(data, train=True):
"""
第一层卷积层是与输入层连接的但不是全连接,而是根据卷积核大小部分连接
"""
with tf.name_scope('conv1_model'):
with tf.name_scope('convolution'):
conv1 = tf.nn.conv2d(data, filter=conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
addition = conv1 + conv1_biases
hidden = tf.nn.relu(addition)
if not train:
# transpose the output of an activation to image
# conv1_activation_relu shape: (8, 32, 32, 64)
# 64 filter maps from this convolution, that's 64 grayscale images
# image size is 32x32
# 8 is the batch_size, which means 8 times of convolution was performed
# just use the last one (index 7) as record
filter_map = hidden[-1]
filter_map = tf.transpose(filter_map, perm=[2, 0, 1])
filter_map = tf.reshape(filter_map, (self.conv1_depth, 32, 32, 1))
self.test_summaries.append(
tf.summary.image('conv1_relu', tensor=filter_map, max_outputs=self.conv1_depth))
with tf.name_scope('conv2_model'):
with tf.name_scope('convolution'):
conv2 = tf.nn.conv2d(hidden, filter=conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
addition = conv2 + conv2_biases
hidden = tf.nn.relu(addition)
hidden = tf.nn.max_pool(
hidden,
ksize=[1, self.pooling_scale, self.pooling_scale, 1],
strides=[1, self.pooling_stride, self.pooling_stride, 1],
padding='SAME')
with tf.name_scope('conv3_model'):
with tf.name_scope('convolution'):
conv3 = tf.nn.conv2d(hidden, filter=conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
addition = conv3 + conv3_biases
hidden = tf.nn.relu(addition)
with tf.name_scope('conv4_model'):
with tf.name_scope('convolution'):
conv4 = tf.nn.conv2d(hidden, filter=conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
addition = conv4 + conv4_biases
hidden = tf.nn.relu(addition)
# if not train:
# filter_map = hidden[-1]
# filter_map = tf.transpose(filter_map, perm=[2, 0, 1])
# filter_map = tf.reshape(filter_map, (self.conv4_depth, 16, 16, 1))
# tf.image_summary('conv4_relu', tensor=filter_map, max_images=self.conv4_depth)
hidden = tf.nn.max_pool(
hidden,
ksize=[1, self.pooling_scale, self.pooling_scale, 1],
strides=[1, self.pooling_stride, self.pooling_stride, 1],
padding='SAME')
# fully connected layer 1
shape = hidden.get_shape().as_list()
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
with tf.name_scope('fc1_model'):
fc1_model = tf.matmul(reshape, fc1_weights) + fc1_biases
hidden = tf.nn.relu(fc1_model)
# fully connected layer 2
with tf.name_scope('fc2_model'):
return tf.matmul(hidden, fc2_weights) + fc2_biases
# Training computation.
logits = model(self.tf_train_samples)
with tf.name_scope('loss'):
self.loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=self.tf_train_labels, logits=logits)
)
self.train_summaries.append(tf.summary.scalar('Loss', self.loss))
# Optimizer.
with tf.name_scope('optimizer'):
self.optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(self.loss)
# Predictions for the training, validation, and test data.
with tf.name_scope('train'):
self.train_prediction = tf.nn.softmax(logits, name='train_prediction')
with tf.name_scope('test'):
self.test_prediction = tf.nn.softmax(model(self.tf_test_samples, train=False), name='test_prediction')
self.merged_train_summary = tf.summary.merge(self.train_summaries)
self.merged_test_summary = tf.summary.merge(self.test_summaries)
def run(self):
'''
用到Session
'''
# private function
def print_confusion_matrix(confusionMatrix):
print('Confusion Matrix:')
for i, line in enumerate(confusionMatrix):
print(line, line[i]/np.sum(line))
a = 0
for i, column in enumerate(np.transpose(confusionMatrix, (1, 0))):
a += (column[i]/np.sum(column))*(np.sum(column)/26000)
print(column[i]/np.sum(column),)
print('\n', np.sum(confusionMatrix), a)
with self.session as session:
tf.initialize_all_variables().run()
# 训练
print('Start Training')
for i, samples, labels in get_chunk(train_samples, train_labels, chunkSize=self.batch_size):
_, l, predictions, summary = session.run(
[self.optimizer, self.loss, self.train_prediction, self.merged_train_summary],
feed_dict={self.tf_train_samples: samples, self.tf_train_labels: labels}
)
self.writer.add_summary(summary, i)
# labels is True Labels
accuracy, _ = self.accuracy(predictions, labels)
if i % 50 == 0:
print('Minibatch loss at step %d: %f' % (i, l))
print('Minibatch accuracy: %.1f%%' % accuracy)
#
# 测试
accuracies = []
confusionMatrices = []
for i, samples, labels in get_chunk(test_samples, test_labels, chunkSize=self.test_batch_size):
result, summary = session.run(
[self.test_prediction, self.merged_test_summary],
feed_dict={self.tf_test_samples: samples}
)
self.writer.add_summary(summary, i)
accuracy, cm = self.accuracy(result, labels, need_confusion_matrix=True)
accuracies.append(accuracy)
confusionMatrices.append(cm)
print('%d Test Accuracy: %.1f%%' % (i, accuracy))
print(' Average Accuracy:', np.average(accuracies))
print('Standard Deviation:', np.std(accuracies))
print_confusion_matrix(np.add.reduce(confusionMatrices))
def accuracy(self, predictions, labels, need_confusion_matrix=False):
'''
计算预测的正确率与召回率
@return: accuracy and confusionMatrix as a tuple
'''
_predictions = np.argmax(predictions, 1)
_labels = np.argmax(labels, 1)
cm = confusion_matrix(_labels, _predictions) if need_confusion_matrix else None
# == is overloaded for numpy array
accuracy = (100.0 * np.sum(_predictions == _labels) / predictions.shape[0])
return accuracy, cm
if __name__ == '__main__':
net = Network(num_hidden=16, batch_size=64, kernel_size=3, conv_depth=16, pooling_scale=2)
net.run()
CNN网络的正确率好像越来越低了,,,
tensorboard 可视化的网络结构如下图所示
从上面的代码及tensorborad可视化图可以看出,搭建CNN网络的一般步骤是:
- 定义graph:主要包括输入的定义、卷积层的权重矩阵weights和bias的定义、全连接层的权重矩阵weights和bias的定义。
- 定义计算模型model:
(1)对1中的卷积层的运算的定义:使用1中定义的权重矩阵和bias进行卷积运算、激活函数的选择、pooling的选择、
(2)对1中的全连接层的定义:使用1中定义的权重矩阵和bias进行卷积运算、激活函数的选择、 - 定义优化器进行优化
- 定义session进行运算
从代码中可以看出,4个卷积层的定义和运算很相似、2个全连接层的定义和运算也很相似,故下一篇博客会将上面的代码抽象出一个CNN网络的计算框架。