以识别手语数字为例,创建卷积网络模型

导入相应模块

import math
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
import tensorflow as tf
from tensorflow.python.framework import ops
from cnn_utils import *

%matplotlib inline
np.random.seed(1)

导入数据集

X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_dataset()

tensorflow新建一张图加载模型 tensorflow创建模型_CNN


用one-hot编码如图所示对手势的六个数字进行编码

导出数据集的其中一个样本如图:

y=5

tensorflow新建一张图加载模型 tensorflow创建模型_卷积_02

归一化输入数据集并对输出分类采用one-hot编码

X_train = X_train_orig/255.#让输出在(0,1)的范围内
X_test = X_test_orig/255.
Y_train = convert_to_one_hot(Y_train_orig, 6).T
Y_test = convert_to_one_hot(Y_test_orig, 6).T
print ("number of training examples = " + str(X_train.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))
conv_layers = {}
"""
out:
number of training examples = 1080
number of test examples = 120
X_train shape: (1080, 64, 64, 3)
Y_train shape: (1080, 6)
X_test shape: (120, 64, 64, 3)
Y_test shape: (120, 6)
"""

创建占位符

为在创建会话时需要喂入的数据创建占位符

def create_placeholders(n_H0, n_W0, n_C0, n_y):
    """
       
    参数:
    n_H0 -- 输入图像的高度
    n_W0 -- 输入图像的宽度
    n_C0 -- 输入图像的通道数
    n_y -- 分类的总数
        
    返回值:
    X -- 输入占位符,维度为[None, n_H0, n_W0, n_C0] ,dtype = "float"
    Y -- 输出占位符,维度为[None, n_y] , dtype = "float"
    """
    X = tf.placeholder(tf.float32,shape=(None, n_H0, n_W0, n_C0),name = "X")
    Y = tf.placeholder(tf.float32,shape=(None, n_y),name = "Y")

    return X, Y

初始化卷积核

这个分类器采用两层卷积层,由此应有两部分卷积核tensorflow新建一张图加载模型 tensorflow创建模型_占位符_03,tensorflow新建一张图加载模型 tensorflow创建模型_卷积核_04,对其采用的初始化方法为tf.contrib.layers.xavier_initializer(seed = 0)

def initialize_parameters():
    """
    参数的维度:
                        W1 : [4, 4, 3, 8] #8个4X4X3的卷积核
                        W2 : [2, 2, 8, 16] # #16个2X2X8的卷积核
    返回值:
    卷积网络参数(卷积核) -- 由W1, W2组成的tensor数组
    """
    tf.set_random_seed(1)
        
    W1 = tf.get_variable("W1",[4,4,3,8], initializer=tf.contrib.layers.xavier_initializer(seed = 0))
    W2 = tf.get_variable("W2",[2,2,8,16], initializer=tf.contrib.layers.xavier_initializer(seed = 0))
    
    parameters = {"W1": W1,
                  "W2": W2}
    
    return parameters

卷积网络前向传播

采用的tensorflow内建函数有:
tf.nn.conv2d(X,W1, strides = [1,s,s,1], padding = 'SAME'): 卷积层函数,给定输入参数X和一组卷积核W1,该函数将W1的卷积核与X卷积,第三个输入([1,f,f,1])表示卷积核对于输入X的每个维度(m,n_H_prev,n_W_prev,n_C_prev)的步幅

tf.nn.max_pool(A, ksize = [1,f,f,1], strides = [1,s,s,1], padding = 'SAME'): 池化层函数,最大池化处理
tf.nn.relu(Z1): 激活函数
tf.contrib.layers.flatten(P):将P展开成一维张量
tf.contrib.layers.fully_connected(F, num_outputs):参考.

def forward_propagation(X, parameters):
    """
    
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED
    
    参数:
    X -- 输入的数据集
    parameters -- 包含卷积核W1和W2
    """
    
    W1 = parameters['W1']
    W2 = parameters['W2']
   
    # 第一层卷积层: same填充,步幅为1
    Z1 = tf.nn.conv2d(X,W1, strides = [1,1,1,1], padding = 'SAME')
    # RELU
    A1 = tf.nn.relu(Z1)
    # 第一层池化层: 窗口8x8, 步幅8, same填充
    P1 = tf.nn.max_pool(A1, ksize = [1,8,8,1], strides = [1,8,8,1], padding = 'SAME')
    # 第二层卷积层: 卷积核为W2,步幅为1,same填充
    Z2 = tf.nn.conv2d(P1,W2, strides = [1,1,1,1], padding = 'SAME')
    # RELU
    A2 = tf.nn.relu(Z2)
    # 第二层池化层
    P2 = tf.nn.max_pool(A2, ksize = [1,4,4,1], strides = [1,4,4,1], padding = 'SAME')
    # 展平
    P2 = tf.contrib.layers.flatten(P2)
    # 全连接层
    # 6个输出
    Z3 = tf.contrib.layers.fully_connected(P2, 6,activation_fn=None)

    return Z3

计算代价

tf.nn.softmax_cross_entropy_with_logits(logits = Z3, labels = Y): 计算softmax损失。 该功能既可以计算softmax激活函数,也可以计算得到的损失。
tf.reduce_mean: 计算张量维度的元素均值。

def compute_cost(Z3, Y):
    """
    计算代价
    """
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = Z3, labels = Y))
    
    return cost

创建卷积模型

  • 创建占位符
  • 初始化参数
  • 向前传播
  • 计算代价
  • 创建优化器
# GRADED FUNCTION: model

def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.009,
          num_epochs = 100, minibatch_size = 64, print_cost = True):
    """
    卷积网络:
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED
    参数:
    X_train -- 训练集 维度(None, 64, 64, 3)
    Y_train -- 标签, 维度(None, n_y = 6)
    X_test -- 测试数据集, 维度(None, 64, 64, 3)
    Y_test -- 标签 维度(None, n_y = 6)
    learning_rate -- 优化器的学习率
    num_epochs -- 迭代次数(模型训练次数)
    minibatch_size -- 小批量反向传播的每一批次的样本数
    返回值:
    train_accuracy -- 训练数据集的查准率(X_train)
    test_accuracy -- 测试数据集的查准率(X_test)
    parameters -- 用于predict的参数
    """
    
    ops.reset_default_graph()   #使能够在不覆盖tf变量的情况下重新运行模型
    
    tf.set_random_seed(1)       
    seed = 3               
    
    (m, n_H0, n_W0, n_C0) = X_train.shape             
    n_y = Y_train.shape[1]                            
    costs = []                           
    
    # 创建占位符
    X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y)
    
    # 初始化参数
    parameters = initialize_parameters()

    #前向传播
    Z3 = forward_propagation(X,parameters)
 
    #计算代价
    cost = compute_cost(Z3, Y)
    
    #反向传播,采用Adam优化
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
    #初始化所有全局变量
    init = tf.global_variables_initializer()
     

    with tf.Session() as sess:
        
        #进行行初始化
        sess.run(init)
        
        #开始迭代训练
        for epoch in range(num_epochs):

            minibatch_cost = 0.
            num_minibatches = int(m / minibatch_size) # 批次数
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in minibatches:
                # 选中一个批次 
                (minibatch_X, minibatch_Y) = minibatch
               
                _ , temp_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})

                minibatch_cost += temp_cost / num_minibatches
                
            if print_cost == True and epoch % 5 == 0:
                print ("Cost after epoch %i: %f" % (epoch, minibatch_cost))
            if print_cost == True and epoch % 1 == 0:
                costs.append(minibatch_cost)
        
        
        #作出代价函数
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()


        predict_op = tf.argmax(Z3, 1)
        correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1))
        
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print(accuracy)
        train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
        test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
        print("Train Accuracy:", train_accuracy)
        print("Test Accuracy:", test_accuracy)
                
        return train_accuracy, test_accuracy, parameters