Le-Net5 网络结构
def inference(input_tensor,train,regularizer):
#第一层:卷积层,过滤器的尺寸为5×5,深度为6,不使用全0补充,步长为1。
#尺寸变化:32×32×1->28×28×6
with tf.variable_scope('layer1-conv1'):
conv1_weights = tf.get_variable('weight',[5,5,1,6],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable('bias',[6],initializer=tf.constant_initializer(0.0))
conv1 = tf.nn.conv2d(input_tensor,conv1_weights,strides=[1,1,1,1],padding='VALID')
relu1 = tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases))
#第二层:池化层,过滤器的尺寸为2×2,使用全0补充,步长为2。
#尺寸变化:28×28×6->14×14×6
with tf.name_scope('layer2-pool1'):
pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
#第三层:卷积层,过滤器的尺寸为5×5,深度为16,不使用全0补充,步长为1。
#尺寸变化:14×14×6->10×10×16
with tf.variable_scope('layer3-conv2'):
conv2_weights = tf.get_variable('weight',[5,5,6,16],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases = tf.get_variable('bias',[16],initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1,conv2_weights,strides=[1,1,1,1],padding='VALID')
relu2 = tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases))
#第四层:池化层,过滤器的尺寸为2×2,使用全0补充,步长为2。
#尺寸变化:10×10×16->5×5×16
with tf.variable_scope('layer4-pool2'):
pool2 = tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
#将第四层池化层的输出转化为第五层全连接层的输入格式。第四层的输出为5×5×16的矩阵,然而第五层全连接层需要的输入格式
#为向量,所以我们需要把代表每张图片的尺寸为5×5×16的矩阵拉直成一个长度为5×5×16的向量。
#举例说,每次训练64张图片,那么第四层池化层的输出的size为(64,5,5,16),拉直为向量,nodes=5×5×16=400,尺寸size变为(64,400)
pool_shape = pool2.get_shape().as_list()
nodes = pool_shape[1]*pool_shape[2]*pool_shape[3]
reshaped = tf.reshape(pool2,[-1,nodes])
#第五层:全连接层,nodes=5×5×16=400,400->120的全连接
#尺寸变化:比如一组训练样本为64,那么尺寸变化为64×400->64×120
#训练时,引入dropout,dropout在训练时会随机将部分节点的输出改为0,dropout可以避免过拟合问题。
#这和模型越简单越不容易过拟合思想一致,和正则化限制权重的大小,使得模型不能任意拟合训练数据中的随机噪声,以此达到避免过拟合思想一致。
#本文最后训练时没有采用dropout,dropout项传入参数设置成了False,因为训练和测试写在了一起没有分离,不过大家可以尝试。
with tf.variable_scope('layer5-fc1'):
fc1_weights = tf.get_variable('weight',[nodes,120],initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses',regularizer(fc1_weights))
fc1_biases = tf.get_variable('bias',[120],initializer=tf.constant_initializer(0.1))
fc1 = tf.nn.relu(tf.matmul(reshaped,fc1_weights) + fc1_biases)
if train:
fc1 = tf.nn.dropout(fc1,0.5)
#第六层:全连接层,120->84的全连接
#尺寸变化:比如一组训练样本为64,那么尺寸变化为64×120->64×84
with tf.variable_scope('layer6-fc2'):
fc2_weights = tf.get_variable('weight',[120,84],initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses',regularizer(fc2_weights))
fc2_biases = tf.get_variable('bias',[84],initializer=tf.truncated_normal_initializer(stddev=0.1))
fc2 = tf.nn.relu(tf.matmul(fc1,fc2_weights) + fc2_biases)
if train:
fc2 = tf.nn.dropout(fc2,0.5)
#第七层:全连接层(近似表示),84->10的全连接
#尺寸变化:比如一组训练样本为64,那么尺寸变化为64×84->64×10。最后,64×10的矩阵经过softmax之后就得出了64张图片分类于每种数字的概率,
#即得到最后的分类结果。
with tf.variable_scope('layer7-fc3'):
fc3_weights = tf.get_variable('weight',[84,10],initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses',regularizer(fc3_weights))
fc3_biases = tf.get_variable('bias',[10],initializer=tf.truncated_normal_initializer(stddev=0.1))
logit = tf.matmul(fc2,fc3_weights) + fc3_biases
return logit
AlexNet
def inference(images):
"""
构建一个AlexNet模型
"""
parameters = []
# 第一层:卷积层conv1
with tf.name_scope('conv1') as scope:
kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 96], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[96], dtype=tf.float32),
trainable=True, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope)
print_activations(conv1)
parameters += [kernel, biases]
# 第二层:池化层pool1
pool1 = tf.nn.max_pool(conv1,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='VALID',
name='pool1')
print_activations(pool1)
# 第三层:卷积层2 conv2
with tf.name_scope('conv2') as scope:
kernel = tf.Variable(tf.truncated_normal([5, 5, 96, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(bias, name=scope)
parameters += [kernel, biases]
print_activations(conv2)
# 第四层:池化层2 pool2
pool2 = tf.nn.max_pool(conv2,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='VALID',
name='pool2')
print_activations(pool2)
# 第五层:卷积层3 conv3
with tf.name_scope('conv3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 384],
dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
trainable=True, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv3 = tf.nn.relu(bias, name=scope)
parameters += [kernel, biases]
print_activations(conv3)
# 第六层:卷积层4 conv4
with tf.name_scope('conv4') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 384],
dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
trainable=True, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv4 = tf.nn.relu(bias, name=scope)
parameters += [kernel, biases]
print_activations(conv4)
# 第七层:卷积层5 conv5
with tf.name_scope('conv5') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256],
dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
bias = tf.nn.bias_add(conv, biases)
conv5 = tf.nn.relu(bias, name=scope)
parameters += [kernel, biases]
print_activations(conv5)
# 第八层:池化层 pool5
pool5 = tf.nn.max_pool(conv5,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='VALID',
name='pool5')
print_activations(pool5)
return pool5, parameters
Vgg
import tensorflow as tf
import numpy as np
from scipy.misc import imread, imresize
from imagenet_classes import class_names
class vgg16:
def __init__(self, imgs, weights=None, sess=None):
self.imgs = imgs
self.convlayers()
self.fc_layers()
self.probs = tf.nn.softmax(self.fc3l)
if weights is not None and sess is not None:
self.load_weights(weights, sess)
def convlayers(self):
self.parameters = []
# zero-mean input
with tf.name_scope('preprocess') as scope:
mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
images = self.imgs-mean
# conv1_1
with tf.name_scope('conv1_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv1_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv1_2
with tf.name_scope('conv1_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv1_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool1
self.pool1 = tf.nn.max_pool(self.conv1_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool1')
# conv2_1
with tf.name_scope('conv2_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv2_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv2_2
with tf.name_scope('conv2_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv2_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv2_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool2
self.pool2 = tf.nn.max_pool(self.conv2_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2')
# conv3_1
with tf.name_scope('conv3_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv3_2
with tf.name_scope('conv3_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv3_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv3_3
with tf.name_scope('conv3_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv3_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool3
self.pool3 = tf.nn.max_pool(self.conv3_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool3')
# conv4_1
with tf.name_scope('conv4_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool3, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv4_2
with tf.name_scope('conv4_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv4_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv4_3
with tf.name_scope('conv4_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv4_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool4
self.pool4 = tf.nn.max_pool(self.conv4_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
# conv5_1
with tf.name_scope('conv5_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool4, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv5_2
with tf.name_scope('conv5_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv5_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv5_3
with tf.name_scope('conv5_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv5_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
def fc_layers(self):
# fc1
with tf.name_scope('fc1') as scope:
shape = int(np.prod(self.pool5.get_shape()[1:]))
fc1w = tf.Variable(tf.truncated_normal([shape, 4096],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
pool5_flat = tf.reshape(self.pool5, [-1, shape])
fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b)
self.fc1 = tf.nn.relu(fc1l)
self.parameters += [fc1w, fc1b]
# fc2
with tf.name_scope('fc2') as scope:
fc2w = tf.Variable(tf.truncated_normal([4096, 4096],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc2w), fc2b)
self.fc2 = tf.nn.relu(fc2l)
self.parameters += [fc2w, fc2b]
# fc3
with tf.name_scope('fc3') as scope:
fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
trainable=True, name='biases')
self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
self.parameters += [fc3w, fc3b]
def load_weights(self, weight_file, sess):
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i, k in enumerate(keys):
print (i, k, np.shape(weights[k]))
sess.run(self.parameters[i].assign(weights[k]))
if __name__ == '__main__':
sess = tf.Session()
imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg = vgg16(imgs, 'vgg16_weights.npz', sess)
img1 = imread('laska.png', mode='RGB')
img1 = imresize(img1, (224, 224))
prob = sess.run(vgg.probs, feed_dict={vgg.imgs: [img1]})[0]
preds = (np.argsort(prob)[::-1])[0:5]
for p in preds:
print( class_names[p], prob[p])
GoogLeNet
########定义函数生成网络中经常用到的函数的默认参数########
# 默认参数:卷积的激活函数、权重初始化方式、标准化器等
def inception_v3_arg_scope(weight_decay=0.00004, # 设置L2正则的weight_decay
stddev=0.1, # 标准差默认值0.1
batch_norm_var_collection='moving_vars'):
batch_norm_params = { # 定义batch normalization(标准化)的参数字典
'decay': 0.9997, # 定义参数衰减系数
'epsilon': 0.001,
'updates_collections': tf.GraphKeys.UPDATE_OPS,
'variables_collections': {
'beta': None,
'gamma': None,
'moving_mean': [batch_norm_var_collection],
'moving_variance': [batch_norm_var_collection],
}
}
with slim.arg_scope([slim.conv2d, slim.fully_connected], # 给函数的参数自动赋予某些默认值
weights_regularizer=slim.l2_regularizer(weight_decay)): # 对[slim.conv2d, slim.fully_connected]自动赋值
# 使用slim.arg_scope后就不需要每次都重复设置参数了,只需要在有修改时设置
with slim.arg_scope( # 嵌套一个slim.arg_scope对卷积层生成函数slim.conv2d的几个参数赋予默认值
[slim.conv2d],
weights_initializer=trunc_normal(stddev), # 权重初始化器
activation_fn=tf.nn.relu, # 激活函数
normalizer_fn=slim.batch_norm, # 标准化器
normalizer_params=batch_norm_params) as sc: # 标准化器的参数设置为前面定义的batch_norm_params
return sc # 最后返回定义好的scope
########定义函数可以生成Inception V3网络的卷积部分########
def inception_v3_base(inputs, scope=None):
'''
Args:
inputs:输入的tensor
scope:包含了函数默认参数的环境
'''
end_points = {} # 定义一个字典表保存某些关键节点供之后使用
with tf.variable_scope(scope, 'InceptionV3', [inputs]):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], # 对三个参数设置默认值
stride=1, padding='VALID'):
# 正式定义Inception V3的网络结构。首先是前面的非Inception Module的卷积层
# 299 x 299 x 3
# 第一个参数为输入的tensor,第二个是输出的通道数,卷积核尺寸,步长stride,padding模式
net = slim.conv2d(inputs, 32, [3, 3], stride=2, scope='Conv2d_1a_3x3') # 直接使用slim.conv2d创建卷积层
# 149 x 149 x 32
'''
因为使用了slim以及slim.arg_scope,我们一行代码就可以定义好一个卷积层
相比AlexNet使用好几行代码定义一个卷积层,或是VGGNet中专门写一个函数定义卷积层,都更加方便
'''
net = slim.conv2d(net, 32, [3, 3], scope='Conv2d_2a_3x3')
# 147 x 147 x 32
net = slim.conv2d(net, 64, [3, 3], padding='SAME', scope='Conv2d_2b_3x3')
# 147 x 147 x 64
net = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_3a_3x3')
# 73 x 73 x 64
net = slim.conv2d(net, 80, [1, 1], scope='Conv2d_3b_1x1')
# 73 x 73 x 80.
net = slim.conv2d(net, 192, [3, 3], scope='Conv2d_4a_3x3')
# 71 x 71 x 192.
net = slim.max_pool2d(net, [3, 3], stride=2, scope='MaxPool_5a_3x3')
# 35 x 35 x 192.
# 上面部分代码一共有5个卷积层,2个池化层,实现了对图片数据的尺寸压缩,并对图片特征进行了抽象
'''
三个连续的Inception模块组,三个Inception模块组中各自分别有多个Inception Module,这部分是Inception Module V3
的精华所在。每个Inception模块组内部的几个Inception Mdoule结构非常相似,但是存在一些细节的不同
'''
# Inception blocks
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], # 设置所有模块组的默认参数
stride=1, padding='SAME'): # 将所有卷积层、最大池化、平均池化层步长都设置为1
# mixed: 35 x 35 x 256.
# 第一个模块组包含了三个结构类似的Inception Module
with tf.variable_scope('Mixed_5b'): # 第一个Inception Module名称。Inception Module有四个分支
with tf.variable_scope('Branch_0'): # 第一个分支64通道的1*1卷积
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'): # 第二个分支48通道1*1卷积,链接一个64通道的5*5卷积
branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv2d_0b_5x5')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
with tf.variable_scope('Branch_3'): # 第四个分支为3*3的平均池化,连接32通道的1*1卷积
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) # 将四个分支的输出合并在一起(第三个维度合并,即输出通道上合并)
'''
因为这里所有层步长均为1,并且padding模式为SAME,所以图片尺寸不会缩小,但是通道数增加了。四个分支通道数之和
64+64+96+32=256,最终输出的tensor的图片尺寸为35*35*256。
第一个模块组所有Inception Module输出图片尺寸都是35*35,但是后两个输出通道数会发生变化。
'''
# mixed_1: 35 x 35 x 288.
with tf.variable_scope('Mixed_5c'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0b_1x1')
branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv_1_0c_5x5')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
# mixed_2: 35 x 35 x 288.
with tf.variable_scope('Mixed_5d'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='Conv2d_0b_5x5')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
# 第二个Inception模块组。第二个到第五个Inception Module结构相似。
# mixed_3: 17 x 17 x 768.
with tf.variable_scope('Mixed_6a'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 384, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_1x1') # 图片会被压缩
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
branch_1 = slim.conv2d(branch_1, 96, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_1x1') # 图片被压缩
with tf.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat([branch_0, branch_1, branch_2], 3) # 输出尺寸定格在17 x 17 x 768
# mixed4: 17 x 17 x 768.
with tf.variable_scope('Mixed_6b'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 128, [1, 7], scope='Conv2d_0b_1x7') # 串联1*7卷积和7*1卷积合成7*7卷积,减少了参数,减轻了过拟合
branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') # 反复将7*7卷积拆分
branch_2 = slim.conv2d(branch_2, 128, [7, 1], scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, 128, [1, 7], scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, 128, [7, 1], scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0e_1x7')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
# mixed_5: 17 x 17 x 768.
with tf.variable_scope('Mixed_6c'):
with tf.variable_scope('Branch_0'):
'''
我们的网络每经过一个inception module,即使输出尺寸不变,但是特征都相当于被重新精炼了一遍,
其中丰富的卷积和非线性化对提升网络性能帮助很大。
'''
branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 160, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, 160, [1, 7], scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0e_1x7')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
# mixed_6: 17 x 17 x 768.
with tf.variable_scope('Mixed_6d'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 160, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, 160, [1, 7], scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0e_1x7')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
# mixed_7: 17 x 17 x 768.
with tf.variable_scope('Mixed_6e'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='Conv2d_0e_1x7')
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
end_points['Mixed_6e'] = net # 将Mixed_6e存储于end_points中,作为Auxiliary Classifier辅助模型的分类
# 第三个inception模块组包含了三个inception module
# mixed_8: 8 x 8 x 1280.
with tf.variable_scope('Mixed_7a'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, 320, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3') # 压缩图片
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 192, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='Conv2d_0c_7x1')
branch_1 = slim.conv2d(branch_1, 192, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.variable_scope('Branch_2'): # 池化层不会对输出通道数产生改变
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat([branch_0, branch_1, branch_2], 3) # 输出图片尺寸被缩小,通道数增加,tensor的总size在持续下降中
# mixed_9: 8 x 8 x 2048.
with tf.variable_scope('Mixed_7b'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 320, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = tf.concat([
slim.conv2d(branch_1, 384, [1, 3], scope='Conv2d_0b_1x3'),
slim.conv2d(branch_1, 384, [3, 1], scope='Conv2d_0b_3x1')], 3)
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 448, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(
branch_2, 384, [3, 3], scope='Conv2d_0b_3x3')
branch_2 = tf.concat([
slim.conv2d(branch_2, 384, [1, 3], scope='Conv2d_0c_1x3'),
slim.conv2d(branch_2, 384, [3, 1], scope='Conv2d_0d_3x1')], 3)
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) # 输出通道数增加到2048
# mixed_10: 8 x 8 x 2048.
with tf.variable_scope('Mixed_7c'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 320, [1, 1], scope='Conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = tf.concat([
slim.conv2d(branch_1, 384, [1, 3], scope='Conv2d_0b_1x3'),
slim.conv2d(branch_1, 384, [3, 1], scope='Conv2d_0c_3x1')], 3)
with tf.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 448, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(
branch_2, 384, [3, 3], scope='Conv2d_0b_3x3')
branch_2 = tf.concat([
slim.conv2d(branch_2, 384, [1, 3], scope='Conv2d_0c_1x3'),
slim.conv2d(branch_2, 384, [3, 1], scope='Conv2d_0d_3x1')], 3)
with tf.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, 192, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
return net, end_points
#Inception V3网络的核心部分,即卷积层部分就完成了
'''
设计inception net的重要原则是图片尺寸不断缩小,inception模块组的目的都是将空间结构简化,同时将空间信息转化为
高阶抽象的特征信息,即将空间维度转为通道的维度。降低了计算量。Inception Module是通过组合比较简单的特征
抽象(分支1)、比较比较复杂的特征抽象(分支2和分支3)和一个简化结构的池化层(分支4),一共四种不同程度的
特征抽象和变换来有选择地保留不同层次的高阶特征,这样最大程度地丰富网络的表达能力。
'''
########全局平均池化、Softmax和Auxiliary Logits########
def inception_v3(inputs,
num_classes=1000, # 最后需要分类的数量(比赛数据集的种类数)
is_training=True, # 标志是否为训练过程,只有在训练时Batch normalization和Dropout才会启用
dropout_keep_prob=0.8, # 节点保留比率
prediction_fn=slim.softmax, # 最后用来分类的函数
spatial_squeeze=True, # 参数标志是否对输出进行squeeze操作(去除维度数为1的维度,比如5*3*1转为5*3)
reuse=None, # 是否对网络和Variable进行重复使用
scope='InceptionV3'): # 包含函数默认参数的环境
with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes], # 定义参数默认值
reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout], # 定义标志默认值
is_training=is_training):
# 拿到最后一层的输出net和重要节点的字典表end_points
net, end_points = inception_v3_base(inputs, scope=scope) # 用定义好的函数构筑整个网络的卷积部分
# Auxiliary Head logits作为辅助分类的节点,对分类结果预测有很大帮助
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'): # 将卷积、最大池化、平均池化步长设置为1
aux_logits = end_points['Mixed_6e'] # 通过end_points取到Mixed_6e
with tf.variable_scope('AuxLogits'):
aux_logits = slim.avg_pool2d(
aux_logits, [5, 5], stride=3, padding='VALID', # 在Mixed_6e之后接平均池化。压缩图像尺寸
scope='AvgPool_1a_5x5')
aux_logits = slim.conv2d(aux_logits, 128, [1, 1], # 卷积。压缩图像尺寸。
scope='Conv2d_1b_1x1')
# Shape of feature map before the final layer.
aux_logits = slim.conv2d(
aux_logits, 768, [5,5],
weights_initializer=trunc_normal(0.01), # 权重初始化方式重设为标准差为0.01的正态分布
padding='VALID', scope='Conv2d_2a_5x5')
aux_logits = slim.conv2d(
aux_logits, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, weights_initializer=trunc_normal(0.001), # 输出变为1*1*1000
scope='Conv2d_2b_1x1')
if spatial_squeeze: # tf.squeeze消除tensor中前两个为1的维度。
aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
end_points['AuxLogits'] = aux_logits # 最后将辅助分类节点的输出aux_logits储存到字典表end_points中
# 处理正常的分类预测逻辑
# Final pooling and prediction
with tf.variable_scope('Logits'):
net = slim.avg_pool2d(net, [8, 8], padding='VALID',
scope='AvgPool_1a_8x8')
# 1 x 1 x 2048
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
end_points['PreLogits'] = net
# 2048
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, # 输出通道数1000
normalizer_fn=None, scope='Conv2d_1c_1x1') # 激活函数和规范化函数设为空
if spatial_squeeze: # tf.squeeze去除输出tensor中维度为1的节点
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
# 1000
end_points['Logits'] = logits
end_points['Predictions'] = prediction_fn(logits, scope='Predictions') # Softmax对结果进行分类预测
return logits, end_points # 最后返回logits和包含辅助节点的end_points
ResNet
class ResNet(object):
def __init__(self, hps, images, labels, mode):
self.hps = hps
self._images = images
self.labels = labels
self.mode = mode
self._extra_train_ops = []
# 构建模型图
def build_graph(self):
# 新建全局step
self.global_step = tf.contrib.framework.get_or_create_global_step()
# 构建ResNet网络模型
self._build_model()
# 构建优化训练操作
if self.mode == 'train':
self._build_train_op()
# 合并所有总结
self.summaries = tf.summary.merge_all()
# 构建模型
def _build_model(self):
with tf.variable_scope('init'):
x = self._images
"""第一层卷积(3,3x3/1,16)"""
x = self._conv('init_conv', x, 3, 3, 16, self._stride_arr(1))
# 残差网络参数
strides = [1, 2, 2]
# 激活前置
activate_before_residual = [True, False, False]
if self.hps.use_bottleneck:
# bottleneck残差单元模块
res_func = self._bottleneck_residual
# 通道数量
filters = [16, 64, 128, 256]
else:
# 标准残差单元模块
res_func = self._residual
# 通道数量
filters = [16, 16, 32, 64]
# 第一组
with tf.variable_scope('unit_1_0'):
x = res_func(x, filters[0], filters[1],
self._stride_arr(strides[0]),
activate_before_residual[0])
for i in six.moves.range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_1_%d' % i):
x = res_func(x, filters[1], filters[1], self._stride_arr(1), False)
# 第二组
with tf.variable_scope('unit_2_0'):
x = res_func(x, filters[1], filters[2],
self._stride_arr(strides[1]),
activate_before_residual[1])
for i in six.moves.range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_2_%d' % i):
x = res_func(x, filters[2], filters[2], self._stride_arr(1), False)
# 第三组
with tf.variable_scope('unit_3_0'):
x = res_func(x, filters[2], filters[3], self._stride_arr(strides[2]),
activate_before_residual[2])
for i in six.moves.range(1, self.hps.num_residual_units):
with tf.variable_scope('unit_3_%d' % i):
x = res_func(x, filters[3], filters[3], self._stride_arr(1), False)
# 全局池化层
with tf.variable_scope('unit_last'):
x = self._batch_norm('final_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
x = self._global_avg_pool(x)
# 全连接层 + Softmax
with tf.variable_scope('logit'):
logits = self._fully_connected(x, self.hps.num_classes)
self.predictions = tf.nn.softmax(logits)
# 构建损失函数
with tf.variable_scope('costs'):
# 交叉熵
xent = tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=self.labels)
# 加和
self.cost = tf.reduce_mean(xent, name='xent')
# L2正则,权重衰减
self.cost += self._decay()
# 添加cost总结,用于Tensorborad显示
tf.summary.scalar('cost', self.cost)
# 构建训练操作
def _build_train_op(self):
# 学习率/步长
self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
tf.summary.scalar('learning_rate', self.lrn_rate)
# 计算训练参数的梯度
trainable_variables = tf.trainable_variables()
grads = tf.gradients(self.cost, trainable_variables)
# 设置优化方法
if self.hps.optimizer == 'sgd':
optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
elif self.hps.optimizer == 'mom':
optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
# 梯度优化操作
apply_op = optimizer.apply_gradients(
zip(grads, trainable_variables),
global_step=self.global_step,
name='train_step')
# 合并BN更新操作
train_ops = [apply_op] + self._extra_train_ops
# 建立优化操作组
self.train_op = tf.group(*train_ops)
# 把步长值转换成tf.nn.conv2d需要的步长数组
def _stride_arr(self, stride):
return [1, stride, stride, 1]
# 残差单元模块
def _residual(self, x, in_filter, out_filter, stride, activate_before_residual=False):
# 是否前置激活(取残差直连之前进行BN和ReLU)
if activate_before_residual:
with tf.variable_scope('shared_activation'):
# 先做BN和ReLU激活
x = self._batch_norm('init_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
# 获取残差直连
orig_x = x
else:
with tf.variable_scope('residual_only_activation'):
# 获取残差直连
orig_x = x
# 后做BN和ReLU激活
x = self._batch_norm('init_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
# 第1子层
with tf.variable_scope('sub1'):
# 3x3卷积,使用输入步长,通道数(in_filter -> out_filter)
x = self._conv('conv1', x, 3, in_filter, out_filter, stride)
# 第2子层
with tf.variable_scope('sub2'):
# BN和ReLU激活
x = self._batch_norm('bn2', x)
x = self._relu(x, self.hps.relu_leakiness)
# 3x3卷积,步长为1,通道数不变(out_filter)
x = self._conv('conv2', x, 3, out_filter, out_filter, [1, 1, 1, 1])
# 合并残差层
with tf.variable_scope('sub_add'):
# 当通道数有变化时
if in_filter != out_filter:
# 均值池化,无补零
orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID')
# 通道补零(第4维前后对称补零)
orig_x = tf.pad(orig_x,
[[0, 0],
[0, 0],
[0, 0],
[(out_filter-in_filter)//2, (out_filter-in_filter)//2]
])
# 合并残差
x += orig_x
tf.logging.debug('image after unit %s', x.get_shape())
return x
# bottleneck残差单元模块
def _bottleneck_residual(self, x, in_filter, out_filter, stride,
activate_before_residual=False):
# 是否前置激活(取残差直连之前进行BN和ReLU)
if activate_before_residual:
with tf.variable_scope('common_bn_relu'):
# 先做BN和ReLU激活
x = self._batch_norm('init_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
# 获取残差直连
orig_x = x
else:
with tf.variable_scope('residual_bn_relu'):
# 获取残差直连
orig_x = x
# 后做BN和ReLU激活
x = self._batch_norm('init_bn', x)
x = self._relu(x, self.hps.relu_leakiness)
# 第1子层
with tf.variable_scope('sub1'):
# 1x1卷积,使用输入步长,通道数(in_filter -> out_filter/4)
x = self._conv('conv1', x, 1, in_filter, out_filter/4, stride)
# 第2子层
with tf.variable_scope('sub2'):
# BN和ReLU激活
x = self._batch_norm('bn2', x)
x = self._relu(x, self.hps.relu_leakiness)
# 3x3卷积,步长为1,通道数不变(out_filter/4)
x = self._conv('conv2', x, 3, out_filter/4, out_filter/4, [1, 1, 1, 1])
# 第3子层
with tf.variable_scope('sub3'):
# BN和ReLU激活
x = self._batch_norm('bn3', x)
x = self._relu(x, self.hps.relu_leakiness)
# 1x1卷积,步长为1,通道数不变(out_filter/4 -> out_filter)
x = self._conv('conv3', x, 1, out_filter/4, out_filter, [1, 1, 1, 1])
# 合并残差层
with tf.variable_scope('sub_add'):
# 当通道数有变化时
if in_filter != out_filter:
# 1x1卷积,使用输入步长,通道数(in_filter -> out_filter)
orig_x = self._conv('project', orig_x, 1, in_filter, out_filter, stride)
# 合并残差
x += orig_x
tf.logging.info('image after unit %s', x.get_shape())
return x
# Batch Normalization批归一化
# ((x-mean)/var)*gamma+beta
def _batch_norm(self, name, x):
with tf.variable_scope(name):
# 输入通道维数
params_shape = [x.get_shape()[-1]]
# offset
beta = tf.get_variable('beta',
params_shape,
tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
# scale
gamma = tf.get_variable('gamma',
params_shape,
tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
if self.mode == 'train':
# 为每个通道计算均值、标准差
mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
# 新建或建立测试阶段使用的batch均值、标准差
moving_mean = tf.get_variable('moving_mean',
params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
moving_variance = tf.get_variable('moving_variance',
params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
# 添加batch均值和标准差的更新操作(滑动平均)
# moving_mean = moving_mean * decay + mean * (1 - decay)
# moving_variance = moving_variance * decay + variance * (1 - decay)
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_mean, mean, 0.9))
self._extra_train_ops.append(moving_averages.assign_moving_average(
moving_variance, variance, 0.9))
else:
# 获取训练中积累的batch均值、标准差
mean = tf.get_variable('moving_mean',
params_shape, tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32),
trainable=False)
variance = tf.get_variable('moving_variance',
params_shape, tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32),
trainable=False)
# 添加到直方图总结
tf.summary.histogram(mean.op.name, mean)
tf.summary.histogram(variance.op.name, variance)
# BN层:((x-mean)/var)*gamma+beta
y = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 0.001)
y.set_shape(x.get_shape())
return y
# 权重衰减,L2正则loss
def _decay(self):
costs = []
# 遍历所有可训练变量
for var in tf.trainable_variables():
#只计算标有“DW”的变量
if var.op.name.find(r'DW') > 0:
costs.append(tf.nn.l2_loss(var))
# 加和,并乘以衰减因子
return tf.multiply(self.hps.weight_decay_rate, tf.add_n(costs))
# 2D卷积
def _conv(self, name, x, filter_size, in_filters, out_filters, strides):
with tf.variable_scope(name):
n = filter_size * filter_size * out_filters
# 获取或新建卷积核,正态随机初始化
kernel = tf.get_variable(
'DW',
[filter_size, filter_size, in_filters, out_filters],
tf.float32,
initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0/n)))
# 计算卷积
return tf.nn.conv2d(x, kernel, strides, padding='SAME')
# leaky ReLU激活函数,泄漏参数leakiness为0就是标准ReLU
def _relu(self, x, leakiness=0.0):
return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
# 全连接层,网络最后一层
def _fully_connected(self, x, out_dim):
# 输入转换成2D tensor,尺寸为[N,-1]
x = tf.reshape(x, [self.hps.batch_size, -1])
# 参数w,平均随机初始化,[-sqrt(3/dim), sqrt(3/dim)]*factor
w = tf.get_variable('DW', [x.get_shape()[1], out_dim],
initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
# 参数b,0值初始化
b = tf.get_variable('biases', [out_dim], initializer=tf.constant_initializer())
# 计算x*w+b
return tf.nn.xw_plus_b(x, w, b)
# 全局均值池化
def _global_avg_pool(self, x):
assert x.get_shape().ndims == 4
# 在第2&3维度上计算均值,尺寸由WxH收缩为1x1
return tf.reduce_mean(x, [1, 2])