TensorFlow 笔记5–模型复用

参考文档https://github.com/ageron/handson-ml/blob/master/11_deep_learning.ipynb

一、模型复用

法1:直接拷贝构建模型的代码

# 只需在session中恢复模型数据,注意路径替换
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")

法2:恢复模型图

1、恢复模型的图

# 路径应该为自己保存模型的路径
saver = tf.train.import_meta_graph("./my_model_final.ckpt.meta")

2、获得模型的op

# 法1:查看模型有哪些op,根据op的名称获得op

# 查看op
for op in tf.get_default_graph().get_operations():
    print(op.name)
# 查看tensor
for var in tf.global_variables():
    print(var.name)    

# 根据op的名称获得op,名称一定要对应
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
accuracy = tf.get_default_graph().get_tensor_by_name("eval/accuracy:0")
training_op = tf.get_default_graph().get_operation_by_name("GradientDescent")

# 法二:在原模型中添加字典存放所需的op,重用模型时通过字典获得op

# 在原模型中添加字典存放所需的op
for op in (X, y, accuracy, training_op):
    tf.add_to_collection("my_important_ops", op)

# 重用模型时通过字典获得op
X, y, accuracy, training_op = tf.get_collection("my_important_ops")

3、在session中恢复模型数据

with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")

重复使用变量:

# 预测时重复恢复模型
tf.get_variable_scope().reuse_variables()


二、只使用模型的一部分

法一:重新构建图,恢复部分模型数据

1、重新构建图

# 需要重用的那几层必须和原模型一样
# 模型参数
n_inputs = 28 * 28  
n_hidden1 = 300 
n_hidden2 = 50  
n_hidden3 = 50  
n_hidden4 = 20  
n_outputs = 10  

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

# 构建图,添加层
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")       # reused
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2") # reused
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3") # reused
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4") # new!
    logits = tf.layers.dense(hidden4, n_outputs, name="outputs")                         # new!

# 图剩下的部分和原模型的就一模一样了

2、恢复部分模型数据

# 创建saver指定要恢复的数据,注意这个saver的名称和保存现在的模型的saver不能一样
reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                               scope="hidden[123]") # regular expression
restore_saver = tf.train.Saver(reuse_vars) # to restore layers 1-3

# 保存现在的模型的saver
saver = tf.train.Saver()

# 恢复模型部分数据
with tf.Session() as sess:
    restore_saver.restore(sess, "./my_model_final.ckpt")

法二:恢复图,修改图,恢复模型数据
1、恢复图并获得所需的op

# 恢复图
saver = tf.train.import_meta_graph("./my_model_final.ckpt.meta")

# 获得所需的op,注意名称
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
hidden3 = tf.get_default_graph().get_tensor_by_name("dnn/hidden4/Relu:0")

2、修改图,添加层

# 要添加层的参数
n_hidden4 = 20  
n_outputs = 10  

# 添加层
new_hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="new_hidden4")
new_logits = tf.layers.dense(new_hidden4, n_outputs, name="new_outputs")

3、使用部分模型数据

# 可以恢复全部数据(只用其中一部分),也可以恢复部分层数据
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")

三、使用其它框架的权重

如果只有模型的权重数据没有图,可以自己构建图,然后加载权重到相应的层。

法一:

# 模型参数
n_inputs = 2
n_hidden1 = 3

# 权重数据 
original_w = [[1., 2., 3.], [4., 5., 6.]] # Load the weights from the other framework
original_b = [7., 8., 9.]                 # Load the biases from the other framework

# 构建图
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
# [...] Build the rest of the model

# 获得要加载权重的变量名
graph = tf.get_default_graph()
assign_kernel = graph.get_operation_by_name("hidden1/kernel/Assign")
assign_bias = graph.get_operation_by_name("hidden1/bias/Assign")
init_kernel = assign_kernel.inputs[1]
init_bias = assign_bias.inputs[1]

init = tf.global_variables_initializer()

with tf.Session() as sess:
    # 权重赋给相应的变量后,喂给模型
    sess.run(init, feed_dict={init_kernel: original_w, init_bias: original_b})
    # [...] Train the model on your new task
    print(hidden1.eval(feed_dict={X: [[10.0, 11.0]]}))  # not shown in the book

法二:

n_inputs = 2
n_hidden1 = 3

original_w = [[1., 2., 3.], [4., 5., 6.]] # Load the weights from the other framework
original_b = [7., 8., 9.]                 # Load the biases from the other framework

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
# [...] Build the rest of the model

# Get a handle on the variables of layer hidden1
with tf.variable_scope("", default_name="", reuse=True):  # root scope
    hidden1_weights = tf.get_variable("hidden1/kernel")
    hidden1_biases = tf.get_variable("hidden1/bias")

# Create dedicated placeholders and assignment nodes
original_weights = tf.placeholder(tf.float32, shape=(n_inputs, n_hidden1))
original_biases = tf.placeholder(tf.float32, shape=n_hidden1)
assign_hidden1_weights = tf.assign(hidden1_weights, original_weights)
assign_hidden1_biases = tf.assign(hidden1_biases, original_biases)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    sess.run(assign_hidden1_weights, feed_dict={original_weights: original_w})
    sess.run(assign_hidden1_biases, feed_dict={original_biases: original_b})
    # [...] Train the model on your new task
    print(hidden1.eval(feed_dict={X: [[10.0, 11.0]]}))