TensorFlow 笔记5–模型复用
参考文档:https://github.com/ageron/handson-ml/blob/master/11_deep_learning.ipynb
一、模型复用
法1:直接拷贝构建模型的代码
# 只需在session中恢复模型数据,注意路径替换
with tf.Session() as sess:
saver.restore(sess, "./my_model_final.ckpt")
法2:恢复模型图
1、恢复模型的图
# 路径应该为自己保存模型的路径
saver = tf.train.import_meta_graph("./my_model_final.ckpt.meta")
2、获得模型的op
# 法1:查看模型有哪些op,根据op的名称获得op
# 查看op
for op in tf.get_default_graph().get_operations():
print(op.name)
# 查看tensor
for var in tf.global_variables():
print(var.name)
# 根据op的名称获得op,名称一定要对应
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
accuracy = tf.get_default_graph().get_tensor_by_name("eval/accuracy:0")
training_op = tf.get_default_graph().get_operation_by_name("GradientDescent")
# 法二:在原模型中添加字典存放所需的op,重用模型时通过字典获得op
# 在原模型中添加字典存放所需的op
for op in (X, y, accuracy, training_op):
tf.add_to_collection("my_important_ops", op)
# 重用模型时通过字典获得op
X, y, accuracy, training_op = tf.get_collection("my_important_ops")
3、在session中恢复模型数据
with tf.Session() as sess:
saver.restore(sess, "./my_model_final.ckpt")
重复使用变量:
# 预测时重复恢复模型
tf.get_variable_scope().reuse_variables()
二、只使用模型的一部分
法一:重新构建图,恢复部分模型数据
1、重新构建图
# 需要重用的那几层必须和原模型一样
# 模型参数
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 20
n_outputs = 10
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
# 构建图,添加层
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1") # reused
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2") # reused
hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3") # reused
hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4") # new!
logits = tf.layers.dense(hidden4, n_outputs, name="outputs") # new!
# 图剩下的部分和原模型的就一模一样了
2、恢复部分模型数据
# 创建saver指定要恢复的数据,注意这个saver的名称和保存现在的模型的saver不能一样
reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
scope="hidden[123]") # regular expression
restore_saver = tf.train.Saver(reuse_vars) # to restore layers 1-3
# 保存现在的模型的saver
saver = tf.train.Saver()
# 恢复模型部分数据
with tf.Session() as sess:
restore_saver.restore(sess, "./my_model_final.ckpt")
法二:恢复图,修改图,恢复模型数据
1、恢复图并获得所需的op
# 恢复图
saver = tf.train.import_meta_graph("./my_model_final.ckpt.meta")
# 获得所需的op,注意名称
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
hidden3 = tf.get_default_graph().get_tensor_by_name("dnn/hidden4/Relu:0")
2、修改图,添加层
# 要添加层的参数
n_hidden4 = 20
n_outputs = 10
# 添加层
new_hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="new_hidden4")
new_logits = tf.layers.dense(new_hidden4, n_outputs, name="new_outputs")
3、使用部分模型数据
# 可以恢复全部数据(只用其中一部分),也可以恢复部分层数据
with tf.Session() as sess:
saver.restore(sess, "./my_model_final.ckpt")
三、使用其它框架的权重
如果只有模型的权重数据没有图,可以自己构建图,然后加载权重到相应的层。
法一:
# 模型参数
n_inputs = 2
n_hidden1 = 3
# 权重数据
original_w = [[1., 2., 3.], [4., 5., 6.]] # Load the weights from the other framework
original_b = [7., 8., 9.] # Load the biases from the other framework
# 构建图
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
# [...] Build the rest of the model
# 获得要加载权重的变量名
graph = tf.get_default_graph()
assign_kernel = graph.get_operation_by_name("hidden1/kernel/Assign")
assign_bias = graph.get_operation_by_name("hidden1/bias/Assign")
init_kernel = assign_kernel.inputs[1]
init_bias = assign_bias.inputs[1]
init = tf.global_variables_initializer()
with tf.Session() as sess:
# 权重赋给相应的变量后,喂给模型
sess.run(init, feed_dict={init_kernel: original_w, init_bias: original_b})
# [...] Train the model on your new task
print(hidden1.eval(feed_dict={X: [[10.0, 11.0]]})) # not shown in the book
法二:
n_inputs = 2
n_hidden1 = 3
original_w = [[1., 2., 3.], [4., 5., 6.]] # Load the weights from the other framework
original_b = [7., 8., 9.] # Load the biases from the other framework
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
# [...] Build the rest of the model
# Get a handle on the variables of layer hidden1
with tf.variable_scope("", default_name="", reuse=True): # root scope
hidden1_weights = tf.get_variable("hidden1/kernel")
hidden1_biases = tf.get_variable("hidden1/bias")
# Create dedicated placeholders and assignment nodes
original_weights = tf.placeholder(tf.float32, shape=(n_inputs, n_hidden1))
original_biases = tf.placeholder(tf.float32, shape=n_hidden1)
assign_hidden1_weights = tf.assign(hidden1_weights, original_weights)
assign_hidden1_biases = tf.assign(hidden1_biases, original_biases)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(assign_hidden1_weights, feed_dict={original_weights: original_w})
sess.run(assign_hidden1_biases, feed_dict={original_biases: original_b})
# [...] Train the model on your new task
print(hidden1.eval(feed_dict={X: [[10.0, 11.0]]}))