背景
在RM比赛中对于飞镖检测问题,虽然我可以通过运动物体检测和颜色检测筛除大部分干扰物体,但是依然会存在部分干扰物体。基于此考虑采用tensorflow训练飞镖头的模型(因为所有学校的飞镖头都一样,所以就不存在训练的模型最后无法使用的情况),没有采用pytorch的原因是opencv里面没办法直接调用他的pth模型,只能调用torch模型。
tensorflow
tensorflow的安装
对于这部分我采用的是anaconda3进行安装,安装的是tensorflow的cpu-1.9版本。对于anaconda3的了解百度搜索即可。下面列几个简单的常用的命令。conda create -n your_env_name python=X.X
在这里比如 your_env_name 可以修改成虚拟环境的名字。X.X可以修改成python的版本系列。conda env list
查看当前环境下有几个虚拟环境。source activate your_env_name(虚拟环境名称)
激活虚拟环境source deactivate your_env_name(虚拟环境名称)
退出虚拟环境
主体代码
卷积网络是我根据AlexNet的网络修改的,修改后变成了三个卷积层和两个全连接层、三个池化层、LRN层,采用交叉熵损失函数。
X_train, y_train = reader2.get_file("/home/demon/PycharmProjects/CNN/temp")
image_batch, label_batch = reader2.get_batch(X_train, y_train, 40, 40, 40, 2048*3)
with tf.device('/cpu:0'):
# 模型参数
learning_rate = 1e-4
training_iters = 200
batch_size = 200
display_step = 5
n_classes = 2
n_fc1 = 60
# 构建模型
x = tf.placeholder(tf.float32, [None, 40, 40, 3], name="input_data")
y = tf.placeholder(tf.int32, [None, n_classes])
W_conv = {
'conv1': tf.Variable(tf.truncated_normal([5, 5, 3, 6], stddev=0.0001)),
'conv2': tf.Variable(tf.truncated_normal([3, 3, 6, 10], stddev=0.01)),
'conv3': tf.Variable(tf.truncated_normal([3, 3, 10, 14], stddev=0.01)),
'fc1': tf.Variable(tf.truncated_normal([3 * 3 * 14, n_fc1], stddev=0.1)),
'fc2': tf.Variable(tf.truncated_normal([n_fc1, n_classes], stddev=0.1)),
}
b_conv = {
'conv1': tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[6])),
'conv2': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[10])),
'conv3': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[14])),
'fc1': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[n_fc1])),
'fc2': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[n_classes])),
}
x_image = tf.reshape(x, [-1, 40, 40, 3])
# 卷积层 1
conv1 = tf.nn.conv2d(x_image, W_conv['conv1'], strides=[1, 1, 1, 1], padding="VALID")
conv1 = tf.nn.bias_add(conv1, b_conv['conv1'])
#conv1 = batch_norm(conv1,True) #正则化
conv1 = tf.nn.relu(conv1)
# 池化层 1
pool1 = tf.nn.avg_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
# LRN层,Local Response Normalization
norm1 = tf.nn.lrn(pool1, 5, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
#卷积层 2
conv2 = tf.nn.conv2d(norm1, W_conv['conv2'], strides=[1, 1, 1, 1], padding="VALID")
conv2 = tf.nn.bias_add(conv2, b_conv['conv2'])
#conv2 = batch_norm(conv2, True)
conv2 = tf.nn.relu(conv2)
# 池化层 2
pool2 = tf.nn.avg_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
# LRN层,Local Response Normalization
norm2 = tf.nn.lrn(pool2, 5, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
print(pool2)
# 卷积层3
conv3 = tf.nn.conv2d(norm2, W_conv['conv3'], strides=[1, 1, 1, 1], padding="VALID")
conv3 = tf.nn.bias_add(conv3, b_conv['conv3'])
# conv3 = batch_norm(conv3, True)
conv3 = tf.nn.relu(conv3)
# 池化层 3
pool3 = tf.nn.avg_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
# LRN层,Local Response Normalization
norm3 = tf.nn.lrn(pool3, 5, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
conv_shape = norm3.get_shape().as_list()
node = conv_shape[1] * conv_shape[2] * conv_shape[3]
reshaped = tf.reshape(norm3, [-1, node])
print(pool3)
fc1 = tf.add(tf.matmul(reshaped, W_conv['fc1']), b_conv['fc1'])
#fc1 = batch_norm(fc1, True, False)
fc1 = tf.nn.relu(fc1)
# 全连接层 2, 即分类层
fc2 = tf.add(tf.matmul(fc1, W_conv['fc2']), b_conv['fc2'])
y_conv = tf.nn.softmax(logits=fc2, name="output_data")
# 定义损失
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc2, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
# 评估模型
correct_pred = tf.equal(tf.argmax(fc2, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()
train(10)
编码
def onehot(labels):
'''one-hot 编码'''
n_sample = len(labels)
n_class = max(labels) + 1
onehot_labels = np.zeros((n_sample, n_class))
onehot_labels[np.arange(n_sample), labels] = 1
return onehot_labels
正则化处理数据
def batch_norm(inputs, is_training,is_conv_out=True,decay = 0.999):
scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
if is_training:
if is_conv_out:
batch_mean, batch_var = tf.nn.moments(inputs,[0,1,2])
else:
batch_mean, batch_var = tf.nn.moments(inputs,[0])
train_mean = tf.assign(pop_mean,
pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var,
pop_var * decay + batch_var * (1 - decay))
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(inputs,
batch_mean, batch_var, beta, scale, 0.001)
else:
return tf.nn.batch_normalization(inputs,
pop_mean, pop_var, beta, scale, 0.001)
模型训练
def train(opech):
with tf.Session() as sess:
sess.run(init)
train_writer = tf.summary.FileWriter("./log", sess.graph) # 输出日志的地方
saver = tf.train.Saver()
c = []
start_time = time.time()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 0
for i in range(opech):
step = i
image, label = sess.run([image_batch, label_batch])
labels = onehot(label)
sess.run(optimizer, feed_dict={x: image, y: labels})
loss_record = sess.run(loss, feed_dict={x: image, y: labels})
print("now the loss is %f " % loss_record)
c.append(loss_record)
end_time = time.time()
print('time: ', (end_time - start_time))
start_time = end_time
print("---------------%d onpech is finished-------------------" % i)
print("Optimization Finished!")
saver.save(sess, save_model)
print("Model Save Finished!")
output_graph_def = graph_util.convert_variables_to_constants(
sess=sess,
input_graph_def=sess.graph_def,
output_node_names=['output_data']
) # 保存为pb模型使用
with tf.gfile.GFile("./model/123.pb", 'wb') as fd:
fd.write(output_graph_def.SerializeToString())
coord.request_stop()
coord.join(threads)
plt.plot(c)
plt.xlabel('Iter')
plt.ylabel('loss')
plt.title('lr=%f, ti=%d, bs=%d' % (learning_rate, training_iters, batch_size))
plt.tight_layout()
plt.savefig('0_and_d1_AlexNet.jpg', dpi=200)
验证图片分类结果
def per_class(imagefile):
image = Image.open(imagefile)
image = image.resize([40, 40])
image_array = np.array(image)
image = tf.cast(image_array,tf.float32)
image = tf.image.per_image_standardization(image)
image = tf.reshape(image, [1, 40, 40, 3])
saver = tf.train.Saver()
with tf.Session() as sess:
save_model = tf.train.latest_checkpoint('./model')
saver.restore(sess, save_model)
image = tf.reshape(image, [1, 40, 40, 3])
image = sess.run(image)
prediction = sess.run(fc2, feed_dict={x: image})
max_index = np.argmax(prediction)
if max_index==0:
return "0"
else:
return "1"
读取本地数据
这里是二分类,可以根据实际需要选择多个分类。
def get_file(file_dir):
images = []
temp = []
for root, sub_folders, files in os.walk(file_dir):
for name in files:
images.append(os.path.join(root,name))
for name in sub_folders:
temp.append(os.path.join(root,name))
# print(root)
# print(sub_folders)
# print(files)
labels=[]
for one_folder in temp:
n_img = len(os.listdir(one_folder))
letter= one_folder.split('/')[-1]
if letter == '0':
labels = np.append(labels,n_img*[0])
else:
labels = np.append(labels,n_img*[1])
print(n_img)
#shuffle
temp = np.array([images,labels])
temp= temp.transpose()
np.random.shuffle(temp)
image_list = list(temp[:,0])
label_list = list(temp[:,1])
label_list =[int(float(i)) for i in label_list]
return image_list,label_list
opencv调用模型
cpkt模型转为pb模型
通过上面的代码我们发现保存的模型有四个cpkt文件以及一个pb文件,但是也可以将cpkt文件转为pb文件。下面将展示代码。
from tensorflow.python.framework import graph_util
import tensorflow as tf
def freeze_graph(input_checkpoint, output_graph):
'''
:param input_checkpoint:
:param output_graph: PB模型保存路径
:return:
'''
# checkpoint = tf.train.get_checkpoint_state(model_folder) #检查目录下ckpt文件状态是否可用
# input_checkpoint = checkpoint.model_checkpoint_path #得ckpt文件路径
# 指定输出的节点名称,该节点名称必须是原模型中存在的节点
output_node_names = "output_data"
saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=True)
with tf.Session() as sess:
saver.restore(sess, input_checkpoint) # 恢复图并得到数据
output_graph_def = graph_util.convert_variables_to_constants( # 模型持久化,将变量值固定
sess=sess,
input_graph_def=sess.graph_def, # 等于:sess.graph_def
output_node_names=output_node_names.split(",")) # 如果有多个输出节点,以逗号隔开
with tf.gfile.GFile(output_graph, "wb") as f: # 保存模型
f.write(output_graph_def.SerializeToString()) # 序列化输出
print("%d ops in the final graph." % len(output_graph_def.node)) # 得到当前图有几个操作节点
# 输入ckpt模型路径
input_checkpoint='model/CNNModel.ckpt'
# 输出pb模型的路径
out_pb_path="model/frozen_model.pb"
# 调用freeze_graph将ckpt转为pb
freeze_graph(input_checkpoint,out_pb_path)
查看pb类型文件的节点名
根据我这几天研究搜集资料发现,当tensorflow的节点出现div 、sub 、argmax的时候。opencv是没办法调用模型的。
import tensorflow as tf
from tensorflow.python.platform import gfile
from google.protobuf import text_format
import os
# model_dir = './'
out_path = '/home/demon/PycharmProjects/my_tensorflow/model/'
model_name = '123.pb'
def create_graph():
with tf.gfile.FastGFile(os.path.join(out_path + model_name), 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
create_graph()
tensor_name_list = [tensor.name for tensor in tf.get_default_graph().as_graph_def().node]
for tensor_name in tensor_name_list:
print(tensor_name, '\n')
比如我的输出:
input_data
Variable
Variable/read
Variable_1
Variable_1/read
Variable_2
Variable_2/read
Variable_3
Variable_3/read
Variable_4
Variable_4/read
Variable_5
Variable_5/read
Variable_6
Variable_6/read
Variable_7
Variable_7/read
Variable_8
Variable_8/read
Variable_9
Variable_9/read
Reshape_1/shape
Reshape_1
Conv2D
BiasAdd
Relu
AvgPool
LRN
Conv2D_1
BiasAdd_1
Relu_1
AvgPool_1
LRN_1
Conv2D_2
BiasAdd_2
Relu_2
AvgPool_2
LRN_2
Reshape_2/shape
Reshape_2
MatMul
Add
Relu_3
MatMul_1
Add_1
output_data
但是当我将代码中加入正则化时候,则会发现opencv无法调用模型。所以采用牺牲性能的方法来方便进行opencv调用模型。
opencv调用模型的代码
class Classifier_ {
public:
Classifier_() {
std::cout<<"success to create a classifier class"<<std::endl;
}
Classifier_(std::string model_path) {
std::cout << model_path << std::endl;
net = cv::dnn::readNetFromTensorflow(model_path);
//net = cv::dnn::readNet(model_path);
if (!net.empty()) {
std::cout << "Model load success!" << std::endl;
} else {
std::cerr << "Model load failure!" << std::endl;
}
}
int Roi2ID(cv::Mat &roi) {
cv::Mat inputBlob = cv::dnn::blobFromImage(roi, 1.0 / 255., cv::Size(), cv::Scalar(), false, false);
//执行图像分类
cv::Mat prob;
net.setInput(inputBlob, "input_data"); //输入节点模型
prob = net.forward("output_data"); //输出模型
std::cout << prob << std::endl;
cv::Mat probMat = prob.reshape(1, 1);
cv::Point classNumber;
double classProb;
cv::minMaxLoc(probMat, NULL, &classProb, NULL, &classNumber);
return classNumber.x;
}
private:
cv::dnn::Net net;
};