本文为OpenCV DNN模块官方教程的扩展,介绍如何使用OpenCV加载TensorFlow Object Detection API训练的模型做目标检测,以SSD和Faster-RCNN为例。
TensorFlow Object Detection API的github链接地址如下:
https://github.com/tensorflow/models/tree/master/research/object_detection 现同时支持TensorFlow1.x和TesnorFlow2.x版本。
本文以TensorFlow 1.x为例(TF2.x等后续稳定支持OpenCV后介绍),介绍OpenCV DNN模块调用SSD和Faster-RCNN模型检测目标的步骤如下:
(1) 下载或自己训练生成 .pb 格式的模型文件。本文以Model Zoo中的ssd_mobilenet_v1_coco为例,下载解压后得到frozen_inference_graph.pb
(2) 使用指令用.pb文件生成.pbtxt文件, SSD模型使用tf_text_graph_ssd.py, Faster-RCNN模型使用tf_text_graph_faster_rcnn.py
SSD:
Faster-RCNN:
主要参数三个:
--input 输入.pb模型文件完整路径;
--output 输出.pbtxt文件完整路径;
--config 输入config文件完整路径
完整指令:
python tf_text_graph_ssd.py --input E:\Practice\TensorFlow\model\ssd_mobilenet_v1_coco_2018_01_28\frozen_inference_graph.pb --output E:\Practice\TensorFlow\model\ssd_mobilenet_v1_coco_2018_01_28\frozen_inference_graph.pbtxt --config D:\models\research\object_detection\samples\configs\ssd_mobilenet_v1_coco.config
运行结果:
(3) 配置OpenCV4.4,加载图片测试 ,代码如下:
#include<opencv2/opencv.hpp>
#include<opencv2/dnn.hpp>
#include <iostream>
using namespace std;
using namespace cv;
using namespace dnn;
const size_t blobSize = 300;
const char* classNames[] = { "background", "person", "bicycle", "car",
"motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
"street sign", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep" }; //SSD需要加background
String weights = "./model2/frozen_inference_graph.pb";
String prototxt = "./model2/frozen_inference_graph.pbtxt";
string imgPath = "./imgs/test.jpg";
dnn::Net net = cv::dnn::readNetFromTensorflow(weights, prototxt);
Mat object_detection(Mat &frame)
{
Size frame_size = frame.size();
double start = (double)getTickCount();
cv::Mat blob = cv::dnn::blobFromImage(frame, 1.0, Size(blobSize, blobSize));
//cout << "blob size: " << blob.size << endl;
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
net.setInput(blob);
Mat output = net.forward();
//cout << "output size: " << output.size << endl;
Mat detectionMat(output.size[2], output.size[3], CV_32F, output.ptr<float>());
float confidenceThreshold = 0.50;
for (int i = 0; i < detectionMat.rows; i++)
{
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidenceThreshold)
{
size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);
char conf[20];
sprintf_s(conf, "%0.2f", confidence);
Rect object((int)xLeftBottom, (int)yLeftBottom,
(int)(xRightTop - xLeftBottom),
(int)(yRightTop - yLeftBottom));
rectangle(frame, object, Scalar(255, 0, 255), 2);
String label = String(classNames[objectClass]) + ": " + conf;
int baseLine = 0;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.7, 1, &baseLine);
rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
Size(labelSize.width, labelSize.height + baseLine)),
Scalar(0, 255, 255), -1);
putText(frame, label, Point(xLeftBottom, yLeftBottom),
FONT_HERSHEY_SIMPLEX, 0.7, Scalar(255, 0, 0), 2);
}
}
double end = (double)getTickCount();
cout << "use_time :" << (end - start) * 1000.0 / cv::getTickFrequency() << " ms \n";
return frame;
}
int main(int argc, char** agrv)
{
Mat frame = cv::imread(imgPath);
if (frame.empty())
{
cout << "img is empty......" << endl;
return 1;
}
Mat result = object_detection(frame);
imshow("OpenCV DNN Test", result);
imwrite("result.jpg", result);
waitKey(0);
return 0;
}
Faster-RCNN使用tf_text_graph_faster_rcnn.py进行转换,classNames[]中不需要设置"background".
更多资讯请关注公众号:OpenCV与AI深度学习