ubuntu下基于OpenCV4实现Mask R-CNN实例分割
- 1、模型下载
- 2、模型初始化
- 3、代码测试:
- 4、运行结果
- 5、参考链接
1、模型下载
模型数据
2、模型初始化
Mask-RCNN算法输出生成为边界框。每个边界框与置信度分数相关联。置信度阈值参数以下的都将被忽略。从网络输出的对象掩码是灰度图像。由于我们在本教程中使用二值掩码,因此我们使用maskThreshold参数来阈值灰色掩码图像。降低其值将获取更大的掩模。有时这有助于包括在边界附近遗漏的部分,但同时,它还可能包括更尖的边界区域处的背景像素。
文件mscoco_labels.names包含训练模型的所有预测对象。colors.txt文件包含用于标记各种类对象的所有颜色。
接下来,我们使用这两个文件加载网络
mask_rcnn_inception_v2_coco.pb:预先训练的权重;
mask_rcnn_inception_v2_coco.pbtxt:模型结构文件;
下载后的文件有个frozen_inference_graph.pb文件,改成了mask_rcnn_inception_v2_coco.pb
其余配置文件mscoco_labels.names、colors.txt、mask_rcnn_inception_v2_coco.pbtxt下载地址为:
https://github.com/luohenyueji/OpenCV-Practical-Exercise/tree/master/ITEM%2010-19/ITEM13%20Mask%20R-CNN%20in%20OpenCV/model
3、代码测试:
main.cpp
// Mask R-CNN in OpenCV.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include <iostream>
#include <fstream>
#include <sstream>
#include <iostream>
#include <string.h>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace dnn;
using namespace std;
// Initialize the parameters
// Confidence threshold 置信度阈值
float confThreshold = 0.5;
// Mask threshold 掩模阈值
float maskThreshold = 0.3;
vector<string> classes;
vector<Scalar> colors;
// Draw the predicted bounding box
void drawBox(Mat &frame, int classId, float conf, Rect box, Mat &objectMask);
// Postprocess the neural network's output for each frame
void postprocess(Mat &frame, const vector<Mat> &outs);
int main()
{
//0-image,1-video,2-camera
int read_file = 0;
// Load names of classes 导入分类名文件
string classesFile = "/home/bruce/study/OpenCV_MaskRCNN/model/mscoco_labels.names";
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line))
{
classes.push_back(line);
}
// Load the colors 导入颜色类文件
string colorsFile = "/home/bruce/study/OpenCV_MaskRCNN/model/colors.txt";
ifstream colorFptr(colorsFile.c_str());
while (getline(colorFptr, line))
{
char *pEnd;
double r, g, b;
//字符串转换成浮点数
r = strtod(line.c_str(), &pEnd);
g = strtod(pEnd, NULL);
b = strtod(pEnd, NULL);
Scalar color = Scalar(r, g, b, 255.0);
colors.push_back(Scalar(r, g, b, 255.0));
}
// Give the configuration and weight files for the model
String textGraph = "/home/bruce/study/OpenCV_MaskRCNN/model/mask_rcnn_inception_v2_coco.pbtxt";
String modelWeights = "/home/bruce/study/OpenCV_MaskRCNN/model/mask_rcnn_inception_v2_coco.pb";
// Load the network 导入网络
Net net = readNetFromTensorflow(modelWeights, textGraph);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
//只使用CPU
net.setPreferableTarget(DNN_TARGET_CPU);
// Open a video file or an image file or a camera stream.
string str, outputFile;
VideoCapture cap;
VideoWriter video;
Mat frame, blob;
try
{
//输出文件,默认是视频
outputFile = "mask_rcnn_out_cpp.avi";
if (read_file == 0)
{
// Open the image file 打开图像文件
str = "/home/bruce/study/OpenCV_MaskRCNN/image/cars.jpg";
//cout << "Image file input : " << str << endl;
ifstream ifile(str);
if (!ifile)
{
throw("error");
}
frame = imread(str);
str.replace(str.end() - 4, str.end(), "_mask_rcnn_out.jpg");
outputFile = str;
}
else if (read_file == 1)
{
// Open the video file 打开视频文件
str = "./image/cars.mp4";
ifstream ifile(str);
if (!ifile)
{
throw("error");
}
cap.open(str);
str.replace(str.end() - 4, str.end(), "_mask_rcnn_out.avi");
outputFile = str;
}
// Open the webcam 打开摄像头
else
{
cap.open(0);
}
}
catch (...)
{
cout << "Could not open the input image/video stream" << endl;
return 0;
}
// Get the video writer initialized to save the output video 如果读入的不是图像,生成输出视频
if (read_file != 0)
{
video.open(outputFile, VideoWriter::fourcc('M', 'J', 'P', 'G'), 28,
Size(cap.get(CAP_PROP_FRAME_WIDTH), cap.get(CAP_PROP_FRAME_HEIGHT)));
}
// Create a window 显示窗口
static const string kWinName = "Deep learning object detection in OpenCV";
//Process frames 处理图像
while (waitKey(1) < 0)
{
//如果是视频
if (read_file != 0)
{
// get frame from the video 获取单帧图像
cap >> frame;
}
// Stop the program if reached end of video 如果图像不存在
if (frame.empty())
{
cout << "Done processing !!!" << endl;
cout << "Output file is stored as " << outputFile << endl;
waitKey(0);
break;
}
// Create a 4D blob from a frame 获得深度学习的输入图像
blobFromImage(frame, blob, 1.0, Size(frame.cols, frame.rows), Scalar(), true, false);
//blobFromImage(frame, blob);
//Sets the input to the network 设置输入
net.setInput(blob);
// Runs the forward pass to get output from the output layers 获得输出层
std::vector<String> outNames(2);
outNames[0] = "detection_out_final";
outNames[1] = "detection_masks";
vector<Mat> outs;
net.forward(outs, outNames);
// Extract the bounding box and mask for each of the detected objects 提取预测框和掩模
postprocess(frame, outs);
// Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
string label = format("Mask-RCNN Inference time for a frame : %0.0f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
// Write the frame with the detection boxes 保存结果
Mat detectedFrame;
frame.convertTo(detectedFrame, CV_8U);
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, frame);
//enter退出
if (waitKey(1000) == 27)
{
break;
}
if (read_file == 0)
{
imwrite(outputFile, detectedFrame);
break;
}
else
{
video.write(detectedFrame);
}
}
cap.release();
//释放生成的视频
if (read_file != 0)
{
video.release();
}
return 0;
}
/**
* @brief For each frame, extract the bounding box and mask for each detected object 提取每张图像的预测框和掩模
*
* @param frame
* @param outs
*/
void postprocess(Mat &frame, const vector<Mat> &outs)
{
//预测框结果
Mat outDetections = outs[0];
//掩模结果
Mat outMasks = outs[1];
// Output size of masks is NxCxHxW where
// N - number of detected boxes
// C - number of classes (excluding background)
// HxW - segmentation shape
//预测的框个数
const int numDetections = outDetections.size[2];
//类别数
const int numClasses = outMasks.size[1];
outDetections = outDetections.reshape(1, outDetections.total() / 7);
//筛选预测框数
for (int i = 0; i < numDetections; ++i)
{
//提取预测框置信度
float score = outDetections.at<float>(i, 2);
//超过阈值
if (score > confThreshold)
{
// Extract the bounding box
//类别
int classId = static_cast<int>(outDetections.at<float>(i, 1));
int left = static_cast<int>(frame.cols * outDetections.at<float>(i, 3));
int top = static_cast<int>(frame.rows * outDetections.at<float>(i, 4));
int right = static_cast<int>(frame.cols * outDetections.at<float>(i, 5));
int bottom = static_cast<int>(frame.rows * outDetections.at<float>(i, 6));
//防止框画在外面
left = max(0, min(left, frame.cols - 1));
top = max(0, min(top, frame.rows - 1));
right = max(0, min(right, frame.cols - 1));
bottom = max(0, min(bottom, frame.rows - 1));
Rect box = Rect(left, top, right - left + 1, bottom - top + 1);
// Extract the mask for the object 提取掩模
Mat objectMask(outMasks.size[2], outMasks.size[3], CV_32F, outMasks.ptr<float>(i, classId));
// Draw bounding box, colorize and show the mask on the image
drawBox(frame, classId, score, box, objectMask);
}
}
}
/**
* @brief Draw the predicted bounding box, colorize and show the mask on the image 画图
*
* @param frame
* @param classId
* @param conf
* @param box
* @param objectMask
*/
void drawBox(Mat &frame, int classId, float conf, Rect box, Mat &objectMask)
{
//Draw a rectangle displaying the bounding box 画预测框
rectangle(frame, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), Scalar(255, 178, 50), 3);
//Get the label for the class name and its confidence
//置信度获取
string label = format("%.2f", conf);
//获取标签
if (!classes.empty())
{
CV_Assert(classId < (int)classes.size());
label = classes[classId] + ":" + label;
}
//Display the label at the top of the bounding box
int baseLine;
//获取字符串的高度和宽度
//标签,字体,文本大小的倍数,文本粗细,文本最低点对应的纵坐标
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
box.y = max(box.y, labelSize.height);
//画框打标签
rectangle(frame, Point(box.x, box.y - round(1.5 * labelSize.height)), Point(box.x + round(1.5 * labelSize.width), box.y + baseLine), Scalar(255, 255, 255), FILLED);
putText(frame, label, Point(box.x, box.y), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);
//填充颜色
Scalar color = colors[classId % colors.size()];
// Resize the mask, threshold, color and apply it on the image 重置大小
resize(objectMask, objectMask, Size(box.width, box.height));
Mat mask = (objectMask > maskThreshold);
//叠加获得颜色掩模
Mat coloredRoi = (0.3 * color + 0.7 * frame(box));
coloredRoi.convertTo(coloredRoi, CV_8UC3);
// Draw the contours on the image 画轮廓
vector<Mat> contours;
Mat hierarchy;
mask.convertTo(mask, CV_8U);
findContours(mask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE);
drawContours(coloredRoi, contours, -1, color, 5, LINE_8, hierarchy, 100);
coloredRoi.copyTo(frame(box), mask);
}
CMakeLists.txt
cmake_minimum_required( VERSION 2.8 )
project( OpenCV_MaskRCNN )
set( CMAKE_BUILD_TYPE "Release" )
set( CMAKE_CXX_FLAGS "-std=c++11 -O3" )
list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake_modules )
SET(OpenCV_DIR "/usr/local/opencv400/share/OpenCV")
find_package( OpenCV 4.0 REQUIRED )
include_directories(
${OpenCV_INCLUDE_DIRS}
${CSPARSE_INCLUDE_DIR}
"/usr/include/eigen3/"
)
add_executable( OpenCV_MaskRCNN main.cpp )
target_link_libraries( OpenCV_MaskRCNN ${OpenCV_LIBS} )
4、运行结果
输入如图片:
输出测试效果:
5、参考链接
https://github.com/luohenyueji/OpenCV-Practical-Exercise