pytorch 异步通信 pytorch异步推理

转载

mob64ca1402d47a 2023-11-09 09:12:47

文章标签 pytorch 异步通信 pytorch 人工智能 python openvino 文章分类 PyTorch 人工智能

（一）C++同步模式的代码详见：【pytorch】将yolov5模型通过openVINO2022部署至生产环境（二）:C++版本（二）异步模式：

pytorch 异步通信 pytorch异步推理_pytorch

异步模式是指启动第一张图片的AI推理计算后，无需等待AI推理计算结束，直接采集第二张图片，并完成图像预处理，然后再检查第一张图片推理计算是否完毕，若完毕，处理输出结果，这样的好处是，并行执行了第一张图片的AI推理计算任务和第二张图片的图像采集和预处理任务，减少了等待时间，提高了硬件的利用率，也提高了吞吐量。

C++的异步模式代码与Python版异步模式代码有较大差异：

【pytorch】将yolov5模型通过openVINO2022部署至生产环境（一）:python版本+异步模式

（三）实现代码如下：
main.cpp:

#include "inference_engine.hpp"
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "yolov5_6vino.h"
using namespace std;
using namespace cv;
using namespace dnn;
using namespace InferenceEngine;

int main(int argc, char** argv)
{
	YOLOVINO yolov5vino;
	int frameCount = 0;
	int fps = 0;
	Mat frame;
	Mat firstFrame;
	auto start = std::chrono::high_resolution_clock::now();
	VideoCapture capture("sample.mp4");
	capture.read(firstFrame);
	//设置第一帧初始帧：
	yolov5vino.initCurrentImage(firstFrame);
	while (1)
	{
		capture.read(frame);
		if (frame.empty())
		{
			break;
		}
		++frameCount;
		//灰度图像也按RGB处理
		if (frame.channels() < 3)
			cvtColor(frame, frame, COLOR_GRAY2RGB);

		std::vector<YOLOVINO::Detection> outputs;
		//异步模式下必须将检测与检测结果后处理进行分离：

		yolov5vino.detect(frame, outputs);
		yolov5vino.drawRect(frame, outputs);
		if (frameCount >= 15)
		{

			auto end = std::chrono::high_resolution_clock::now();
			//计算fps：
			fps = frameCount * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();

			frameCount = 0;
			start = std::chrono::high_resolution_clock::now();
		}

		if (fps > 0)
		{

			std::ostringstream fps_label;
			fps_label << std::fixed << std::setprecision(2);
			fps_label << "FPS: " << fps;
			std::string fps_label_str = fps_label.str();
			cv::putText(frame, fps_label_str.c_str(), cv::Point(20, 50), cv::FONT_HERSHEY_SIMPLEX, 2, cv::Scalar(0, 0, 255), 2);
		}
		cv::namedWindow("output", cv::WINDOW_NORMAL);
		cv::imshow("output", frame);

		if (cv::waitKey(1) != -1)
		{
			capture.release();
			std::cout << "finished by user\n";
			break;
		}
	}
	return 1;
}

yolov5_6vino.h:

#include "yolov5_6vino.h"
#include "samples/ocv_common.hpp"
//将OpenCV Mat对象中的图像数据传给为InferenceEngine Blob对象
void frameToBlob(const cv::Mat& frame, InferRequest::Ptr& inferRequest, const std::string& inputName) {
    /* 从OpenCV Mat对象中拷贝图像数据到InferenceEngine 输入Blob对象 */
    Blob::Ptr frameBlob = inferRequest->GetBlob(inputName);
    //这个函数方便将8U 0-255转为Blob
    matU8ToBlob<uint8_t>(frame, frameBlob); //uint8_t
    
}
void frameToBlob2(const cv::Mat& frame, InferRequest::Ptr& inferRequest, const std::string& inputName) {
    /* 从OpenCV Mat对象中拷贝图像数据到InferenceEngine 输入Blob对象 */
    Blob::Ptr frameBlob = inferRequest->GetBlob(inputName);
    //为适应yolo特性，将输入值除以255的新函数
    matU8ToBlob2<float>(frame, frameBlob); //uint8_t
}

YOLOVINO::YOLOVINO()
{
    init();
}

YOLOVINO::~YOLOVINO()
{
}

void YOLOVINO::init()
{
    InferenceEngine::Core ie;
    //两种加载形式，直接加载onnx，或者加载
    // 下面这种是直接加载onnx:
    //InferenceEngine::CNNNetwork network = ie.ReadNetwork(m_modelFilename);
    InferenceEngine::CNNNetwork network = ie.ReadNetwork("exported_onnx_model.XML", "exported_onnx_model.bin");
    //输入名及输入指针的数组
    InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
    InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
    for (auto item : inputs)
    {
        m_inputName = item.first;
        auto input_data = item.second;
        input_data->setPrecision(Precision::FP32);
        //默认布局与维度有关: C - for 1 - dimensional, NC - for 2 - dimensional, CHW - for 3 - dimensional, NCHW - for 4 - dimensional NCDHW - for 5 - dimensional
        //这里其实无需设置
        input_data->setLayout(Layout::NCHW);
        //内建输入预处理，设置以应对RGB格式的输入
        input_data->getPreProcess().setColorFormat(ColorFormat::RGB);
        //std::cout << "input name = " << m_inputName << std::endl;
    }

    //输出所有输出名：
    for (auto item : outputs)
    {
        auto output_data = item.second;
        output_data->setPrecision(Precision::FP32);
        m_outputName = item.first;
        std::cout << "output name = " << m_outputName << std::endl;
    }
    //加载网络为可执行网络
    auto executable_network = ie.LoadNetwork(network, m_deviceName);
    m_inferRequest = executable_network.CreateInferRequestPtr();
    m_inferRequestNext= executable_network.CreateInferRequestPtr();
        
    m_inputData = m_inferRequest->GetBlob(m_inputName);
    m_numChannels = m_inputData->getTensorDesc().getDims()[1];
    m_inputH = m_inputData->getTensorDesc().getDims()[2];
    m_inputW = m_inputData->getTensorDesc().getDims()[3];
    //m_imageSize = m_inputH * m_inputW;
    loadClassList();
}
//该函数将vetor中的数据按行读取
void YOLOVINO::loadClassList()
{
    std::ifstream ifs(m_classfile);
    std::string line;
    while (getline(ifs, line))
        m_classNames.push_back(line);
}
//该函数取图像最大边长作新正方形，多出部分补0
Mat YOLOVINO::formatYolov5(const Mat& source)
{
    int col = source.cols;
    int row = source.rows;
    int max = MAX(col, row);
    Mat result = Mat::zeros(max, max, CV_8UC3);
    source.copyTo(result(Rect(0, 0, col, row)));
    return result;
}
void YOLOVINO::initCurrentImage(Mat& image)
{
    //初始化第一帧图像：
    cv::Mat input_image = formatYolov5(image);
    cv::Mat blob_image;
    cvtColor(input_image, blob_image, COLOR_BGR2RGB);
    frameToBlob2(blob_image, m_inferRequest, m_inputName);
}
//传入的永远是下一帧图像
void YOLOVINO::detect(Mat& image, vector<Detection>& outputs)
{
    //处理下一帧图像：
    cv::Mat input_image = formatYolov5(image);
    cv::Mat blob_image;
    cvtColor(input_image, blob_image, COLOR_BGR2RGB);
    //输入图像转换的两种方法：
    //法1：借助安装用例自带的辅助文件samples/ocv_common.hpp
    frameToBlob2(blob_image, m_inferRequestNext, m_inputName);

    auto start = std::chrono::high_resolution_clock::now();

    m_inferRequest->StartAsync();
    //等待当前帧AI推理计算结束		
    m_inferRequest->Wait(IInferRequest::WaitMode::RESULT_READY);

    auto output = m_inferRequest->GetBlob(m_outputName);
    const float* detection_out =output->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();

    //维度信息
    const SizeVector outputDims = output->getTensorDesc().getDims();//1,6300[25200],9
    auto end = std::chrono::high_resolution_clock::now();
    auto time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
    cout << "time = " << time << endl;
    float x_factor = float(input_image.cols / m_inputW);
    float y_factor = float(input_image.rows / m_inputH);
    float* dataout = (float*)detection_out;
    const int dimensions = outputDims[2];
    const int rows = outputDims[1];
    vector<int> class_ids;
    vector<float> confidences;
    vector<Rect> boxes;
    //解析顺序，维度为1的可直接略掉。
    //25200个9依次排列：
    for (int i = 0; i < rows; ++i)
    {
        float confidence = dataout[4];
        if (confidence >= m_confThreshold)
        {
            float* classes_scores = dataout + 5;
            Mat scores(1, m_classNames.size(), CV_32FC1, classes_scores);
            Point class_id;
            double max_class_score;
            minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
            if (max_class_score > m_scoreThreshold)
            {
                confidences.push_back(confidence);
                class_ids.push_back(class_id.x);

                float x = dataout[0];
                float y = dataout[1];
                float w = dataout[2];
                float h = dataout[3];
                int left = int((x - 0.5 * w) * x_factor);
                int top = int((y - 0.5 * h) * y_factor);
                int width = int(w * x_factor);
                int height = int(h * y_factor);
                boxes.push_back(Rect(left, top, width, height));
            }
        }
        dataout += dimensions;
    }
    vector<int> nms_result;
    NMSBoxes(boxes, confidences, m_scoreThreshold, m_nmsThreshold, nms_result);
    for (int i = 0; i < nms_result.size(); i++)
    {
        int idx = nms_result[i];
        Detection result;
        result.class_id = class_ids[idx];
        result.confidence = confidences[idx];
        result.box = boxes[idx];
        outputs.push_back(result);
    }
    //下一帧推理请求转为当前帧推理请求
    m_inferRequest.swap(m_inferRequestNext);
}
//绘制box框
void YOLOVINO::drawRect(Mat& image, vector<Detection>& outputs)
{
    int detections = outputs.size();
    for (int i = 0; i < detections; ++i)
    {
        auto detection = outputs[i];
        auto box = detection.box;
        auto classId = detection.class_id;
        const auto color = colors[classId % colors.size()];
        rectangle(image, box, color, 3);

        rectangle(image, Point(box.x, box.y - 40), Point(box.x + box.width, box.y), color, FILLED);
        putText(image, m_classNames[classId].c_str(), Point(box.x, box.y - 5), FONT_HERSHEY_SIMPLEX, 1.5, Scalar(0, 0, 0), 2);
    }

}

yolov5_6vino.h:

#ifndef YOLOV5VINO_H
#define YOLOV5VINO_H
#include <fstream>
#include <opencv2/opencv.hpp>
#include <inference_engine.hpp>
#define NOT_NCS2
using namespace cv;
using namespace dnn;
using namespace std;
using namespace InferenceEngine;

class YOLOVINO
{
public:
    struct Detection
    {
        int class_id;
        float confidence;
        Rect box;
    };
public:
    YOLOVINO();
    ~YOLOVINO();
    void init();
    void loadNet(bool is_cuda);
    Mat formatYolov5(const Mat& source);
    void initCurrentImage(Mat& image);
    void detect(Mat& image, vector<Detection>& outputs);
    void drawRect(Mat& image, vector<Detection>& outputs);
    void loadClassList();
private:
    //每个类的类别得分
    float m_scoreThreshold = 0.4;
    //大于该值被认为是同一个框：
    float m_nmsThreshold = 0.7;
    //每个框含有物体的概率
    float m_confThreshold = 0.6;

    //"CPU","GPU","MYRIAD"
#ifdef NCS2
    const std::string m_deviceName = "MYRIAD";
    const std::string m_modelFilename = "configFiles/yolov5sNCS2.xml";
#else
    //plugins.xml中的定义值：
    const std::string m_deviceName = "HETERO:GPU,CPU";// "HETERO";
    const std::string m_modelFilename = "yolov5s.onnx";
#endif // NCS2
    const std::string m_classfile = "classes.txt";
    size_t m_numChannels = 0;
    size_t m_inputH = 0;
    size_t m_inputW = 0;
    size_t m_imageSize = 0;
    std::string m_inputName = "";
    std::string m_outputName = "";
    vector<std::string> m_classNames;
    const vector<Scalar> colors = { Scalar(255, 255, 0), Scalar(0, 255, 0), Scalar(0, 255, 255), Scalar(255, 0, 0) };
    InferRequest::Ptr m_inferRequest;
    InferRequest::Ptr m_inferRequestNext;
    Blob::Ptr m_inputData;
};
#endif

效果如图：

pytorch 异步通信 pytorch异步推理_pytorch 异步通信_02