ncnn部署到 mnn 部署

转载

mob64ca13fd559d 2024-05-04 12:38:40

文章标签 ncnn部署到 github 封装设计原则 文章分类 机器学习人工智能

这里代码其实大部分来自于xiongdongzhang的github项目：https://github.com/xindongzhang/MNN-APPLICATIONS，个人觉得学习一个新东西，最开始的步骤应该是用起来，至于怎么用起来，可以先参考一下别人怎么用的，将代码拆分、重组和封装，通过这一系列的过程，我们就可以基本掌握这个新东西的使用方法，会用之后，才考虑原理的学习。

这里以MNN-APPLICATIONS中的pfld关键点检测部分来讲，里面代码为：

https://github.com/xindongzhang/MNN-APPLICATIONS/blob/master/applications/pfld-lite/onnx/jni/onnx_pfld.cpp

按照我们对一个模型推理过程的理解，一般模型推理过程都分为两个部分，第一个部分为模型载入和参数初始化，第二个部分为模型前向推理及一些后处理，得到检测或识别的结果。

1. 代码分解

第一步将代码分解成两个部分：模型载入和前向推理。

第一个部分代码：

// load and config mnn model
    auto revertor = std::unique_ptr<Revert>(new Revert(model_name.c_str()));
    revertor->initialize();
    auto modelBuffer      = revertor->getBuffer();
    const auto bufferSize = revertor->getBufferSize();
    auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize));
    revertor.reset();
    MNN::ScheduleConfig config;
    config.numThread = threads;
    config.type      = static_cast<MNNForwardType>(forward);
    MNN::BackendConfig backendConfig;
    config.backendConfig = &backendConfig;
    
    auto session = net->createSession(config);
    net->releaseModel();

第二个部分代码：

// wrapping input tensor, convert nhwc to nchw    
    std::vector<int> dims{1, INPUT_SIZE, INPUT_SIZE, 3};
    auto nhwc_Tensor = MNN::Tensor::create<float>(dims, NULL, MNN::Tensor::TENSORFLOW);
    auto nhwc_data   = nhwc_Tensor->host<float>();
    auto nhwc_size   = nhwc_Tensor->size();
    ::memcpy(nhwc_data, image.data, nhwc_size);

    std::string input_tensor = "data";
    auto inputTensor  = net->getSessionInput(session, nullptr);
    inputTensor->copyFromHostTensor(nhwc_Tensor);

    // run network
    net->runSession(session);

    // get output data
    std::string output_tensor_name0 = "conv5_fwd";

    MNN::Tensor *tensor_lmks  = net->getSessionOutput(session, output_tensor_name0.c_str());

    MNN::Tensor tensor_lmks_host(tensor_lmks, tensor_lmks->getDimensionType());
    
    tensor_lmks->copyToHostTensor(&tensor_lmks_host);

2.重组和封装

重组就是将代码按照功能定义成对应接口，按照C++八大设计原则中讲到，我们应该针对接口编程，而不是针对过程编程，所以需要定义接口，这里定义了两个接口——模型载入和提取关键点：

int LoadModel(const char* root_path);
    int ExtractKeypoints(const cv::Mat& img_face, std::vector<cv::Point2f>* keypoints);

按照C++八大设计原则有：需要封装变化点，使用封装来创建对象之间的分界层，让设计者在一侧进行修改，而不会对另一侧产生不良影响，从而实现层次间的松耦合，这里学习了seetaface项目中使用Impl类进行隔离，具体实现部分在类Impl中：

class PFLD::Impl {
public:
    Impl() {
        device_ = 0;
        precision_ = 0;
        power_ = 0;
        memory_ = 0;
        
        initialized_ = false;
    }
    ~Impl() {
        landmarker_->releaseModel();
        landmarker_->releaseSession(session_);
    }

    int LoadModel(const char* root_path);
    int ExtractKeypoints(const cv::Mat& img_face, std::vector<cv::Point2f>* keypoints);    

    std::shared_ptr<MNN::Interpreter> landmarker_;
    const int inputSize_ = 96;
    
    int device_;
    int precision_;
    int power_;
    int memory_;

    MNN::Session* session_ = nullptr;
    MNN::Tensor* input_tensor_ = nullptr;
    bool initialized_;
};

同时将里面一些变量命名进行重构，就可以获知一个比较清晰完整的MNN使用方法，对于接口LoadModel有：

int PFLD::Impl::LoadModel(const char* root_path) {
    std::string model_file = std::string(root_path) + "/pfld-lite.mnn";
    landmarker_ = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_file.c_str()));
    
    MNN::ScheduleConfig config;
    config.numThread = 1;
    config.type      = static_cast<MNNForwardType>(device_);

    MNN::BackendConfig backendConfig;
    backendConfig.precision = (MNN::BackendConfig::PrecisionMode)precision_;
    backendConfig.power = (MNN::BackendConfig::PowerMode) power_;
    backendConfig.memory = (MNN::BackendConfig::MemoryMode) memory_;
    config.backendConfig = &backendConfig;
    session_ = landmarker_->createSession(config);

    // nhwc to nchw
    std::vector<int> dims{1, inputSize_, inputSize_, 3};
    input_tensor_ = MNN::Tensor::create<float>(dims, NULL, MNN::Tensor::TENSORFLOW);

    initialized_ = true;

    return 0;
}

这里主要做的工作是模型载入及一些参数初始化，相对于原始版本的blazeface代码，这里将输入tensor创建放在了LoadModel里面，避免每次推理都需要为input_tensor_分配内存，对于具体推理ExtractKeypoints有：

int PFLD::Impl::ExtractKeypoints(const cv::Mat& img_face, std::vector<cv::Point2f>* keypoints) {
    std::cout << "start extract keypoints." << std::endl;
    keypoints->clear();
    if (!initialized_) {
        std::cout << "model uninitialed." << std::endl;
        return 10000;
    }
    if (img_face.empty()) {
        std::cout << "input empty." << std::endl;
        return 10001;
    }
    // image prepocess
    cv::Mat face_cpy = img_face.clone();
    int width = face_cpy.cols;
    int height = face_cpy.rows;
    float scale_x = static_cast<float>(width) / inputSize_;
    float scale_y = static_cast<float>(height) / inputSize_;

    cv::Mat face_resized;
    cv::resize(face_cpy, face_resized, cv::Size(inputSize_, inputSize_));
    face_resized.convertTo(face_resized, CV_32FC3);
    face_resized = (face_resized - 123.0f) / 58.0f;

    auto tensor_data = input_tensor_->host<float>();
    auto tensor_size = input_tensor_->size();
    ::memcpy(tensor_data, face_resized.data, tensor_size);

    auto input_tensor = landmarker_->getSessionInput(session_, nullptr);
    input_tensor->copyFromHostTensor(input_tensor_);
    landmarker_->runSession(session_);

    // get output
    std::string output_tensor_name0 = "conv5_fwd";
    MNN::Tensor* tensor_landmarks = landmarker_->getSessionOutput(session_, output_tensor_name0.c_str());
    MNN::Tensor tensor_landmarks_host(tensor_landmarks, tensor_landmarks->getDimensionType());
    tensor_landmarks->copyToHostTensor(&tensor_landmarks_host);

    std::cout << "batch:    " << tensor_landmarks->batch()    << std::endl 
              << "channels: " << tensor_landmarks->channel()  << std::endl
              << "height:   " << tensor_landmarks->height()   << std::endl
              << "width:    " << tensor_landmarks->width()    << std::endl
              << "type:     " << tensor_landmarks->getDimensionType() << std::endl; 

    auto landmarks_dataPtr = tensor_landmarks_host.host<float>();
    int num_of_points = 98;
    for (int i = 0; i < num_of_points; ++i) {
        cv::Point2f curr_pt(landmarks_dataPtr[2 * i + 0] * scale_x,
                            landmarks_dataPtr[2 * i + 1] * scale_y);
        keypoints->push_back(curr_pt);
    }

    std::cout << "end extract keypoints." << std::endl;

    return 0;
}

主要包括对输入图片预处理（去均值、归一化），设置模型输入，模型前向推理及获取输出tensor几个步骤。