简介:本文介绍在Ascend 310平台对MindSpore框架实现的网络进行推理的流程。以CV语义分割网络(HRNet)为例,使用的数据集是Cityscapes,数据文件为PNG图片。使用的中间原语为MindIR,数据格式为二进制流

建议:可以参考MindSpore ModelZoo master分支下的ResNet系列网络的310推理脚本,是一个不错的完整示例。

注:本文目前仅介绍基于MindIR和二进制流文件的310推理,其他方法随缘更新。仅涉及实际操作流程,具体原理请查阅其他资料。

总体流程

升腾310和GPU_自动驾驶

相关脚本

推理过程涉及到一下相关脚本:

  • export.py
  • preprocess.py
  • main.cc
  • postprocess.py

接下来分别介绍各个脚本的作用。

Export

这一步的工作是将训练得到的checkpoint文件和网络模型转换为MindIR。

相关脚本:export.py

输入:checkpoint文件

输出(*.mindir)位置:自定义

def main():
    """Export mindir for 310 inference."""
    parser = argparse.ArgumentParser("HRNet Semantic Segmentation exporting.")
    parser.add_argument("--device_id", type=int, default=0, help="Device ID. ")
    parser.add_argument("--checkpoint_file", type=str, help="Checkpoint file path. ")
    parser.add_argument("--file_name", type=str, help="Output file name. ")
    parser.add_argument("--file_format", type=str, default="MINDIR",
                        choices=["AIR", "MINDIR"], help="Output file format. ")
    parser.add_argument("--device_target", type=str, default="Ascend",
                        choices=["Ascend", "GPU", "CPU"], help="Device target.")

    args = parser.parse_args()

    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
    if args.device_target == "Ascend":
        context.set_context(device_id=args.device_id)
    # 将此前训练得到的checkpoint导入网络
    net = get_seg_model(config)
    params_dict = load_checkpoint(args.checkpoint_file)
    load_param_into_net(net, params_dict)
    net.set_train(False)
    # 构造一个推理时的模型输入
    height, width = config.eval.image_size[0], config.eval.image_size[1]
    input_data = Tensor(np.zeros([1, 3, height, width], dtype=np.float32))
    # 导出mindir文件
    export(net, input_data, file_name=args.file_name, file_format=args.file_format)

Preprocess

这一步的工作是对数据集进行预处理,并将处理后的数据集保存为我们所需要的格式,本文以二进制格式保存。

注:此步骤亦可在execute步骤的C++脚本中实现,Python实现比较简单。

相关脚本:preprocess.py

输入:原始数据集

输出(*.bin)位置:./preprocess_Result/亦可自定义

def export_cityscapes_to_bin(args):
    """Convert data format from png to bin."""
    image_path = os.path.join(args.output_path, "image")
    label_path = os.path.join(args.output_path, "label")
    os.makedirs(image_path)
    os.makedirs(label_path)
    # 这是我项目里的Cityscapes数据集迭代器,导出的数据集已经是经过预处理的了
    dataset = Cityscapes(args.data_path,
                         num_samples=None,
                         num_classes=config.dataset.num_classes,
                         multi_scale=False,
                         flip=False,
                         ignore_label=config.dataset.ignore_label,
                         base_size=config.eval.base_size,
                         crop_size=config.eval.image_size,
                         downsample_rate=1,
                         scale_factor=16,
                         mean=config.dataset.mean,
                         std=config.dataset.std,
                         is_train=False)
    for i, data in enumerate(dataset):
        # 逐条读出数据,格式为numpy.array()
        image = data[0]
        label = data[1]
        file_name = "cityscapes_val_" + str(i) + ".bin"
        image_file_path = os.path.join(image_path, file_name)
        label_file_path = os.path.join(label_path, file_name)
        # 利用numpy中数组的成员函数tofile将数据集存储为二进制流文件
        image.tofile(image_file_path)
        label.tofile(label_file_path)
        # 此处根据自己使用的数据集随机应变
    print("Export bin files finished!")

Execute

这一步的工作是模型预测,将上一步得到的数据导入模型,计算得到预测结果,将预测结果保存为需要的格式。

相关脚本:main.cc,其余相关脚本一般均存放在/ascend310_infer目录下。除main.cc之外均不需要做修改。

输入:上一步处理后的数据集

输出(*.bin)位置:./results_Files/亦可自定义

文件具体流程如下,一般情况下只需要更改数据条目导入和推理的部分(for循环那里)。脚本中所用到的很多函数都在utils.cc脚本中有实现,直接将其他项目的脚本拿过来用就行。

注:如果数据预处理的部分也打算在此步骤实现,请参考Model_zoo中的其他网络,本示例没有展示这方面内容。

//从命令行获取参数
DEFINE_string(gmindir_path, "./hrnet-v2-w48.mindir", "mindir path");
DEFINE_string(gdataset_path, ".", "dataset path");
DEFINE_int32(gdevice_id, 0, "device id");

int main(int argc, char **argv) {
    //从命令行获取参数
    gflags::ParseCommandLineFlags(&argc, &argv, true);
    if (real_path(FLAGS_gmindir_path).empty()) {
        std::cout << "Invalid mindir." << std::endl;
        return 1;
    }
    //配置环境变量
    auto context = std::make_shared<Context>();
    auto ascend310 = std::make_shared<mindspore::Ascend310DeviceInfo>();
    ascend310->SetDeviceID(FLAGS_gdevice_id);
    context->MutableDeviceInfo().push_back(ascend310);
    //导入模型
    mindspore::Graph graph;
    Serialization::Load(FLAGS_gmindir_path, ModelType::kMindIR, &graph);
    Model model;
    Status ret = model.Build(GraphCell(graph), context);
    if (ret != kSuccess) {
        std::cout << "ERROR: Build failed." << std::endl;
        return 1;
    }
    auto all_files = get_all_files(FLAGS_gdataset_path);
    std::cout << typeid(all_files).name() << std::endl;
    if (all_files.empty()) {
        std::cout << "ERROR: no input data." << std::endl;
        return 1;
    }
    //获取模型输入格式
    std::vector<MSTensor> modelInputs = model.GetInputs();
    std::map<double, double> costTime_map;
    //逐个二进制文件导入
    size_t size = all_files.size();
    for (size_t i = 0; i < size; ++i) {
        struct timeval start = {0};
        struct timeval end = {0};
        double startTimeMs = 0;
        double endTimeMs = 0;
        std::vector<MSTensor> inputs;
        std::vector<MSTensor> outputs;
        std::cout << "==> Image: " << all_files[i] << std::endl;
        //导入image二进制流文件
        MSTensor image = read_file_to_tensor(all_files[i]);
        inputs.emplace_back(modelInputs[0].Name(), modelInputs[0].DataType(), modelInputs[0].Shape(),
                            image.Data().get(), image.DataSize());
        gettimeofday(&start, nullptr);
        //推理得到预测结果
        ret = model.Predict(inputs, &outputs);
        gettimeofday(&end, nullptr);
        if (ret != kSuccess) {
            std::cout << "Predict " << all_files[i] << " failed." << std::endl;
            return 1;
        }
        startTimeMs = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000;
        endTimeMs = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000;
        costTime_map.insert(std::pair<double, double>(startTimeMs, endTimeMs));
        //将预测结果保存为二进制流文件
        write_result(all_files[i], outputs);
    }
    //记录日志信息
    double average = 0.0;
    int inferCount = 0;
    for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) {
        average += iter->second - iter->first;
        inferCount++;
    }
    average = average / inferCount;
    std::stringstream timeCost;
    timeCost << "NN inference cost average time: " << average << " ms of infer_count " << inferCount << std::endl;
    std::cout << "NN inference cost average time: " << average << "ms of infer_count " << inferCount << std::endl;
    std::string fileName = "./time_Result" + std::string("/test_perform_static.txt");
    std::ofstream fileStream(fileName.c_str(), std::ios::trunc);
    fileStream << timeCost.str();
    fileStream.close();
    costTime_map.clear();
    return 0;
}

Postprocess

这一步的工作是用预测结果和对应标签计算评价指标(mIoU)。

相关脚本:postprocess.py

输入:标签文件(*.bin)和预测结果文件(*.bin)

输出:打印评价指标值。

此脚本可以仿照eval.py的流程更改。一定要注意二进制流文件读入时的数据格式dtype和数组的shape

def main(args):
    """Main function for mIoU calculation."""
    result_list = os.listdir(args.label_path)
    num_classes = config.dataset.num_classes
    confusion_matrix = np.zeros((num_classes, num_classes))
    ignore_label = config.dataset.ignore_label
    count = 0
    for result in result_list:
        prefix = result.rstrip(".bin")
        # 分别导入预测结果和label的二进制流文件,此处一定要注意dtype和shape应与保存前一致
        pred = np.fromfile(os.path.join(args.result_path, prefix + "_0.bin"), dtype=np.float32).reshape(19, 256, 512)
        output = pred.transpose(1, 2, 0)
        output = np.array([cv2.resize(output, (2048, 1024), interpolation=cv2.INTER_LINEAR)])
        label = np.fromfile(os.path.join(args.label_path, prefix + ".bin"), dtype=np.int32).reshape(1, 1024, 2048)
        # 此后是我计算评价指标mIoU的步骤,请读者根据自身情况随机应变
        confusion_matrix += get_confusion_matrix(label, output, [1, 1024, 2048], num_classes, ignore_label)
        count += 1
    print("Total number of images: ", count)

    pos = confusion_matrix.sum(1)
    res = confusion_matrix.sum(0)
    tp = np.diag(confusion_matrix)
    IoU_array = (tp / np.maximum(1.0, pos + res - tp))
    mean_IoU = IoU_array.mean()

    # Show results
    print("=========== 310 Inference Result ===========")
    print("mIoU:", mean_IoU)
    print("IoU array: \n", IoU_array)
    print("=========================================")

执行

在MindSpore Model_zoo的项目中,一般都会将执行脚本存放在/scripts目录下,命名有明显的310标识。通过执行脚本可以很清晰地了解310推理的执行流程。

只需要根据其他项目的脚本做执行相关的必要更改即可。

# 从命令行获取必要参数,参数数量错误时报错
if [[ $# != 3 ]]; then
    echo "Usage:"
    echo "sh scripts/ascend310_inference.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID]"
    exit 1
fi
# 函数:将相对路径转换为绝对路径
get_real_path() {
    if [ "${1:0:1}" == "/" ]; then
        echo "$1"
    else
        echo "$(realpath -m $PWD/$1)"
    fi
}

model=$(get_real_path $1)
data_path=$(get_real_path $2)
device_id=$3

echo "mindir name: "$model
echo "dataset path: "$data_path
echo "device id: "$device_id
# 配置环境变量
export ASCEND_HOME=/usr/local/Ascend/
if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
    export PATH=$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH
    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
    export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
    export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
    export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
else
    export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
    export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
    export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
    export ASCEND_OPP_PATH=$ASCEND_HOME/opp
fi
# 编译调用
function compile_app()
{
    cd ./ascend310_infer/src/ || exit
    if [ -f "Makefile" ]; then
        make clean
    fi
    bash build.sh &> build.log
}
# 数据预处理调用
function preprocess_data()
{
    if [ -d preprocess_Result ]; then
        rm -rf ./preprocess_Result
    fi
    mkdir preprocess_Result
    python3.7 ./preprocess.py --data_path=$data_path --output_path=./preprocess_Result &> preprocess.log
}
# 推理调用
function infer()
{
    cd - || exit
    if [ -d result_Files ]; then
        rm -rf ./result_Files
    fi
    if [ -d time_Result ]; then
        rm -rf ./time_Result
    fi
    mkdir result_Files
    mkdir time_Result
    ./ascend310_infer/src/main --gmindir_path=$model --gdataset_path=./preprocess_Result/image --gdevice_id=$device_id  &> infer.log
}
# 评价指标计算调用
function cal_acc()
{
    python3.7 ./postprocess.py --result_path=./result_Files --label_path=./preprocess_Result/label &> acc.log
    if [ $? -ne 0 ]; then
        echo "Calculate accuracy failed."
        exit 1
    fi
}
# 执行数据预处理
preprocess_data
if [ $? -ne 0 ]; then
    echo "Dataset preprocessing failed."
    exit 1
fi
# 执行编译
compile_app
if [ $? -ne 0 ]; then
    echo "Compile app code failed."
    exit 1
fi
# 执行推理
infer
if [ $? -ne 0 ]; then
    echo "Execute inference failed."
    exit 1
fi
# 执行评价指标计算
cal_acc
if [ $? -ne 0 ]; then
    echo "Calculate mIoU failed."
    exit 1
fi

可能会出现的错误

错误:undefined reference to ‘google::FlagRegisterer::FlagRegisterer

为帮助过我们的同志们点赞!