前言
作为垃圾佬,我的CPU是E3-1230V2
本文适用windows。linux系统需要改opencv打开摄像头部分的代码!
提供一个实验用的onnx模型,提取码:bs9m
百度网盘链接
安装环境
本人头铁,学习就要学最新的包,直接安装openvino2024.0.0
# 建议激活自己的虚拟环境后安装
pip install opencv-python opencv-contrib-python
pip install openvino==2024.0.0
onnx模型转openvino IR模型(以yolo-nas为例子
)
import openvino as ov
# 导出模型为 ONNX 格式
onnx_path = './yolo-nas-s.onnx'
# 转换 ONNX 模型为 OpenVINO 格式
ov_model = ov.convert_model(onnx_path)
# 保存 OpenVINO 模型
ir_path = './yolo_nas.xml'
ov.save_model(ov_model, ir_path)
print("OpenVINO IR model saved to:", ir_path)
转换成功后,在当前路径下就会生成.xml和.bin文件
openvino加载IR模型+推理
代码我会尽可能的注释全面,请耐心观看
import cv2
import numpy as np
import openvino as ov
# COCO数据集的分类名字
class_names = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant",
"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
"zebra", "giraffe", "backpack", "umbrella",
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot",
"hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet",
"tvmonitor", "laptop", "mouse", "remote",
"keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock",
"vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
# 👇ir_filename:openvinoIR 模型路径
ir_filename = "./yolonas_openvino_model/yolo_nas.xml"
# 👇Create OpenVINO Core
core = ov.Core()
# 👇读取模型
model = core.read_model(model=ir_filename, weights=ir_filename.replace(".xml", ".bin"))
# 👇加载模型,如果用intel的显卡就把CPU改成GPU,但是要确保你的显卡驱动安装好
compiled_model = core.compile_model(model=model, device_name="CPU")
# 👇查看模型的输入层矩阵形状,例如:batch_size、通道数、图片高、图片宽
input_layer = model.input(0)
input_h, input_w = input_layer.shape[2], input_layer.shape[3]
# 👇打印输出:input shape: 640, 640
print(f"input shape: {input_h}, {input_w}")
# 👇打印输出:input_layer: <Output: names[input.1] shape[1,3,640,640] type: f32>
print(f"input_layer: {input_layer}")
# 👇查看模型的输出层矩阵形状,例如:batch_size、检测框数量、检测框位置、每种分类预测得分
output_layer = model.output(0)
# 👇打印输出:output shape: <Output: names[1630] shape[1,8400,4] type: f32>
print(f"output shape: {output_layer}")
output_layer1 = model.output(1)
# 👇打印输出:output_layer1: <Output: names[1623] shape[1,8400,80] type: f32>
print(f"output_layer1: {output_layer1}")
# 👇打开摄像头
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Exit! Webcam fails to open!")
exit()
while True:
# 👇读取摄像头视频帧,图片保存在frame
ret, frame = cap.read()
if not ret:
print("Failed to grab frame")
break
# 👇开启计时
start = cv2.getTickCount()
# 👇把图片处理成IR模型要求的输入格式[1,3,640,640]
# 👇temp1:[640,640,3]
temp1 = cv2.resize(frame, (640, 640))
# 👇temp2:[3,640,640]
temp2 = temp1.transpose((2, 0, 1))
# 👇input_image:[1,3,640,640], float32, 归一化
input_image = np.expand_dims(temp2, axis=0).astype(np.float32) / 255.0
# 👇图片缩放比例
x_factor, y_factor = frame.shape[1] / input_w, frame.shape[0] / input_h
# 👇图片输入模型,开始推理, output是模型输出
output = compiled_model(input_image)
# 👇output0是检测框的像素坐标: [1,8400,4]
output0 = output[0]
# 👇output_squeezed0: [8400,4]
output_squeezed0 = np.squeeze(output0)
print('output_squeezed0 shape:', output_squeezed0.shape)
# 👇output1是模型输出:8400个检测框,每个检测框有80种分类的得分
# 👇output1: [1,8400,80]
output1 = output[1]
# 👇output_squeezed1: [8400,80]
output_squeezed1 = np.squeeze(output1)
print('output_squeezed1 shape:', output_squeezed1.shape)
# 👇boxes用来装:筛选后的检测框坐标
# 👇classIds用来装:检测框对应的一个分类索引,对应程序开头的COCO数据集的分类名字
# 👇confidences用来装:筛选后的最高得分
boxes, classIds, confidences = [], [], []
for i in range(output_squeezed0.shape[0]):
classes_scores = output_squeezed1[i, :]
# 👇classIdPoint:最高分的索引号
classIdPoint = np.argmax(classes_scores)
# 👇score:最高分
score = classes_scores[classIdPoint]
if score > 0.3:
# 👇检测框坐标,分别是左上角和右下角坐标
x1, y1, x2, y2 = output_squeezed0[i, :]
# 👇乘图片缩放比例,求取原图片中检测框坐标
x = int(x1 * x_factor)
y = int(y1 * y_factor)
# 👇求检测框的中心点坐标,乘图片缩放比例,求取原图片检测框中心点坐标
width = int((x2 - x1) * x_factor)
height = int((y2 - y1) * y_factor)
box = (x, y, width, height)
boxes.append(box)
classIds.append(classIdPoint)
confidences.append(score)
print(confidences)
# 👇进行非极大值抑制,把同样分类的且重叠度高的检测框去掉
# 👇0.5:要求得分<0.5的不要
# 👇0.7:交并比>0.7的不要,交并比用于描述重叠度
# 👇indexes:返回保留下来的boxes的索引号(boxes和confidences数组长度是一样的)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.7)
print(indexes)
# 开始在原图上画框框
for i in indexes:
idx = classIds[i]
x, y, width, height = boxes[i]
# 👇这是检测框
cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 0, 255), 2)
# 👇检测框上面画个小框,用来显示分类名字和得分
cv2.rectangle(frame, (x, y - 20), (x + width, y), (0, 255, 255), -1)
# 👇分类名字画出来
cv2.putText(frame, class_names[idx], (x, y - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
# 👇得分画出来
cv2.putText(frame, str(confidences[i]), (x + 70, y - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
# 👇t: 计算处理时长
t = (cv2.getTickCount() - start) / cv2.getTickFrequency()
print(f"Infer time(ms): {t * 1000:.2f}ms; Detections: {len(indexes)}")
# 👇计算并显示fps
cv2.putText(frame, f"FPS: {1.0 / t:.2f}", (20, 40), cv2.FONT_HERSHEY_PLAIN, 2.0, (255, 0, 0), 2)
cv2.imshow("YOLOnas + OpenVINO Python Demo", frame)
key = cv2.waitKey(1)
if key == 27: # ESC
break
cv2.destroyAllWindows()
效果展示