写在前面
本篇文章仅仅做个人记忆,如果能帮助到一些需要将Yolov7转为Onnx并推理,再将Onnx转为Rknn的同学,那我会感到非常荣幸。网上现在有许多关于转Rknn的模型,但是我觉得比较乱,本篇文章仅仅是把网上的文章总结在一起,让大家知道在哪种模型情况下更适合用。我也不是大模型的专家,如果哪里说的有问题,欢迎各位批评指正。(现在应该都用v8或者v9了吧)在本节后面,我会贴上V8的处理代码,v8的处理比较简单。
Yolov7模型的分类
在进行总的代码使用前,我们需要先了解Yolov7模型的一个非常重要的东西。即模型的输入和输出,它们的维度对于模型推理和后处理非常重要,我将现在的Yolov7大致分为了三种,它们分别是官方版、普通版和Yolov5版。下面我将给这三个版本进行定义。
同时写在前面,这三种版本不是每种都能够既推理Onnx也同时能够推理Rknn。这里面的关系我是一个月前捋清楚,但是又转头忙别的事去了。官方版能Onnx推理,但是Rknn转换不了,自然也推理不了。普通版Onnx能转换但是没有后处理,但是有转成Rknn后的后处理。Yolov5版有Onnx的后处理但是没有Rknn的后处理。
我不太清楚Onnx和Rknn的后处理部分能不能一样,因为它们的模型输出是一致的,写完这篇文章我明天去试一下。(也有可能后天hhh)
官方版
官方版的输入输出维度应该如下图所示:

它的输出应当是在最后是一个7维度的数组,这个Onnx模型的导出可以使用现在Yolov7官方的export.py用以下命令导出。
python export.py --weights ./ \
--grid --end2end --simplify \
--topk-all 100 --iou-thres 0.65 --conf-thres 0.35 \
--img-size 640 640 --max-wh 640相信大家都能看懂代码的意思。
普通版
普通版的输入输出维度应该如下图所示:

看输出,有三个输出通道。第一行是输出名称,第二行是输出的锚点框大小,是NCWH格式。这个Onnx模型的导出可以使用现在Yolov7官方的export.py用以下命令导出。
python export.py --weights yolo7.py至于为什么我把第一个叫做官方版,第二个叫普通版。因为第一个是Yolov7官方使用Colab开源的代码,第二个是直接用export.py不用加那么多参数,就能导出。而且它们之间的Onnx模型输出也不同。
Yolov5版
顾名思义,就是v7的输出和v5的输出类似。如下图所示:

可以看到其输出维度是和v5类似的,导出方法借鉴这篇文章
Onnx推理部分
官方版
有条件的直接到这里试吧。
import cv2
cuda = True#是否有GPU
w = "/content/yolov7/yolov7-tiny.onnx"
img = cv2.imread('/content/yolov7/inference/images/horses.jpg')
#导入照片
import time
import requests
import random
import numpy as np
import onnxruntime as ort
from PIL import Image
from pathlib import Path
from collections import OrderedDict,namedtuple
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = ort.InferenceSession(w, providers=providers)
#下面是保证图像满足输入的处理
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, r, (dw, dh)
#根据模型配置
names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush']
colors = {name:[random.randint(0, 255) for _ in range(3)] for i,name in enumerate(names)}
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image = img.copy()
image, ratio, dwdh = letterbox(image, auto=False)
image = image.transpose((2, 0, 1))#whc2cwh
image = np.expand_dims(image, 0)#加维度
image = np.ascontiguousarray(image)
im = image.astype(np.float32)
im /= 255
im.shape
outname = [ for i in session.get_outputs()]
outname
inname = [ for i in session.get_inputs()]
inname
inp = {inname[0]:im}#输入数据与输入通道绑定
outputs = session.run(outname, inp)[0]#一通道输出
ori_images = [img.copy()]
for i,(batch_id,x0,y0,x1,y1,cls_id,score) in enumerate(outputs):
image = ori_images[int(batch_id)]
box = np.array([x0,y0,x1,y1])
box -= np.array(dwdh*2)
box /= ratio
box = box.round().astype(np.int32).tolist()
cls_id = int(cls_id)
score = round(float(score),3)
name = names[cls_id]
color = colors[name]
name += ' '+str(score)
cv2.rectangle(image,box[:2],box[2:],color,2)
cv2.putText(image,name,(box[0], box[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,[225, 255, 255],thickness=2) #后处理
Image.fromarray(ori_images[0])#显示图像上面的代码抄自官方
Yolov5版
import argparse
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized
import onnxruntime
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
save_img = not opt.nosave and not source.endswith('.txt') # 在opt.nosave为false和source不是以txt结尾时,save_img被赋值为True
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))#如果source以txt结尾或者以以下四个开头,那么webcam为True
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)#这里是选择输入设备
half = device.type != 'cpu' # half precision only supported on CUDA
cuda = torch.cuda.is_available()
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = onnxruntime.InferenceSession(weights, providers=providers)#与onnx_inference的16、17行一样
# Load model
model = session
# stride = int(model.stride.max()) # model stride
stride = 32#这里与onnx_inference一样
imgsz = check_img_size(imgsz, s=stride) # 通过这个函数可以确保输入图像大小是stride的倍数
# Second-stage classifier
classify = False
if classify:#跳过分类器加载
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)#对图像视频进行准备,以便于模型处理
# Get names and colors
names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush']
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]#随机分配颜色
# Run inference
if device.type != 'cpu':#用GPU处理,这里不需要
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
t0 = time.time()
for path, img, im0s, vid_cap in dataset:#path为当前处理的图像路径、img被转换为RGB(通高宽),im0s为未经处理的图像以BGR存储,self.cap是视频文件
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0
if img.ndimension() == 3:
img = img.unsqueeze(0)# 图像都为(1 通高宽)float32,到这里与onnx_inference的图像处理大致一样,图像转入模型前处理结束
# Inference
t1 = time_synchronized()
ts = time.time()#获得时间戳
im = img.cpu().numpy() # 前面从数组转到pytorch张量,这里是从张量又转换到pytorch数组,onnx_inference也是numpy数组
#print(f'输出名称有{session.get_outputs()},输入名称有{session.get_inputs()}')
# outname = [ for i in session.get_outputs()] # 获取所有的输出节点信息
#
# inname = [ for i in session.get_inputs()] # 获取所有的输入节点信息
# print(f'输出名称有{outname},输入名称有{inname}')
#输出名称有['output'],输入名称有['images']
pred = model.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: im})[0]#到这里与
# pred = model.run([ for i in session.get_outputs()], {session.get_inputs()[0].name: im})[0]
te = time.time()
print('inference time : %.4f s' % (te - ts))#计算推理时间
# Apply NMS
pred = torch.from_numpy(pred).reshape(1, -1, 85)
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / ) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or view_img: # Add bbox to image
label = f'{names[int(cls)]} {conf:.2f}'
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
# Print time (inference + NMS)
print(f'{s}Done. ({t2 - t1:.3f}s)')
# Stream results
if view_img:
cv2.imshow(str(p), im0)
cv2.waitKey(0) # 1 millisecond
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
print(f" The image with the result is saved in: {save_path}")
else: # 'video' or 'stream'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
# print(f"Results saved to {save_dir}{s}")
print(f'Done. ({time.time() - t0:.3f}s)')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov7-v5.onnx', help='model.onnx path(s)')
parser.add_argument('--source', type=str, default='D:\Aplication\python\yolov7\\test.jpg', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
opt = parser.parse_args()
print(opt)
# check_requirements(exclude=('pycocotools', 'thop'))
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['']:
detect()
strip_optimizer(opt.weights)
else:
detect()缺点是要放在Yolov7官方的目录中才能运行,里面要用到其中的依赖。
Rknn推理部分
不包含后处理的一般处理过程
rknn = RKNN(verbose=True)
rknn.config(mean_values=[], std_values=[], target_platform=[])
ret = rknn.load_onnx(model=ONNX_MODEL) # outputs=['495', '497', '499']
ret = rknn.build(do_quantization=True, dataset=DATASET)
ret = rknn.init_runtime()
ret = rknn.export_rknn('./')
outputs_rknn = rknn.inference(inputs=[img_rknn_input], data_format=['nhwc'])普通版
import os
import urllib
import traceback
import time
import sys
import numpy as np
import cv2
from rknn.api import RKNN
ONNX_MODEL = 'yolov7.onnx'
RKNN_MODEL = 'yolov7.rknn'
IMG_PATH = 'bus.jpg'
DATASET = 'dataset.txt'
QUANTIZE_ON = True
BOX_THESH = 0.45
NMS_THRESH = 0.25
IMG_SIZE = 640
CLASSES = ["person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light",
"fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant",
"bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa",
"pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop ", "mouse ", "remote ", "keyboard ", "cell phone", "microwave ",
"oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush "]
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def xywh2xyxy(x):
# Convert [x, y, w, h] to [x1, y1, x2, y2]
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
imgsz=[640,640]
def process(input, mask, anchors):
anchors = [anchors[i] for i in mask]
grid_h, grid_w = map(int, input.shape[0:2])
box_confidence = sigmoid(input[..., 4])
box_confidence = np.expand_dims(box_confidence, axis=-1)
box_class_probs = sigmoid(input[..., 5:])
box_xy = sigmoid(input[..., :2]) * 2 - 0.5
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
grid = np.concatenate((col, row), axis=-1)
box_xy += grid
box_xy *= int(IMG_SIZE / grid_h)
# col = np.tile(np.arange(0, grid_w), grid_h).reshape(-1, grid_w)
# row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_w)
# col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
# row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
# grid = np.concatenate((col, row), axis=-1)
# box_xy += grid
# # box_xy *= (int(imgsz[0] / grid_h), int(imgsz[1] / grid_w))
# box_xy *= (int(imgsz[0] / grid_h), int(imgsz[1] / grid_w))
box_wh = pow(sigmoid(input[..., 2:4]) * 2, 2)
box_wh = box_wh * anchors
box = np.concatenate((box_xy, box_wh), axis=-1)
return box, box_confidence, box_class_probs
def filter_boxes(boxes, box_confidences, box_class_probs):
"""Filter boxes with box threshold. It's a bit different with origin yolov5 post process!
# Arguments
boxes: ndarray, boxes of objects.
box_confidences: ndarray, confidences of objects.
box_class_probs: ndarray, class_probs of objects.
# Returns
boxes: ndarray, filtered boxes.
classes: ndarray, classes for boxes.
scores: ndarray, scores for boxes.
"""
box_classes = np.argmax(box_class_probs, axis=-1)
box_class_scores = np.max(box_class_probs, axis=-1)
pos = np.where(box_confidences[..., 0] >= BOX_THESH)
boxes = boxes[pos]
classes = box_classes[pos]
scores = box_class_scores[pos]
return boxes, classes, scores
def nms_boxes(boxes, scores):
"""Suppress non-maximal boxes.
# Arguments
boxes: ndarray, boxes of objects.
scores: ndarray, scores of objects.
# Returns
keep: ndarray, index of effective boxes.
"""
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= NMS_THRESH)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def yolov5_post_process(input_data):
masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
yolov5_anchors = [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
yolov7_anchors = [[12, 16], [19, 36], [40, 28],
[36, 75], [75, 55], [72, 146],
[142, 110], [192, 243], [459, 401]]
boxes, classes, scores = [], [], []
for input, mask in zip(input_data, masks):
b, c, s = process(input, mask, yolov5_anchors)
b, c, s = filter_boxes(b, c, s)
boxes.append(b)
classes.append(c)
scores.append(s)
boxes = np.concatenate(boxes)
boxes = xywh2xyxy(boxes)
classes = np.concatenate(classes)
scores = np.concatenate(scores)
nboxes, nclasses, nscores = [], [], []
for c in set(classes):
inds = np.where(classes == c)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = nms_boxes(b, s)
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
boxes = np.concatenate(nboxes)
classes = np.concatenate(nclasses)
scores = np.concatenate(nscores)
return boxes, classes, scores
def draw(image, boxes, scores, classes):
"""Draw the boxes on the image.
# Argument:
image: original image.
boxes: ndarray, boxes of objects.
classes: ndarray, classes of objects.
scores: ndarray, scores of objects.
all_classes: all classes name.
"""
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = box
print('class: {}, score: {}'.format(CLASSES[cl], score))
print('box coordinate left,top,right,down: [{}, {}, {}, {}]'.format(top, left, right, bottom))
top = int(top)
left = int(left)
right = int(right)
bottom = int(bottom)
cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left + 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.6, (0, 0, 255), 2)
def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
if __name__ == '__main__':
# Create RKNN object
rknn = RKNN(verbose=True)
# pre-process config
print('--> Config model')
rknn.config(mean_values=[[128, 128, 128]], std_values=[[255, 255, 255]] ,target_platform='rk3566')
print('done')
# Load ONNX model
print('--> Loading model')
ret = rknn.load_onnx(model=ONNX_MODEL,outputs=['output', '492', '493'])#outputs=['495', '497', '499']
# ret = rknn.load_onnx(ONNX_MODEL)
if ret != 0:
print('Load model failed!')
exit(ret)
print('done')
# Build model
print('--> Building model')
ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET)
if ret != 0:
print('Build model failed!')
exit(ret)
print('done')
# Export RKNN model
print('--> Export rknn model')
ret = rknn.export_rknn(RKNN_MODEL)
if ret != 0:
print('Export rknn model failed!')
exit(ret)
print('done')
# Init runtime environment
print('--> Init runtime environment')
ret = rknn.init_runtime()
# ret = rknn.init_runtime('rk3566')
if ret != 0:
print('Init runtime environment failed!')
exit(ret)
print('done')
# Set inputs
img = cv2.imread(IMG_PATH)
print(img.shape)
# img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
#print(f'img is {img}')
# Inference
print('--> Running model')
outputs = rknn.inference(inputs=[img])
#print(len(outputs[4]))
# np.save('./onnx_yolov5_0.npy', outputs[0])
# np.save('./onnx_yolov5_1.npy', outputs[1])
# np.save('./onnx_yolov5_2.npy', outputs[2])
print('done')
# post process
input0_data = outputs[0]
input1_data = outputs[1]
input2_data = outputs[2]
input0_data = input0_data.reshape([3, -1] + list(input0_data.shape[-2:]))
input1_data = input1_data.reshape([3, -1] + list(input1_data.shape[-2:]))
input2_data = input2_data.reshape([3, -1] + list(input2_data.shape[-2:]))
input_data = list()
input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))
input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))
input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))
boxes, classes, scores = yolov5_post_process(input_data)
img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if boxes is not None:
draw(img_1, boxes, scores, classes)
# show output
cv2.imshow("post process result", img_1)
cv2.waitKey(0)
cv2.destroyAllWindows()
rknn.release()Yolov8
下面放代码,v8做的比较好,参考这份,下面的rknn代码是从这位大佬拷过来的。Onnx是参考这位
Onnx推理
from ultralytics import YOLO
def yolov8_export():
# Load a model
model = YOLO(model="./runs/detect/bbb2/weights/")
model.export(format='onnx', imgsz=(608, 608), opset=12, simplify=True)import cv2
import onnxruntime as ort
from PIL import Image
import numpy as np
# 置信度
confidence_thres = 0.35
# iou阈值
iou_thres = 0.5
# 类别
classes = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck',
8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',
22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase',
29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat',
35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple',
48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut',
55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet',
62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave',
69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase',
76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
# 随机颜色
color_palette = np.random.uniform(100, 255, size=(len(classes), 3))
# 判断是使用GPU或CPU
providers = [
('CUDAExecutionProvider', {
'device_id': 0, # 可以选择GPU设备ID,如果你有多个GPU
}),
'CPUExecutionProvider', # 也可以设置CPU作为备选
]
def calculate_iou(box, other_boxes):
"""
计算给定边界框与一组其他边界框之间的交并比(IoU)。
参数:
- box: 单个边界框,格式为 [x1, y1, width, height]。
- other_boxes: 其他边界框的数组,每个边界框的格式也为 [x1, y1, width, height]。
返回值:
- iou: 一个数组,包含给定边界框与每个其他边界框的IoU值。
"""
# 计算交集的左上角坐标
x1 = np.maximum(box[0], np.array(other_boxes)[:, 0])
y1 = np.maximum(box[1], np.array(other_boxes)[:, 1])
# 计算交集的右下角坐标
x2 = np.minimum(box[0] + box[2], np.array(other_boxes)[:, 0] + np.array(other_boxes)[:, 2])
y2 = np.minimum(box[1] + box[3], np.array(other_boxes)[:, 1] + np.array(other_boxes)[:, 3])
# 计算交集区域的面积
intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
# 计算给定边界框的面积
box_area = box[2] * box[3]
# 计算其他边界框的面积
other_boxes_area = np.array(other_boxes)[:, 2] * np.array(other_boxes)[:, 3]
# 计算IoU值
iou = intersection_area / (box_area + other_boxes_area - intersection_area)
return iou
def custom_NMSBoxes(boxes, scores, confidence_threshold, iou_threshold):
# 如果没有边界框,则直接返回空列表
if len(boxes) == 0:
return []
# 将得分和边界框转换为NumPy数组
scores = np.array(scores)
boxes = np.array(boxes)
# 根据置信度阈值过滤边界框
mask = scores > confidence_threshold
filtered_boxes = boxes[mask]
filtered_scores = scores[mask]
# 如果过滤后没有边界框,则返回空列表
if len(filtered_boxes) == 0:
return []
# 根据置信度得分对边界框进行排序
sorted_indices = np.argsort(filtered_scores)[::-1]
# 初始化一个空列表来存储选择的边界框索引
indices = []
# 当还有未处理的边界框时,循环继续
while len(sorted_indices) > 0:
# 选择得分最高的边界框索引
current_index = sorted_indices[0]
indices.append(current_index)
# 如果只剩一个边界框,则结束循环
if len(sorted_indices) == 1:
break
# 获取当前边界框和其他边界框
current_box = filtered_boxes[current_index]
other_boxes = filtered_boxes[sorted_indices[1:]]
# 计算当前边界框与其他边界框的IoU
iou = calculate_iou(current_box, other_boxes)
# 找到IoU低于阈值的边界框,即与当前边界框不重叠的边界框
non_overlapping_indices = np.where(iou <= iou_threshold)[0]
# 更新sorted_indices以仅包含不重叠的边界框
sorted_indices = sorted_indices[non_overlapping_indices + 1]
# 返回选择的边界框索引
return indices
def draw_detections(img, box, score, class_id):
"""
在输入图像上绘制检测到的对象的边界框和标签。
参数:
img: 要在其上绘制检测结果的输入图像。
box: 检测到的边界框。
score: 对应的检测得分。
class_id: 检测到的对象的类别ID。
返回:
无
"""
# 提取边界框的坐标
x1, y1, w, h = box
# 根据类别ID检索颜色
color = color_palette[class_id]
# 在图像上绘制边界框
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
# 创建标签文本,包括类名和得分
label = f'{classes[class_id]}: {score:.2f}'
# 计算标签文本的尺寸
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# 计算标签文本的位置
label_x = x1
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
# 绘制填充的矩形作为标签文本的背景
cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED)
# 在图像上绘制标签文本
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
def preprocess(img, input_width, input_height):
"""
在执行推理之前预处理输入图像。
返回:
image_data: 为推理准备好的预处理后的图像数据。
"""
# 获取输入图像的高度和宽度
img_height, img_width = img.shape[:2]
# 将图像颜色空间从BGR转换为RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 将图像大小调整为匹配输入形状
img = cv2.resize(img, (input_width, input_height))
# 通过除以255.0来归一化图像数据
image_data = np.array(img) / 255.0
# 转置图像,使通道维度为第一维
image_data = np.transpose(image_data, (2, 0, 1)) # 通道首
# 扩展图像数据的维度以匹配预期的输入形状
image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
# 返回预处理后的图像数据
return image_data, img_height, img_width
def postprocess(input_image, output, input_width, input_height, img_width, img_height):
"""
对模型输出进行后处理,提取边界框、得分和类别ID。
参数:
input_image (numpy.ndarray): 输入图像。
output (numpy.ndarray): 模型的输出。
input_width (int): 模型输入宽度。
input_height (int): 模型输入高度。
img_width (int): 原始图像宽度。
img_height (int): 原始图像高度。
返回:
numpy.ndarray: 绘制了检测结果的输入图像。
"""
# 转置和压缩输出以匹配预期的形状
outputs = np.transpose(np.squeeze(output[0]))
# 获取输出数组的行数
rows = outputs.shape[0]
# 用于存储检测的边界框、得分和类别ID的列表
boxes = []
scores = []
class_ids = []
# 计算边界框坐标的缩放因子
x_factor = img_width / input_width
y_factor = img_height / input_height
# 遍历输出数组的每一行
for i in range(rows):
# 从当前行提取类别得分
classes_scores = outputs[i][4:]
# 找到类别得分中的最大得分
max_score = np.amax(classes_scores)
# 如果最大得分高于置信度阈值
if max_score >= confidence_thres:
# 获取得分最高的类别ID
class_id = np.argmax(classes_scores)
# 从当前行提取边界框坐标
x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
# 计算边界框的缩放坐标
left = int((x - w / 2) * x_factor)
top = int((y - h / 2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
# 将类别ID、得分和框坐标添加到各自的列表中
class_ids.append(class_id)
scores.append(max_score)
boxes.append([left, top, width, height])
# 应用非最大抑制过滤重叠的边界框
indices = custom_NMSBoxes(boxes, scores, confidence_thres, iou_thres)
# 遍历非最大抑制后的选定索引
for i in indices:
# 根据索引获取框、得分和类别ID
box = boxes[i]
score = scores[i]
class_id = class_ids[i]
# 在输入图像上绘制检测结果
draw_detections(input_image, box, score, class_id)
# 返回修改后的输入图像
return input_image
def init_detect_model(model_path):
# 使用ONNX模型文件创建一个推理会话,并指定执行提供者
session = ort.InferenceSession(model_path, providers=providers)
# 获取模型的输入信息
model_inputs = session.get_inputs()
# 获取输入的形状,用于后续使用
input_shape = model_inputs[0].shape
# 从输入形状中提取输入宽度
input_width = input_shape[2]
# 从输入形状中提取输入高度
input_height = input_shape[3]
# 返回会话、模型输入信息、输入宽度和输入高度
return session, model_inputs, input_width, input_height
def detect_object(image, session, model_inputs, input_width, input_height):
# 如果输入的图像是PIL图像对象,将其转换为NumPy数组
if isinstance(image, Image.Image):
result_image = np.array(image)
else:
# 否则,直接使用输入的图像(假定已经是NumPy数组)
result_image = image
# 预处理图像数据,调整图像大小并可能进行归一化等操作
img_data, img_height, img_width = preprocess(result_image, input_width, input_height)
# 使用预处理后的图像数据进行推理
outputs = session.run(None, {model_inputs[0].name: img_data})
# 对推理结果进行后处理,例如解码检测框,过滤低置信度的检测等
output_image = postprocess(result_image, outputs, input_width, input_height, img_width, img_height)
# 返回处理后的图像
return output_image
if __name__ == '__main__':
# 模型文件的路径
model_path = "yolov8n.onnx"
# 初始化检测模型,加载模型并获取模型输入节点信息和输入图像的宽度、高度
session, model_inputs, input_width, input_height = init_detect_model(model_path)
# 三种模式 1为图片预测,并显示结果图片;2为摄像头检测,并实时显示FPS; 3为视频检测,并保存结果视频
mode = 1
if mode == 1:
# 读取图像文件
image_data = cv2.imread("street.jpg")
# 使用检测模型对读入的图像进行对象检测
result_image = detect_object(image_data, session, model_inputs, input_width, input_height)
# 将检测后的图像保存到文件
cv2.imwrite("output_image.jpg", result_image)
# 在窗口中显示检测后的图像
cv2.imshow('Output', result_image)
# 等待用户按键,然后关闭显示窗口
cv2.waitKey(0)
elif mode == 2:
# 打开摄像头
cap = cv2.VideoCapture() # 0表示默认摄像头,如果有多个摄像头可以尝试使用1、2等
# 检查摄像头是否成功打开
if not cap.isOpened():
print("Error: Could not open camera.")
exit()
# 初始化帧数计数器和起始时间
frame_count = 0
start_time = time.time()
# 循环读取摄像头视频流
while True:
# 读取一帧
ret, frame = cap.read()
# 检查帧是否成功读取
if not ret:
print("Error: Could not read frame.")
break
# 使用检测模型对读入的帧进行对象检测
output_image = detect_object(frame, session, model_inputs, input_width, input_height)
# 计算帧速率
frame_count += 1
end_time = time.time()
elapsed_time = end_time - start_time
fps = frame_count / elapsed_time
print(f"FPS: {fps:.2f}")
# 将FPS绘制在图像上
cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
# 在窗口中显示当前帧
cv2.imshow("Video", output_image)
# 按下 'q' 键退出循环
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 释放摄像头资源
cap.release()
# 关闭窗口
cv2.destroyAllWindows()
elif mode == 3:
# 输入视频路径
input_video_path = 'kun.mp4'
# 输出视频路径
output_video_path = 'kun_det.mp4'
# 打开视频文件
cap = cv2.VideoCapture(input_video_path)
# 检查视频是否成功打开
if not cap.isOpened():
print("Error: Could not open video.")
exit()
# 读取视频的基本信息
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
# 定义视频编码器和创建VideoWriter对象
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 根据文件名后缀使用合适的编码器
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
# 初始化帧数计数器和起始时间
frame_count = 0
start_time = time.time()
while True:
ret, frame = cap.read()
if not ret:
print("Info: End of video file.")
break
# 对读入的帧进行对象检测
output_image = detect_object(frame, session, model_inputs, input_width, input_height)
# 计算并打印帧速率
frame_count += 1
end_time = time.time()
elapsed_time = end_time - start_time
if elapsed_time > 0:
fps = frame_count / elapsed_time
print(f"FPS: {fps:.2f}")
# 将处理后的帧写入输出视频
out.write(output_image)
#(可选)实时显示处理后的视频帧
cv2.imshow("Output Video", output_image)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 释放资源
cap.release()
out.release()
cv2.destroyAllWindows()
else:
print("输入错误,请检查mode的赋值")Rknn推理
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/17 13:44:51
@Author : tm1
@IDE : PyCharm
@Project: onnx2rknn_YOLOv8
@Disc : 手动选择onnx的输出节点。
区别:1.被舍弃的部分onnx后处理需要手动实现;
2.可以量化。
"""
import cv2
import numpy as np
import yaml
from rknn.api import RKNN
ONNX_MODEL = './onnx_model/VisDrone2019/best.onnx'
RKNN_MODEL = './onnx_model/VisDrone2019/best.rknn'
DATASET = './onnx_model/VisDrone2019/quantize.txt'
dataset = './onnx_model/VisDrone2019/VisDrone2019.yaml'
QUANTIZE_ON = True
# CLASSES = {0: "hogcote"} # 训练时的类别
CLASSES = {} # 训练时的类别
if CLASSES == {}:
with open(dataset, 'r') as f:
CLASSES = yaml.safe_load(f)['names']
nmsThresh = 0.45 # 值越大,代表允许重叠的面积越大。
objectThresh = 0.5
# 注意调整为onnx模型的大小。
model_h = 608
model_w = 608
color_palette = np.random.uniform(0, 255, size=(len(CLASSES), 3))
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def draw_detections(img, box, score, class_id):
"""
Draws bounding boxes and labels on the input image based on the detected objects.
Args:
img: The input image to draw detections on.
box: Detected bounding box.
score: Corresponding detection score.
class_id: Class ID for the detected object.
Returns:
None
"""
# Extract the coordinates of the bounding box
x1, y1, w, h = box
# Retrieve the color for the class ID
color = color_palette[class_id]
# Draw the bounding box on the image
cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
# Create the label text with class name and score
label = f'{CLASSES[class_id]}: {score:.2f}'
# Calculate the dimensions of the label text
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# Calculate the position of the label text
label_x = x1
label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
# Draw a filled rectangle as the background for the label text
cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color,
cv2.FILLED)
# Draw the label text on the image
cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def postprocess(input_image, outputs):
img_h, img_w = input_image.shape[:2]
boxes0 = np.transpose(np.squeeze(outputs[0]))
scores0 = np.transpose(np.squeeze(outputs[1]))
if len(scores0.shape) == 1:
scores0 = np.expand_dims(scores0, axis=1)
scores = sigmoid(scores0)
max_scores = np.max(scores, axis=1) # 多个类别时,最大的分数。
max_indices = np.argmax(scores, axis=1)
t = np.where(max_scores >= objectThresh)[0] # 元组
boxes = boxes0[t]
scores = max_scores[t]
class_ids = max_indices[t]
# 根据分数从高到低排序
sorted_indices = np.argsort(scores)[::-1]
boxes = boxes[sorted_indices]
scores = scores[sorted_indices]
class_ids = class_ids[sorted_indices]
print(boxes)
print(scores)
print(class_ids)
# Get the number of rows in the outputs array
rows = boxes.shape[0]
# Lists to store the bounding boxes, scores, and class IDs of the detections
boxes_ = []
scores_ = []
class_ids_ = []
# Calculate the scaling factors for the bounding box coordinates
x_factor = img_w / model_w
y_factor = img_h / model_h
# Iterate over each row in the outputs array
for i in range(rows):
# Extract the class scores from the current row
classes_scores = scores[i]
# Find the maximum score among the class scores
max_score = np.amax(classes_scores)
# If the maximum score is above the confidence threshold
if max_score >= objectThresh:
# Get the class ID with the highest score
class_id = np.argmax(classes_scores)
# Extract the bounding box coordinates from the current row
x, y, w, h = boxes[i]
# Calculate the scaled coordinates of the bounding box
left = int((x - w / 2) * x_factor)
top = int((y - h / 2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
# Add the class ID, score, and box coordinates to the respective lists
class_ids_.append(class_id)
scores_.append(max_score)
boxes_.append([left, top, width, height])
print(boxes_)
print(scores_)
print(class_ids_)
# Apply non-maximum suppression to filter out overlapping bounding boxes
indices = cv2.dnn.NMSBoxes(boxes_, scores_, score_threshold=objectThresh, nms_threshold=nmsThresh)
# Iterate over the selected indices after non-maximum suppression
for i in indices:
# Get the box, score, and class ID corresponding to the index
box = boxes_[i]
score = scores_[i]
class_id = class_ids_[i]
# Draw the detection on the input image
draw_detections(input_image, box, score, class_id)
return input_image
def export_rknn():
rknn = RKNN(verbose=True)
rknn.config(
# see:ultralytics/yolo/data/utils.py
mean_values=[[0, 0, 0]],
std_values=[[255, 255, 255]],
# TODO:使用下面均值、方差后,效果更差:
# mean_values=[[123.675, 116.28, 103.53]], # IMAGENET_MEAN = 0.485, 0.456, 0.406
# std_values=[[58.395, 57.12, 57.375]], # IMAGENET_STD = 0.229, 0.224, 0.225
quantized_algorithm='normal',
quantized_method='channel',
# optimization_level=2,
compress_weight=False, # 压缩模型的权值,可以减小rknn模型的大小。默认值为False。
# single_core_mode=True,
# model_pruning=False, # 修剪模型以减小模型大小,默认值为False。
target_platform='rk3588'
)
rknn.load_onnx(
model=ONNX_MODEL,
outputs=[
'/model.22/Mul_2_output_0', '/model.22/Split_output_1',
]
)
rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET, rknn_batch_size=1)
rknn.export_rknn(RKNN_MODEL)
# # 精度分析
# rknn.accuracy_analysis(
# inputs=['/home/tm1/D/workspace/onnx2rknn_YOLOv8/onnx_model/official/zidane.jpg'],
# output_dir="./snapshot",
# target=None
# )
rknn.init_runtime()
return rknn
if __name__ == '__main__':
# 数据准备
img_path = 'onnx_model/VisDrone2019/img.png'
orig_img = cv2.imread(img_path)
# img = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
img = orig_img
img_h, img_w = img.shape[:2]
resized_img, ratio, (dw, dh) = letterbox(img, new_shape=(model_h, model_w)) # padding resize
# resized_img = cv2.resize(img, (model_w, model_h), interpolation=cv2.INTER_LINEAR) # direct resize
input = np.expand_dims(resized_img, axis=0)
# 转换模型
rknn = export_rknn()
# 推理
outputs = rknn.inference(inputs=[input], data_format="nhwc")
# 后处理
result_img = postprocess(resized_img, outputs)
# 保存结果
cv2.imwrite('./onnx_model/VisDrone2019/img_result.jpg', result_img)
# 释放
rknn.release()
















