引用库文件
import os
from hobot_dnn import pyeasy_dnn as dnn
from hobot_vio import libsrcampy as srcampy
import numpy as np
import cv2
import colorsys
from time import time
#导入算法推理模块hobot_dnn、视频输出模块hobot_vio、numpy、opencv、colorsys等模块
get_classes函数
def get_classes(): #目标类型
return np.array(["person", "bicycle", "car",
"motorcycle", "airplane", "bus",
"train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign",
"parking meter", "bench", "bird",
"cat", "dog", "horse",
"sheep", "cow", "elephant",
"bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag",
"tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball",
"kite", "baseball bat", "baseball glove",
"skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup",
"fork", "knife", "spoon",
"bowl", "banana", "apple",
"sandwich", "orange", "broccoli",
"carrot", "hot dog", "pizza",
"donut", "cake", "chair",
"couch", "potted plant", "bed",
"dining table", "toilet", "tv",
"laptop", "mouse", "remote",
"keyboard", "cell phone", "microwave",
"oven", "toaster", "sink",
"refrigerator", "book", "clock",
"vase", "scissors", "teddy bear",
"hair drier", "toothbrush"])
bgr2nv12_opencv函数
def bgr2nv12_opencv(image): #图像数据格式转换
height, width = image.shape[0], image.shape[1]
area = height * width
yuv420p = cv2.cvtColor(image, cv2.COLOR_BGR2YUV_I420).reshape((area * 3 // 2,))
y = yuv420p[:area]
uv_planar = yuv420p[area:].reshape((2, area // 4))
uv_packed = uv_planar.transpose((1, 0)).reshape((area // 2,))
nv12 = np.zeros_like(yuv420p)
nv12[:height * width] = y
nv12[height * width:] = uv_packed
return nv12
get_display_res函数
def get_display_res(): #获取显示结果(即显示框大小)
if os.path.exists("/usr/bin/8618_get_edid") == False:
return 1920, 1080
import subprocess
p = subprocess.Popen(["/usr/bin/8618_get_edid"], stdout=subprocess.PIPE)
result = p.communicate()
res = result[0].split(b',')
res[1] = max(min(int(res[1]), 1920), 0)
res[0] = max(min(int(res[0]), 1080), 0)
return int(res[1]), int(res[0])
print_properties函数
def print_properties(pro): #输出模型数据
print("tensor type:", pro.tensor_type)
print("data type:", pro.dtype)
print("layout:", pro.layout)
print("shape:", pro.shape)
decode函数
def decode(outputs, score_threshold, origin_shape, input_size=512):
def _distance2bbox(points, distance): #计算两点坐标,并将数据转换为4*1矩阵
x1 = points[..., 0] - distance[..., 0]
y1 = points[..., 1] - distance[..., 1]
x2 = points[..., 0] + distance[..., 2]
y2 = points[..., 1] + distance[..., 3]
return np.stack([x1, y1, x2, y2], -1)
def _scores(cls, ce): #使用sigmoid函数激活并相乘开根,求均值(使更鲁棒)
cls = 1 / (1 + np.exp(-cls))
ce = 1 / (1 + np.exp(-ce))
return np.sqrt(ce * cls)
def _bbox(bbox, stride, origin_shape, input_size): #图像长宽转换处理
h, w = bbox.shape[1:3] #获取初始长宽
yv, xv = np.meshgrid(np.arange(h), np.arange(w)) #创建坐标矩阵
xy = (np.stack((yv, xv), 2) + 0.5) * stride
bbox = _distance2bbox(xy, bbox) #计算得到两点数据的4*1矩阵
# opencv read, shape[1] is w, shape[0] is h
scale_w = origin_shape[1] / input_size #计算尺度
scale_h = origin_shape[0] / input_size
scale = max(origin_shape[0], origin_shape[1]) / input_size
# origin img is pad resized
#bbox = bbox * scale
# origin img is resized
bbox = bbox * [scale_w, scale_h, scale_w, scale_h] #尺度转换
return bbox
bboxes = list()
strides = [8, 16, 32, 64, 128]
for i in range(len(strides)): #矩阵计算
cls = outputs[i].buffer
bbox = outputs[i + 5].buffer
ce = outputs[i + 10].buffer
scores = _scores(cls, ce)
classes = np.argmax(scores, axis=-1)
classes = np.reshape(classes, [-1, 1])
max_score = np.max(scores, axis=-1)
max_score = np.reshape(max_score, [-1, 1])
bbox = _bbox(bbox, strides[i], origin_shape, input_size)
bbox = np.reshape(bbox, [-1, 4])
pred_bbox = np.concatenate([bbox, max_score, classes], axis=1)
index = pred_bbox[..., 4] > score_threshold
pred_bbox = pred_bbox[index]
bboxes.append(pred_bbox)
return np.concatenate(bboxes) #结果合并多个矩阵成为一个新矩阵
nms函数
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): #预测框大小设置
def bboxes_iou(boxes1, boxes2):
boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2)
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * \
(boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * \
(boxes2[..., 3] - boxes2[..., 1])
left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = np.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
ious = np.maximum(1.0 * inter_area / union_area,
np.finfo(np.float32).eps)
return ious
classes_in_img = list(set(bboxes[:, 5]))
best_bboxes = []
for cls in classes_in_img:
cls_mask = (bboxes[:, 5] == cls)
cls_bboxes = bboxes[cls_mask]
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])
best_bbox = cls_bboxes[max_ind]
best_bboxes.append(best_bbox)
cls_bboxes = np.concatenate(
[cls_bboxes[:max_ind], cls_bboxes[max_ind + 1:]])
iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
weight = np.ones((len(iou),), dtype=np.float32)
assert method in ['nms', 'soft-nms']
if method == 'nms':
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > 0.
cls_bboxes = cls_bboxes[score_mask]
return best_bboxes
draw_bboxs函数
def draw_bboxs(image, bboxes, gt_classes_index=None, classes=get_classes()): #预测框绘制
"""draw the bboxes in the original image
"""
num_classes = len(classes)
image_h, image_w, channel = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
colors))
fontScale = 0.5
bbox_thick = int(0.6 * (image_h + image_w) / 600)
for i, bbox in enumerate(bboxes):
coor = np.array(bbox[:4], dtype=np.int32)
if gt_classes_index == None:
class_index = int(bbox[5])
score = bbox[4]
else:
class_index = gt_classes_index[i]
score = 1
bbox_color = colors[class_index]
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
classes_name = classes[class_index]
bbox_mess = '%s: %.2f' % (classes_name, score)
t_size = cv2.getTextSize(bbox_mess,
0,
fontScale,
thickness=bbox_thick // 2)[0]
cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3),
bbox_color, -1)
cv2.putText(image,
bbox_mess, (c1[0], c1[1] - 2),
cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0),
bbox_thick // 2,
lineType=cv2.LINE_AA)
print("{} is in the picture with confidence:{:.4f}".format(
classes_name, score))
# cv2.imwrite("demo.jpg", image)
return image
postprocess函数
def postprocess(model_output, #根据输入数据设置预测图像框
model_hw_shape,
origin_image=None,
origin_img_shape=None,
score_threshold=0.5,
nms_threshold=0.6,
dump_image=False):
input_height = model_hw_shape[0]
input_width = model_hw_shape[1]
if origin_image is not None:
origin_image_shape = origin_image.shape[0:2]
else:
origin_image_shape = origin_img_shape #设置图像长宽
prediction_bbox = decode(outputs=model_output, #数据计算
score_threshold=score_threshold,
origin_shape=origin_image_shape,
input_size=512)
prediction_bbox = nms(prediction_bbox, iou_threshold=nms_threshold)
prediction_bbox = np.array(prediction_bbox)
topk = min(prediction_bbox.shape[0], 1000)
if topk != 0:
idx = np.argpartition(prediction_bbox[..., 4], -topk)[-topk:]
prediction_bbox = prediction_bbox[idx]
if dump_image and origin_image is not None:
draw_bboxs(origin_image, prediction_bbox)
return prediction_bbox
主函数部分
if __name__ == '__main__':
models = dnn.load('../models/fcos_512x512_nv12.bin') #载入模型
# 打印输入 tensor 的属性
print_properties(models[0].inputs[0].properties)
# 打印输出 tensor 的属性
print(len(models[0].outputs)) #此部分输出模型结构,数据类型以及格式
for output in models[0].outputs:
print_properties(output.properties)
# 打开 usb camera: /dev/video8 获取usb摄像头
cap = cv2.VideoCapture(8)
if(not cap.isOpened()):
exit(-1)
print("Open usb camera successfully")
# 设置usb camera的输出图像格式为 MJPEG, 分辨率 640 x 480
# 可以通过 v4l2-ctl -d /dev/video8 --list-formats-ext 命令查看摄像头支持的分辨率
# 根据应用需求调整该采集图像的分辨率
codec = cv2.VideoWriter_fourcc( 'M', 'J', 'P', 'G' ) #MJPG = Motion-JPEG也就是MJPEG
cap.set(cv2.CAP_PROP_FOURCC, codec) #设置编解码器
cap.set(cv2.CAP_PROP_FPS, 30) #设置帧率为30帧
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) #分辨率长
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) #分辨率宽
# Get HDMI display object
disp = srcampy.Display() #使用Hobot-vio库display图像信息,应该可以用opencv替换
# For the meaning of parameters, please refer to the relevant documents of HDMI display
disp_w, disp_h = get_display_res() #窗口大小
disp.display(0, disp_w, disp_h) #设置窗口大小
start_time = time() #预设置参数
image_counter = 0
while True:
_ ,frame = cap.read() #获取frame值
# print(frame.shape)
if frame is None: #判断是否获取到图像信息
print("Failed to get image from usb camera")
# 把图片缩放到模型的输入尺寸
# 获取算法模型的输入tensor 的尺寸
h, w = models[0].inputs[0].properties.shape[2], models[0].inputs[0].properties.shape[3]
des_dim = (w, h)
resized_data = cv2.resize(frame, des_dim, interpolation=cv2.INTER_AREA) #修改输入图像尺寸,使适合模型
nv12_data = bgr2nv12_opencv(resized_data) #转换图像数据格式
# Forward
outputs = models[0].forward(nv12_data) #前向计算
# Do post process
input_shape = (h, w)
prediction_bbox = postprocess(outputs, input_shape, origin_img_shape=(disp_h, disp_w))
if frame.shape[0]!=disp_h or frame.shape[1]!=disp_w:
frame = cv2.resize(frame, (disp_w,disp_h), interpolation=cv2.INTER_AREA)
# Draw bboxs
box_bgr = draw_bboxs(frame, prediction_bbox) #绘制预测框
# cv2.imwrite("imf.jpg", box_bgr)
# Convert to nv12 for HDMI display
box_nv12 = bgr2nv12_opencv(box_bgr) #图像数据格式转换
disp.set_img(box_nv12.tobytes())
finish_time = time()
image_counter += 1
if finish_time - start_time > 10:
print(start_time, finish_time, image_counter)
print("FPS: {:.2f}".format(image_counter / (finish_time - start_time)))
start_time = finish_time
image_counter = 0
评论(0)
您还未登录,请登录后发表或查看评论