refactors into classes
- ImageMeta struct saves image metadata - DataHandler takes care of loading dataset and parsing the label information - YoloProcessing takes care of image preprocessing and yolo postprocessing - HailoHandler connects to Hailo device, pushes the desired network hef file and runs the dataset on the hailo chip
This commit is contained in:
parent
c64b8a27dd
commit
be89e739b4
554
inference.py
554
inference.py
@ -1,317 +1,302 @@
|
|||||||
import os
|
|
||||||
from multiprocessing import Process
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
import numpy as np
|
t
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
from detection_tools.utils.visualization_utils import \
|
||||||
|
visualize_boxes_and_labels_on_image_array
|
||||||
|
|
||||||
|
from hailo_platform import (ConfigureParams, FormatType, HEF,
|
||||||
|
HailoStreamInterface, InferVStreams,
|
||||||
|
InputVStreamParams, OutputVStreamParams,
|
||||||
|
PcieDevice)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow.image import combined_non_max_suppression
|
from tensorflow.image import combined_non_max_suppression
|
||||||
|
|
||||||
from detection_tools.utils.visualization_utils import visualize_boxes_and_labels_on_image_array
|
|
||||||
|
|
||||||
|
|
||||||
from hailo_platform import (HEF, PcieDevice, HailoStreamInterface, InferVStreams, ConfigureParams, \
|
|
||||||
InputVStreamParams, OutputVStreamParams, InputVStreams, OutputVStreams, FormatType)
|
|
||||||
|
|
||||||
# preprocess dataset for yolov5 size
|
|
||||||
# yolov5 640x640
|
|
||||||
# resnet18 320x320
|
|
||||||
|
|
||||||
|
|
||||||
def preproc(image, output_height=640, output_width=640, resize_side=256):
|
|
||||||
|
|
||||||
'''
|
|
||||||
imagenet-standard: aspect-preserving resize to 256px smaller-side,
|
|
||||||
then central-crop to 224px
|
|
||||||
'''
|
|
||||||
new_width = int(image.width/image.height*resize_side)
|
|
||||||
new_height = resize_side
|
|
||||||
x , y = (new_width-output_width)/2, 0
|
|
||||||
|
|
||||||
# Select area to crop
|
|
||||||
area = (x, y, x+output_width, y+output_height)
|
|
||||||
|
|
||||||
# Crop, show, and save image
|
|
||||||
cropped_img = image.resize((new_width, new_height)).crop(area)
|
|
||||||
return cropped_img
|
|
||||||
|
|
||||||
|
|
||||||
# Collect images from data files
|
# Collect images from data files
|
||||||
def dataset_read(hef):
|
|
||||||
|
|
||||||
images_path = './minimal_data'
|
|
||||||
names = []
|
|
||||||
images_list = [img_name for img_name in os.listdir(images_path) if
|
|
||||||
os.path.splitext(os.path.join(images_path, img_name))[1] == '.jpg']
|
|
||||||
|
|
||||||
# Define dataset params
|
|
||||||
input_vstream_info = hef.get_input_vstream_infos()[0]
|
|
||||||
output_vstream_infos = hef.get_output_vstream_infos()
|
|
||||||
image_height, image_width, channels = input_vstream_info.shape
|
|
||||||
|
|
||||||
# dataset = np.zeros((len(images_list), image_height, image_width, channels),
|
|
||||||
# dtype=np.float32)
|
|
||||||
dataset = np.zeros((1, image_height, image_width, channels),
|
|
||||||
dtype=np.float32)
|
|
||||||
|
|
||||||
for idx, img_name in enumerate(images_list):
|
|
||||||
img = Image.open(os.path.join(images_path, img_name))
|
|
||||||
img_preproc = preproc(img)
|
|
||||||
dataset[idx,:,:,:] = np.array(img_preproc)
|
|
||||||
names.append(img_name)
|
|
||||||
break
|
|
||||||
|
|
||||||
return dataset, names
|
|
||||||
|
|
||||||
# Generate random dataset
|
|
||||||
def dataset_random(image_height, image_width, channels):
|
|
||||||
num_of_images = 10
|
|
||||||
low, high = 2, 20
|
|
||||||
dataset = np.random.randint(low, high, (num_of_images, image_height,
|
|
||||||
image_width, channels)).astype(np.float32)
|
|
||||||
return dataset
|
|
||||||
|
|
||||||
def init_hailo(model_name='yolov5m'):
|
|
||||||
target = PcieDevice()
|
|
||||||
|
|
||||||
hef_path = f'hef/{model_name}.hef'
|
|
||||||
hef = HEF(hef_path)
|
|
||||||
|
|
||||||
# Configure network groups
|
|
||||||
configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)
|
|
||||||
network_groups = target.configure(hef, configure_params)
|
|
||||||
network_group = network_groups[0]
|
|
||||||
|
|
||||||
|
|
||||||
return hef, network_group
|
class ImageMeta:
|
||||||
|
def __init__(self, image_height, image_width, channels):
|
||||||
|
self.image_height = image_height
|
||||||
|
self.image_width = image_width
|
||||||
|
self.channels = channels
|
||||||
|
|
||||||
|
|
||||||
'''
|
class DataHandler:
|
||||||
The target can be used as a context manager ("with" statement) to ensure it's released on time.
|
def __init__(self, path, image_meta):
|
||||||
Here it's avoided for the sake of simplicity
|
self.images_path = path
|
||||||
'''
|
self.image_meta = image_meta
|
||||||
def run_hailo(dataset, names, hef, network_group):
|
|
||||||
# Create input and output virtual streams params
|
def load_data(self, preprocess_fn):
|
||||||
# Quantized argument signifies whether or not the incoming data is already quantized.
|
names = []
|
||||||
# Data is quantized by HailoRT if and only if quantized == False .
|
|
||||||
input_vstreams_params = InputVStreamParams.make(network_group,
|
images_list = [img_name for img_name in os.listdir(self.images_path)
|
||||||
quantized=False,
|
if os.path.splitext(os.path.join(self.images_path, img_name))[1] == '.jpg']
|
||||||
format_type=FormatType.FLOAT32)
|
dataset = np.zeros((1, self.image_meta.image_height,
|
||||||
# TODO: change to FLOAT32
|
self.image_meta.image_width,
|
||||||
output_vstreams_params = OutputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)
|
self.image_meta.channels),
|
||||||
# output_vstreams_params = OutputVStreamParams.make(network_group,
|
dtype=np.float32)
|
||||||
# quantized=True,
|
|
||||||
# format_type=FormatType.INT8)
|
for idx, img_name in enumerate(images_list):
|
||||||
|
img = Image.open(os.path.join(self.images_path, img_name))
|
||||||
|
img_preproc = preprocess_fn(img)
|
||||||
|
dataset[idx, :, :, :] = np.array(img_preproc)
|
||||||
|
names.append(img_name)
|
||||||
|
break
|
||||||
|
|
||||||
|
self.dataset = dataset
|
||||||
|
self.names = names
|
||||||
|
|
||||||
|
|
||||||
input_vstream_info = hef.get_input_vstream_infos()[0]
|
def _get_coco_labels(self):
|
||||||
output_vstream_infos = hef.get_output_vstream_infos()
|
coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json')))
|
||||||
input_data = {input_vstream_info.name: dataset}
|
coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()}
|
||||||
network_group_params = network_group.create_params()
|
return coco_names
|
||||||
|
|
||||||
with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
|
|
||||||
with network_group.activate(network_group_params):
|
|
||||||
infer_results = infer_pipeline.infer(input_data)
|
|
||||||
|
|
||||||
out = [infer_results[i.name] for i in output_vstream_infos]
|
|
||||||
return out, names, dataset, names
|
|
||||||
|
|
||||||
|
|
||||||
# 20 x 20 -> 32
|
def _get_labels(self, label_name):
|
||||||
# stride = 32
|
filename = os.path.join(os.path.dirname(__file__), label_name + '.json')
|
||||||
def yolo_postprocess_numpy(net_out, anchors_for_stride, stride):
|
names = json.load(open(filename))
|
||||||
"""
|
names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
|
||||||
net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255]
|
return names
|
||||||
first we reshape it to be as in gluon and then follow gluon's shapes.
|
|
||||||
output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# net_out = net_out.astype(np.float32) / 256
|
COCO_17_14 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9,
|
||||||
num_classes = 4
|
10: 10, 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18,
|
||||||
BS = net_out.shape[0] # batch size
|
18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27,
|
||||||
H = net_out.shape[1]
|
26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37,
|
||||||
W = net_out.shape[2]
|
34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46,
|
||||||
|
42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54,
|
||||||
num_anchors = anchors_for_stride.size // 2 # 2 params for each anchor.
|
50: 55, 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62,
|
||||||
num_pred = 1 + 4 + num_classes # 2 box centers, 2 box scales, 1 objness, num_classes class scores
|
58: 63, 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74,
|
||||||
alloc_size = (128, 128)
|
66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82,
|
||||||
|
74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}
|
||||||
grid_x = np.arange(alloc_size[1])
|
|
||||||
grid_y = np.arange(alloc_size[0])
|
|
||||||
grid_x, grid_y = np.meshgrid(grid_x, grid_y) # dims [128,128], [128,128]
|
|
||||||
|
|
||||||
offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1) # dim [128,128,2]
|
|
||||||
offsets = np.expand_dims(np.expand_dims(offsets, 0), 0) # dim [1,1,128,128,2]
|
|
||||||
|
|
||||||
pred = net_out.transpose((0, 3, 1, 2)) # now dims are: [N,C,H,W] as in Gluon.
|
|
||||||
pred = np.reshape(pred, (BS, num_anchors * num_pred, -1)) # dim [N, 255, HxW]
|
|
||||||
# dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped.
|
|
||||||
pred = pred.transpose((0, 2, 1))
|
|
||||||
pred = np.reshape(pred, (BS, -1, num_anchors, num_pred)) # dim [N, HxW, 3, 85]]
|
|
||||||
|
|
||||||
raw_box_centers = pred[:, :, :, 0:2] # dim [N, HxW, 3, 2]
|
|
||||||
raw_box_scales = pred[:, :, :, 2:4] # dim [N,HxW, 3, 2]
|
|
||||||
|
|
||||||
objness = pred[:, :, :, 4:5] # dim [N, HxW, 3, 1]
|
|
||||||
class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80]
|
|
||||||
offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2]
|
|
||||||
offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2]
|
|
||||||
box_centers, box_scales, confidence, class_pred = _yolo5_decode(
|
|
||||||
raw_box_centers=raw_box_centers,
|
|
||||||
raw_box_scales=raw_box_scales,
|
|
||||||
objness=objness,
|
|
||||||
class_pred=class_pred,
|
|
||||||
anchors_for_stride=anchors_for_stride,
|
|
||||||
offsets=offsets,
|
|
||||||
stride=stride)
|
|
||||||
|
|
||||||
class_score = class_pred * confidence # dim [N, HxW, 3, 80]
|
|
||||||
wh = box_scales / 2.0
|
|
||||||
# dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax
|
|
||||||
bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1)
|
|
||||||
|
|
||||||
detection_boxes = np.reshape(bbox, (BS, -1, 1, 4)) # dim [N, num_detections, 1, 4]
|
|
||||||
detection_scores = np.reshape(class_score, (BS, -1, num_classes)) # dim [N, num_detections, 80]
|
|
||||||
|
|
||||||
# switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax:
|
|
||||||
detection_boxes_tmp = np.zeros(detection_boxes.shape)
|
|
||||||
detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1]
|
|
||||||
detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0]
|
|
||||||
detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3]
|
|
||||||
detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2]
|
|
||||||
|
|
||||||
detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax
|
|
||||||
return detection_boxes.astype(np.float32), detection_scores.astype(np.float32)
|
|
||||||
|
|
||||||
def _yolo5_decode(raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride):
|
|
||||||
box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride
|
|
||||||
box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2]
|
|
||||||
return box_centers, box_scales, objness, class_pred
|
|
||||||
|
|
||||||
def postprocessing(endnodes):
|
|
||||||
"""
|
|
||||||
endnodes is a list of 3 output tensors:
|
|
||||||
endnodes[0] - stride 32 of input
|
|
||||||
endnodes[1] - stride 16 of input
|
|
||||||
endnodes[2] - stride 8 of input
|
|
||||||
Returns:
|
|
||||||
a tensor with dims: [BS, Total_num_of_detections_in_image, 6]
|
|
||||||
where:
|
|
||||||
total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes,
|
|
||||||
with H, W as input dims.
|
|
||||||
If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get:
|
|
||||||
total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS
|
|
||||||
"""
|
|
||||||
H_input = 640
|
|
||||||
W_input = 640
|
|
||||||
anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
|
|
||||||
# TODO make prettier
|
|
||||||
strides = [8, 16, 32]
|
|
||||||
num_classes = 80
|
|
||||||
|
|
||||||
for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers:
|
|
||||||
stride = strides[::-1][output_ind]
|
|
||||||
anchors_for_stride = np.array(anchors_list[::-1][output_ind])
|
|
||||||
anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2]
|
|
||||||
|
|
||||||
detection_boxes, detection_scores = yolo_postprocess_numpy(output_branch,
|
|
||||||
anchors_for_stride,
|
|
||||||
stride)
|
|
||||||
|
|
||||||
# detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a
|
|
||||||
# [BS, num_detections, num_classes] tensor
|
|
||||||
detection_boxes = detection_boxes / H_input # normalization of box coordinates to 1
|
|
||||||
BS = endnodes[0].shape[0]
|
|
||||||
H = H_input // stride
|
|
||||||
W = W_input // stride
|
|
||||||
num_anchors = anchors_for_stride.size // 2
|
|
||||||
num_detections = H * W * num_anchors
|
|
||||||
# detection_boxes.set_shape((BS, num_detections, 1, 4))
|
|
||||||
# detection_scores.set_shape((BS, num_detections, num_classes))
|
|
||||||
# concatenating the detections from the different output layers:
|
|
||||||
if output_ind == 0:
|
|
||||||
detection_boxes_full = detection_boxes
|
|
||||||
detection_scores_full = detection_scores
|
|
||||||
else:
|
|
||||||
detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1)
|
|
||||||
detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1)
|
|
||||||
|
|
||||||
score_threshold = 0.5
|
|
||||||
nms_iou_threshold = 0.5
|
|
||||||
labels_offset = 1
|
|
||||||
|
|
||||||
(nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \
|
|
||||||
combined_non_max_suppression(boxes=detection_boxes_full,
|
|
||||||
scores=detection_scores_full,
|
|
||||||
score_threshold=score_threshold,
|
|
||||||
iou_threshold=nms_iou_threshold,
|
|
||||||
max_output_size_per_class=100,
|
|
||||||
max_total_size=100)
|
|
||||||
|
|
||||||
|
|
||||||
# adding offset to the class prediction and cast to integer
|
class YoloProcessing:
|
||||||
def translate_coco_2017_to_2014(nmsed_classes):
|
def __init__(self, imageMeta, classes):
|
||||||
return np.vectorize(COCO_2017_TO_2014_TRANSLATION.get)(nmsed_classes).astype(np.int32)
|
self.output_height = imageMeta.image_height
|
||||||
|
self.output_width = imageMeta.image_width
|
||||||
|
self.classes = classes
|
||||||
|
|
||||||
nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16)
|
def preproc(self, image, resize_side=256):
|
||||||
nmsed_classes = translate_coco_2017_to_2014(nmsed_classes)
|
|
||||||
|
|
||||||
return {'detection_boxes': nmsed_boxes,
|
'''
|
||||||
'detection_scores': nmsed_scores,
|
imagenet-standard: aspect-preserving resize to 256px smaller-side,
|
||||||
'detection_classes': nmsed_classes,
|
then central-crop to 224px
|
||||||
'num_detections': num_detections}
|
'''
|
||||||
|
new_width = int(image.width/image.height*resize_side)
|
||||||
|
new_height = resize_side
|
||||||
|
x, y = (new_width-self.output_width)/2, 0
|
||||||
|
|
||||||
|
# Select area to crop
|
||||||
|
area = (x, y, x+self.output_width, y+self.output_height)
|
||||||
|
|
||||||
|
# Crop, show, and save image
|
||||||
|
cropped_img = image.resize((new_width, new_height)).crop(area)
|
||||||
|
return cropped_img
|
||||||
|
|
||||||
|
# 20 x 20 -> 32
|
||||||
|
# stride = 32
|
||||||
|
def yolo_postprocess_numpy(self, net_out, anchors_for_stride, stride):
|
||||||
|
"""
|
||||||
|
net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255]
|
||||||
|
first we reshape it to be as in gluon and then follow gluon's shapes.
|
||||||
|
output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# net_out = net_out.astype(np.float32) / 256
|
||||||
|
num_classes = 4
|
||||||
|
BS = net_out.shape[0] # batch size
|
||||||
|
H = net_out.shape[1]
|
||||||
|
W = net_out.shape[2]
|
||||||
|
|
||||||
|
num_anchors = anchors_for_stride.size // 2 # 2 params for each anchor.
|
||||||
|
num_pred = 1 + 4 + num_classes # 2 box centers, 2 box scales, 1 objness, num_classes class scores
|
||||||
|
alloc_size = (128, 128)
|
||||||
|
|
||||||
|
grid_x = np.arange(alloc_size[1])
|
||||||
|
grid_y = np.arange(alloc_size[0])
|
||||||
|
grid_x, grid_y = np.meshgrid(grid_x, grid_y) # dims [128,128], [128,128]
|
||||||
|
|
||||||
|
offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1) # dim [128,128,2]
|
||||||
|
offsets = np.expand_dims(np.expand_dims(offsets, 0), 0) # dim [1,1,128,128,2]
|
||||||
|
|
||||||
|
pred = net_out.transpose((0, 3, 1, 2)) # now dims are: [N,C,H,W] as in Gluon.
|
||||||
|
pred = np.reshape(pred, (BS, num_anchors * num_pred, -1)) # dim [N, 255, HxW]
|
||||||
|
# dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped.
|
||||||
|
pred = pred.transpose((0, 2, 1))
|
||||||
|
pred = np.reshape(pred, (BS, -1, num_anchors, num_pred)) # dim [N, HxW, 3, 85]]
|
||||||
|
|
||||||
|
raw_box_centers = pred[:, :, :, 0:2] # dim [N, HxW, 3, 2]
|
||||||
|
raw_box_scales = pred[:, :, :, 2:4] # dim [N,HxW, 3, 2]
|
||||||
|
|
||||||
|
objness = pred[:, :, :, 4:5] # dim [N, HxW, 3, 1]
|
||||||
|
class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80]
|
||||||
|
offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2]
|
||||||
|
offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2]
|
||||||
|
box_centers, box_scales, confidence, class_pred = self._yolo5_decode(
|
||||||
|
raw_box_centers=raw_box_centers,
|
||||||
|
raw_box_scales=raw_box_scales,
|
||||||
|
objness=objness,
|
||||||
|
class_pred=class_pred,
|
||||||
|
anchors_for_stride=anchors_for_stride,
|
||||||
|
offsets=offsets,
|
||||||
|
stride=stride)
|
||||||
|
|
||||||
|
class_score = class_pred * confidence # dim [N, HxW, 3, 80]
|
||||||
|
wh = box_scales / 2.0
|
||||||
|
# dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax
|
||||||
|
bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1)
|
||||||
|
|
||||||
|
detection_boxes = np.reshape(bbox, (BS, -1, 1, 4)) # dim [N, num_detections, 1, 4]
|
||||||
|
detection_scores = np.reshape(class_score, (BS, -1, num_classes)) # dim [N, num_detections, 80]
|
||||||
|
|
||||||
|
# switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax:
|
||||||
|
detection_boxes_tmp = np.zeros(detection_boxes.shape)
|
||||||
|
detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1]
|
||||||
|
detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0]
|
||||||
|
detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3]
|
||||||
|
detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2]
|
||||||
|
|
||||||
|
detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax
|
||||||
|
return detection_boxes.astype(np.float32), detection_scores.astype(np.float32)
|
||||||
|
|
||||||
|
def _yolo5_decode(self, raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride):
|
||||||
|
box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride
|
||||||
|
box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2]
|
||||||
|
return box_centers, box_scales, objness, class_pred
|
||||||
|
|
||||||
|
|
||||||
def _get_face_detection_visualization_data(logits):
|
def postprocessing(self, endnodes):
|
||||||
boxes = logits['detection_boxes'][0]
|
"""
|
||||||
|
endnodes is a list of 3 output tensors:
|
||||||
|
endnodes[0] - stride 32 of input
|
||||||
|
endnodes[1] - stride 16 of input
|
||||||
|
endnodes[2] - stride 8 of input
|
||||||
|
Returns:
|
||||||
|
a tensor with dims: [BS, Total_num_of_detections_in_image, 6]
|
||||||
|
where:
|
||||||
|
total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes,
|
||||||
|
with H, W as input dims.
|
||||||
|
If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get:
|
||||||
|
total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS
|
||||||
|
"""
|
||||||
|
H_input = 640
|
||||||
|
W_input = 640
|
||||||
|
anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
|
||||||
|
# TODO make prettier
|
||||||
|
strides = [8, 16, 32]
|
||||||
|
|
||||||
face_landmarks = logits.get('face_landmarks')
|
for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers:
|
||||||
if face_landmarks is not None:
|
stride = strides[::-1][output_ind]
|
||||||
face_landmarks = face_landmarks[0].reshape((-1, 5, 2))[:, :, (1, 0)]
|
anchors_for_stride = np.array(anchors_list[::-1][output_ind])
|
||||||
boxes = boxes[:, (1, 0, 3, 2)]
|
anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2]
|
||||||
# No name to prevent clobbering the visualization
|
|
||||||
labels = {1: {'id': 1, 'name': ''}}
|
detection_boxes, detection_scores = self.yolo_postprocess_numpy(output_branch,
|
||||||
return boxes, labels, face_landmarks
|
anchors_for_stride,
|
||||||
|
stride)
|
||||||
|
|
||||||
|
# detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a
|
||||||
|
# [BS, num_detections, num_classes] tensor
|
||||||
|
detection_boxes = detection_boxes / H_input # normalization of box coordinates to 1
|
||||||
|
BS = endnodes[0].shape[0]
|
||||||
|
H = H_input // stride
|
||||||
|
W = W_input // stride
|
||||||
|
num_anchors = anchors_for_stride.size // 2
|
||||||
|
num_detections = H * W * num_anchors
|
||||||
|
# detection_boxes.set_shape((BS, num_detections, 1, 4))
|
||||||
|
# detection_scores.set_shape((BS, num_detections, num_classes))
|
||||||
|
# concatenating the detections from the different output layers:
|
||||||
|
if output_ind == 0:
|
||||||
|
detection_boxes_full = detection_boxes
|
||||||
|
detection_scores_full = detection_scores
|
||||||
|
else:
|
||||||
|
detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1)
|
||||||
|
detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1)
|
||||||
|
|
||||||
|
score_threshold = 0.5
|
||||||
|
nms_iou_threshold = 0.5
|
||||||
|
labels_offset = 1
|
||||||
|
|
||||||
|
(nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \
|
||||||
|
combined_non_max_suppression(boxes=detection_boxes_full,
|
||||||
|
scores=detection_scores_full,
|
||||||
|
score_threshold=score_threshold,
|
||||||
|
iou_threshold=nms_iou_threshold,
|
||||||
|
max_output_size_per_class=100,
|
||||||
|
max_total_size=100)
|
||||||
|
|
||||||
|
|
||||||
def _get_coco_labels():
|
# adding offset to the class prediction and cast to integer
|
||||||
coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json')))
|
def translate_coco_2017_to_2014(nmsed_classes):
|
||||||
coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()}
|
return np.vectorize(COCO_17_14.get)(nmsed_classes).astype(np.int32)
|
||||||
return coco_names
|
|
||||||
|
nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16)
|
||||||
|
nmsed_classes = translate_coco_2017_to_2014(nmsed_classes)
|
||||||
|
|
||||||
|
return {'detection_boxes': nmsed_boxes,
|
||||||
|
'detection_scores': nmsed_scores,
|
||||||
|
'detection_classes': nmsed_classes,
|
||||||
|
'num_detections': num_detections}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class HailoHandler:
|
||||||
|
def __init__(self, hef_path='hef/yolov5m.hef'):
|
||||||
|
target = PcieDevice()
|
||||||
|
|
||||||
|
self.hef = HEF(hef_path)
|
||||||
|
|
||||||
|
# Configure network groups
|
||||||
|
configure_params = ConfigureParams.create_from_hef(hef=self.hef,
|
||||||
|
interface=HailoStreamInterface.PCIe)
|
||||||
|
network_groups = target.configure(self.hef, configure_params)
|
||||||
|
self.network_group = network_groups[0]
|
||||||
|
|
||||||
|
self.input_vstreams_params = InputVStreamParams.make(self.network_group,
|
||||||
|
quantized=False,
|
||||||
|
format_type=FormatType.FLOAT32)
|
||||||
|
|
||||||
|
self.output_vstreams_params = OutputVStreamParams.make(self.network_group, quantized=False, format_type=FormatType.FLOAT32)
|
||||||
|
|
||||||
|
self.input_vstream_info = self.hef.get_input_vstream_infos()[0]
|
||||||
|
self.output_vstream_infos = self.hef.get_output_vstream_infos()
|
||||||
|
self.network_group_params = self.network_group.create_params()
|
||||||
|
|
||||||
|
def run_hailo(self, dataset):
|
||||||
|
|
||||||
|
input_data = {self.input_vstream_info.name: dataset}
|
||||||
|
|
||||||
|
with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params) as infer_pipeline:
|
||||||
|
with self.network_group.activate(self.network_group_params):
|
||||||
|
infer_results = infer_pipeline.infer(input_data)
|
||||||
|
|
||||||
|
out = [infer_results[i.name] for i in self.output_vstream_infos]
|
||||||
|
return out
|
||||||
|
|
||||||
def _get_labels(label_name):
|
|
||||||
filename = os.path.join(os.path.dirname(__file__), label_name + '.json')
|
|
||||||
names = json.load(open(filename))
|
|
||||||
names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
|
|
||||||
return names
|
|
||||||
|
|
||||||
|
|
||||||
def process_yolo5():
|
def process_yolo5():
|
||||||
|
|
||||||
hef, network_group = init_hailo("yolov5m_22_2")
|
imageMeta = ImageMeta(640, 640, 3)
|
||||||
|
processor = YoloProcessing(imageMeta, classes=3)
|
||||||
|
data = DataHandler('./minimal_data', imageMeta)
|
||||||
|
data.load_data(processor.preproc)
|
||||||
|
|
||||||
dataset, names = dataset_read(hef)
|
hailo = HailoHandler('hef/yolov5m_22_2.hef')
|
||||||
|
out = hailo.run_hailo(data.dataset)
|
||||||
|
|
||||||
samples = 1000
|
logits = processor.postprocessing(out)
|
||||||
start_time = time.time()
|
|
||||||
fps = 0
|
|
||||||
while samples > 0:
|
|
||||||
if start_time + 1 < time.time():
|
|
||||||
print("fps: " + str(fps))
|
|
||||||
start_time = time.time()
|
|
||||||
fps = 0
|
|
||||||
|
|
||||||
out, names, dataset, names = run_hailo(dataset, names, hef, network_group)
|
|
||||||
|
|
||||||
logits = postprocessing(out)
|
|
||||||
|
|
||||||
fps += 1
|
|
||||||
samples -= 1
|
|
||||||
|
|
||||||
|
|
||||||
labels = _get_labels("daria_names")
|
labels = data._get_labels("daria_names")
|
||||||
image = visualize_boxes_and_labels_on_image_array(
|
image = visualize_boxes_and_labels_on_image_array(
|
||||||
dataset[0],
|
data.dataset[0],
|
||||||
logits['detection_boxes'].numpy()[0],
|
logits['detection_boxes'].numpy()[0],
|
||||||
logits['detection_classes'][0],
|
logits['detection_classes'][0],
|
||||||
logits['detection_scores'].numpy()[0],
|
logits['detection_scores'].numpy()[0],
|
||||||
@ -324,16 +309,5 @@ def process_yolo5():
|
|||||||
|
|
||||||
Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png')
|
Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png')
|
||||||
|
|
||||||
COCO_2017_TO_2014_TRANSLATION = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10,
|
|
||||||
11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19,
|
|
||||||
19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27, 26: 28,
|
|
||||||
27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37, 34: 38,
|
|
||||||
35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, 42: 47,
|
|
||||||
43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54, 50: 55,
|
|
||||||
51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, 58: 63,
|
|
||||||
59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74, 66: 75,
|
|
||||||
67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82, 74: 84,
|
|
||||||
75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
process_yolo5()
|
process_yolo5()
|
||||||
|
Loading…
Reference in New Issue
Block a user