be89e739b4
- ImageMeta struct saves image metadata - DataHandler takes care of loading dataset and parsing the label information - YoloProcessing takes care of image preprocessing and yolo postprocessing - HailoHandler connects to Hailo device, pushes the desired network hef file and runs the dataset on the hailo chip
314 lines
13 KiB
Python
314 lines
13 KiB
Python
import json
|
|
import os
|
|
import time
|
|
t
|
|
from PIL import Image
|
|
|
|
from detection_tools.utils.visualization_utils import \
|
|
visualize_boxes_and_labels_on_image_array
|
|
|
|
from hailo_platform import (ConfigureParams, FormatType, HEF,
|
|
HailoStreamInterface, InferVStreams,
|
|
InputVStreamParams, OutputVStreamParams,
|
|
PcieDevice)
|
|
|
|
import numpy as np
|
|
|
|
import tensorflow as tf
|
|
from tensorflow.image import combined_non_max_suppression
|
|
|
|
# Collect images from data files
|
|
|
|
|
|
class ImageMeta:
|
|
def __init__(self, image_height, image_width, channels):
|
|
self.image_height = image_height
|
|
self.image_width = image_width
|
|
self.channels = channels
|
|
|
|
|
|
class DataHandler:
|
|
def __init__(self, path, image_meta):
|
|
self.images_path = path
|
|
self.image_meta = image_meta
|
|
|
|
def load_data(self, preprocess_fn):
|
|
names = []
|
|
|
|
images_list = [img_name for img_name in os.listdir(self.images_path)
|
|
if os.path.splitext(os.path.join(self.images_path, img_name))[1] == '.jpg']
|
|
dataset = np.zeros((1, self.image_meta.image_height,
|
|
self.image_meta.image_width,
|
|
self.image_meta.channels),
|
|
dtype=np.float32)
|
|
|
|
for idx, img_name in enumerate(images_list):
|
|
img = Image.open(os.path.join(self.images_path, img_name))
|
|
img_preproc = preprocess_fn(img)
|
|
dataset[idx, :, :, :] = np.array(img_preproc)
|
|
names.append(img_name)
|
|
break
|
|
|
|
self.dataset = dataset
|
|
self.names = names
|
|
|
|
|
|
def _get_coco_labels(self):
|
|
coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json')))
|
|
coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()}
|
|
return coco_names
|
|
|
|
|
|
def _get_labels(self, label_name):
|
|
filename = os.path.join(os.path.dirname(__file__), label_name + '.json')
|
|
names = json.load(open(filename))
|
|
names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
|
|
return names
|
|
|
|
COCO_17_14 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9,
|
|
10: 10, 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18,
|
|
18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27,
|
|
26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37,
|
|
34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46,
|
|
42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54,
|
|
50: 55, 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62,
|
|
58: 63, 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74,
|
|
66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82,
|
|
74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}
|
|
|
|
|
|
class YoloProcessing:
|
|
def __init__(self, imageMeta, classes):
|
|
self.output_height = imageMeta.image_height
|
|
self.output_width = imageMeta.image_width
|
|
self.classes = classes
|
|
|
|
def preproc(self, image, resize_side=256):
|
|
|
|
'''
|
|
imagenet-standard: aspect-preserving resize to 256px smaller-side,
|
|
then central-crop to 224px
|
|
'''
|
|
new_width = int(image.width/image.height*resize_side)
|
|
new_height = resize_side
|
|
x, y = (new_width-self.output_width)/2, 0
|
|
|
|
# Select area to crop
|
|
area = (x, y, x+self.output_width, y+self.output_height)
|
|
|
|
# Crop, show, and save image
|
|
cropped_img = image.resize((new_width, new_height)).crop(area)
|
|
return cropped_img
|
|
|
|
# 20 x 20 -> 32
|
|
# stride = 32
|
|
def yolo_postprocess_numpy(self, net_out, anchors_for_stride, stride):
|
|
"""
|
|
net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255]
|
|
first we reshape it to be as in gluon and then follow gluon's shapes.
|
|
output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8.
|
|
"""
|
|
|
|
# net_out = net_out.astype(np.float32) / 256
|
|
num_classes = 4
|
|
BS = net_out.shape[0] # batch size
|
|
H = net_out.shape[1]
|
|
W = net_out.shape[2]
|
|
|
|
num_anchors = anchors_for_stride.size // 2 # 2 params for each anchor.
|
|
num_pred = 1 + 4 + num_classes # 2 box centers, 2 box scales, 1 objness, num_classes class scores
|
|
alloc_size = (128, 128)
|
|
|
|
grid_x = np.arange(alloc_size[1])
|
|
grid_y = np.arange(alloc_size[0])
|
|
grid_x, grid_y = np.meshgrid(grid_x, grid_y) # dims [128,128], [128,128]
|
|
|
|
offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1) # dim [128,128,2]
|
|
offsets = np.expand_dims(np.expand_dims(offsets, 0), 0) # dim [1,1,128,128,2]
|
|
|
|
pred = net_out.transpose((0, 3, 1, 2)) # now dims are: [N,C,H,W] as in Gluon.
|
|
pred = np.reshape(pred, (BS, num_anchors * num_pred, -1)) # dim [N, 255, HxW]
|
|
# dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped.
|
|
pred = pred.transpose((0, 2, 1))
|
|
pred = np.reshape(pred, (BS, -1, num_anchors, num_pred)) # dim [N, HxW, 3, 85]]
|
|
|
|
raw_box_centers = pred[:, :, :, 0:2] # dim [N, HxW, 3, 2]
|
|
raw_box_scales = pred[:, :, :, 2:4] # dim [N,HxW, 3, 2]
|
|
|
|
objness = pred[:, :, :, 4:5] # dim [N, HxW, 3, 1]
|
|
class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80]
|
|
offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2]
|
|
offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2]
|
|
box_centers, box_scales, confidence, class_pred = self._yolo5_decode(
|
|
raw_box_centers=raw_box_centers,
|
|
raw_box_scales=raw_box_scales,
|
|
objness=objness,
|
|
class_pred=class_pred,
|
|
anchors_for_stride=anchors_for_stride,
|
|
offsets=offsets,
|
|
stride=stride)
|
|
|
|
class_score = class_pred * confidence # dim [N, HxW, 3, 80]
|
|
wh = box_scales / 2.0
|
|
# dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax
|
|
bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1)
|
|
|
|
detection_boxes = np.reshape(bbox, (BS, -1, 1, 4)) # dim [N, num_detections, 1, 4]
|
|
detection_scores = np.reshape(class_score, (BS, -1, num_classes)) # dim [N, num_detections, 80]
|
|
|
|
# switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax:
|
|
detection_boxes_tmp = np.zeros(detection_boxes.shape)
|
|
detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1]
|
|
detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0]
|
|
detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3]
|
|
detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2]
|
|
|
|
detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax
|
|
return detection_boxes.astype(np.float32), detection_scores.astype(np.float32)
|
|
|
|
def _yolo5_decode(self, raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride):
|
|
box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride
|
|
box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2]
|
|
return box_centers, box_scales, objness, class_pred
|
|
|
|
|
|
def postprocessing(self, endnodes):
|
|
"""
|
|
endnodes is a list of 3 output tensors:
|
|
endnodes[0] - stride 32 of input
|
|
endnodes[1] - stride 16 of input
|
|
endnodes[2] - stride 8 of input
|
|
Returns:
|
|
a tensor with dims: [BS, Total_num_of_detections_in_image, 6]
|
|
where:
|
|
total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes,
|
|
with H, W as input dims.
|
|
If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get:
|
|
total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS
|
|
"""
|
|
H_input = 640
|
|
W_input = 640
|
|
anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
|
|
# TODO make prettier
|
|
strides = [8, 16, 32]
|
|
|
|
for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers:
|
|
stride = strides[::-1][output_ind]
|
|
anchors_for_stride = np.array(anchors_list[::-1][output_ind])
|
|
anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2]
|
|
|
|
detection_boxes, detection_scores = self.yolo_postprocess_numpy(output_branch,
|
|
anchors_for_stride,
|
|
stride)
|
|
|
|
# detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a
|
|
# [BS, num_detections, num_classes] tensor
|
|
detection_boxes = detection_boxes / H_input # normalization of box coordinates to 1
|
|
BS = endnodes[0].shape[0]
|
|
H = H_input // stride
|
|
W = W_input // stride
|
|
num_anchors = anchors_for_stride.size // 2
|
|
num_detections = H * W * num_anchors
|
|
# detection_boxes.set_shape((BS, num_detections, 1, 4))
|
|
# detection_scores.set_shape((BS, num_detections, num_classes))
|
|
# concatenating the detections from the different output layers:
|
|
if output_ind == 0:
|
|
detection_boxes_full = detection_boxes
|
|
detection_scores_full = detection_scores
|
|
else:
|
|
detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1)
|
|
detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1)
|
|
|
|
score_threshold = 0.5
|
|
nms_iou_threshold = 0.5
|
|
labels_offset = 1
|
|
|
|
(nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \
|
|
combined_non_max_suppression(boxes=detection_boxes_full,
|
|
scores=detection_scores_full,
|
|
score_threshold=score_threshold,
|
|
iou_threshold=nms_iou_threshold,
|
|
max_output_size_per_class=100,
|
|
max_total_size=100)
|
|
|
|
|
|
# adding offset to the class prediction and cast to integer
|
|
def translate_coco_2017_to_2014(nmsed_classes):
|
|
return np.vectorize(COCO_17_14.get)(nmsed_classes).astype(np.int32)
|
|
|
|
nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16)
|
|
nmsed_classes = translate_coco_2017_to_2014(nmsed_classes)
|
|
|
|
return {'detection_boxes': nmsed_boxes,
|
|
'detection_scores': nmsed_scores,
|
|
'detection_classes': nmsed_classes,
|
|
'num_detections': num_detections}
|
|
|
|
|
|
|
|
class HailoHandler:
|
|
def __init__(self, hef_path='hef/yolov5m.hef'):
|
|
target = PcieDevice()
|
|
|
|
self.hef = HEF(hef_path)
|
|
|
|
# Configure network groups
|
|
configure_params = ConfigureParams.create_from_hef(hef=self.hef,
|
|
interface=HailoStreamInterface.PCIe)
|
|
network_groups = target.configure(self.hef, configure_params)
|
|
self.network_group = network_groups[0]
|
|
|
|
self.input_vstreams_params = InputVStreamParams.make(self.network_group,
|
|
quantized=False,
|
|
format_type=FormatType.FLOAT32)
|
|
|
|
self.output_vstreams_params = OutputVStreamParams.make(self.network_group, quantized=False, format_type=FormatType.FLOAT32)
|
|
|
|
self.input_vstream_info = self.hef.get_input_vstream_infos()[0]
|
|
self.output_vstream_infos = self.hef.get_output_vstream_infos()
|
|
self.network_group_params = self.network_group.create_params()
|
|
|
|
def run_hailo(self, dataset):
|
|
|
|
input_data = {self.input_vstream_info.name: dataset}
|
|
|
|
with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params) as infer_pipeline:
|
|
with self.network_group.activate(self.network_group_params):
|
|
infer_results = infer_pipeline.infer(input_data)
|
|
|
|
out = [infer_results[i.name] for i in self.output_vstream_infos]
|
|
return out
|
|
|
|
|
|
|
|
def process_yolo5():
|
|
|
|
imageMeta = ImageMeta(640, 640, 3)
|
|
processor = YoloProcessing(imageMeta, classes=3)
|
|
data = DataHandler('./minimal_data', imageMeta)
|
|
data.load_data(processor.preproc)
|
|
|
|
hailo = HailoHandler('hef/yolov5m_22_2.hef')
|
|
out = hailo.run_hailo(data.dataset)
|
|
|
|
logits = processor.postprocessing(out)
|
|
|
|
|
|
labels = data._get_labels("daria_names")
|
|
image = visualize_boxes_and_labels_on_image_array(
|
|
data.dataset[0],
|
|
logits['detection_boxes'].numpy()[0],
|
|
logits['detection_classes'][0],
|
|
logits['detection_scores'].numpy()[0],
|
|
labels,
|
|
use_normalized_coordinates=True,
|
|
max_boxes_to_draw=100,
|
|
min_score_thresh=.5,
|
|
agnostic_mode=False,
|
|
line_thickness=4)
|
|
|
|
Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png')
|
|
|
|
if __name__ == "__main__":
|
|
process_yolo5()
|