import json import os import time import ipdb from PIL import Image from threading import Thread from detection_tools.utils.visualization_utils import \ visualize_boxes_and_labels_on_image_array from hailo_platform import (ConfigureParams, FormatType, HEF, HailoStreamInterface, InferVStreams, InputVStreamParams, OutputVStreamParams, PcieDevice) import numpy as np import tensorflow as tf from tensorflow.image import combined_non_max_suppression # Collect images from data files class ImageMeta: def __init__(self, image_height, image_width, channels): self.image_height = image_height self.image_width = image_width self.channels = channels class DataHandler: def __init__(self, path, image_meta): self.images_path = path self.image_meta = image_meta def load_data(self, preprocess_fn): names = [] images_list = [img_name for img_name in os.listdir(self.images_path) if os.path.splitext(os.path.join(self.images_path, img_name))[1] == '.jpg'] dataset = np.zeros((1, self.image_meta.image_height, self.image_meta.image_width, self.image_meta.channels), dtype=np.float32) for idx, img_name in enumerate(images_list): img = Image.open(os.path.join(self.images_path, img_name)) img_preproc = preprocess_fn(img) dataset[idx, :, :, :] = np.array(img_preproc) names.append(img_name) break self.dataset = dataset self.names = names def _get_coco_labels(self): coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json'))) coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()} return coco_names def get_labels(self, path): filename = os.path.join(os.path.dirname(__file__), path) names = json.load(open(filename)) names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()} return names COCO_17_14 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27, 26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37, 34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, 42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54, 50: 55, 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, 58: 63, 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74, 66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82, 74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90} class YoloProcessing: def __init__(self, imageMeta, classes): self.output_height = imageMeta.image_height self.output_width = imageMeta.image_width self.classes = classes def preproc(self, image, resize_side=256): ''' imagenet-standard: aspect-preserving resize to 256px smaller-side, then central-crop to 224px ''' new_width = int(image.width/image.height*resize_side) new_height = resize_side x, y = (new_width-self.output_width)/2, 0 # Select area to crop area = (x, y, x+self.output_width, y+self.output_height) # Crop, show, and save image cropped_img = image.resize((new_width, new_height)).crop(area) return cropped_img # 20 x 20 -> 32 # stride = 32 def yolo_postprocess_numpy(self, net_out, anchors_for_stride, stride): """ net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255] first we reshape it to be as in gluon and then follow gluon's shapes. output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8. """ # net_out = net_out.astype(np.float32) / 256 num_classes = 4 BS = net_out.shape[0] # batch size H = net_out.shape[1] W = net_out.shape[2] num_anchors = anchors_for_stride.size // 2 # 2 params for each anchor. num_pred = 1 + 4 + num_classes # 2 box centers, 2 box scales, 1 objness, num_classes class scores alloc_size = (128, 128) grid_x = np.arange(alloc_size[1]) grid_y = np.arange(alloc_size[0]) grid_x, grid_y = np.meshgrid(grid_x, grid_y) # dims [128,128], [128,128] offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1) # dim [128,128,2] offsets = np.expand_dims(np.expand_dims(offsets, 0), 0) # dim [1,1,128,128,2] pred = net_out.transpose((0, 3, 1, 2)) # now dims are: [N,C,H,W] as in Gluon. pred = np.reshape(pred, (BS, num_anchors * num_pred, -1)) # dim [N, 255, HxW] # dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped. pred = pred.transpose((0, 2, 1)) pred = np.reshape(pred, (BS, -1, num_anchors, num_pred)) # dim [N, HxW, 3, 85]] raw_box_centers = pred[:, :, :, 0:2] # dim [N, HxW, 3, 2] raw_box_scales = pred[:, :, :, 2:4] # dim [N,HxW, 3, 2] objness = pred[:, :, :, 4:5] # dim [N, HxW, 3, 1] class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80] offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2] offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2] box_centers, box_scales, confidence, class_pred = self._yolo5_decode( raw_box_centers=raw_box_centers, raw_box_scales=raw_box_scales, objness=objness, class_pred=class_pred, anchors_for_stride=anchors_for_stride, offsets=offsets, stride=stride) class_score = class_pred * confidence # dim [N, HxW, 3, 80] wh = box_scales / 2.0 # dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1) detection_boxes = np.reshape(bbox, (BS, -1, 1, 4)) # dim [N, num_detections, 1, 4] detection_scores = np.reshape(class_score, (BS, -1, num_classes)) # dim [N, num_detections, 80] # switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax: detection_boxes_tmp = np.zeros(detection_boxes.shape) detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1] detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0] detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3] detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2] detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax return detection_boxes.astype(np.float32), detection_scores.astype(np.float32) def _yolo5_decode(self, raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride): box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2] return box_centers, box_scales, objness, class_pred def process_to_picture(self, endnodes, data): logits = self.postprocessing(endnodes) self.visualize_image(logits, data) def visualize_image(self, logits, data): labels = data.get_labels("data/daria_labels.json") image = visualize_boxes_and_labels_on_image_array( data.dataset[0], logits['detection_boxes'].numpy()[0], logits['detection_classes'][0], logits['detection_scores'].numpy()[0], labels, use_normalized_coordinates=True, max_boxes_to_draw=100, min_score_thresh=.5, agnostic_mode=False, line_thickness=4) Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png') print("Successfully saved image") def postprocessing(self, endnodes, count): """ endnodes is a list of 3 output tensors: endnodes[0] - stride 32 of input endnodes[1] - stride 16 of input endnodes[2] - stride 8 of input Returns: a tensor with dims: [BS, Total_num_of_detections_in_image, 6] where: total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes, with H, W as input dims. If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get: total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS """ H_input = 640 W_input = 640 anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] # TODO make prettier strides = [8, 16, 32] for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers: stride = strides[::-1][output_ind] anchors_for_stride = np.array(anchors_list[::-1][output_ind]) anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2] detection_boxes, detection_scores = self.yolo_postprocess_numpy(output_branch, anchors_for_stride, stride) # detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a # [BS, num_detections, num_classes] tensor detection_boxes = detection_boxes / H_input # normalization of box coordinates to 1 BS = endnodes[0].shape[0] H = H_input // stride W = W_input // stride num_anchors = anchors_for_stride.size // 2 num_detections = H * W * num_anchors # detection_boxes.set_shape((BS, num_detections, 1, 4)) # detection_scores.set_shape((BS, num_detections, num_classes)) # concatenating the detections from the different output layers: if output_ind == 0: detection_boxes_full = detection_boxes detection_scores_full = detection_scores else: detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1) detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1) score_threshold = 0.5 nms_iou_threshold = 0.5 labels_offset = 1 (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \ combined_non_max_suppression(boxes=detection_boxes_full, scores=detection_scores_full, score_threshold=score_threshold, iou_threshold=nms_iou_threshold, max_output_size_per_class=100, max_total_size=100) # adding offset to the class prediction and cast to integer def translate_coco_2017_to_2014(nmsed_classes): return np.vectorize(COCO_17_14.get)(nmsed_classes).astype(np.int32) nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16) nmsed_classes = translate_coco_2017_to_2014(nmsed_classes) print(count) return {'detection_boxes': nmsed_boxes, 'detection_scores': nmsed_scores, 'detection_classes': nmsed_classes, 'num_detections': num_detections} class HailoHandler: def __init__(self, hef_path='hef/yolov5m.hef'): target = PcieDevice() self.hef = HEF(hef_path) # Configure network groups configure_params = ConfigureParams.create_from_hef(hef=self.hef, interface=HailoStreamInterface.PCIe) network_groups = target.configure(self.hef, configure_params) self.network_group = network_groups[0] self.input_vstreams_params = InputVStreamParams.make(self.network_group, quantized=False, format_type=FormatType.FLOAT32) self.output_vstreams_params = OutputVStreamParams.make(self.network_group, quantized=False, format_type=FormatType.FLOAT32) self.input_vstream_info = self.hef.get_input_vstream_infos()[0] self.output_vstream_infos = self.hef.get_output_vstream_infos() self.network_group_params = self.network_group.create_params() def run_hailo(self, dataset): input_data = {self.input_vstream_info.name: dataset} with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params) as infer_pipeline: with self.network_group.activate(self.network_group_params): infer_results = infer_pipeline.infer(input_data) out = [infer_results[i.name] for i in self.output_vstream_infos] return out def start_hailo_thread(self): self.hailo_async = True self.hailo_block = False self.input_data = None self.hailo_thread = Thread(target=self._hailo_async) self.hailo_thread.start() def _hailo_async(self): with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params)\ as infer_pipeline: with self.network_group.activate(self.network_group_params): self._hailo_async_loop(infer_pipeline) def _hailo_async_loop(self, infer_pipeline): while self.hailo_async: if(not self.hailo_block and type(self.input_data) != type(None)): self.infer_results = None self.hailo_block = True infer_results = infer_pipeline.infer(self.input_data) self.infer_results = [infer_results[i.name] for i in self.output_vstream_infos] self.input_data = None self.hailo_block = False def hailo_input(self, input_data): while self.hailo_block: time.sleep(0.01) self.hailo_block = True self.input_data = input_data self.input_data = {self.input_vstream_info.name: input_data} self.infer_results = None self.hailo_block = False def hailo_output(self): while self.hailo_block: time.sleep(0.01) return self.infer_results def stop_hailo_thread(self): self.hailo_async = False self.hailo_thread.join() def test_async_yolo5(): imageMeta = ImageMeta(640, 640, 3) processor = YoloProcessing(imageMeta, classes=3) data = DataHandler('./data', imageMeta) data.load_data(processor.preproc) hailo = HailoHandler('hef/yolov5m_daria.hef') hailo.start_hailo_thread() fps = 0 now = time.time() for i in range(100): fps += 1 if now + 1 < time.time(): fps = 0 now = time.time() hailo.hailo_input(data.dataset) out = None while(out == None): time.sleep(0.0001) out = hailo.hailo_output() Thread(target=processor.postprocessing, args=[out, i]).start() hailo.stop_hailo_thread() def test_process_yolo5(): imageMeta = ImageMeta(640, 640, 3) processor = YoloProcessing(imageMeta, classes=4) data = DataHandler('./data', imageMeta) data.load_data(processor.preproc) hailo = HailoHandler('hef/yolov5m_daria.hef') now = time.time() fps = 0 for i in range(100): fps += 1 if now + 1 < time.time(): print(fps) fps = 0 now = time.time() out = hailo.run_hailo(data.dataset) logits = processor.postprocessing(out) labels = data.get_labels("data/daria_labels.json") image = visualize_boxes_and_labels_on_image_array( data.dataset[0], logits['detection_boxes'].numpy()[0], logits['detection_classes'][0], logits['detection_scores'].numpy()[0], labels, use_normalized_coordinates=True, max_boxes_to_draw=100, min_score_thresh=.5, agnostic_mode=False, line_thickness=4) print("Successfully saved image") if __name__ == "__main__": test_async_yolo5()