diff --git a/inference.py b/inference.py index 70c5a5a..4532e3f 100644 --- a/inference.py +++ b/inference.py @@ -1,317 +1,302 @@ -import os -from multiprocessing import Process import json +import os import time -import numpy as np +t from PIL import Image + +from detection_tools.utils.visualization_utils import \ + visualize_boxes_and_labels_on_image_array + +from hailo_platform import (ConfigureParams, FormatType, HEF, + HailoStreamInterface, InferVStreams, + InputVStreamParams, OutputVStreamParams, + PcieDevice) + +import numpy as np + import tensorflow as tf from tensorflow.image import combined_non_max_suppression -from detection_tools.utils.visualization_utils import visualize_boxes_and_labels_on_image_array - - -from hailo_platform import (HEF, PcieDevice, HailoStreamInterface, InferVStreams, ConfigureParams, \ - InputVStreamParams, OutputVStreamParams, InputVStreams, OutputVStreams, FormatType) - -# preprocess dataset for yolov5 size -# yolov5 640x640 -# resnet18 320x320 - - -def preproc(image, output_height=640, output_width=640, resize_side=256): - - ''' - imagenet-standard: aspect-preserving resize to 256px smaller-side, - then central-crop to 224px - ''' - new_width = int(image.width/image.height*resize_side) - new_height = resize_side - x , y = (new_width-output_width)/2, 0 - - # Select area to crop - area = (x, y, x+output_width, y+output_height) - - # Crop, show, and save image - cropped_img = image.resize((new_width, new_height)).crop(area) - return cropped_img - - # Collect images from data files -def dataset_read(hef): - - images_path = './minimal_data' - names = [] - images_list = [img_name for img_name in os.listdir(images_path) if - os.path.splitext(os.path.join(images_path, img_name))[1] == '.jpg'] - - # Define dataset params - input_vstream_info = hef.get_input_vstream_infos()[0] - output_vstream_infos = hef.get_output_vstream_infos() - image_height, image_width, channels = input_vstream_info.shape - - # dataset = np.zeros((len(images_list), image_height, image_width, channels), - # dtype=np.float32) - dataset = np.zeros((1, image_height, image_width, channels), - dtype=np.float32) - - for idx, img_name in enumerate(images_list): - img = Image.open(os.path.join(images_path, img_name)) - img_preproc = preproc(img) - dataset[idx,:,:,:] = np.array(img_preproc) - names.append(img_name) - break - - return dataset, names - -# Generate random dataset -def dataset_random(image_height, image_width, channels): - num_of_images = 10 - low, high = 2, 20 - dataset = np.random.randint(low, high, (num_of_images, image_height, - image_width, channels)).astype(np.float32) - return dataset - -def init_hailo(model_name='yolov5m'): - target = PcieDevice() - - hef_path = f'hef/{model_name}.hef' - hef = HEF(hef_path) - - # Configure network groups - configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe) - network_groups = target.configure(hef, configure_params) - network_group = network_groups[0] - return hef, network_group +class ImageMeta: + def __init__(self, image_height, image_width, channels): + self.image_height = image_height + self.image_width = image_width + self.channels = channels -''' -The target can be used as a context manager ("with" statement) to ensure it's released on time. -Here it's avoided for the sake of simplicity -''' -def run_hailo(dataset, names, hef, network_group): - # Create input and output virtual streams params - # Quantized argument signifies whether or not the incoming data is already quantized. - # Data is quantized by HailoRT if and only if quantized == False . - input_vstreams_params = InputVStreamParams.make(network_group, - quantized=False, - format_type=FormatType.FLOAT32) - # TODO: change to FLOAT32 - output_vstreams_params = OutputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32) - # output_vstreams_params = OutputVStreamParams.make(network_group, - # quantized=True, - # format_type=FormatType.INT8) +class DataHandler: + def __init__(self, path, image_meta): + self.images_path = path + self.image_meta = image_meta + + def load_data(self, preprocess_fn): + names = [] + + images_list = [img_name for img_name in os.listdir(self.images_path) + if os.path.splitext(os.path.join(self.images_path, img_name))[1] == '.jpg'] + dataset = np.zeros((1, self.image_meta.image_height, + self.image_meta.image_width, + self.image_meta.channels), + dtype=np.float32) + + for idx, img_name in enumerate(images_list): + img = Image.open(os.path.join(self.images_path, img_name)) + img_preproc = preprocess_fn(img) + dataset[idx, :, :, :] = np.array(img_preproc) + names.append(img_name) + break + + self.dataset = dataset + self.names = names - input_vstream_info = hef.get_input_vstream_infos()[0] - output_vstream_infos = hef.get_output_vstream_infos() - input_data = {input_vstream_info.name: dataset} - network_group_params = network_group.create_params() - - with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline: - with network_group.activate(network_group_params): - infer_results = infer_pipeline.infer(input_data) - - out = [infer_results[i.name] for i in output_vstream_infos] - return out, names, dataset, names + def _get_coco_labels(self): + coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json'))) + coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()} + return coco_names -# 20 x 20 -> 32 -# stride = 32 -def yolo_postprocess_numpy(net_out, anchors_for_stride, stride): - """ - net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255] - first we reshape it to be as in gluon and then follow gluon's shapes. - output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8. - """ + def _get_labels(self, label_name): + filename = os.path.join(os.path.dirname(__file__), label_name + '.json') + names = json.load(open(filename)) + names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()} + return names - # net_out = net_out.astype(np.float32) / 256 - num_classes = 4 - BS = net_out.shape[0] # batch size - H = net_out.shape[1] - W = net_out.shape[2] - - num_anchors = anchors_for_stride.size // 2 # 2 params for each anchor. - num_pred = 1 + 4 + num_classes # 2 box centers, 2 box scales, 1 objness, num_classes class scores - alloc_size = (128, 128) - - grid_x = np.arange(alloc_size[1]) - grid_y = np.arange(alloc_size[0]) - grid_x, grid_y = np.meshgrid(grid_x, grid_y) # dims [128,128], [128,128] - - offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1) # dim [128,128,2] - offsets = np.expand_dims(np.expand_dims(offsets, 0), 0) # dim [1,1,128,128,2] - - pred = net_out.transpose((0, 3, 1, 2)) # now dims are: [N,C,H,W] as in Gluon. - pred = np.reshape(pred, (BS, num_anchors * num_pred, -1)) # dim [N, 255, HxW] - # dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped. - pred = pred.transpose((0, 2, 1)) - pred = np.reshape(pred, (BS, -1, num_anchors, num_pred)) # dim [N, HxW, 3, 85]] - - raw_box_centers = pred[:, :, :, 0:2] # dim [N, HxW, 3, 2] - raw_box_scales = pred[:, :, :, 2:4] # dim [N,HxW, 3, 2] - - objness = pred[:, :, :, 4:5] # dim [N, HxW, 3, 1] - class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80] - offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2] - offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2] - box_centers, box_scales, confidence, class_pred = _yolo5_decode( - raw_box_centers=raw_box_centers, - raw_box_scales=raw_box_scales, - objness=objness, - class_pred=class_pred, - anchors_for_stride=anchors_for_stride, - offsets=offsets, - stride=stride) - - class_score = class_pred * confidence # dim [N, HxW, 3, 80] - wh = box_scales / 2.0 - # dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax - bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1) - - detection_boxes = np.reshape(bbox, (BS, -1, 1, 4)) # dim [N, num_detections, 1, 4] - detection_scores = np.reshape(class_score, (BS, -1, num_classes)) # dim [N, num_detections, 80] - - # switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax: - detection_boxes_tmp = np.zeros(detection_boxes.shape) - detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1] - detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0] - detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3] - detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2] - - detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax - return detection_boxes.astype(np.float32), detection_scores.astype(np.float32) - -def _yolo5_decode(raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride): - box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride - box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2] - return box_centers, box_scales, objness, class_pred - -def postprocessing(endnodes): - """ - endnodes is a list of 3 output tensors: - endnodes[0] - stride 32 of input - endnodes[1] - stride 16 of input - endnodes[2] - stride 8 of input - Returns: - a tensor with dims: [BS, Total_num_of_detections_in_image, 6] - where: - total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes, - with H, W as input dims. - If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get: - total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS - """ - H_input = 640 - W_input = 640 - anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] - # TODO make prettier - strides = [8, 16, 32] - num_classes = 80 - - for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers: - stride = strides[::-1][output_ind] - anchors_for_stride = np.array(anchors_list[::-1][output_ind]) - anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2] - - detection_boxes, detection_scores = yolo_postprocess_numpy(output_branch, - anchors_for_stride, - stride) - - # detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a - # [BS, num_detections, num_classes] tensor - detection_boxes = detection_boxes / H_input # normalization of box coordinates to 1 - BS = endnodes[0].shape[0] - H = H_input // stride - W = W_input // stride - num_anchors = anchors_for_stride.size // 2 - num_detections = H * W * num_anchors - # detection_boxes.set_shape((BS, num_detections, 1, 4)) - # detection_scores.set_shape((BS, num_detections, num_classes)) - # concatenating the detections from the different output layers: - if output_ind == 0: - detection_boxes_full = detection_boxes - detection_scores_full = detection_scores - else: - detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1) - detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1) - - score_threshold = 0.5 - nms_iou_threshold = 0.5 - labels_offset = 1 - - (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \ - combined_non_max_suppression(boxes=detection_boxes_full, - scores=detection_scores_full, - score_threshold=score_threshold, - iou_threshold=nms_iou_threshold, - max_output_size_per_class=100, - max_total_size=100) +COCO_17_14 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, + 10: 10, 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, + 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27, + 26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37, + 34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, + 42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54, + 50: 55, 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, + 58: 63, 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74, + 66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82, + 74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90} - # adding offset to the class prediction and cast to integer - def translate_coco_2017_to_2014(nmsed_classes): - return np.vectorize(COCO_2017_TO_2014_TRANSLATION.get)(nmsed_classes).astype(np.int32) +class YoloProcessing: + def __init__(self, imageMeta, classes): + self.output_height = imageMeta.image_height + self.output_width = imageMeta.image_width + self.classes = classes - nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16) - nmsed_classes = translate_coco_2017_to_2014(nmsed_classes) + def preproc(self, image, resize_side=256): - return {'detection_boxes': nmsed_boxes, - 'detection_scores': nmsed_scores, - 'detection_classes': nmsed_classes, - 'num_detections': num_detections} + ''' + imagenet-standard: aspect-preserving resize to 256px smaller-side, + then central-crop to 224px + ''' + new_width = int(image.width/image.height*resize_side) + new_height = resize_side + x, y = (new_width-self.output_width)/2, 0 + + # Select area to crop + area = (x, y, x+self.output_width, y+self.output_height) + + # Crop, show, and save image + cropped_img = image.resize((new_width, new_height)).crop(area) + return cropped_img + + # 20 x 20 -> 32 + # stride = 32 + def yolo_postprocess_numpy(self, net_out, anchors_for_stride, stride): + """ + net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255] + first we reshape it to be as in gluon and then follow gluon's shapes. + output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8. + """ + + # net_out = net_out.astype(np.float32) / 256 + num_classes = 4 + BS = net_out.shape[0] # batch size + H = net_out.shape[1] + W = net_out.shape[2] + + num_anchors = anchors_for_stride.size // 2 # 2 params for each anchor. + num_pred = 1 + 4 + num_classes # 2 box centers, 2 box scales, 1 objness, num_classes class scores + alloc_size = (128, 128) + + grid_x = np.arange(alloc_size[1]) + grid_y = np.arange(alloc_size[0]) + grid_x, grid_y = np.meshgrid(grid_x, grid_y) # dims [128,128], [128,128] + + offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1) # dim [128,128,2] + offsets = np.expand_dims(np.expand_dims(offsets, 0), 0) # dim [1,1,128,128,2] + + pred = net_out.transpose((0, 3, 1, 2)) # now dims are: [N,C,H,W] as in Gluon. + pred = np.reshape(pred, (BS, num_anchors * num_pred, -1)) # dim [N, 255, HxW] + # dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped. + pred = pred.transpose((0, 2, 1)) + pred = np.reshape(pred, (BS, -1, num_anchors, num_pred)) # dim [N, HxW, 3, 85]] + + raw_box_centers = pred[:, :, :, 0:2] # dim [N, HxW, 3, 2] + raw_box_scales = pred[:, :, :, 2:4] # dim [N,HxW, 3, 2] + + objness = pred[:, :, :, 4:5] # dim [N, HxW, 3, 1] + class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80] + offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2] + offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2] + box_centers, box_scales, confidence, class_pred = self._yolo5_decode( + raw_box_centers=raw_box_centers, + raw_box_scales=raw_box_scales, + objness=objness, + class_pred=class_pred, + anchors_for_stride=anchors_for_stride, + offsets=offsets, + stride=stride) + + class_score = class_pred * confidence # dim [N, HxW, 3, 80] + wh = box_scales / 2.0 + # dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax + bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1) + + detection_boxes = np.reshape(bbox, (BS, -1, 1, 4)) # dim [N, num_detections, 1, 4] + detection_scores = np.reshape(class_score, (BS, -1, num_classes)) # dim [N, num_detections, 80] + + # switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax: + detection_boxes_tmp = np.zeros(detection_boxes.shape) + detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1] + detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0] + detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3] + detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2] + + detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax + return detection_boxes.astype(np.float32), detection_scores.astype(np.float32) + + def _yolo5_decode(self, raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride): + box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride + box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2] + return box_centers, box_scales, objness, class_pred -def _get_face_detection_visualization_data(logits): - boxes = logits['detection_boxes'][0] + def postprocessing(self, endnodes): + """ + endnodes is a list of 3 output tensors: + endnodes[0] - stride 32 of input + endnodes[1] - stride 16 of input + endnodes[2] - stride 8 of input + Returns: + a tensor with dims: [BS, Total_num_of_detections_in_image, 6] + where: + total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes, + with H, W as input dims. + If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get: + total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS + """ + H_input = 640 + W_input = 640 + anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] + # TODO make prettier + strides = [8, 16, 32] - face_landmarks = logits.get('face_landmarks') - if face_landmarks is not None: - face_landmarks = face_landmarks[0].reshape((-1, 5, 2))[:, :, (1, 0)] - boxes = boxes[:, (1, 0, 3, 2)] - # No name to prevent clobbering the visualization - labels = {1: {'id': 1, 'name': ''}} - return boxes, labels, face_landmarks + for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers: + stride = strides[::-1][output_ind] + anchors_for_stride = np.array(anchors_list[::-1][output_ind]) + anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2] + + detection_boxes, detection_scores = self.yolo_postprocess_numpy(output_branch, + anchors_for_stride, + stride) + + # detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a + # [BS, num_detections, num_classes] tensor + detection_boxes = detection_boxes / H_input # normalization of box coordinates to 1 + BS = endnodes[0].shape[0] + H = H_input // stride + W = W_input // stride + num_anchors = anchors_for_stride.size // 2 + num_detections = H * W * num_anchors + # detection_boxes.set_shape((BS, num_detections, 1, 4)) + # detection_scores.set_shape((BS, num_detections, num_classes)) + # concatenating the detections from the different output layers: + if output_ind == 0: + detection_boxes_full = detection_boxes + detection_scores_full = detection_scores + else: + detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1) + detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1) + + score_threshold = 0.5 + nms_iou_threshold = 0.5 + labels_offset = 1 + + (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \ + combined_non_max_suppression(boxes=detection_boxes_full, + scores=detection_scores_full, + score_threshold=score_threshold, + iou_threshold=nms_iou_threshold, + max_output_size_per_class=100, + max_total_size=100) -def _get_coco_labels(): - coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json'))) - coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()} - return coco_names + # adding offset to the class prediction and cast to integer + def translate_coco_2017_to_2014(nmsed_classes): + return np.vectorize(COCO_17_14.get)(nmsed_classes).astype(np.int32) + + nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16) + nmsed_classes = translate_coco_2017_to_2014(nmsed_classes) + + return {'detection_boxes': nmsed_boxes, + 'detection_scores': nmsed_scores, + 'detection_classes': nmsed_classes, + 'num_detections': num_detections} + + + +class HailoHandler: + def __init__(self, hef_path='hef/yolov5m.hef'): + target = PcieDevice() + + self.hef = HEF(hef_path) + + # Configure network groups + configure_params = ConfigureParams.create_from_hef(hef=self.hef, + interface=HailoStreamInterface.PCIe) + network_groups = target.configure(self.hef, configure_params) + self.network_group = network_groups[0] + + self.input_vstreams_params = InputVStreamParams.make(self.network_group, + quantized=False, + format_type=FormatType.FLOAT32) + + self.output_vstreams_params = OutputVStreamParams.make(self.network_group, quantized=False, format_type=FormatType.FLOAT32) + + self.input_vstream_info = self.hef.get_input_vstream_infos()[0] + self.output_vstream_infos = self.hef.get_output_vstream_infos() + self.network_group_params = self.network_group.create_params() + + def run_hailo(self, dataset): + + input_data = {self.input_vstream_info.name: dataset} + + with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params) as infer_pipeline: + with self.network_group.activate(self.network_group_params): + infer_results = infer_pipeline.infer(input_data) + + out = [infer_results[i.name] for i in self.output_vstream_infos] + return out -def _get_labels(label_name): - filename = os.path.join(os.path.dirname(__file__), label_name + '.json') - names = json.load(open(filename)) - names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()} - return names def process_yolo5(): - hef, network_group = init_hailo("yolov5m_22_2") + imageMeta = ImageMeta(640, 640, 3) + processor = YoloProcessing(imageMeta, classes=3) + data = DataHandler('./minimal_data', imageMeta) + data.load_data(processor.preproc) - dataset, names = dataset_read(hef) + hailo = HailoHandler('hef/yolov5m_22_2.hef') + out = hailo.run_hailo(data.dataset) - samples = 1000 - start_time = time.time() - fps = 0 - while samples > 0: - if start_time + 1 < time.time(): - print("fps: " + str(fps)) - start_time = time.time() - fps = 0 - - out, names, dataset, names = run_hailo(dataset, names, hef, network_group) - - logits = postprocessing(out) - - fps += 1 - samples -= 1 + logits = processor.postprocessing(out) - labels = _get_labels("daria_names") + labels = data._get_labels("daria_names") image = visualize_boxes_and_labels_on_image_array( - dataset[0], + data.dataset[0], logits['detection_boxes'].numpy()[0], logits['detection_classes'][0], logits['detection_scores'].numpy()[0], @@ -324,16 +309,5 @@ def process_yolo5(): Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png') -COCO_2017_TO_2014_TRANSLATION = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, - 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, - 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27, 26: 28, - 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37, 34: 38, - 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, 42: 47, - 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54, 50: 55, - 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, 58: 63, - 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74, 66: 75, - 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82, 74: 84, - 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90} - if __name__ == "__main__": process_yolo5()