refactors into classes

- ImageMeta struct saves image metadata - DataHandler takes care of loading dataset and parsing the label information - YoloProcessing takes care of image preprocessing and yolo postprocessing - HailoHandler connects to Hailo device, pushes the desired network hef file and runs the dataset on the hailo chip
2022-03-02 11:44:50 +01:00
parent c64b8a27dd
commit be89e739b4
1 changed files with 264 additions and 290 deletions
--- a/inference.py
+++ b/inference.py
@@ -1,317 +1,302 @@
-import os
-from multiprocessing import Process
 import json
+import os
 import time
-import numpy as np
+t
 from PIL import Image
+
+from detection_tools.utils.visualization_utils import \
+            visualize_boxes_and_labels_on_image_array
+
+from hailo_platform import (ConfigureParams, FormatType, HEF,
+                            HailoStreamInterface, InferVStreams,
+                            InputVStreamParams, OutputVStreamParams,
+                            PcieDevice)
+
+import numpy as np
+
 import tensorflow as tf
 from tensorflow.image import combined_non_max_suppression

-from detection_tools.utils.visualization_utils import visualize_boxes_and_labels_on_image_array
-
-
-from hailo_platform import (HEF, PcieDevice, HailoStreamInterface, InferVStreams, ConfigureParams, \
-        InputVStreamParams, OutputVStreamParams, InputVStreams, OutputVStreams, FormatType)
-
-# preprocess dataset for yolov5 size
-# yolov5 640x640
-# resnet18 320x320
-
-
-def preproc(image, output_height=640, output_width=640, resize_side=256):
-
-    '''
-    imagenet-standard: aspect-preserving resize to 256px smaller-side,
-    then central-crop to 224px
-    '''
-    new_width = int(image.width/image.height*resize_side)
-    new_height = resize_side
-    x , y = (new_width-output_width)/2, 0
-
-    # Select area to crop
-    area = (x, y, x+output_width, y+output_height)
-
-    # Crop, show, and save image
-    cropped_img = image.resize((new_width, new_height)).crop(area)
-    return cropped_img
-
-
 # Collect images from data files
-def dataset_read(hef):
-
-    images_path = './minimal_data'
-    names = []
-    images_list = [img_name for img_name in os.listdir(images_path) if
-            os.path.splitext(os.path.join(images_path, img_name))[1] == '.jpg']
-
-    # Define dataset params
-    input_vstream_info = hef.get_input_vstream_infos()[0]
-    output_vstream_infos = hef.get_output_vstream_infos()
-    image_height, image_width, channels = input_vstream_info.shape
-
-    # dataset = np.zeros((len(images_list), image_height, image_width, channels),
-    #        dtype=np.float32)
-    dataset = np.zeros((1, image_height, image_width, channels),
-           dtype=np.float32)
-
-    for idx, img_name in enumerate(images_list):
-        img = Image.open(os.path.join(images_path, img_name))
-        img_preproc = preproc(img)
-        dataset[idx,:,:,:] = np.array(img_preproc) 
-        names.append(img_name)
-        break
-
-    return dataset, names
-
-# Generate random dataset
-def dataset_random(image_height, image_width, channels):
-    num_of_images = 10
-    low, high = 2, 20
-    dataset = np.random.randint(low, high, (num_of_images, image_height,
-        image_width, channels)).astype(np.float32)
-    return dataset
-
-def init_hailo(model_name='yolov5m'):
-    target = PcieDevice()
-
-    hef_path = f'hef/{model_name}.hef'
-    hef = HEF(hef_path)
-
-    # Configure network groups
-    configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)
-    network_groups = target.configure(hef, configure_params)
-    network_group = network_groups[0]


-    return hef, network_group
+class ImageMeta:
+    def __init__(self, image_height, image_width, channels):
+        self.image_height = image_height
+        self.image_width = image_width
+        self.channels = channels


-'''
-The target can be used as a context manager ("with" statement) to ensure it's released on time.
-Here it's avoided for the sake of simplicity
-'''
-def run_hailo(dataset, names, hef, network_group):
-    # Create input and output virtual streams params
-    # Quantized argument signifies whether or not the incoming data is already quantized.
-    # Data is quantized by HailoRT if and only if quantized == False .
-    input_vstreams_params = InputVStreamParams.make(network_group,
-            quantized=False,
-            format_type=FormatType.FLOAT32)
-    # TODO: change to FLOAT32
-    output_vstreams_params = OutputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)
-    # output_vstreams_params = OutputVStreamParams.make(network_group,
-    #        quantized=True,
-    #        format_type=FormatType.INT8)
+class DataHandler:
+    def __init__(self, path, image_meta):
+        self.images_path = path
+        self.image_meta = image_meta
+
+    def load_data(self, preprocess_fn):
+        names = []
+
+        images_list = [img_name for img_name in os.listdir(self.images_path)
+                       if os.path.splitext(os.path.join(self.images_path, img_name))[1] == '.jpg']
+        dataset = np.zeros((1, self.image_meta.image_height, 
+                               self.image_meta.image_width, 
+                               self.image_meta.channels),
+                           dtype=np.float32)
+
+        for idx, img_name in enumerate(images_list):
+            img = Image.open(os.path.join(self.images_path, img_name))
+            img_preproc = preprocess_fn(img)
+            dataset[idx, :, :, :] = np.array(img_preproc)
+            names.append(img_name)
+            break
+
+        self.dataset = dataset
+        self.names = names


-    input_vstream_info = hef.get_input_vstream_infos()[0]
-    output_vstream_infos = hef.get_output_vstream_infos()
-    input_data = {input_vstream_info.name: dataset}
-    network_group_params = network_group.create_params()
-
-    with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
-        with network_group.activate(network_group_params):
-            infer_results = infer_pipeline.infer(input_data)
-
-    out = [infer_results[i.name] for i in output_vstream_infos]
-    return out, names, dataset, names
+    def _get_coco_labels(self):
+        coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json')))
+        coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()}
+        return coco_names


-# 20 x 20 -> 32
-# stride = 32
-def yolo_postprocess_numpy(net_out, anchors_for_stride, stride):
-    """
-    net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255]
-    first we reshape it to be as in gluon and then follow gluon's shapes.
-    output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8.
-    """
+    def _get_labels(self, label_name):
+        filename = os.path.join(os.path.dirname(__file__), label_name + '.json')
+        names = json.load(open(filename))
+        names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
+        return names

-    # net_out = net_out.astype(np.float32) / 256
-    num_classes = 4
-    BS = net_out.shape[0]  # batch size
-    H = net_out.shape[1]
-    W = net_out.shape[2]
-
-    num_anchors = anchors_for_stride.size // 2  # 2 params for each anchor.
-    num_pred = 1 + 4 + num_classes  # 2 box centers, 2 box scales, 1 objness, num_classes class scores
-    alloc_size = (128, 128)
-
-    grid_x = np.arange(alloc_size[1])
-    grid_y = np.arange(alloc_size[0])
-    grid_x, grid_y = np.meshgrid(grid_x, grid_y)  # dims [128,128], [128,128]
-
-    offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1)  # dim [128,128,2]
-    offsets = np.expand_dims(np.expand_dims(offsets, 0), 0)  # dim [1,1,128,128,2]
-
-    pred = net_out.transpose((0, 3, 1, 2))  # now dims are: [N,C,H,W] as in Gluon.
-    pred = np.reshape(pred, (BS, num_anchors * num_pred, -1))  # dim [N, 255, HxW]
-    # dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped.
-    pred = pred.transpose((0, 2, 1))
-    pred = np.reshape(pred, (BS, -1, num_anchors, num_pred))  # dim [N, HxW, 3, 85]]
-
-    raw_box_centers = pred[:, :, :, 0:2]  # dim [N, HxW, 3, 2]
-    raw_box_scales = pred[:, :, :, 2:4]  # dim [N,HxW, 3, 2]
-
-    objness = pred[:, :, :, 4:5]  # dim [N, HxW, 3, 1]
-    class_pred = pred[:, :, :, 5:]  # dim [N, HxW, 3, 80]
-    offsets = offsets[:, :, :H, :W, :]  # dim [1, 1, H, W, 2]
-    offsets = np.reshape(offsets, (1, -1, 1, 2))  # dim [1, HxW, 1, 2]
-    box_centers, box_scales, confidence, class_pred = _yolo5_decode(
-        raw_box_centers=raw_box_centers,
-        raw_box_scales=raw_box_scales,
-        objness=objness,
-        class_pred=class_pred,
-        anchors_for_stride=anchors_for_stride,
-        offsets=offsets,
-        stride=stride)
-
-    class_score = class_pred * confidence  # dim [N, HxW, 3, 80]
-    wh = box_scales / 2.0
-    # dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax
-    bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1)
-
-    detection_boxes = np.reshape(bbox, (BS, -1, 1, 4))  # dim [N, num_detections, 1, 4]
-    detection_scores = np.reshape(class_score, (BS, -1, num_classes))  # dim [N, num_detections, 80]
-
-    # switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax:
-    detection_boxes_tmp = np.zeros(detection_boxes.shape)
-    detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1]
-    detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0]
-    detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3]
-    detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2]
-
-    detection_boxes = detection_boxes_tmp  # now scheme is: ymin, xmin, ymax, xmax
-    return detection_boxes.astype(np.float32), detection_scores.astype(np.float32)
-
-def _yolo5_decode(raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride):
-    box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride
-    box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride  # dim [N, HxW, 3, 2]
-    return box_centers, box_scales, objness, class_pred
-
-def postprocessing(endnodes):
-    """
-    endnodes is a list of 3 output tensors:
-    endnodes[0] - stride 32 of input
-    endnodes[1] - stride 16 of input
-    endnodes[2] - stride 8 of input
-    Returns:
-    a tensor with dims: [BS, Total_num_of_detections_in_image, 6]
-    where:
-        total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes,
-        with H, W as input dims.
-        If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get:
-        total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS
-    """
-    H_input = 640
-    W_input = 640
-    anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
-    # TODO make prettier
-    strides = [8, 16, 32]
-    num_classes = 80
-
-    for output_ind, output_branch in enumerate(endnodes):  # iterating over the output layers:
-        stride = strides[::-1][output_ind]
-        anchors_for_stride = np.array(anchors_list[::-1][output_ind])
-        anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2))  # dim [1, 1, 3, 2]
-
-        detection_boxes, detection_scores = yolo_postprocess_numpy(output_branch, 
-                                                                   anchors_for_stride,
-                                                                   stride)
-
-        # detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a
-        # [BS, num_detections, num_classes] tensor
-        detection_boxes = detection_boxes / H_input  # normalization of box coordinates to 1
-        BS = endnodes[0].shape[0]
-        H = H_input // stride
-        W = W_input // stride
-        num_anchors = anchors_for_stride.size // 2
-        num_detections = H * W * num_anchors
-        # detection_boxes.set_shape((BS, num_detections, 1, 4))
-        # detection_scores.set_shape((BS, num_detections, num_classes))
-        # concatenating the detections from the different output layers:
-        if output_ind == 0:
-            detection_boxes_full = detection_boxes
-            detection_scores_full = detection_scores
-        else:
-            detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1)
-            detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1)
-
-    score_threshold = 0.5
-    nms_iou_threshold = 0.5
-    labels_offset = 1
-
-    (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \
-        combined_non_max_suppression(boxes=detection_boxes_full,
-                                     scores=detection_scores_full,
-                                     score_threshold=score_threshold,
-                                     iou_threshold=nms_iou_threshold,
-                                     max_output_size_per_class=100,
-                                     max_total_size=100)
+COCO_17_14 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9,
+              10: 10, 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18,
+              18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27,
+              26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37,
+              34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46,
+              42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54,
+              50: 55, 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62,
+              58: 63, 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74,
+              66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82,
+              74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}


-    # adding offset to the class prediction and cast to integer
-    def translate_coco_2017_to_2014(nmsed_classes):
-        return np.vectorize(COCO_2017_TO_2014_TRANSLATION.get)(nmsed_classes).astype(np.int32)
+class YoloProcessing:
+    def __init__(self, imageMeta, classes):
+        self.output_height = imageMeta.image_height
+        self.output_width = imageMeta.image_width
+        self.classes = classes

-    nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16)
-    nmsed_classes = translate_coco_2017_to_2014(nmsed_classes)
+    def preproc(self, image, resize_side=256):

-    return {'detection_boxes': nmsed_boxes,
-            'detection_scores': nmsed_scores,
-            'detection_classes': nmsed_classes,
-            'num_detections': num_detections}
+        '''
+        imagenet-standard: aspect-preserving resize to 256px smaller-side,
+        then central-crop to 224px
+        '''
+        new_width = int(image.width/image.height*resize_side)
+        new_height = resize_side
+        x, y = (new_width-self.output_width)/2, 0
+
+        # Select area to crop
+        area = (x, y, x+self.output_width, y+self.output_height)
+
+        # Crop, show, and save image
+        cropped_img = image.resize((new_width, new_height)).crop(area)
+        return cropped_img
+
+    # 20 x 20 -> 32
+    # stride = 32
+    def yolo_postprocess_numpy(self, net_out, anchors_for_stride, stride):
+        """
+        net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255]
+        first we reshape it to be as in gluon and then follow gluon's shapes.
+        output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8.
+        """
+
+        # net_out = net_out.astype(np.float32) / 256
+        num_classes = 4
+        BS = net_out.shape[0]  # batch size
+        H = net_out.shape[1]
+        W = net_out.shape[2]
+
+        num_anchors = anchors_for_stride.size // 2  # 2 params for each anchor.
+        num_pred = 1 + 4 + num_classes  # 2 box centers, 2 box scales, 1 objness, num_classes class scores
+        alloc_size = (128, 128)
+
+        grid_x = np.arange(alloc_size[1])
+        grid_y = np.arange(alloc_size[0])
+        grid_x, grid_y = np.meshgrid(grid_x, grid_y)  # dims [128,128], [128,128]
+
+        offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1)  # dim [128,128,2]
+        offsets = np.expand_dims(np.expand_dims(offsets, 0), 0)  # dim [1,1,128,128,2]
+
+        pred = net_out.transpose((0, 3, 1, 2))  # now dims are: [N,C,H,W] as in Gluon.
+        pred = np.reshape(pred, (BS, num_anchors * num_pred, -1))  # dim [N, 255, HxW]
+        # dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped.
+        pred = pred.transpose((0, 2, 1))
+        pred = np.reshape(pred, (BS, -1, num_anchors, num_pred))  # dim [N, HxW, 3, 85]]
+
+        raw_box_centers = pred[:, :, :, 0:2]  # dim [N, HxW, 3, 2]
+        raw_box_scales = pred[:, :, :, 2:4]  # dim [N,HxW, 3, 2]
+
+        objness = pred[:, :, :, 4:5]  # dim [N, HxW, 3, 1]
+        class_pred = pred[:, :, :, 5:]  # dim [N, HxW, 3, 80]
+        offsets = offsets[:, :, :H, :W, :]  # dim [1, 1, H, W, 2]
+        offsets = np.reshape(offsets, (1, -1, 1, 2))  # dim [1, HxW, 1, 2]
+        box_centers, box_scales, confidence, class_pred = self._yolo5_decode(
+            raw_box_centers=raw_box_centers,
+            raw_box_scales=raw_box_scales,
+            objness=objness,
+            class_pred=class_pred,
+            anchors_for_stride=anchors_for_stride,
+            offsets=offsets,
+            stride=stride)
+
+        class_score = class_pred * confidence  # dim [N, HxW, 3, 80]
+        wh = box_scales / 2.0
+        # dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax
+        bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1)
+
+        detection_boxes = np.reshape(bbox, (BS, -1, 1, 4))  # dim [N, num_detections, 1, 4]
+        detection_scores = np.reshape(class_score, (BS, -1, num_classes))  # dim [N, num_detections, 80]
+
+        # switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax:
+        detection_boxes_tmp = np.zeros(detection_boxes.shape)
+        detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1]
+        detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0]
+        detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3]
+        detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2]
+
+        detection_boxes = detection_boxes_tmp  # now scheme is: ymin, xmin, ymax, xmax
+        return detection_boxes.astype(np.float32), detection_scores.astype(np.float32)
+
+    def _yolo5_decode(self, raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride):
+        box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride
+        box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride  # dim [N, HxW, 3, 2]
+        return box_centers, box_scales, objness, class_pred


-def _get_face_detection_visualization_data(logits):
-    boxes = logits['detection_boxes'][0]
+    def postprocessing(self, endnodes):
+        """
+        endnodes is a list of 3 output tensors:
+        endnodes[0] - stride 32 of input
+        endnodes[1] - stride 16 of input
+        endnodes[2] - stride 8 of input
+        Returns:
+        a tensor with dims: [BS, Total_num_of_detections_in_image, 6]
+        where:
+            total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes,
+            with H, W as input dims.
+            If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get:
+            total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS
+        """
+        H_input = 640
+        W_input = 640
+        anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
+        # TODO make prettier
+        strides = [8, 16, 32]

-    face_landmarks = logits.get('face_landmarks')
-    if face_landmarks is not None:
-        face_landmarks = face_landmarks[0].reshape((-1, 5, 2))[:, :, (1, 0)]
-    boxes = boxes[:, (1, 0, 3, 2)]
-    # No name to prevent clobbering the visualization
-    labels = {1: {'id': 1, 'name': ''}}
-    return boxes, labels, face_landmarks
+        for output_ind, output_branch in enumerate(endnodes):  # iterating over the output layers:
+            stride = strides[::-1][output_ind]
+            anchors_for_stride = np.array(anchors_list[::-1][output_ind])
+            anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2))  # dim [1, 1, 3, 2]
+
+            detection_boxes, detection_scores = self.yolo_postprocess_numpy(output_branch, 
+                                                                       anchors_for_stride,
+                                                                       stride)
+
+            # detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a
+            # [BS, num_detections, num_classes] tensor
+            detection_boxes = detection_boxes / H_input  # normalization of box coordinates to 1
+            BS = endnodes[0].shape[0]
+            H = H_input // stride
+            W = W_input // stride
+            num_anchors = anchors_for_stride.size // 2
+            num_detections = H * W * num_anchors
+            # detection_boxes.set_shape((BS, num_detections, 1, 4))
+            # detection_scores.set_shape((BS, num_detections, num_classes))
+            # concatenating the detections from the different output layers:
+            if output_ind == 0:
+                detection_boxes_full = detection_boxes
+                detection_scores_full = detection_scores
+            else:
+                detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1)
+                detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1)
+
+        score_threshold = 0.5
+        nms_iou_threshold = 0.5
+        labels_offset = 1
+
+        (nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \
+            combined_non_max_suppression(boxes=detection_boxes_full,
+                                         scores=detection_scores_full,
+                                         score_threshold=score_threshold,
+                                         iou_threshold=nms_iou_threshold,
+                                         max_output_size_per_class=100,
+                                         max_total_size=100)


-def _get_coco_labels():
-    coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json')))
-    coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()}
-    return coco_names
+        # adding offset to the class prediction and cast to integer
+        def translate_coco_2017_to_2014(nmsed_classes):
+            return np.vectorize(COCO_17_14.get)(nmsed_classes).astype(np.int32)
+
+        nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16)
+        nmsed_classes = translate_coco_2017_to_2014(nmsed_classes)
+
+        return {'detection_boxes': nmsed_boxes,
+                'detection_scores': nmsed_scores,
+                'detection_classes': nmsed_classes,
+                'num_detections': num_detections}
+
+
+
+class HailoHandler:
+    def __init__(self, hef_path='hef/yolov5m.hef'):
+        target = PcieDevice()
+
+        self.hef = HEF(hef_path)
+
+        # Configure network groups
+        configure_params = ConfigureParams.create_from_hef(hef=self.hef,
+                                        interface=HailoStreamInterface.PCIe)
+        network_groups = target.configure(self.hef, configure_params)
+        self.network_group = network_groups[0]
+
+        self.input_vstreams_params = InputVStreamParams.make(self.network_group,
+                                                        quantized=False,
+                                                        format_type=FormatType.FLOAT32)
+
+        self.output_vstreams_params = OutputVStreamParams.make(self.network_group, quantized=False, format_type=FormatType.FLOAT32)
+
+        self.input_vstream_info = self.hef.get_input_vstream_infos()[0]
+        self.output_vstream_infos = self.hef.get_output_vstream_infos()
+        self.network_group_params = self.network_group.create_params()
+
+    def run_hailo(self, dataset):
+
+        input_data = {self.input_vstream_info.name: dataset}
+
+        with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params) as infer_pipeline:
+            with self.network_group.activate(self.network_group_params):
+                infer_results = infer_pipeline.infer(input_data)
+
+        out = [infer_results[i.name] for i in self.output_vstream_infos]
+        return out

-def _get_labels(label_name):
-    filename = os.path.join(os.path.dirname(__file__), label_name + '.json')
-    names = json.load(open(filename))
-    names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
-    return names


 def process_yolo5():

-    hef, network_group = init_hailo("yolov5m_22_2")
+    imageMeta = ImageMeta(640, 640, 3)
+    processor = YoloProcessing(imageMeta, classes=3)
+    data = DataHandler('./minimal_data', imageMeta)
+    data.load_data(processor.preproc)

-    dataset, names = dataset_read(hef)
+    hailo = HailoHandler('hef/yolov5m_22_2.hef')
+    out = hailo.run_hailo(data.dataset)

-    samples = 1000
-    start_time = time.time()
-    fps = 0
-    while samples > 0:
-        if start_time + 1 < time.time():
-            print("fps: " + str(fps))
-            start_time = time.time()
-            fps = 0
-
-        out, names, dataset, names = run_hailo(dataset, names, hef, network_group)
-
-        logits = postprocessing(out)
-
-        fps += 1
-        samples -= 1
+    logits = processor.postprocessing(out)


-    labels = _get_labels("daria_names")
+    labels = data._get_labels("daria_names")
    image = visualize_boxes_and_labels_on_image_array(
-        dataset[0],
+        data.dataset[0],
        logits['detection_boxes'].numpy()[0],
        logits['detection_classes'][0],
        logits['detection_scores'].numpy()[0],
@@ -324,16 +309,5 @@ def process_yolo5():

    Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png')

-COCO_2017_TO_2014_TRANSLATION = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10,
-                                 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19,
-                                 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27, 26: 28,
-                                 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37, 34: 38,
-                                 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, 42: 47,
-                                 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54, 50: 55,
-                                 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, 58: 63,
-                                 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74, 66: 75,
-                                 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82, 74: 84,
-                                 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}
-
 if __name__ == "__main__":
    process_yolo5()