refactors into classes

- ImageMeta struct saves image metadata
- DataHandler takes care of loading dataset
  and parsing the label information
- YoloProcessing takes care of image preprocessing
  and yolo postprocessing
- HailoHandler connects to Hailo device, pushes
  the desired network hef file and runs the dataset
  on the hailo chip
This commit is contained in:
raphael 2022-03-02 11:44:50 +01:00
parent c64b8a27dd
commit be89e739b4

View File

@ -1,24 +1,89 @@
import os
from multiprocessing import Process
import json import json
import os
import time import time
import numpy as np t
from PIL import Image from PIL import Image
from detection_tools.utils.visualization_utils import \
visualize_boxes_and_labels_on_image_array
from hailo_platform import (ConfigureParams, FormatType, HEF,
HailoStreamInterface, InferVStreams,
InputVStreamParams, OutputVStreamParams,
PcieDevice)
import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.image import combined_non_max_suppression from tensorflow.image import combined_non_max_suppression
from detection_tools.utils.visualization_utils import visualize_boxes_and_labels_on_image_array # Collect images from data files
from hailo_platform import (HEF, PcieDevice, HailoStreamInterface, InferVStreams, ConfigureParams, \ class ImageMeta:
InputVStreamParams, OutputVStreamParams, InputVStreams, OutputVStreams, FormatType) def __init__(self, image_height, image_width, channels):
self.image_height = image_height
# preprocess dataset for yolov5 size self.image_width = image_width
# yolov5 640x640 self.channels = channels
# resnet18 320x320
def preproc(image, output_height=640, output_width=640, resize_side=256): class DataHandler:
def __init__(self, path, image_meta):
self.images_path = path
self.image_meta = image_meta
def load_data(self, preprocess_fn):
names = []
images_list = [img_name for img_name in os.listdir(self.images_path)
if os.path.splitext(os.path.join(self.images_path, img_name))[1] == '.jpg']
dataset = np.zeros((1, self.image_meta.image_height,
self.image_meta.image_width,
self.image_meta.channels),
dtype=np.float32)
for idx, img_name in enumerate(images_list):
img = Image.open(os.path.join(self.images_path, img_name))
img_preproc = preprocess_fn(img)
dataset[idx, :, :, :] = np.array(img_preproc)
names.append(img_name)
break
self.dataset = dataset
self.names = names
def _get_coco_labels(self):
coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json')))
coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()}
return coco_names
def _get_labels(self, label_name):
filename = os.path.join(os.path.dirname(__file__), label_name + '.json')
names = json.load(open(filename))
names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
return names
COCO_17_14 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9,
10: 10, 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18,
18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27,
26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37,
34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46,
42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54,
50: 55, 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62,
58: 63, 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74,
66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82,
74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}
class YoloProcessing:
def __init__(self, imageMeta, classes):
self.output_height = imageMeta.image_height
self.output_width = imageMeta.image_width
self.classes = classes
def preproc(self, image, resize_side=256):
''' '''
imagenet-standard: aspect-preserving resize to 256px smaller-side, imagenet-standard: aspect-preserving resize to 256px smaller-side,
@ -26,100 +91,18 @@ def preproc(image, output_height=640, output_width=640, resize_side=256):
''' '''
new_width = int(image.width/image.height*resize_side) new_width = int(image.width/image.height*resize_side)
new_height = resize_side new_height = resize_side
x , y = (new_width-output_width)/2, 0 x, y = (new_width-self.output_width)/2, 0
# Select area to crop # Select area to crop
area = (x, y, x+output_width, y+output_height) area = (x, y, x+self.output_width, y+self.output_height)
# Crop, show, and save image # Crop, show, and save image
cropped_img = image.resize((new_width, new_height)).crop(area) cropped_img = image.resize((new_width, new_height)).crop(area)
return cropped_img return cropped_img
# Collect images from data files
def dataset_read(hef):
images_path = './minimal_data'
names = []
images_list = [img_name for img_name in os.listdir(images_path) if
os.path.splitext(os.path.join(images_path, img_name))[1] == '.jpg']
# Define dataset params
input_vstream_info = hef.get_input_vstream_infos()[0]
output_vstream_infos = hef.get_output_vstream_infos()
image_height, image_width, channels = input_vstream_info.shape
# dataset = np.zeros((len(images_list), image_height, image_width, channels),
# dtype=np.float32)
dataset = np.zeros((1, image_height, image_width, channels),
dtype=np.float32)
for idx, img_name in enumerate(images_list):
img = Image.open(os.path.join(images_path, img_name))
img_preproc = preproc(img)
dataset[idx,:,:,:] = np.array(img_preproc)
names.append(img_name)
break
return dataset, names
# Generate random dataset
def dataset_random(image_height, image_width, channels):
num_of_images = 10
low, high = 2, 20
dataset = np.random.randint(low, high, (num_of_images, image_height,
image_width, channels)).astype(np.float32)
return dataset
def init_hailo(model_name='yolov5m'):
target = PcieDevice()
hef_path = f'hef/{model_name}.hef'
hef = HEF(hef_path)
# Configure network groups
configure_params = ConfigureParams.create_from_hef(hef=hef, interface=HailoStreamInterface.PCIe)
network_groups = target.configure(hef, configure_params)
network_group = network_groups[0]
return hef, network_group
'''
The target can be used as a context manager ("with" statement) to ensure it's released on time.
Here it's avoided for the sake of simplicity
'''
def run_hailo(dataset, names, hef, network_group):
# Create input and output virtual streams params
# Quantized argument signifies whether or not the incoming data is already quantized.
# Data is quantized by HailoRT if and only if quantized == False .
input_vstreams_params = InputVStreamParams.make(network_group,
quantized=False,
format_type=FormatType.FLOAT32)
# TODO: change to FLOAT32
output_vstreams_params = OutputVStreamParams.make(network_group, quantized=False, format_type=FormatType.FLOAT32)
# output_vstreams_params = OutputVStreamParams.make(network_group,
# quantized=True,
# format_type=FormatType.INT8)
input_vstream_info = hef.get_input_vstream_infos()[0]
output_vstream_infos = hef.get_output_vstream_infos()
input_data = {input_vstream_info.name: dataset}
network_group_params = network_group.create_params()
with InferVStreams(network_group, input_vstreams_params, output_vstreams_params) as infer_pipeline:
with network_group.activate(network_group_params):
infer_results = infer_pipeline.infer(input_data)
out = [infer_results[i.name] for i in output_vstream_infos]
return out, names, dataset, names
# 20 x 20 -> 32 # 20 x 20 -> 32
# stride = 32 # stride = 32
def yolo_postprocess_numpy(net_out, anchors_for_stride, stride): def yolo_postprocess_numpy(self, net_out, anchors_for_stride, stride):
""" """
net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255] net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255]
first we reshape it to be as in gluon and then follow gluon's shapes. first we reshape it to be as in gluon and then follow gluon's shapes.
@ -156,7 +139,7 @@ def yolo_postprocess_numpy(net_out, anchors_for_stride, stride):
class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80] class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80]
offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2] offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2]
offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2] offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2]
box_centers, box_scales, confidence, class_pred = _yolo5_decode( box_centers, box_scales, confidence, class_pred = self._yolo5_decode(
raw_box_centers=raw_box_centers, raw_box_centers=raw_box_centers,
raw_box_scales=raw_box_scales, raw_box_scales=raw_box_scales,
objness=objness, objness=objness,
@ -183,12 +166,13 @@ def yolo_postprocess_numpy(net_out, anchors_for_stride, stride):
detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax
return detection_boxes.astype(np.float32), detection_scores.astype(np.float32) return detection_boxes.astype(np.float32), detection_scores.astype(np.float32)
def _yolo5_decode(raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride): def _yolo5_decode(self, raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride):
box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride
box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2] box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2]
return box_centers, box_scales, objness, class_pred return box_centers, box_scales, objness, class_pred
def postprocessing(endnodes):
def postprocessing(self, endnodes):
""" """
endnodes is a list of 3 output tensors: endnodes is a list of 3 output tensors:
endnodes[0] - stride 32 of input endnodes[0] - stride 32 of input
@ -207,14 +191,13 @@ def postprocessing(endnodes):
anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
# TODO make prettier # TODO make prettier
strides = [8, 16, 32] strides = [8, 16, 32]
num_classes = 80
for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers: for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers:
stride = strides[::-1][output_ind] stride = strides[::-1][output_ind]
anchors_for_stride = np.array(anchors_list[::-1][output_ind]) anchors_for_stride = np.array(anchors_list[::-1][output_ind])
anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2] anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2]
detection_boxes, detection_scores = yolo_postprocess_numpy(output_branch, detection_boxes, detection_scores = self.yolo_postprocess_numpy(output_branch,
anchors_for_stride, anchors_for_stride,
stride) stride)
@ -251,7 +234,7 @@ def postprocessing(endnodes):
# adding offset to the class prediction and cast to integer # adding offset to the class prediction and cast to integer
def translate_coco_2017_to_2014(nmsed_classes): def translate_coco_2017_to_2014(nmsed_classes):
return np.vectorize(COCO_2017_TO_2014_TRANSLATION.get)(nmsed_classes).astype(np.int32) return np.vectorize(COCO_17_14.get)(nmsed_classes).astype(np.int32)
nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16) nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16)
nmsed_classes = translate_coco_2017_to_2014(nmsed_classes) nmsed_classes = translate_coco_2017_to_2014(nmsed_classes)
@ -262,56 +245,58 @@ def postprocessing(endnodes):
'num_detections': num_detections} 'num_detections': num_detections}
def _get_face_detection_visualization_data(logits):
boxes = logits['detection_boxes'][0]
face_landmarks = logits.get('face_landmarks') class HailoHandler:
if face_landmarks is not None: def __init__(self, hef_path='hef/yolov5m.hef'):
face_landmarks = face_landmarks[0].reshape((-1, 5, 2))[:, :, (1, 0)] target = PcieDevice()
boxes = boxes[:, (1, 0, 3, 2)]
# No name to prevent clobbering the visualization
labels = {1: {'id': 1, 'name': ''}}
return boxes, labels, face_landmarks
self.hef = HEF(hef_path)
def _get_coco_labels(): # Configure network groups
coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json'))) configure_params = ConfigureParams.create_from_hef(hef=self.hef,
coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()} interface=HailoStreamInterface.PCIe)
return coco_names network_groups = target.configure(self.hef, configure_params)
self.network_group = network_groups[0]
self.input_vstreams_params = InputVStreamParams.make(self.network_group,
quantized=False,
format_type=FormatType.FLOAT32)
self.output_vstreams_params = OutputVStreamParams.make(self.network_group, quantized=False, format_type=FormatType.FLOAT32)
self.input_vstream_info = self.hef.get_input_vstream_infos()[0]
self.output_vstream_infos = self.hef.get_output_vstream_infos()
self.network_group_params = self.network_group.create_params()
def run_hailo(self, dataset):
input_data = {self.input_vstream_info.name: dataset}
with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params) as infer_pipeline:
with self.network_group.activate(self.network_group_params):
infer_results = infer_pipeline.infer(input_data)
out = [infer_results[i.name] for i in self.output_vstream_infos]
return out
def _get_labels(label_name):
filename = os.path.join(os.path.dirname(__file__), label_name + '.json')
names = json.load(open(filename))
names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
return names
def process_yolo5(): def process_yolo5():
hef, network_group = init_hailo("yolov5m_22_2") imageMeta = ImageMeta(640, 640, 3)
processor = YoloProcessing(imageMeta, classes=3)
data = DataHandler('./minimal_data', imageMeta)
data.load_data(processor.preproc)
dataset, names = dataset_read(hef) hailo = HailoHandler('hef/yolov5m_22_2.hef')
out = hailo.run_hailo(data.dataset)
samples = 1000 logits = processor.postprocessing(out)
start_time = time.time()
fps = 0
while samples > 0:
if start_time + 1 < time.time():
print("fps: " + str(fps))
start_time = time.time()
fps = 0
out, names, dataset, names = run_hailo(dataset, names, hef, network_group)
logits = postprocessing(out)
fps += 1
samples -= 1
labels = _get_labels("daria_names") labels = data._get_labels("daria_names")
image = visualize_boxes_and_labels_on_image_array( image = visualize_boxes_and_labels_on_image_array(
dataset[0], data.dataset[0],
logits['detection_boxes'].numpy()[0], logits['detection_boxes'].numpy()[0],
logits['detection_classes'][0], logits['detection_classes'][0],
logits['detection_scores'].numpy()[0], logits['detection_scores'].numpy()[0],
@ -324,16 +309,5 @@ def process_yolo5():
Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png') Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png')
COCO_2017_TO_2014_TRANSLATION = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10,
11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19,
19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27, 26: 28,
27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37, 34: 38,
35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, 42: 47,
43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54, 50: 55,
51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, 58: 63,
59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74, 66: 75,
67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82, 74: 84,
75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}
if __name__ == "__main__": if __name__ == "__main__":
process_yolo5() process_yolo5()