hailo-inference/ros_inference.py
2022-03-17 11:49:00 +01:00

497 lines
19 KiB
Python

import json
import os
import io
import time
import copy
from PIL import Image
from threading import Thread
from multiprocessing import Process
import ipdb
from detection_tools.utils.visualization_utils import \
visualize_boxes_and_labels_on_image_array
from hailo_platform import (ConfigureParams, FormatType, HEF,
HailoStreamInterface, InferVStreams,
InputVStreamParams, OutputVStreamParams,
PcieDevice)
import numpy as np
import tensorflow as tf
from tensorflow.image import combined_non_max_suppression
import rclpy
from rclpy.node import Node
from std_msgs.msg import String
from sensor_msgs.msg import Image as ImageMsg
from vision_msgs.msg import Detection2DArray, Detection2D, BoundingBox2D, ObjectHypothesisWithPose
from geometry_msgs.msg import Pose2D
from cv_bridge import CvBridge
# Collect images from data files
class ImageMeta:
def __init__(self, image_height, image_width, channels):
self.image_height = image_height
self.image_width = image_width
self.channels = channels
class DataHandler:
def __init__(self, path, image_meta):
self.images_path = path
self.image_meta = image_meta
def load_data(self, preprocess_fn):
names = []
images_list = [img_name for img_name in os.listdir(self.images_path)
if os.path.splitext(os.path.join(self.images_path, img_name))[1] == '.jpg']
dataset = np.zeros((1, self.image_meta.image_height,
self.image_meta.image_width,
self.image_meta.channels),
dtype=np.float32)
for idx, img_name in enumerate(images_list):
img = Image.open(os.path.join(self.images_path, img_name))
img_preproc = preprocess_fn(img)
dataset[idx, :, :, :] = np.array(img_preproc)
names.append(img_name)
break
self.dataset = dataset
self.names = names
def _get_coco_labels(self):
coco_names = json.load(open(os.path.join(os.path.dirname(__file__), 'coco_names.json')))
coco_names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in coco_names.items()}
return coco_names
def get_labels(self, path):
filename = os.path.join(os.path.dirname(__file__), path)
names = json.load(open(filename))
names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
return names
def get_labels(path):
filename = os.path.join(os.path.dirname(__file__), path)
names = json.load(open(filename))
names = {int(k): {'id': int(k), 'name': str(v)} for (k, v) in names.items()}
return names
COCO_17_14 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9,
10: 10, 11: 11, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18,
18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 27,
26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, 32: 36, 33: 37,
34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46,
42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54,
50: 55, 51: 56, 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62,
58: 63, 59: 64, 60: 65, 61: 67, 62: 70, 63: 72, 64: 73, 65: 74,
66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, 72: 81, 73: 82,
74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90}
class YoloProcessing:
def __init__(self, imageMeta, classes):
self.output_height = imageMeta.image_height
self.output_width = imageMeta.image_width
self.classes = classes
def preproc(self, image, resize_side=256):
'''
imagenet-standard: aspect-preserving resize to 256px smaller-side,
then central-crop to 224px
'''
new_width = int(image.width/image.height*resize_side)
new_height = resize_side
x, y = (new_width-self.output_width)/2, 0
# Select area to crop
area = (x, y, x+self.output_width, y+self.output_height)
# Crop, show, and save image
cropped_img = image.resize((new_width, new_height)).crop(area)
return cropped_img
# 20 x 20 -> 32
# stride = 32
def yolo_postprocess_numpy(self, net_out, anchors_for_stride, stride):
"""
net_out is shape: [N, 19, 19, 255] or [N, 38, 38, 255] or [N, 76, 76, 255]
first we reshape it to be as in gluon and then follow gluon's shapes.
output_ind = 0 for stride 32, 1 for stride 16, 2 for stride 8.
"""
# net_out = net_out.astype(np.float32) / 256
num_classes = 4
BS = net_out.shape[0] # batch size
H = net_out.shape[1]
W = net_out.shape[2]
num_anchors = anchors_for_stride.size // 2 # 2 params for each anchor.
num_pred = 1 + 4 + num_classes # 2 box centers, 2 box scales, 1 objness, num_classes class scores
alloc_size = (128, 128)
grid_x = np.arange(alloc_size[1])
grid_y = np.arange(alloc_size[0])
grid_x, grid_y = np.meshgrid(grid_x, grid_y) # dims [128,128], [128,128]
offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1) # dim [128,128,2]
offsets = np.expand_dims(np.expand_dims(offsets, 0), 0) # dim [1,1,128,128,2]
pred = net_out.transpose((0, 3, 1, 2)) # now dims are: [N,C,H,W] as in Gluon.
pred = np.reshape(pred, (BS, num_anchors * num_pred, -1)) # dim [N, 255, HxW]
# dim [N, 361, 255], we did it so that the 255 be the last dim and can be reshaped.
pred = pred.transpose((0, 2, 1))
pred = np.reshape(pred, (BS, -1, num_anchors, num_pred)) # dim [N, HxW, 3, 85]]
raw_box_centers = pred[:, :, :, 0:2] # dim [N, HxW, 3, 2]
raw_box_scales = pred[:, :, :, 2:4] # dim [N,HxW, 3, 2]
objness = pred[:, :, :, 4:5] # dim [N, HxW, 3, 1]
class_pred = pred[:, :, :, 5:] # dim [N, HxW, 3, 80]
offsets = offsets[:, :, :H, :W, :] # dim [1, 1, H, W, 2]
offsets = np.reshape(offsets, (1, -1, 1, 2)) # dim [1, HxW, 1, 2]
box_centers, box_scales, confidence, class_pred = self._yolo5_decode(
raw_box_centers=raw_box_centers,
raw_box_scales=raw_box_scales,
objness=objness,
class_pred=class_pred,
anchors_for_stride=anchors_for_stride,
offsets=offsets,
stride=stride)
class_score = class_pred * confidence # dim [N, HxW, 3, 80]
wh = box_scales / 2.0
# dim [N, HxW, 3, 4]. scheme xmin, ymin, xmax, ymax
bbox = np.concatenate((box_centers - wh, box_centers + wh), axis=-1)
detection_boxes = np.reshape(bbox, (BS, -1, 1, 4)) # dim [N, num_detections, 1, 4]
detection_scores = np.reshape(class_score, (BS, -1, num_classes)) # dim [N, num_detections, 80]
# switching scheme from xmin, ymin, xmanx, ymax to ymin, xmin, ymax, xmax:
detection_boxes_tmp = np.zeros(detection_boxes.shape)
detection_boxes_tmp[:, :, :, 0] = detection_boxes[:, :, :, 1]
detection_boxes_tmp[:, :, :, 1] = detection_boxes[:, :, :, 0]
detection_boxes_tmp[:, :, :, 2] = detection_boxes[:, :, :, 3]
detection_boxes_tmp[:, :, :, 3] = detection_boxes[:, :, :, 2]
detection_boxes = detection_boxes_tmp # now scheme is: ymin, xmin, ymax, xmax
return detection_boxes.astype(np.float32), detection_scores.astype(np.float32)
def _yolo5_decode(self, raw_box_centers, raw_box_scales, objness, class_pred, anchors_for_stride, offsets, stride):
box_centers = (raw_box_centers * 2. - 0.5 + offsets) * stride
box_scales = (raw_box_scales * 2) ** 2 * anchors_for_stride # dim [N, HxW, 3, 2]
return box_centers, box_scales, objness, class_pred
def visualize_image(self, logits, image):
labels = get_labels("data/daria_labels.json")
image = visualize_boxes_and_labels_on_image_array(
image,
logits['detection_boxes'].numpy()[0],
logits['detection_classes'][0],
logits['detection_scores'].numpy()[0],
labels,
use_normalized_coordinates=True,
max_boxes_to_draw=100,
min_score_thresh=.5,
agnostic_mode=False,
line_thickness=4)
Image.fromarray(np.uint8(image)).save('/home/maintenance/test.png')
Image.fromarray(np.uint8(image)).show()
def postprocessing(self, endnodes):
"""
endnodes is a list of 3 output tensors:
endnodes[0] - stride 32 of input
endnodes[1] - stride 16 of input
endnodes[2] - stride 8 of input
Returns:
a tensor with dims: [BS, Total_num_of_detections_in_image, 6]
where:
total_num_of_detections_in_image = H*W*((1/32^2) + (1/16^2) + (1/8^2))*num_anchors*num_classes,
with H, W as input dims.
If H=W=608, num_anchors=3, num_classes=80 (coco 2017), we get:
total_num_of_detections = 1819440 ~ 1.8M detections per image for the NMS
"""
H_input = 640
W_input = 640
anchors_list = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
# TODO make prettier
strides = [8, 16, 32]
for output_ind, output_branch in enumerate(endnodes): # iterating over the output layers:
stride = strides[::-1][output_ind]
anchors_for_stride = np.array(anchors_list[::-1][output_ind])
anchors_for_stride = np.reshape(anchors_for_stride, (1, 1, -1, 2)) # dim [1, 1, 3, 2]
detection_boxes, detection_scores = self.yolo_postprocess_numpy(output_branch,
anchors_for_stride,
stride)
# detection_boxes is a [BS, num_detections, 1, 4] tensor, detection_scores is a
# [BS, num_detections, num_classes] tensor
detection_boxes = detection_boxes / H_input # normalization of box coordinates to 1
BS = endnodes[0].shape[0]
H = H_input // stride
W = W_input // stride
num_anchors = anchors_for_stride.size // 2
num_detections = H * W * num_anchors
# detection_boxes.set_shape((BS, num_detections, 1, 4))
# detection_scores.set_shape((BS, num_detections, num_classes))
# concatenating the detections from the different output layers:
if output_ind == 0:
detection_boxes_full = detection_boxes
detection_scores_full = detection_scores
else:
detection_boxes_full = tf.concat([detection_boxes_full, detection_boxes], axis=1)
detection_scores_full = tf.concat([detection_scores_full, detection_scores], axis=1)
score_threshold = 0.5
nms_iou_threshold = 0.5
labels_offset = 1
(nmsed_boxes, nmsed_scores, nmsed_classes, num_detections) = \
combined_non_max_suppression(boxes=detection_boxes_full,
scores=detection_scores_full,
score_threshold=score_threshold,
iou_threshold=nms_iou_threshold,
max_output_size_per_class=100,
max_total_size=100)
# adding offset to the class prediction and cast to integer
def translate_coco_2017_to_2014(nmsed_classes):
return np.vectorize(COCO_17_14.get)(nmsed_classes).astype(np.int32)
nmsed_classes = tf.cast(tf.add(nmsed_classes, labels_offset), tf.int16)
nmsed_classes = translate_coco_2017_to_2014(nmsed_classes)
return {'detection_boxes': nmsed_boxes,
'detection_scores': nmsed_scores,
'detection_classes': nmsed_classes,
'num_detections': num_detections}
class HailoHandler:
def __init__(self, hef_path='hef/yolov5m.hef'):
target = PcieDevice()
self.hef = HEF(hef_path)
# Configure network groups
configure_params = ConfigureParams.create_from_hef(hef=self.hef,
interface=HailoStreamInterface.PCIe)
network_groups = target.configure(self.hef, configure_params)
self.network_group = network_groups[0]
self.input_vstreams_params = InputVStreamParams.make(self.network_group,
quantized=False,
format_type=FormatType.FLOAT32)
self.output_vstreams_params = OutputVStreamParams.make(self.network_group, quantized=False, format_type=FormatType.FLOAT32)
self.input_vstream_info = self.hef.get_input_vstream_infos()[0]
self.output_vstream_infos = self.hef.get_output_vstream_infos()
self.network_group_params = self.network_group.create_params()
def run_hailo(self, dataset):
input_data = {self.input_vstream_info.name: dataset}
with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params) as infer_pipeline:
with self.network_group.activate(self.network_group_params):
infer_results = infer_pipeline.infer(input_data)
out = [infer_results[i.name] for i in self.output_vstream_infos]
return out
def start_hailo_thread(self):
self.hailo_async = True
self.hailo_block = False
self.input_data = None
self._infer_results = None
self.hailo_thread = Thread(target=self._hailo_async)
self.hailo_thread.start()
def _hailo_async(self):
with InferVStreams(self.network_group, self.input_vstreams_params, self.output_vstreams_params)\
as infer_pipeline:
with self.network_group.activate(self.network_group_params):
self._hailo_async_loop(infer_pipeline)
def _hailo_async_loop(self, infer_pipeline):
while self.hailo_async:
if(not self.hailo_block and type(self.input_data) != type(None)):
self._infer_results = None
self.hailo_block = True
infer_results = infer_pipeline.infer(self.input_data)
self._infer_results = [infer_results[i.name] for i in self.output_vstream_infos]
self.input_data = None
self.hailo_block = False
def hailo_input(self, input_data):
while self.hailo_block:
time.sleep(0.001)
self.hailo_block = True
self.input_data = input_data
self.input_data = {self.input_vstream_info.name: input_data}
self.hailo_block = False
def hailo_output(self):
while self.hailo_block:
time.sleep(0.001)
return self._infer_results
def stop_hailo_thread(self):
self.hailo_async = False
self.hailo_thread.join()
class HailoNode(Node):
def __init__(self):
self._ros_init()
self._metadata_init()
self._object_init()
self._thread_init()
def __del__(self):
self.hailo.stop_hailo_thread()
self._thread_run = False
self._post_process.join()
def _ros_init(self):
super().__init__('hailo_image_subscriber')
self.sub = self.create_subscription(ImageMsg, '/r3_cam_left_0', self._image_callback, 10)
self.pub = self.create_publisher(Detection2DArray, '/hailo_bounding_boxes', 10)
self.pub_ping = self.create_publisher(String, '/ping', 1)
def _metadata_init(self):
# TODO into yaml file
classes = 3
self.image_meta = ImageMeta(640, 640, 3)
self.processor = YoloProcessing(self.image_meta, classes)
self.hailo_hef = 'hef/yolov5m_daria.hef'
def _object_init(self):
self.hailo = HailoHandler(self.hailo_hef)
self.bridge = CvBridge()
def _thread_init(self):
self._thread_run = True
self._new_input = False
self.yolo_image = None
self.hailo.start_hailo_thread()
self.detections = None
self.detections_new = False
self.detections_mutex = False
self._post_process = Thread(target=self._thread_postprocessing).start()
self.publish_thread = Thread(target=self._thread_publish).start()
def _image_callback(self, ros_image):
image = self._convert_ros_to_pil(ros_image)
self.yolo_image = self._preprocess(image)
self.image_infer(self.yolo_image)
self._new_input = True
def _preprocess(self, image):
image = self.processor.preproc(image)
return self._dataset_from_image(image)
def image_infer(self, data):
self.hailo.hailo_input(data)
def _thread_postprocessing(self):
while self._thread_run:
output = None
while(output == None or not self._new_input):
time.sleep(0.001)
output = self.hailo.hailo_output()
self._new_input = False
now = time.time()
self.detections_mutex = True
self.detections = self.processor.postprocessing(output)
self.detections_new = True
self.detections_mutex = False
print("postprocessing time: ", time.time() - now)
self.processor.visualize_image(self.detections, self.yolo_image[0])
def _thread_publish(self):
while self._thread_run:
while self.detections_mutex or not self.detections_new:
time.sleep(0.001)
self._publish_detection(self.detections)
self.detections_new = False
def _publish_ping(self, msg="ping"):
s = String()
s.data = msg
self.pub_ping.publish(s)
def _publish_detection(self, detections):
labels = get_labels("data/daria_labels.json")
detection_array = Detection2DArray()
for bb in range(len(detections['detection_boxes'].numpy()[0])):
boxes = detections['detection_boxes'].numpy()[0][bb]
classes = detections['detection_classes'][0][bb]
scores = detections['detection_scores'].numpy()[0][bb]
if(scores > 0.01):
bb = BoundingBox2D()
bb.center = Pose2D()
bb.center.x = float(boxes[0])
bb.center.y = float(boxes[1])
bb.size_x = float(boxes[2])
bb.size_y = float(boxes[3])
oh = ObjectHypothesisWithPose(id=str(labels[classes]), score=float(scores))
detection = Detection2D(results=[oh], bbox=bb)
detection_array.detections.append(detection)
self.pub.publish(detection_array)
def _convert_ros_to_pil(self, ros_image):
try:
img = self.bridge.imgmsg_to_cv2(ros_image, "rgb8")
image = Image.fromarray(img)
except CvBridgeError as e:
print(e)
return image
def _dataset_from_image(self, image):
dataset = np.zeros((1, self.image_meta.image_height,
self.image_meta.image_width,
self.image_meta.channels),
dtype=np.float32)
dataset[0, :, :, :] = np.array(image)
return dataset
def main(args=None):
rclpy.init(args=args)
hailo_node = HailoNode()
rclpy.spin(hailo_node)
if __name__ == "__main__":
main()