hailo-inference/cpp_inference/yolov5.cpp
2022-03-31 11:10:10 +02:00

201 lines
9.0 KiB
C++

#include "yolov5.hpp"
#include <cmath>
#include <stdint.h>
#include <stdio.h>
float fix_scale(float32_t input, float32_t qp_scale, float32_t qp_zp)
{
return (input - qp_zp) * qp_scale;
}
inline float minm(float n1, float n2)
{
if (n1<n2) {
return n1;
}
return n2;
}
inline float maxm(float n1, float n2)
{
if (n1>n2) {
return n1;
}
return n2;
}
float32_t iou_calc_c(float32_t *box_1, float32_t *box_2)
{
float width_of_overlap_area = minm(box_1[XMAX], box_2[XMAX]) - maxm(box_1[XMIN], box_2[XMIN]);
float height_of_overlap_area = minm(box_1[YMAX], box_2[YMAX]) - maxm(box_1[YMIN], box_2[YMIN]);
float positive_width_of_overlap_area = maxm(width_of_overlap_area + 1, 0.0f);
float positive_height_of_overlap_area = maxm(height_of_overlap_area + 1, 0.0f);
float area_of_overlap = positive_width_of_overlap_area * positive_height_of_overlap_area;
float box_1_area = (box_1[YMAX] - box_1[YMIN] + 1) * (box_1[XMAX] - box_1[XMIN] + 1);
float box_2_area = (box_2[YMAX] - box_2[YMIN] + 1) * (box_2[XMAX] - box_2[XMIN] + 1);
return area_of_overlap / (box_1_area + box_2_area - area_of_overlap);
}
#if 1
static void get_class(uint8_t *fm, uint32_t row, uint32_t col, uint32_t anchor, uint32_t feature_map_size,
uint32_t *class_id, uint32_t *class_prob)
{
uint32_t cls_prob, prob_max = 0;
uint32_t selected_class_id = 1;
for (uint32_t cls_id = 1; cls_id <= CLASSES_COUNT; ++cls_id)
{
uint32_t cls_prob_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CLASS_CHANNEL_OFFSET + cls_id - 1;
cls_prob = fm[cls_prob_tensor_index];
if (cls_prob > prob_max)
{
selected_class_id = cls_id;
prob_max = cls_prob;
}
}
*class_prob = prob_max;
*class_id = selected_class_id;
return;
}
void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, uint32_t feature_map_size,
int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6])
{
float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax = 0.0f;
uint32_t confidence_tensor_index = 0;
uint32_t class_id = 0;
uint32_t class_prob_int = 0;
float32_t class_prob = 0.0f;
uint32_t x_tensor_index = 0;
for (uint32_t row = 0; row < feature_map_size; ++row) {
for (uint32_t col = 0; col < feature_map_size; ++col) {
for (uint32_t anchor = 0; anchor < ANCHORS_NUM; ++anchor) {
confidence_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CONF_CHANNEL_OFFSET;
if (feature_map_size==20 && confidence_tensor_index >= 9600) {
printf("row: %u col %u anchor %u\n", row, col, anchor);
}
confidence = fix_scale(fm[confidence_tensor_index], qp_scale, qp_zp);
if (confidence < thr) {
continue;
}
get_class(fm, row, col, anchor, feature_map_size, &class_id, &class_prob_int);
class_prob = fix_scale(class_prob_int, qp_scale, qp_zp);
confidence = class_prob * confidence;
if (confidence > thr)
{
//printf("class_prob: %f, confidence: %f\n", class_prob, confidence);
x_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor;
if (row == 1 && col == 1 && anchor == 1) {
printf("x index: %u\n", x_tensor_index);
}
x = (fix_scale(fm[x_tensor_index], qp_scale, qp_zp) * 2.0f - 0.5f + col) / feature_map_size;
y = (fix_scale(fm[x_tensor_index + 1], qp_scale, qp_zp) * 2.0f - 0.5f + row) / feature_map_size;
w = pow(2.0f * (fix_scale(fm[x_tensor_index + 2], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2] / IMAGE_SIZE;
h = pow(2.0f * (fix_scale(fm[x_tensor_index + 3], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2 + 1] / IMAGE_SIZE;
xmin = (x - (w / 2.0f)) * IMAGE_SIZE;
ymin = (y - (h / 2.0f)) * IMAGE_SIZE;
xmax = (x + (w / 2.0f)) * IMAGE_SIZE;
ymax = (y + (h / 2.0f)) * IMAGE_SIZE;
if (*box_index < MAX_BOXES)
{
//printf("class type %d\n", chosen_cls);
box_array[*box_index][0] = ymin;
box_array[*box_index][1] = xmin;
box_array[*box_index][2] = ymax;
box_array[*box_index][3] = xmax;
box_array[*box_index][4] = confidence;
box_array[*box_index][5] = class_id;
*box_index = *box_index + 1;
}
}
}
}
}
}
#endif
#if 0
void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, int feature_map_size,
int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6])
{
float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax, conf_max = 0.0f;
int add = 0, anchor = 0, chosen_row = 0, chosen_col = 0, chosen_cls = -1;
float32_t cls_prob, prob_max = 0.0f;
// channels 0-3 are box coordinates, channel 4 is the confidence, and channels 5-84 are classes
for (int row = 0; row < feature_map_size; ++row) {
for (int col = 0; col < feature_map_size; ++col) {
prob_max = 0;
for (int a = 0; a < ANCHORS_NUM; ++a) {
add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + CONF_CHANNEL_OFFSET;
//confidence = fix_scale(fm[add], qp_scale, qp_zp);
confidence = (fm[add])*qp_scale;
if (confidence > thr)
//printf("no way we are here %f\n",qp_scale);
for (int c = CLASS_CHANNEL_OFFSET; c < FEATURE_MAP_CHANNELS; ++c) {
add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + c;
// final confidence: box confidence * class probability
cls_prob = fm[add];
if (cls_prob > prob_max)
{
conf_max = fix_scale(cls_prob, qp_scale, qp_zp) * confidence;
chosen_cls = c - CLASS_CHANNEL_OFFSET + 1;
prob_max = cls_prob;
anchor = a;
chosen_row = row;
chosen_col = col;
}
}
}
float basetemp;// = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp));
float tempresult = 1.0;
//float exptemp = 2.0;
if (conf_max > thr) {
add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * chosen_row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * chosen_col + FEATURE_MAP_CHANNELS * anchor;
x = (fix_scale(fm[add], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_col) / feature_map_size;
y = (fix_scale(fm[add + 1], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_row) / feature_map_size;
basetemp = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp));
//tempresult = 1.0f;
//exptemp = 2.0f;
//while (exptemp != 0) {
// tempresult *= basetemp;
// --exptemp;
//}
tempresult = basetemp*basetemp;
w = tempresult * anchors[anchor * 2] / IMAGE_SIZE;
basetemp = 2.0f * (fix_scale(fm[add + 3], qp_scale, qp_zp));
//tempresult = 1.0f;
//exptemp = 2.0f;
//while (exptemp != 0) {
// tempresult *= basetemp;
// --exptemp;
//}
tempresult = basetemp*basetemp;
h = tempresult * anchors[anchor * 2 + 1] / IMAGE_SIZE;
xmin = (x - (w / 2.0f)) * IMAGE_SIZE;
ymin = (y - (h / 2.0f)) * IMAGE_SIZE;
xmax = (x + (w / 2.0f)) * IMAGE_SIZE;
ymax = (y + (h / 2.0f)) * IMAGE_SIZE;
if (*box_index < MAX_BOXES)
{
//printf("class type %d\n", chosen_cls);
box_array[*box_index][0] = ymin;
box_array[*box_index][1] = xmin;
box_array[*box_index][2] = ymax;
box_array[*box_index][3] = xmax;
box_array[*box_index][4] = conf_max;
box_array[*box_index][5] = chosen_cls;
*box_index = *box_index + 1;
}
}
}
}
}
#endif