hailo-inference/cpp_inference/yolov5.cpp

#include "yolov5.hpp"
#include <cmath>
#include <stdint.h>
#include <stdio.h>


float fix_scale(float32_t input, float32_t qp_scale, float32_t qp_zp)
{
  return (input - qp_zp) * qp_scale;
}

inline float minm(float n1, float n2)
{
     if (n1<n2) {
         return n1;
     }
     return n2;
}

inline float maxm(float n1, float n2)
{
     if (n1>n2) {
         return n1;
     }
     return n2;
}

float32_t iou_calc_c(float32_t *box_1, float32_t *box_2)
{
    float width_of_overlap_area  = minm(box_1[XMAX], box_2[XMAX]) - maxm(box_1[XMIN], box_2[XMIN]);
    float height_of_overlap_area = minm(box_1[YMAX], box_2[YMAX]) - maxm(box_1[YMIN], box_2[YMIN]);
    float positive_width_of_overlap_area = maxm(width_of_overlap_area + 1, 0.0f);
    float positive_height_of_overlap_area = maxm(height_of_overlap_area + 1, 0.0f);
    float area_of_overlap = positive_width_of_overlap_area * positive_height_of_overlap_area;
    float box_1_area = (box_1[YMAX] - box_1[YMIN] + 1)  * (box_1[XMAX] - box_1[XMIN] + 1);
    float box_2_area = (box_2[YMAX] - box_2[YMIN] + 1)  * (box_2[XMAX] - box_2[XMIN] + 1);
    return area_of_overlap / (box_1_area + box_2_area - area_of_overlap);

}

#if 1
static void get_class(uint8_t *fm, uint32_t row, uint32_t col, uint32_t anchor, uint32_t feature_map_size,
uint32_t *class_id, uint32_t *class_prob)
{
    uint32_t cls_prob, prob_max = 0;
    uint32_t selected_class_id = 1;
    for (uint32_t cls_id = 1; cls_id <= CLASSES_COUNT; ++cls_id)
    {
        uint32_t cls_prob_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CLASS_CHANNEL_OFFSET + cls_id - 1;
        cls_prob = fm[cls_prob_tensor_index];
        if (cls_prob > prob_max)
        {
            selected_class_id = cls_id;
            prob_max = cls_prob;
        }
    }
    *class_prob = prob_max;
    *class_id = selected_class_id;
    return;
}

void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, uint32_t feature_map_size,
		   int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6])
{
    float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax = 0.0f;
    uint32_t confidence_tensor_index = 0;
    uint32_t class_id = 0;
    uint32_t class_prob_int = 0;
    float32_t class_prob = 0.0f;
    uint32_t x_tensor_index = 0;
    for (uint32_t row = 0; row < feature_map_size; ++row) {
        for (uint32_t col = 0; col < feature_map_size; ++col) {
            for (uint32_t anchor = 0; anchor < ANCHORS_NUM; ++anchor) {
                confidence_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CONF_CHANNEL_OFFSET;
                if (feature_map_size==20 && confidence_tensor_index >= 9600) {
                    printf("row: %u col %u anchor %u\n", row, col, anchor);
                }
                confidence = fix_scale(fm[confidence_tensor_index], qp_scale, qp_zp);
                if (confidence < thr) {
                    continue;
                }
                get_class(fm, row, col, anchor, feature_map_size, &class_id, &class_prob_int);
                class_prob = fix_scale(class_prob_int, qp_scale, qp_zp);
                confidence = class_prob * confidence;
                if (confidence > thr)
                {
                    //printf("class_prob: %f, confidence: %f\n", class_prob, confidence);
                    x_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor;
                    if (row == 1 && col == 1 && anchor == 1) {
                        printf("x index: %u\n", x_tensor_index);
                    }
                    x = (fix_scale(fm[x_tensor_index], qp_scale, qp_zp) * 2.0f - 0.5f + col) / feature_map_size;
                    y = (fix_scale(fm[x_tensor_index + 1], qp_scale, qp_zp) * 2.0f - 0.5f +  row) / feature_map_size;
                    w = pow(2.0f * (fix_scale(fm[x_tensor_index + 2], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2] / IMAGE_SIZE;
                    h = pow(2.0f * (fix_scale(fm[x_tensor_index + 3], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2 + 1] / IMAGE_SIZE;

                    xmin = (x - (w / 2.0f)) * IMAGE_SIZE;
                    ymin = (y - (h / 2.0f)) * IMAGE_SIZE;
                    xmax = (x + (w / 2.0f)) * IMAGE_SIZE;
                    ymax = (y + (h / 2.0f)) * IMAGE_SIZE;
                    if (*box_index < MAX_BOXES)
                    {
                        //printf("class type %d\n", chosen_cls);
                        box_array[*box_index][0] = ymin;
                        box_array[*box_index][1] = xmin;
                        box_array[*box_index][2] = ymax;
                        box_array[*box_index][3] = xmax;
                        box_array[*box_index][4] = confidence;
                        box_array[*box_index][5] = class_id;
                        *box_index = *box_index + 1;
                    }
                }
            }
        }
    }
}
#endif

#if 0
void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, int feature_map_size,
		   int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6])
{
    float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax, conf_max = 0.0f;
    int add = 0, anchor = 0, chosen_row = 0, chosen_col = 0, chosen_cls = -1;
    float32_t cls_prob, prob_max = 0.0f;

    // channels 0-3 are box coordinates, channel 4 is the confidence, and channels 5-84 are classes
    for (int row = 0; row < feature_map_size; ++row) {
        for (int col = 0; col < feature_map_size; ++col) {
            prob_max = 0;
            for (int a = 0; a < ANCHORS_NUM; ++a) {
                add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + CONF_CHANNEL_OFFSET;
                //confidence = fix_scale(fm[add], qp_scale,  qp_zp);
                confidence = (fm[add])*qp_scale;
                if (confidence > thr)
                //printf("no way we are here %f\n",qp_scale);
                for (int c = CLASS_CHANNEL_OFFSET; c < FEATURE_MAP_CHANNELS; ++c) {
                    add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + c;
                    // final confidence: box confidence * class probability
                    cls_prob = fm[add];
                    if (cls_prob > prob_max)
                    {
		        conf_max = fix_scale(cls_prob, qp_scale, qp_zp) * confidence;
                        chosen_cls = c - CLASS_CHANNEL_OFFSET + 1;
                        prob_max = cls_prob;
                        anchor = a;
                        chosen_row = row;
                        chosen_col = col;
                    }
                }
            }
            float basetemp;// = 2.0f * (fix_scale(fm[add + 2], qp_scale,  qp_zp));
            float tempresult = 1.0;
            //float exptemp = 2.0;

            if (conf_max > thr) {
                add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * chosen_row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * chosen_col + FEATURE_MAP_CHANNELS * anchor;
                x = (fix_scale(fm[add], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_col) / feature_map_size;
                y = (fix_scale(fm[add + 1], qp_scale, qp_zp) * 2.0f - 0.5f +  chosen_row) / feature_map_size;
                basetemp = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp));
                //tempresult = 1.0f;
                //exptemp = 2.0f;

                //while (exptemp != 0) {
                //      tempresult *= basetemp;
                //      --exptemp;
                //}
                tempresult = basetemp*basetemp;
                w = tempresult * anchors[anchor * 2] / IMAGE_SIZE;

                basetemp = 2.0f * (fix_scale(fm[add + 3], qp_scale, qp_zp));
                //tempresult = 1.0f;
                //exptemp = 2.0f;

                //while (exptemp != 0) {
                //      tempresult *= basetemp;
                //      --exptemp;
                //}
                tempresult = basetemp*basetemp;
                h = tempresult * anchors[anchor * 2 + 1] / IMAGE_SIZE;

                xmin = (x - (w / 2.0f)) * IMAGE_SIZE;
                ymin = (y - (h / 2.0f)) * IMAGE_SIZE;
                xmax = (x + (w / 2.0f)) * IMAGE_SIZE;
                ymax = (y + (h / 2.0f)) * IMAGE_SIZE;
                if (*box_index < MAX_BOXES)
                {
                    //printf("class type %d\n", chosen_cls);
                    box_array[*box_index][0] = ymin;
                    box_array[*box_index][1] = xmin;
                    box_array[*box_index][2] = ymax;
                    box_array[*box_index][3] = xmax;
                    box_array[*box_index][4] = conf_max;
                    box_array[*box_index][5] = chosen_cls;
                    *box_index = *box_index + 1;
                }
            }
        }
    }
}
#endif