hailo-inference/cpp_inference/yolov5.cpp

#include "yolov5.hpp"
#include <cmath>
#include <stdint.h>
#include <stdio.h>


float fix_scale(float32_t input, float32_t qp_scale, float32_t qp_zp)
{
  return (input - qp_zp) * qp_scale;
}

inline float minm(float n1, float n2)
{
     if (n1<n2) {
         return n1;
     }
     return n2;
}

inline float maxm(float n1, float n2)
{
     if (n1>n2) {
         return n1;
     }
     return n2;
}

float32_t iou_calc_c(float32_t *box_1, float32_t *box_2) 
{
    float width_of_overlap_area  = minm(box_1[XMAX], box_2[XMAX]) - maxm(box_1[XMIN], box_2[XMIN]);
    float height_of_overlap_area = minm(box_1[YMAX], box_2[YMAX]) - maxm(box_1[YMIN], box_2[YMIN]);
    float positive_width_of_overlap_area = maxm(width_of_overlap_area + 1, 0.0f);
    float positive_height_of_overlap_area = maxm(height_of_overlap_area + 1, 0.0f);
    float area_of_overlap = positive_width_of_overlap_area * positive_height_of_overlap_area;
    float box_1_area = (box_1[YMAX] - box_1[YMIN] + 1)  * (box_1[XMAX] - box_1[XMIN] + 1);
    float box_2_area = (box_2[YMAX] - box_2[YMIN] + 1)  * (box_2[XMAX] - box_2[XMIN] + 1);
    return area_of_overlap / (box_1_area + box_2_area - area_of_overlap);

}

#if 1
static void get_class(uint8_t *fm, uint32_t row, uint32_t col, uint32_t anchor, uint32_t feature_map_size, 
uint32_t *class_id, uint32_t *class_prob)
{
    uint32_t cls_prob, prob_max = 0;
    uint32_t selected_class_id = 1;
    for (uint32_t cls_id = 1; cls_id <= CLASSES_COUNT; ++cls_id)
    {
        uint32_t cls_prob_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CLASS_CHANNEL_OFFSET + cls_id - 1;
        cls_prob = fm[cls_prob_tensor_index];
        if (cls_prob > prob_max)
        {
            selected_class_id = cls_id;
            prob_max = cls_prob;
        }
    }
    *class_prob = prob_max;
    *class_id = selected_class_id;
    return;
}

void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, uint32_t feature_map_size, 
		   int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6]) 
{
    float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax = 0.0f;
    uint32_t confidence_tensor_index = 0;
    uint32_t class_id = 0;
    uint32_t class_prob_int = 0;
    float32_t class_prob = 0.0f;
    uint32_t x_tensor_index = 0;
    for (uint32_t row = 0; row < feature_map_size; ++row) {
        for (uint32_t col = 0; col < feature_map_size; ++col) {
            for (uint32_t anchor = 0; anchor < ANCHORS_NUM; ++anchor) {
                confidence_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CONF_CHANNEL_OFFSET;
                if (feature_map_size==20 && confidence_tensor_index >= 9600) {
                    printf("row: %u col %u anchor %u\n", row, col, anchor);
                }
                confidence = fix_scale(fm[confidence_tensor_index], qp_scale, qp_zp);
                if (confidence < thr) {
                    continue;
                }
                get_class(fm, row, col, anchor, feature_map_size, &class_id, &class_prob_int);
                class_prob = fix_scale(class_prob_int, qp_scale, qp_zp);
                confidence = class_prob * confidence;
                if (confidence > thr)
                {
                    //printf("class_prob: %f, confidence: %f\n", class_prob, confidence);
                    x_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor;
                    if (row == 1 && col == 1 && anchor == 1) {
                        printf("x index: %u\n", x_tensor_index);
                    }
                    x = (fix_scale(fm[x_tensor_index], qp_scale, qp_zp) * 2.0f - 0.5f + col) / feature_map_size;
                    y = (fix_scale(fm[x_tensor_index + 1], qp_scale, qp_zp) * 2.0f - 0.5f +  row) / feature_map_size;
                    w = pow(2.0f * (fix_scale(fm[x_tensor_index + 2], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2] / IMAGE_SIZE;
                    h = pow(2.0f * (fix_scale(fm[x_tensor_index + 3], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2 + 1] / IMAGE_SIZE;

                    xmin = (x - (w / 2.0f)) * IMAGE_SIZE;
                    ymin = (y - (h / 2.0f)) * IMAGE_SIZE;
                    xmax = (x + (w / 2.0f)) * IMAGE_SIZE;
                    ymax = (y + (h / 2.0f)) * IMAGE_SIZE;
                    if (*box_index < MAX_BOXES)
                    {
                        //printf("class type %d\n", chosen_cls);
                        box_array[*box_index][0] = ymin;
                        box_array[*box_index][1] = xmin;
                        box_array[*box_index][2] = ymax;
                        box_array[*box_index][3] = xmax;
                        box_array[*box_index][4] = confidence;
                        box_array[*box_index][5] = class_id;
                        *box_index = *box_index + 1;
                    }
                }
            }
        }
    }
}
#endif

#if 0
void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, int feature_map_size, 
		   int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6]) 
{
    float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax, conf_max = 0.0f;
    int add = 0, anchor = 0, chosen_row = 0, chosen_col = 0, chosen_cls = -1;
    float32_t cls_prob, prob_max = 0.0f;

    // channels 0-3 are box coordinates, channel 4 is the confidence, and channels 5-84 are classes
    for (int row = 0; row < feature_map_size; ++row) {
        for (int col = 0; col < feature_map_size; ++col) {
            prob_max = 0;
            for (int a = 0; a < ANCHORS_NUM; ++a) {
                add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + CONF_CHANNEL_OFFSET;
                //confidence = fix_scale(fm[add], qp_scale,  qp_zp);
                confidence = (fm[add])*qp_scale;
                if (confidence > thr)
                //printf("no way we are here %f\n",qp_scale);
                for (int c = CLASS_CHANNEL_OFFSET; c < FEATURE_MAP_CHANNELS; ++c) {
                    add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + c;
                    // final confidence: box confidence * class probability
                    cls_prob = fm[add];
                    if (cls_prob > prob_max) 
                    {
		        conf_max = fix_scale(cls_prob, qp_scale, qp_zp) * confidence;
                        chosen_cls = c - CLASS_CHANNEL_OFFSET + 1;
                        prob_max = cls_prob;
                        anchor = a;
                        chosen_row = row;
                        chosen_col = col;
                    }
                }
            }
            float basetemp;// = 2.0f * (fix_scale(fm[add + 2], qp_scale,  qp_zp)); 
            float tempresult = 1.0;
            //float exptemp = 2.0;

            if (conf_max > thr) {
                add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * chosen_row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * chosen_col + FEATURE_MAP_CHANNELS * anchor;
                x = (fix_scale(fm[add], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_col) / feature_map_size;
                y = (fix_scale(fm[add + 1], qp_scale, qp_zp) * 2.0f - 0.5f +  chosen_row) / feature_map_size;
                basetemp = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp));
                //tempresult = 1.0f;
                //exptemp = 2.0f;

                //while (exptemp != 0) {
                //      tempresult *= basetemp;
                //      --exptemp;
                //}
                tempresult = basetemp*basetemp;                      
                w = tempresult * anchors[anchor * 2] / IMAGE_SIZE;

                basetemp = 2.0f * (fix_scale(fm[add + 3], qp_scale, qp_zp));
                //tempresult = 1.0f;
                //exptemp = 2.0f;

                //while (exptemp != 0) {
                //      tempresult *= basetemp;
                //      --exptemp;
                //}
                tempresult = basetemp*basetemp;                      
                h = tempresult * anchors[anchor * 2 + 1] / IMAGE_SIZE;

                xmin = (x - (w / 2.0f)) * IMAGE_SIZE;
                ymin = (y - (h / 2.0f)) * IMAGE_SIZE;
                xmax = (x + (w / 2.0f)) * IMAGE_SIZE;
                ymax = (y + (h / 2.0f)) * IMAGE_SIZE;
                if (*box_index < MAX_BOXES)
                {
                    //printf("class type %d\n", chosen_cls);
                    box_array[*box_index][0] = ymin;
                    box_array[*box_index][1] = xmin;
                    box_array[*box_index][2] = ymax;
                    box_array[*box_index][3] = xmax;
                    box_array[*box_index][4] = conf_max;
                    box_array[*box_index][5] = chosen_cls;
                    *box_index = *box_index + 1;
                }
            }
        }
    }
}
#endif
adds cpp inference example 2022-03-31 11:10:10 +02:00			`#include "yolov5.hpp"`
			`#include <cmath>`
			`#include <stdint.h>`
			`#include <stdio.h>`


			`float fix_scale(float32_t input, float32_t qp_scale, float32_t qp_zp)`
			`{`
			`return (input - qp_zp) * qp_scale;`
			`}`

			`inline float minm(float n1, float n2)`
			`{`
			`if (n1<n2) {`
			`return n1;`
			`}`
			`return n2;`
			`}`

			`inline float maxm(float n1, float n2)`
			`{`
			`if (n1>n2) {`
			`return n1;`
			`}`
			`return n2;`
			`}`

			`float32_t iou_calc_c(float32_t box_1, float32_t box_2)`
			`{`
			`float width_of_overlap_area = minm(box_1[XMAX], box_2[XMAX]) - maxm(box_1[XMIN], box_2[XMIN]);`
			`float height_of_overlap_area = minm(box_1[YMAX], box_2[YMAX]) - maxm(box_1[YMIN], box_2[YMIN]);`
			`float positive_width_of_overlap_area = maxm(width_of_overlap_area + 1, 0.0f);`
			`float positive_height_of_overlap_area = maxm(height_of_overlap_area + 1, 0.0f);`
			`float area_of_overlap = positive_width_of_overlap_area * positive_height_of_overlap_area;`
			`float box_1_area = (box_1[YMAX] - box_1[YMIN] + 1) * (box_1[XMAX] - box_1[XMIN] + 1);`
			`float box_2_area = (box_2[YMAX] - box_2[YMIN] + 1) * (box_2[XMAX] - box_2[XMIN] + 1);`
			`return area_of_overlap / (box_1_area + box_2_area - area_of_overlap);`

			`}`

			`#if 1`
			`static void get_class(uint8_t *fm, uint32_t row, uint32_t col, uint32_t anchor, uint32_t feature_map_size,`
			`uint32_t class_id, uint32_t class_prob)`
			`{`
			`uint32_t cls_prob, prob_max = 0;`
			`uint32_t selected_class_id = 1;`
			`for (uint32_t cls_id = 1; cls_id <= CLASSES_COUNT; ++cls_id)`
			`{`
			`uint32_t cls_prob_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CLASS_CHANNEL_OFFSET + cls_id - 1;`
			`cls_prob = fm[cls_prob_tensor_index];`
			`if (cls_prob > prob_max)`
			`{`
			`selected_class_id = cls_id;`
			`prob_max = cls_prob;`
			`}`
			`}`
			`*class_prob = prob_max;`
			`*class_id = selected_class_id;`
			`return;`
			`}`

			`void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, uint32_t feature_map_size,`
			`int* anchors, float32_t thr, uint32_t box_index, float32_t (box_array)[6])`
			`{`
			`float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax = 0.0f;`
			`uint32_t confidence_tensor_index = 0;`
			`uint32_t class_id = 0;`
			`uint32_t class_prob_int = 0;`
			`float32_t class_prob = 0.0f;`
			`uint32_t x_tensor_index = 0;`
			`for (uint32_t row = 0; row < feature_map_size; ++row) {`
			`for (uint32_t col = 0; col < feature_map_size; ++col) {`
			`for (uint32_t anchor = 0; anchor < ANCHORS_NUM; ++anchor) {`
			`confidence_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CONF_CHANNEL_OFFSET;`
			`if (feature_map_size==20 && confidence_tensor_index >= 9600) {`
			`printf("row: %u col %u anchor %u\n", row, col, anchor);`
			`}`
			`confidence = fix_scale(fm[confidence_tensor_index], qp_scale, qp_zp);`
			`if (confidence < thr) {`
			`continue;`
			`}`
			`get_class(fm, row, col, anchor, feature_map_size, &class_id, &class_prob_int);`
			`class_prob = fix_scale(class_prob_int, qp_scale, qp_zp);`
			`confidence = class_prob * confidence;`
			`if (confidence > thr)`
			`{`
			`//printf("class_prob: %f, confidence: %f\n", class_prob, confidence);`
			`x_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor;`
			`if (row == 1 && col == 1 && anchor == 1) {`
			`printf("x index: %u\n", x_tensor_index);`
			`}`
			`x = (fix_scale(fm[x_tensor_index], qp_scale, qp_zp) * 2.0f - 0.5f + col) / feature_map_size;`
			`y = (fix_scale(fm[x_tensor_index + 1], qp_scale, qp_zp) * 2.0f - 0.5f + row) / feature_map_size;`
			`w = pow(2.0f * (fix_scale(fm[x_tensor_index + 2], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2] / IMAGE_SIZE;`
			`h = pow(2.0f * (fix_scale(fm[x_tensor_index + 3], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2 + 1] / IMAGE_SIZE;`

			`xmin = (x - (w / 2.0f)) * IMAGE_SIZE;`
			`ymin = (y - (h / 2.0f)) * IMAGE_SIZE;`
			`xmax = (x + (w / 2.0f)) * IMAGE_SIZE;`
			`ymax = (y + (h / 2.0f)) * IMAGE_SIZE;`
			`if (*box_index < MAX_BOXES)`
			`{`
			`//printf("class type %d\n", chosen_cls);`
			`box_array[*box_index][0] = ymin;`
			`box_array[*box_index][1] = xmin;`
			`box_array[*box_index][2] = ymax;`
			`box_array[*box_index][3] = xmax;`
			`box_array[*box_index][4] = confidence;`
			`box_array[*box_index][5] = class_id;`
			`box_index = box_index + 1;`
			`}`
			`}`
			`}`
			`}`
			`}`
			`}`
			`#endif`

			`#if 0`
			`void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, int feature_map_size,`
			`int* anchors, float32_t thr, uint32_t box_index, float32_t (box_array)[6])`
			`{`
			`float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax, conf_max = 0.0f;`
			`int add = 0, anchor = 0, chosen_row = 0, chosen_col = 0, chosen_cls = -1;`
			`float32_t cls_prob, prob_max = 0.0f;`

			`// channels 0-3 are box coordinates, channel 4 is the confidence, and channels 5-84 are classes`
			`for (int row = 0; row < feature_map_size; ++row) {`
			`for (int col = 0; col < feature_map_size; ++col) {`
			`prob_max = 0;`
			`for (int a = 0; a < ANCHORS_NUM; ++a) {`
			`add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + CONF_CHANNEL_OFFSET;`
			`//confidence = fix_scale(fm[add], qp_scale, qp_zp);`
			`confidence = (fm[add])*qp_scale;`
			`if (confidence > thr)`
			`//printf("no way we are here %f\n",qp_scale);`
			`for (int c = CLASS_CHANNEL_OFFSET; c < FEATURE_MAP_CHANNELS; ++c) {`
			`add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + c;`
			`// final confidence: box confidence * class probability`
			`cls_prob = fm[add];`
			`if (cls_prob > prob_max)`
			`{`
			`conf_max = fix_scale(cls_prob, qp_scale, qp_zp) * confidence;`
			`chosen_cls = c - CLASS_CHANNEL_OFFSET + 1;`
			`prob_max = cls_prob;`
			`anchor = a;`
			`chosen_row = row;`
			`chosen_col = col;`
			`}`
			`}`
			`}`
			`float basetemp;// = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp));`
			`float tempresult = 1.0;`
			`//float exptemp = 2.0;`

			`if (conf_max > thr) {`
			`add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * chosen_row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * chosen_col + FEATURE_MAP_CHANNELS * anchor;`
			`x = (fix_scale(fm[add], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_col) / feature_map_size;`
			`y = (fix_scale(fm[add + 1], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_row) / feature_map_size;`
			`basetemp = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp));`
			`//tempresult = 1.0f;`
			`//exptemp = 2.0f;`

			`//while (exptemp != 0) {`
			`// tempresult *= basetemp;`
			`// --exptemp;`
			`//}`
			`tempresult = basetemp*basetemp;`
			`w = tempresult * anchors[anchor * 2] / IMAGE_SIZE;`

			`basetemp = 2.0f * (fix_scale(fm[add + 3], qp_scale, qp_zp));`
			`//tempresult = 1.0f;`
			`//exptemp = 2.0f;`

			`//while (exptemp != 0) {`
			`// tempresult *= basetemp;`
			`// --exptemp;`
			`//}`
			`tempresult = basetemp*basetemp;`
			`h = tempresult * anchors[anchor * 2 + 1] / IMAGE_SIZE;`

			`xmin = (x - (w / 2.0f)) * IMAGE_SIZE;`
			`ymin = (y - (h / 2.0f)) * IMAGE_SIZE;`
			`xmax = (x + (w / 2.0f)) * IMAGE_SIZE;`
			`ymax = (y + (h / 2.0f)) * IMAGE_SIZE;`
			`if (*box_index < MAX_BOXES)`
			`{`
			`//printf("class type %d\n", chosen_cls);`
			`box_array[*box_index][0] = ymin;`
			`box_array[*box_index][1] = xmin;`
			`box_array[*box_index][2] = ymax;`
			`box_array[*box_index][3] = xmax;`
			`box_array[*box_index][4] = conf_max;`
			`box_array[*box_index][5] = chosen_cls;`
			`box_index = box_index + 1;`
			`}`
			`}`
			`}`
			`}`
			`}`
			`#endif`