201 lines
9.0 KiB
C++
201 lines
9.0 KiB
C++
#include "yolov5.hpp"
|
|
#include <cmath>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
|
|
|
|
float fix_scale(float32_t input, float32_t qp_scale, float32_t qp_zp)
|
|
{
|
|
return (input - qp_zp) * qp_scale;
|
|
}
|
|
|
|
inline float minm(float n1, float n2)
|
|
{
|
|
if (n1<n2) {
|
|
return n1;
|
|
}
|
|
return n2;
|
|
}
|
|
|
|
inline float maxm(float n1, float n2)
|
|
{
|
|
if (n1>n2) {
|
|
return n1;
|
|
}
|
|
return n2;
|
|
}
|
|
|
|
float32_t iou_calc_c(float32_t *box_1, float32_t *box_2)
|
|
{
|
|
float width_of_overlap_area = minm(box_1[XMAX], box_2[XMAX]) - maxm(box_1[XMIN], box_2[XMIN]);
|
|
float height_of_overlap_area = minm(box_1[YMAX], box_2[YMAX]) - maxm(box_1[YMIN], box_2[YMIN]);
|
|
float positive_width_of_overlap_area = maxm(width_of_overlap_area + 1, 0.0f);
|
|
float positive_height_of_overlap_area = maxm(height_of_overlap_area + 1, 0.0f);
|
|
float area_of_overlap = positive_width_of_overlap_area * positive_height_of_overlap_area;
|
|
float box_1_area = (box_1[YMAX] - box_1[YMIN] + 1) * (box_1[XMAX] - box_1[XMIN] + 1);
|
|
float box_2_area = (box_2[YMAX] - box_2[YMIN] + 1) * (box_2[XMAX] - box_2[XMIN] + 1);
|
|
return area_of_overlap / (box_1_area + box_2_area - area_of_overlap);
|
|
|
|
}
|
|
|
|
#if 1
|
|
static void get_class(uint8_t *fm, uint32_t row, uint32_t col, uint32_t anchor, uint32_t feature_map_size,
|
|
uint32_t *class_id, uint32_t *class_prob)
|
|
{
|
|
uint32_t cls_prob, prob_max = 0;
|
|
uint32_t selected_class_id = 1;
|
|
for (uint32_t cls_id = 1; cls_id <= CLASSES_COUNT; ++cls_id)
|
|
{
|
|
uint32_t cls_prob_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CLASS_CHANNEL_OFFSET + cls_id - 1;
|
|
cls_prob = fm[cls_prob_tensor_index];
|
|
if (cls_prob > prob_max)
|
|
{
|
|
selected_class_id = cls_id;
|
|
prob_max = cls_prob;
|
|
}
|
|
}
|
|
*class_prob = prob_max;
|
|
*class_id = selected_class_id;
|
|
return;
|
|
}
|
|
|
|
void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, uint32_t feature_map_size,
|
|
int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6])
|
|
{
|
|
float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax = 0.0f;
|
|
uint32_t confidence_tensor_index = 0;
|
|
uint32_t class_id = 0;
|
|
uint32_t class_prob_int = 0;
|
|
float32_t class_prob = 0.0f;
|
|
uint32_t x_tensor_index = 0;
|
|
for (uint32_t row = 0; row < feature_map_size; ++row) {
|
|
for (uint32_t col = 0; col < feature_map_size; ++col) {
|
|
for (uint32_t anchor = 0; anchor < ANCHORS_NUM; ++anchor) {
|
|
confidence_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CONF_CHANNEL_OFFSET;
|
|
if (feature_map_size==20 && confidence_tensor_index >= 9600) {
|
|
printf("row: %u col %u anchor %u\n", row, col, anchor);
|
|
}
|
|
confidence = fix_scale(fm[confidence_tensor_index], qp_scale, qp_zp);
|
|
if (confidence < thr) {
|
|
continue;
|
|
}
|
|
get_class(fm, row, col, anchor, feature_map_size, &class_id, &class_prob_int);
|
|
class_prob = fix_scale(class_prob_int, qp_scale, qp_zp);
|
|
confidence = class_prob * confidence;
|
|
if (confidence > thr)
|
|
{
|
|
//printf("class_prob: %f, confidence: %f\n", class_prob, confidence);
|
|
x_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor;
|
|
if (row == 1 && col == 1 && anchor == 1) {
|
|
printf("x index: %u\n", x_tensor_index);
|
|
}
|
|
x = (fix_scale(fm[x_tensor_index], qp_scale, qp_zp) * 2.0f - 0.5f + col) / feature_map_size;
|
|
y = (fix_scale(fm[x_tensor_index + 1], qp_scale, qp_zp) * 2.0f - 0.5f + row) / feature_map_size;
|
|
w = pow(2.0f * (fix_scale(fm[x_tensor_index + 2], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2] / IMAGE_SIZE;
|
|
h = pow(2.0f * (fix_scale(fm[x_tensor_index + 3], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2 + 1] / IMAGE_SIZE;
|
|
|
|
xmin = (x - (w / 2.0f)) * IMAGE_SIZE;
|
|
ymin = (y - (h / 2.0f)) * IMAGE_SIZE;
|
|
xmax = (x + (w / 2.0f)) * IMAGE_SIZE;
|
|
ymax = (y + (h / 2.0f)) * IMAGE_SIZE;
|
|
if (*box_index < MAX_BOXES)
|
|
{
|
|
//printf("class type %d\n", chosen_cls);
|
|
box_array[*box_index][0] = ymin;
|
|
box_array[*box_index][1] = xmin;
|
|
box_array[*box_index][2] = ymax;
|
|
box_array[*box_index][3] = xmax;
|
|
box_array[*box_index][4] = confidence;
|
|
box_array[*box_index][5] = class_id;
|
|
*box_index = *box_index + 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if 0
|
|
void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, int feature_map_size,
|
|
int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6])
|
|
{
|
|
float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax, conf_max = 0.0f;
|
|
int add = 0, anchor = 0, chosen_row = 0, chosen_col = 0, chosen_cls = -1;
|
|
float32_t cls_prob, prob_max = 0.0f;
|
|
|
|
// channels 0-3 are box coordinates, channel 4 is the confidence, and channels 5-84 are classes
|
|
for (int row = 0; row < feature_map_size; ++row) {
|
|
for (int col = 0; col < feature_map_size; ++col) {
|
|
prob_max = 0;
|
|
for (int a = 0; a < ANCHORS_NUM; ++a) {
|
|
add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + CONF_CHANNEL_OFFSET;
|
|
//confidence = fix_scale(fm[add], qp_scale, qp_zp);
|
|
confidence = (fm[add])*qp_scale;
|
|
if (confidence > thr)
|
|
//printf("no way we are here %f\n",qp_scale);
|
|
for (int c = CLASS_CHANNEL_OFFSET; c < FEATURE_MAP_CHANNELS; ++c) {
|
|
add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + c;
|
|
// final confidence: box confidence * class probability
|
|
cls_prob = fm[add];
|
|
if (cls_prob > prob_max)
|
|
{
|
|
conf_max = fix_scale(cls_prob, qp_scale, qp_zp) * confidence;
|
|
chosen_cls = c - CLASS_CHANNEL_OFFSET + 1;
|
|
prob_max = cls_prob;
|
|
anchor = a;
|
|
chosen_row = row;
|
|
chosen_col = col;
|
|
}
|
|
}
|
|
}
|
|
float basetemp;// = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp));
|
|
float tempresult = 1.0;
|
|
//float exptemp = 2.0;
|
|
|
|
if (conf_max > thr) {
|
|
add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * chosen_row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * chosen_col + FEATURE_MAP_CHANNELS * anchor;
|
|
x = (fix_scale(fm[add], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_col) / feature_map_size;
|
|
y = (fix_scale(fm[add + 1], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_row) / feature_map_size;
|
|
basetemp = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp));
|
|
//tempresult = 1.0f;
|
|
//exptemp = 2.0f;
|
|
|
|
//while (exptemp != 0) {
|
|
// tempresult *= basetemp;
|
|
// --exptemp;
|
|
//}
|
|
tempresult = basetemp*basetemp;
|
|
w = tempresult * anchors[anchor * 2] / IMAGE_SIZE;
|
|
|
|
basetemp = 2.0f * (fix_scale(fm[add + 3], qp_scale, qp_zp));
|
|
//tempresult = 1.0f;
|
|
//exptemp = 2.0f;
|
|
|
|
//while (exptemp != 0) {
|
|
// tempresult *= basetemp;
|
|
// --exptemp;
|
|
//}
|
|
tempresult = basetemp*basetemp;
|
|
h = tempresult * anchors[anchor * 2 + 1] / IMAGE_SIZE;
|
|
|
|
xmin = (x - (w / 2.0f)) * IMAGE_SIZE;
|
|
ymin = (y - (h / 2.0f)) * IMAGE_SIZE;
|
|
xmax = (x + (w / 2.0f)) * IMAGE_SIZE;
|
|
ymax = (y + (h / 2.0f)) * IMAGE_SIZE;
|
|
if (*box_index < MAX_BOXES)
|
|
{
|
|
//printf("class type %d\n", chosen_cls);
|
|
box_array[*box_index][0] = ymin;
|
|
box_array[*box_index][1] = xmin;
|
|
box_array[*box_index][2] = ymax;
|
|
box_array[*box_index][3] = xmax;
|
|
box_array[*box_index][4] = conf_max;
|
|
box_array[*box_index][5] = chosen_cls;
|
|
*box_index = *box_index + 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|