#include "yolov5.hpp" #include #include #include float fix_scale(float32_t input, float32_t qp_scale, float32_t qp_zp) { return (input - qp_zp) * qp_scale; } inline float minm(float n1, float n2) { if (n1n2) { return n1; } return n2; } float32_t iou_calc_c(float32_t *box_1, float32_t *box_2) { float width_of_overlap_area = minm(box_1[XMAX], box_2[XMAX]) - maxm(box_1[XMIN], box_2[XMIN]); float height_of_overlap_area = minm(box_1[YMAX], box_2[YMAX]) - maxm(box_1[YMIN], box_2[YMIN]); float positive_width_of_overlap_area = maxm(width_of_overlap_area + 1, 0.0f); float positive_height_of_overlap_area = maxm(height_of_overlap_area + 1, 0.0f); float area_of_overlap = positive_width_of_overlap_area * positive_height_of_overlap_area; float box_1_area = (box_1[YMAX] - box_1[YMIN] + 1) * (box_1[XMAX] - box_1[XMIN] + 1); float box_2_area = (box_2[YMAX] - box_2[YMIN] + 1) * (box_2[XMAX] - box_2[XMIN] + 1); return area_of_overlap / (box_1_area + box_2_area - area_of_overlap); } #if 1 static void get_class(uint8_t *fm, uint32_t row, uint32_t col, uint32_t anchor, uint32_t feature_map_size, uint32_t *class_id, uint32_t *class_prob) { uint32_t cls_prob, prob_max = 0; uint32_t selected_class_id = 1; for (uint32_t cls_id = 1; cls_id <= CLASSES_COUNT; ++cls_id) { uint32_t cls_prob_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CLASS_CHANNEL_OFFSET + cls_id - 1; cls_prob = fm[cls_prob_tensor_index]; if (cls_prob > prob_max) { selected_class_id = cls_id; prob_max = cls_prob; } } *class_prob = prob_max; *class_id = selected_class_id; return; } void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, uint32_t feature_map_size, int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6]) { float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax = 0.0f; uint32_t confidence_tensor_index = 0; uint32_t class_id = 0; uint32_t class_prob_int = 0; float32_t class_prob = 0.0f; uint32_t x_tensor_index = 0; for (uint32_t row = 0; row < feature_map_size; ++row) { for (uint32_t col = 0; col < feature_map_size; ++col) { for (uint32_t anchor = 0; anchor < ANCHORS_NUM; ++anchor) { confidence_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor + CONF_CHANNEL_OFFSET; if (feature_map_size==20 && confidence_tensor_index >= 9600) { printf("row: %u col %u anchor %u\n", row, col, anchor); } confidence = fix_scale(fm[confidence_tensor_index], qp_scale, qp_zp); if (confidence < thr) { continue; } get_class(fm, row, col, anchor, feature_map_size, &class_id, &class_prob_int); class_prob = fix_scale(class_prob_int, qp_scale, qp_zp); confidence = class_prob * confidence; if (confidence > thr) { //printf("class_prob: %f, confidence: %f\n", class_prob, confidence); x_tensor_index = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * anchor; if (row == 1 && col == 1 && anchor == 1) { printf("x index: %u\n", x_tensor_index); } x = (fix_scale(fm[x_tensor_index], qp_scale, qp_zp) * 2.0f - 0.5f + col) / feature_map_size; y = (fix_scale(fm[x_tensor_index + 1], qp_scale, qp_zp) * 2.0f - 0.5f + row) / feature_map_size; w = pow(2.0f * (fix_scale(fm[x_tensor_index + 2], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2] / IMAGE_SIZE; h = pow(2.0f * (fix_scale(fm[x_tensor_index + 3], qp_scale, qp_zp)), 2.0f) * anchors[anchor * 2 + 1] / IMAGE_SIZE; xmin = (x - (w / 2.0f)) * IMAGE_SIZE; ymin = (y - (h / 2.0f)) * IMAGE_SIZE; xmax = (x + (w / 2.0f)) * IMAGE_SIZE; ymax = (y + (h / 2.0f)) * IMAGE_SIZE; if (*box_index < MAX_BOXES) { //printf("class type %d\n", chosen_cls); box_array[*box_index][0] = ymin; box_array[*box_index][1] = xmin; box_array[*box_index][2] = ymax; box_array[*box_index][3] = xmax; box_array[*box_index][4] = confidence; box_array[*box_index][5] = class_id; *box_index = *box_index + 1; } } } } } } #endif #if 0 void extract_boxes(uint8_t *fm, float32_t qp_zp, float32_t qp_scale, int feature_map_size, int* anchors, float32_t thr, uint32_t *box_index, float32_t (*box_array)[6]) { float32_t confidence, x, y, h, w, xmin, ymin, xmax, ymax, conf_max = 0.0f; int add = 0, anchor = 0, chosen_row = 0, chosen_col = 0, chosen_cls = -1; float32_t cls_prob, prob_max = 0.0f; // channels 0-3 are box coordinates, channel 4 is the confidence, and channels 5-84 are classes for (int row = 0; row < feature_map_size; ++row) { for (int col = 0; col < feature_map_size; ++col) { prob_max = 0; for (int a = 0; a < ANCHORS_NUM; ++a) { add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + CONF_CHANNEL_OFFSET; //confidence = fix_scale(fm[add], qp_scale, qp_zp); confidence = (fm[add])*qp_scale; if (confidence > thr) //printf("no way we are here %f\n",qp_scale); for (int c = CLASS_CHANNEL_OFFSET; c < FEATURE_MAP_CHANNELS; ++c) { add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * col + FEATURE_MAP_CHANNELS * a + c; // final confidence: box confidence * class probability cls_prob = fm[add]; if (cls_prob > prob_max) { conf_max = fix_scale(cls_prob, qp_scale, qp_zp) * confidence; chosen_cls = c - CLASS_CHANNEL_OFFSET + 1; prob_max = cls_prob; anchor = a; chosen_row = row; chosen_col = col; } } } float basetemp;// = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp)); float tempresult = 1.0; //float exptemp = 2.0; if (conf_max > thr) { add = FEATURE_MAP_CHANNELS * ANCHORS_NUM * feature_map_size * chosen_row + FEATURE_MAP_CHANNELS * ANCHORS_NUM * chosen_col + FEATURE_MAP_CHANNELS * anchor; x = (fix_scale(fm[add], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_col) / feature_map_size; y = (fix_scale(fm[add + 1], qp_scale, qp_zp) * 2.0f - 0.5f + chosen_row) / feature_map_size; basetemp = 2.0f * (fix_scale(fm[add + 2], qp_scale, qp_zp)); //tempresult = 1.0f; //exptemp = 2.0f; //while (exptemp != 0) { // tempresult *= basetemp; // --exptemp; //} tempresult = basetemp*basetemp; w = tempresult * anchors[anchor * 2] / IMAGE_SIZE; basetemp = 2.0f * (fix_scale(fm[add + 3], qp_scale, qp_zp)); //tempresult = 1.0f; //exptemp = 2.0f; //while (exptemp != 0) { // tempresult *= basetemp; // --exptemp; //} tempresult = basetemp*basetemp; h = tempresult * anchors[anchor * 2 + 1] / IMAGE_SIZE; xmin = (x - (w / 2.0f)) * IMAGE_SIZE; ymin = (y - (h / 2.0f)) * IMAGE_SIZE; xmax = (x + (w / 2.0f)) * IMAGE_SIZE; ymax = (y + (h / 2.0f)) * IMAGE_SIZE; if (*box_index < MAX_BOXES) { //printf("class type %d\n", chosen_cls); box_array[*box_index][0] = ymin; box_array[*box_index][1] = xmin; box_array[*box_index][2] = ymax; box_array[*box_index][3] = xmax; box_array[*box_index][4] = conf_max; box_array[*box_index][5] = chosen_cls; *box_index = *box_index + 1; } } } } } #endif