手撕 NMS（非极大值抑制） IoU（交并比） Python/C++

IoU 概念

在这里插入图片描述

IoU 代码

Python代码

def iou(box1, box2):
    # 计算交叠区域的左上角和右下角坐标
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # 计算交叠区域的宽度和高度
    width = max(0, x2 - x1 + 1)
    height = max(0, y2 - y1 + 1)

    # 计算交叠区域的面积
    intersection = width * height

    # 计算两个边界框的面积
    area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
    area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

    # 计算交并比
    iou = intersection / float(area1 + area2 - intersection)

    return iou

C++代码

#include <iostream>
#include <vector>
#include <algorithm>

struct Box {
    
    
    int x1, y1, x2, y2;
};

// 计算两个边界框的交并比
float iou(const Box& box1, const Box& box2) {
    
    
    int x1 = std::max(box1.x1, box2.x1);
    int y1 = std::max(box1.y1, box2.y1);
    int x2 = std::min(box1.x2, box2.x2);
    int y2 = std::min(box1.y2, box2.y2);

    int width = std::max(0, x2 - x1 + 1);
    int height = std::max(0, y2 - y1 + 1);

    int intersection = width * height;

    int area1 = (box1.x2 - box1.x1 + 1) * (box1.y2 - box1.y1 + 1);
    int area2 = (box2.x2 - box2.x1 + 1) * (box2.y2 - box2.y1 + 1);

    float iou = static_cast<float>(intersection) / (area1 + area2 - intersection);

    return iou;
}

IoU代码注解

首先，我们将输入的两个边界框的坐标表示为 box1 和 box2
接下来，我们计算交叠区域的左上角和右下角坐标。这里使用 max() 和 min() 函数来确保交叠区域的坐标是合理的，并避免出现负数宽度或高度。
然后，我们计算交叠区域的宽度和高度，使用 max() 函数确保它们不会小于 0。
接着，我们计算交叠区域的面积，即宽度乘以高度。
接下来，我们计算两个边界框的面积。我们首先计算每个边界框的宽度和高度，然后将它们相乘得到面积。
最后，我们计算交并比，通过将交叠区域的面积除以两个边界框的总面积减去交叠区域的面积。

在计算交叠区域的宽度和高度时，代码中的 +1 是为了避免当边界框的宽度或高度为0时出现除以0的错误。它确保了计算的结果至少为1，以保证在计算交叠区域面积和边界框面积时不会出现异常情况。

在计算交叠区域的宽度和高度时，如果 x2 - x1 或 y2 - y1 的结果为 0 ，那么没有交叠区域，但是为了避免出现除以 0 的错误，将结果加 1 以确保最小值为 1。这样做可以避免无效的计算和错误的结果。

在实际的项目中一般设置更小的 1e-7
在这里插入图片描述

NMS概念

在这里插入图片描述

NMS代码

Python代码

def nms(boxes, scores, threshold):
    # 按照得分降序排列
    order = scores.argsort()[::-1]  # 根据得分对边界框的索引进行降序排序

    keep = []  # 用于存储保留的边界框索引
    while order.size > 0:
        # 选择得分最高的边界框
        i = order[0]  # 取得分最高的边界框的索引
        keep.append(i)  # 将该边界框的索引添加到保留列表中

        # 计算当前边界框与其他边界框的交并比
        ious = np.array([iou(boxes[i], boxes[t]) for t in order[1:]])  # 计算当前边界框与其他边界框的交并比

        # 保留交并比小于阈值的边界框
        inds = np.where(ious <= threshold)[0]  # 找出交并比小于阈值的边界框的索引
        order = order[1:][inds]  # 更新order数组，去除保留的边界框的索引

    return keep

当我们使用 order = order[1:][inds] 这行代码时，可以举一个简单的例子来说明其作用。

假设原始的 order 数组为 [3, 1, 4, 2, 0]，而 inds 数组为 [0, 2]。现在我们将这两行代码应用于这个例子。

首先，我们对 order 数组应用切片操作 order[1:]，它会返回一个新的数组 [1, 4, 2, 0]。注意，这里我们去除了原始数组的第一个元素。

接下来，我们使用 inds 数组来索引新的数组 order[1:]。根据 inds 数组的值，我们选择在索引 0 和索引 2 处的元素。因此，我们最终得到的 order 数组为 [1, 2]。

换句话说，这行代码的作用是从原始的 order 数组中去除第一个元素，并根据 inds 数组选择保留的索引，返回一个更新后的 order 数组。

所以，对于给定的示例，order = order[1:][inds] 操作会将原始的 order 数组 [3, 1, 4, 2, 0] 更新为 [1, 2]。

C++ 代码

#include <iostream>
#include <vector>
#include <algorithm>

struct Box {
    
    
    int x1, y1, x2, y2;
};

// 计算两个边界框的交并比
float iou(const Box& box1, const Box& box2) {
    
    
    int x1 = std::max(box1.x1, box2.x1);
    int y1 = std::max(box1.y1, box2.y1);
    int x2 = std::min(box1.x2, box2.x2);
    int y2 = std::min(box1.y2, box2.y2);

    int width = std::max(0, x2 - x1 + 1);
    int height = std::max(0, y2 - y1 + 1);

    int intersection = width * height;

    int area1 = (box1.x2 - box1.x1 + 1) * (box1.y2 - box1.y1 + 1);
    int area2 = (box2.x2 - box2.x1 + 1) * (box2.y2 - box2.y1 + 1);

    float iou = static_cast<float>(intersection) / (area1 + area2 - intersection);

    return iou;
}

// NMS算法实现
std::vector<int> nms(const std::vector<Box>& boxes, const std::vector<float>& scores, float threshold) {
    
    
    std::vector<int> order(scores.size());
    for (int i = 0; i < scores.size(); i++) {
    
    
        order[i] = i;
    }

    // 按照得分降序排列边界框的索引
    std::sort(order.begin(), order.end(), [&](int a, int b) {
    
     return scores[a] > scores[b]; });

    std::vector<int> keep;  // 用于存储保留的边界框索引
    while (!order.empty()) {
    
    
        int i = order[0];  // 选择得分最高的边界框
        keep.push_back(i);  // 将该边界框的索引添加到保留列表中

        std::vector<float> ious;  // 存储当前边界框与其他边界框的交并比
        for (int t = 1; t < order.size(); t++) {
    
    
            ious.push_back(iou(boxes[i], boxes[order[t]]));  // 计算当前边界框与其他边界框的交并比
        }

        std::vector<int> inds;  // 存储交并比小于阈值的边界框的索引
        for (int t = 0; t < ious.size(); t++) {
    
    
            if (ious[t] <= threshold) {
    
    
                inds.push_back(t);  // 将交并比小于阈值的边界框的索引添加到列表中
            }
        }

        std::vector<int> new_order;  // 存储更新后的order数组
        for (int t = 0; t < inds.size(); t++) {
    
    
            new_order.push_back(order[inds[t] + 1]);  // 更新order数组，去除保留的边界框的索引
        }
        order = new_order;
    }

    return keep;  // 返回保留的边界框索引
}

int main() {
    
    
    // 示例输入
    std::vector<Box> boxes = {
    
    {
    
    50, 50, 100, 100}, {
    
    60, 60, 120, 120}, {
    
    70, 70, 130, 130}, {
    
    80, 80, 140, 140}};
    std::vector<float> scores = {
    
    0.9, 0.75, 0.8, 0.95};
    float threshold = 0.5;

    // 调用NMS算法
    std::vector<int> keep = nms(boxes, scores, threshold);

    // 打印保留的边界框索引
    std::cout << "保留的边界框索引：";
    for (int i = 0; i < keep.size(); i++) {
    
    
        std::cout << keep[i] << " ";
    }
    std::cout << std::endl;

    return 0;
}

参考链接

Enzo_Mi
Enzo_Mi 课件地址： https://yyzc83.axshare.com