求图像连通区域的最大内接矩形

获取图像连通区域的最大内接矩形

1.背景

在做一些图像识别的任务的时候，遇到了一个需求就是获取一个不规则轮廓内的最大内接矩形，需求示例：
在这里插入图片描述
一个不规则轮廓里面，想提取最主要的矩形部分。

2.数据准备

1.该图像是由一个轮廓数据生成的，生成上述需求图像的代码如下：

contour = np.array([[ 301,  300],[ 300,  301],[ 300,  626],[ 301,  627],[1071,  627],[1072,  628],[1072, 637],[1073,  638],[1251,638],
 [1252, 637],[1252,  628],[1253,  627],[1297,  627], [1298,  626],[1298,  625],[1299,  624],[1788, 624],[1790,  622],[1792,622],
 [1793,  621],[2244,  621],[2245,  622],[2247,  622],[2249,  624],[3161,  624],[3162, 625],[3162,  636],[3163,  637],[3433,637],
 [3434,  636],[3434,  625],[3435,  624],[4285,  624],[4286,  623],[4286,  378],[4285,  377],[4179,  377],[4178,  376],[4178,322],
 [4177,  321],[4168,  321],[4167,  320],[4167,  310],[4166,  309],[4030,  309],[4029,  310],[4029,  320],[4028,  321],[3811,  321],
 [3810,  320],[3810, 310],[3809,  309],[3673,  309],[3672,  310],[3672,  320],[3671, 321],[3662,  321],[3661,  322],[3661,  376],
 [3660,  377],[3396, 377],[3394,  375],[3230,  375],[3228,  377],[2964,  377],[2963 , 376],[2963 , 322],[2962,  321],[2953,  321],
 [2952 , 320],[2952 , 310],[2951,  309],[2816 , 309],[2815 , 310],[2815 , 320],[2814 , 321],[2596 , 321],[2595 , 320],[2595 , 310],
 [2594 , 309],[2458,  309],[2457 , 310],[2457 , 320],[2456 , 321],[2447 , 321],[2446 , 322],[2446 , 376],[2445 , 377],[2181 , 377],
 [2179 , 375],[2016 , 375],[2014 , 377],[1750 , 377],[1749 , 376],[1749 , 322],[1748 , 321],[1738 , 321],[1737 , 320],[1737 , 310],
 [1736,  309],[1601 , 309],[1600 , 310],[1600 , 320],[1599 , 321],[1381 , 321],[1380 , 320],[1380 , 310],[1379 , 309],[1243 , 309],
 [1242 , 310],[1242 , 320],[1241 , 321],[1232 , 321],[1231 , 322],[1231,  376],[1230 , 377],[ 966 , 377],[ 964 , 375],[ 801 , 375],
 [ 799 , 377],[ 535 , 377],[ 534 , 376],[ 534 , 301],[ 533 , 300]])

import cv2
img = np.zeros((1000, 4600))
cv2.polylines(img, [contour.reshape(-1, 1, 2).astype('int')], True, 255, 10)
cv2.imwrite('xxx/xxx/Desktop/1.png', img)
# 使用上述代码可以生成一张保存在桌面名称为1.png的一张图片，图片的样子就是
# 我粘贴的需求示例里面的图片
# 轮廓数据也是对图像进行边缘检测得到的，记录一下保存成numpy可读的测试数据方式
# np.savetxt('xxx/xxx/output.txt', contour,fmt='%d')
# contour = np.loadtxt('xxx/xxx/output.txt', dtype=int)

3.使用中心外扩的思想

中心外扩的思想只适用于凸多边形的轮廓，当是凹多边形时候需要保证轮廓图形的质心在轮廓内部。具体思想如下：
1.用图像的思想进行查找，以黑色图像为背景，将轮廓部分填充成白色，生成二值图像。
2.取轮廓多边形的质心为起点
3.以图形的质心为起点向四周逐步外扩，逐像素点去寻找是否是联通区域。
注意⚠️：该方法的缺点有2个：
1.就是起点必须在轮廓内，所以当图形是凹多边形的时候很有可能找到的质心在轮廓外，这种情况下这个方法就失效了。
2.找到的轮廓内的最大内接矩形只能是正矩形，当示例的图形轮廓旋转一定角度的时候，其内部的最大内接矩形应该是倾斜矩形，但是该方法找不到想要的结果。

def move_edge(img, edge, edge_id):
    """
    扩展边界
    :param img: 输入图像，单通道二值图，深度为8
    :param edge: 边界数组，存放4条边界值
    :param edge_id: 当前边界号
    :return: 布尔值，确定当前边界是否可以扩展
    """
    nr, nc = img.shape[:2]
    if edge_id == 0:
        if edge[0] >= nr - 1:
            return False, edge
        for i in range(edge[3], edge[1] + 1):
            if img[edge[0] + 1, i] == 0:
                return False, edge
        edge[0] += 1
        return True, edge
    elif edge_id == 1:
        if edge[1] >= nc - 1:
            return False, edge
        for i in range(edge[2], edge[0] + 1):
            if img[i, edge[1] + 1] == 0:
                return False, edge
        edge[1] += 1
        return True, edge
    elif edge_id == 2:
        if edge[2] <= 0:
            return False, edge
        for i in range(edge[3], edge[1] + 1):
            if img[edge[2] - 1, i] == 0:
                return False, edge
        edge[2] -= 1
        return True, edge
    else:
        if edge[3] <= 0:
            return False, edge
        for i in range(edge[2], edge[0] + 1):
            if img[i, edge[3] - 1] == 0:
                return False, edge
        edge[3] -= 1
        return True, edge


def find_max_inner_rectangle(img, center, move_direction='both'):
    """
    求连通区域最大内接矩形
    :param img: 输入图像，单通道二值图
    :param center: 最小外接矩的中心
    :param move_direction: 优先移动的方向，备选参数有 "both"、"horizontal"、"vertical"
    :return: bbox，最大内接矩形
    """
    edge = [0] * 4
    edge[0] = center[1]
    edge[1] = center[0]
    edge[2] = center[1]
    edge[3] = center[0]
    is_expand = [1, 1, 1, 1]  # 扩展标记位
    # 四个方向同时外扩
    if move_direction == 'both':
        n = 0
        while any(is_expand):
            edge_id = n % 4
            is_expand[edge_id], edge = move_edge(img, edge, edge_id)
            n += 1
    # 水平方向先外扩
    elif move_direction == 'horizontal':
        n = 1
        while (is_expand[1] or is_expand[3]):
            edge_id = n % 4
            is_expand[edge_id], edge = move_edge(img, edge, edge_id)
            n += 2
        edge[3] += 20
        edge[1] -= 20
        n = 0
        while (is_expand[0] or is_expand[2]):
            edge_id = n % 4
            is_expand[edge_id], edge = move_edge(img, edge, edge_id)
            n += 2
        edge[3] -= 20
        edge[1] += 20
    # 竖直方向先外扩
    else:
        n = 0
        while (is_expand[0] or is_expand[2]):
            edge_id = n % 4
            is_expand[edge_id], edge = move_edge(img, edge, edge_id)
            n += 2
        edge[2] += 20
        edge[0] -= 20
        n = 1
        while (is_expand[1] or is_expand[3]):
            edge_id = n % 4
            is_expand[edge_id], edge = move_edge(img, edge, edge_id)
            n += 2
        edge[2] -= 20
        edge[0] += 20

    return [edge[3], edge[2], edge[1], edge[0]]

# 需要引入一个python库shapely用来寻找轮廓的质心
from shapely.geometry import Polygon
center = Polygon(contour).centroid
center = list(map(int, [center.x, center.y]))
img = np.zeros((1000, 4600))
cv2.fillPoly(img, [contour.reshape(-1, 1, 2).astype('int')], 255)
res = find_max_inner_rectangle(img, center)
# 得出来的结果就是找到的最大的内接矩形的bbox,包括左上点和右下点

示例代码里面的轮廓使用该方法找到的最大内接矩形结果如图
（玫红色的矩形即为该方法找到的最大内接矩形）：
在这里插入图片描述

4.使用最大矩形

采用的是leecode题库第85题的思想，在0，1矩阵中寻找最大矩形。
具体的思想过程不赘述可以直接去看题目哈哈。
该方法同样有一个缺点没有办法获取倾斜矩形，也只能在轮廓中获取正矩形。
注意⚠️：使用该思想可以递归获取一个轮廓内的若干个面积大于阈值的矩形。

import numpy as np
def get_max_inner_rectangles(matrix_np: np.ndarray, rectangle_bbox: list, area_value: int, result_list: list,
                             cur_area: float = float('inf')) -> list:
    """
    递归获取空间的多个内接矩形
    Args:
        matrix_np: 包含空间的底图
        rectangle_bbox: 空间的外接矩形
        area_value: 最小面积阈值
        result_list: 内接矩形列表
        cur_area: 当前矩形的面积
    Returns:
        result_list: 内接矩形列表
    """
    xmin, ymin, xmax, ymax = rectangle_bbox
    crop_img = matrix_np[ymin:ymax, xmin:xmax]  # 通过最大外接矩形，crop包含该空间的区域，优化速度
    matrix_list = crop_img.tolist()

    row = len(matrix_list)
    col = len(matrix_list[0])
    height = [0] * (col + 2)
    res = 0  # 记录矩形内像素值相加后的最大值
    bbox_rec = None  # 最大内接矩形bbox
    for i in range(row):
        stack = []  # 利用栈的特性获取最大矩形区域
        for j in range(col + 2):
            if 1 <= j <= col:
                if matrix_list[i][j - 1] == 255:
                    height[j] += 1
                else:
                    height[j] = 0
            # 精髓代码块 计算最大内接矩形 并计算最大值
            while stack and height[stack[-1]] > height[j]:
                cur = stack.pop()
                if res < (j - stack[-1] - 1) * height[cur]:
                    res = (j - stack[-1] - 1) * height[cur]
                    bbox_rec = [stack[-1], i - height[cur], j, i]
            stack.append(j)

    # 递归停止条件，1.最大内接矩形面积小于阈值；2. 没有最大内接矩形
    if cur_area < area_value or not bbox_rec:
        return result_list
    # 映射到原图中的位置
    src_min_x = xmin + bbox_rec[0]
    src_min_y = ymin + bbox_rec[1]
    src_max_x = xmin + bbox_rec[2]
    src_max_y = ymin + bbox_rec[3]
    bbox_src_position = [src_min_x, src_min_y, src_max_x, src_max_y]
    # 转成np格式，并将已经找到的最大内接矩形涂黑
    bbox_cnt = [[bbox_src_position[0], bbox_src_position[1]], 
                [bbox_src_position[2], bbox_src_position[1]], 
                [bbox_src_position[2], bbox_src_position[3]], 
                [bbox_src_position[0], bbox_src_position[3]]]
    contour_cur_np = np.array(bbox_cnt).reshape(-1, 1, 2)
    cv2.polylines(matrix_np, [contour_cur_np], 1, 0)
    cv2.fillPoly(matrix_np, [contour_cur_np], 0)
    cur_area =  (bbox_rec[2] - bbox_rec[0]) * (bbox_rec[3] - bbox_rec[1])
    if cur_area > area_value:
        result_list.append(bbox_src_position)
    # 递归获取剩下的内接矩形
    get_max_inner_rectangles(matrix_np, rectangle_bbox, area_value, result_list, cur_area)

    return result_list
x, y, w, h = cv2.boundingRect(cnt_final.reshape(-1,1,2))
cnt_bbox = [x, y,  x + w, y + h]
res_list = get_max_inner_rectangles(img, cnt_bbox, 100, [])
res_list = sorted(res_list, key=lambda _: (_[2] -_[0]) *(_[3] -_[1]), reverse=True)
res = res_list[0]

示例代码里面的轮廓使用该方法找到的最大内接矩形结果如图
（玫红色的矩形即为该方法找到的最大内接矩形）：
在这里插入图片描述

5.总结

上述两种方法其实第二种的思想更加巧妙，第一种就是利用图像的思想主要是硬逻辑去找的，但是在某些特殊场景下可以根据需求去沿着x方向或者是y方向去找，两种方法的缺陷都是不能找到倾斜的最大内接矩形，单纯记录一下自己遇到的问题，有想法的可以交流哦。