Ajuste automaticamente a orientação da imagem e o corte da janela

❝
Recentemente, encontrei algumas fotos no "2021 Guangdong Industrial Intelligent Manufacturing Innovation Competition Competição de algoritmo inteligente: inspeção de qualidade de defeitos de superfície de ladrilho" com diferentes desvios de ângulo. Semelhante às imagens de satélite, a resolução é extremamente grande, mas o alvo é extremamente pequeno, o que requer ajuste automático do ângulo, janelamento e mapeamento de coordenadas correspondente da imagem original.
❞

Leia fotos

Para imagens grandes, usá-las diretamente cv2.imreadserá cerca de 30% mais lento do que PILconvertê-las . Recomenda-se usar a leitura aqui.numpy arrayImage.open

import numpy as np
import cv2
from PIL import Image
# org_img = cv2.imread(BASE_DIR + img_file)
org_img = Image.open(BASE_DIR + img_file)
org_img = cv2.cvtColor(np.asarray(org_img), cv2.COLOR_RGB2BGR)

Detectar quadro externo

1. Converta para imagem em tons de cinza

# 灰度图
greyPic = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)

2. Binarize a imagem

O limite aqui usa o valor médio da imagem, que pode atender à maioria dos cenários e pode ser ajustado por você mesmo em ocasiões especiais.

# threshold(src, thresh, maxval, type, dst=None)
# src是输入数组，thresh是阈值的具体值，maxval是type取THRESH_BINARY或者THRESH_BINARY_INV时的最大值
# type有5种类型,这里取0：THRESH_BINARY ，当前点值大于阈值时，取maxval，也就是前一个参数，否则设为0
# 该函数第一个返回值是阈值的值，第二个是阈值化后的图像
ret, binPic = cv2.threshold(greyPic, greyPic.mean(), 255, cv2.THRESH_BINARY)

3. Filtragem mediana

median = cv2.medianBlur(binPic, 5)

4. Encontre o esboço

# findContours()有三个参数：输入图像，层次类型和轮廓逼近方法
# 该函数会修改原图像，建议使用img.copy()作为输入
# 由函数返回的层次树很重要，cv2.RETR_TREE会得到图像中轮廓的整体层次结构，以此来建立轮廓之间的‘关系'。
# 如果只想得到最外面的轮廓，可以使用cv2.RETE_EXTERNAL。这样可以消除轮廓中其他的轮廓，也就是最大的集合
# 该函数有三个返回值：修改后的图像，图像的轮廓，它们的层次
contours, hierarchy = cv2.findContours(median, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

5. Obtenha o retângulo delimitador mínimo

maxArea = 0
# 挨个检查看那个轮廓面积最大
for i in range(len(contours)):
    if cv2.contourArea(contours[i]) > cv2.contourArea(contours[maxArea]):
        maxArea = i
hull = cv2.convexHull(contours[maxArea])
hull = np.squeeze(hull)

# 得到最小外接矩形的（中心(x,y), (宽,高), 旋转角度）
rect = cv2.minAreaRect(hull)
# 通过box会出矩形框
box = np.int0(cv2.boxPoints(rect))

Ajustar o ângulo da imagem

Obtenha o desvio angular, calcule a matriz afim e boxtransforme as coordenadas do retângulo circunscrito.

center = rect[0]
angle = rect[2]
if angle > 45:
    angle = angle - 90
    
# 旋转矩阵
M = cv2.getRotationMatrix2D(center, angle, 1)
h, w, c = org_img.shape
# 旋转图片
dst = cv2.warpAffine(org_img, M, (w, h))
# 坐标变换
poly_r = np.asarray([(M[0][0] * x + M[0][1] * y + M[0][2],
                      M[1][0] * x + M[1][1] * y + M[1][2]) for (x, y) in box])

Cortar a foto

x_s, y_s = np.int0(poly_r.min(axis=0))
x_e, y_e = np.int0(poly_r.max(axis=0))
# 设置预留边框
border = 100
x_s = int(max((x_s - border), 0))
y_s = int(max((y_s - border), 0))
x_e = int(min((x_e + border), w))
y_e = int(min((y_e + border), h))
# 剪裁
cut_img = dst[y_s:y_e, x_s:x_e, :]

segmentação de janela

Depois que a imagem for endireitada, ela poderá ser dividida em janelas conforme necessário. Depois de especificar o tamanho da janela, a taxa de sobreposição e o diretório de saída, você pode obter várias imagens pequenas.

def slice(img, img_file, window_l=1024, overlap=0.2, out_dir=""):
    # 切割图片 生成文件 xxx_000_000.jpg
    h, w, c = img.shape

    step_l = int(window_l - window_l * overlap)  # 步长
    x_num = int(np.ceil(max((w - window_l) / step_l, 0))) + 1
    y_num = int(np.ceil(max((h - window_l) / step_l, 0))) + 1
    for i in range(x_num):
        for j in range(y_num):
            x_s, x_e = i * step_l, i * step_l + window_l
            y_s, y_e = j * step_l, j * step_l + window_l
            # 修正越界
            if x_e > w:
                x_s, x_e = w - window_l, w
            if y_e > h:
                y_s, y_e = h - window_l, h
            assert w >= window_l
            assert h >= window_l
            new_img_file = img_file[:-4] + '_%03d_%03d.jpg' % (i, j)
            im = img[y_s:y_e, x_s:x_e, :]
            cv2.imwrite(out_dir + new_img_file, im)
    return

Processamento em lote

Encapsule a função, verifique o diretório inteiro e salve o arquivo de configuração correspondente com a imagem original para preparar a restauração das coordenadas posteriormente.

def adjust_angle(org_img, img_file, border=100):
    h, w, c = org_img.shape
    # 统一尺度，如果尺寸小于 4000，放大一倍
    scale = 1
    if w < 4000 or h < 4000:
        scale = 2
        w = int(w * scale)
        h = int(h * scale)
        org_img = cv2.resize(org_img, (w, h), interpolation=cv2.INTER_LINEAR)

    x_s, y_s, x_e, y_e, rect, new_img = getCornerPoint(org_img)

    # 去除边框
    x_s = int(max((x_s - border), 0))
    y_s = int(max((y_s - border), 0))
    x_e = int(min((x_e + border), w))
    y_e = int(min((y_e + border), h))

    img = new_img[y_s:y_e, x_s:x_e, :]

    data = dict()
    data['name'] = img_file
    data['xyxy'] = [x_s, y_s, x_e, y_e]
    data['rect'] = rect
    data['border'] = border
    data['scale'] = scale

    return data, img

Defina BASE_DIRo diretório da imagem original, OUT_ADJUSTo diretório após o ajuste do ângulo e adjust.jsono arquivo de configuração.

result_json = []
img_list = os.listdir(BASE_DIR)
for img_file in tqdm(img_list):
    org_img = Image.open(BASE_DIR + img_file)
    org_img = cv2.cvtColor(np.asarray(org_img), cv2.COLOR_RGB2BGR)
    data, img = adjust_angle(org_img, img_file, border=100)
    result_json.append(data)
    cv2.imwrite(OUT_ADJUST + img_file, img)

    slice(img, img_file, TARGET, overlap=OVERLAP, out_dir=OUT_SLICE)

with open(OUT_DIR + 'adjust.json', 'w') as fp:
    json.dump(result_json, fp, indent=4, ensure_ascii=False)

Restauração de coordenadas

1. Leia a lista de imagens fatiadas

with open("instances_test2017_1024.json", 'r') as f:
    test_imgs = json.load(f)['images']
test_imgs_dict = {}
for i, obj in enumerate(test_imgs):
    img_name = obj['file_name']
    test_imgs_dict[img_name] = i

2. Leia as informações do arquivo original

with open(OUT_DIR + 'adjust.json', 'r') as fp:
    img_info = json.load(fp)
img_info_dict = {}
for i, obj in enumerate(img_info):
    img_name = obj['name']
    img_info_dict[img_name] = i

3. Leia o arquivo de resultados da inferência

Reunindo os resultados de inferência de vários subgráficos, você pode fazer uso total mmdetectiondo multithreading DataLoadere da grande memória de vídeo batch sizepara acelerar o processo de inferência.

with open("result_1024-20.pkl", 'rb') as f:
    pred_set = pickle.load(f)

4. Mesclar coordenadas no mapa de ajuste de ângulo

O comprimento e a largura da imagem são obtidos e, com base nos mesmos parâmetros de janelamento, a soma das coordenadas de referência de cada subimagem pode ser x_srestaurada y_s.

test_imgs_dictUm dicionário de nomes de arquivos de subgráficos e pred_setuma lista de resultados de previsão são salvos nele . Através do nome do arquivo no formato XXX_000_000.jpg, o conjunto de resultados de inferência correspondente pode ser obtido após o mapeamento em dois níveis.

def merge_result(info, pred_set, test_imgs_dict, img_file, window_l=1024, overlap=0.2):
    assert info['name'] == img_file
    # 这里只需要取图片长宽信息，避免读图操作太慢，直接读取配置文件
    x1, y1, x2, y2 = info['xyxy']
    w = x2 - x1
    h = y2 - y1

    step_l = int(window_l - window_l * overlap)  # 步长
    x_num = int(np.ceil(max((w - window_l) / step_l, 0))) + 1
    y_num = int(np.ceil(max((h - window_l) / step_l, 0))) + 1

    result = [np.array([[], ] * 5).T.astype(np.float32), ] * 6  # 分类数为6, bbox.shape 为(0, 5)
    for i in range(x_num):
        for j in range(y_num):
            x_s, x_e = i * step_l, i * step_l + window_l
            y_s, y_e = j * step_l, j * step_l + window_l
            # 修正越界
            if x_e > w:
                x_s, x_e = w - window_l, w
            if y_e > h:
                y_s, y_e = h - window_l, h
            assert w >= window_l
            assert h >= window_l

            new_img_file = img_file[:-4] + '_%03d_%03d.jpg' % (i, j)
            pred = pred_set[test_imgs_dict[new_img_file]]  # 获取预测结果

            for label_id, bboxes in enumerate(pred):
                # 坐标修正 x_s, y_s 划窗基坐标
                bboxes[:, 0] = bboxes[:, 0] + x_s
                bboxes[:, 1] = bboxes[:, 1] + y_s
                bboxes[:, 2] = bboxes[:, 2] + x_s
                bboxes[:, 3] = bboxes[:, 3] + y_s
                # 合并到大图
                result[label_id] = np.vstack((result[label_id], bboxes))

    return result

5. Mapeamento de coordenadas para a imagem original

Primeiro, obtenha as informações da imagem original info, obtenha os parâmetros do retângulo externo, ângulo de rotação, taxa de escala, tamanho da borda, etc., construa uma matriz afim inversa Me execute a transformação de coordenadas em todos os quadros de detecção.

def generate_json(pred, info, img_file, score_threshold=0.05, out_dir="", vis=False):
    base_x, base_y, x2, y2 = info['xyxy']
    rect = info['rect']
    scale = info['scale']
    border = info['border']
    x1, y1, x2, y2 = (border, border, x2 - border, y2 - border)
    poly = np.asarray([(x1, y1), (x2, y1), (x2, y2), (x1, y2)]) 
    center = tuple(rect[0])
    angle = rect[2]
    if angle > 45:
        angle = angle - 90
    
    # 逆旋转还原
    M = cv2.getRotationMatrix2D(center, -angle, 1)

    # 遍历完所有分片, nms
    json_results = []
    for label_id, bboxes in enumerate(pred):  # 6个分类
        bboxes = nms(np.array(bboxes[:, :4]), np.array(bboxes[:, 4]), iou_threshold=0.5)[0]
        # 坐标转换到原始图片
        bboxes[:, 0] = bboxes[:, 0] + base_x
        bboxes[:, 1] = bboxes[:, 1] + base_y
        bboxes[:, 2] = bboxes[:, 2] + base_x
        bboxes[:, 3] = bboxes[:, 3] + base_y

        for ann in bboxes:
            x1, y1, x2, y2, score = ann
            if score < score_threshold:
                continue

            poly_r = np.asarray([(M[0][0] * x + M[0][1] * y + M[0][2],
                                  M[1][0] * x + M[1][1] * y + M[1][2]) for (x, y) in
                                 [(x1, y1), (x1, y2), (x2, y1), (x2, y2)]])

            # 还原小图片缩放
            ann = poly2ann(poly_r, score, scale=scale)

            data = dict()
            data['name'] = img_file
            data['category'] = label_id + 1
            data['bbox'] = [float(ann[0]), float(ann[1]), float(ann[2]), float(ann[3])]
            data['score'] = float(score)

            json_results.append(data)

    return json_results

Finalmente, nmsapós uma série de pós-processamento, ela pode ser mapeada para a imagem original.

Acabamento perfeito!