Automatically adjust picture orientation and window cropping

Recently, I encountered some pictures in the "2021 Guangdong Industrial Intelligent Manufacturing Innovation Competition Intelligent Algorithm Competition: Quality Inspection of Tile Surface Defects" with different angle deviations. Similar to satellite images, the resolution is extremely large but the target is extremely small. This requires automatic angle adjustment, windowing and corresponding coordinate mapping of the original image.

Read pictures

For large images, using them directly cv2.imreadwill be about 30% slower than PILconverting them . It is recommended to use reading here.numpy arrayImage.open

import numpy as np
import cv2
from PIL import Image
# org_img = cv2.imread(BASE_DIR + img_file)
org_img = Image.open(BASE_DIR + img_file)
org_img = cv2.cvtColor(np.asarray(org_img), cv2.COLOR_RGB2BGR)

Detect outer frame

1. Convert to grayscale image

# 灰度图
greyPic = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)

2. Binarize the image

The threshold here uses the average image value, which can meet most scenarios and can be adjusted by yourself in special occasions.

# threshold(src, thresh, maxval, type, dst=None)
# src是输入数组,thresh是阈值的具体值,maxval是type取THRESH_BINARY或者THRESH_BINARY_INV时的最大值
# type有5种类型,这里取0:THRESH_BINARY ,当前点值大于阈值时,取maxval,也就是前一个参数,否则设为0
# 该函数第一个返回值是阈值的值,第二个是阈值化后的图像
ret, binPic = cv2.threshold(greyPic, greyPic.mean(), 255, cv2.THRESH_BINARY)

3. Median filtering

median = cv2.medianBlur(binPic, 5)

4. Find the outline

# findContours()有三个参数:输入图像,层次类型和轮廓逼近方法
# 该函数会修改原图像,建议使用img.copy()作为输入
# 由函数返回的层次树很重要,cv2.RETR_TREE会得到图像中轮廓的整体层次结构,以此来建立轮廓之间的‘关系'。
# 如果只想得到最外面的轮廓,可以使用cv2.RETE_EXTERNAL。这样可以消除轮廓中其他的轮廓,也就是最大的集合
# 该函数有三个返回值:修改后的图像,图像的轮廓,它们的层次
contours, hierarchy = cv2.findContours(median, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

5. Get the minimum bounding rectangle

maxArea = 0
# 挨个检查看那个轮廓面积最大
for i in range(len(contours)):
    if cv2.contourArea(contours[i]) > cv2.contourArea(contours[maxArea]):
        maxArea = i
hull = cv2.convexHull(contours[maxArea])
hull = np.squeeze(hull)

# 得到最小外接矩形的(中心(x,y), (宽,高), 旋转角度)
rect = cv2.minAreaRect(hull)
# 通过box会出矩形框
box = np.int0(cv2.boxPoints(rect))

Adjust picture angle

Obtain the angle deviation, calculate the affine matrix, and boxtransform the coordinates of the circumscribed rectangle.

center = rect[0]
angle = rect[2]
if angle > 45:
    angle = angle - 90
    
# 旋转矩阵
M = cv2.getRotationMatrix2D(center, angle, 1)
h, w, c = org_img.shape
# 旋转图片
dst = cv2.warpAffine(org_img, M, (w, h))
# 坐标变换
poly_r = np.asarray([(M[0][0] * x + M[0][1] * y + M[0][2],
                      M[1][0] * x + M[1][1] * y + M[1][2]) for (x, y) in box])

Crop picture

x_s, y_s = np.int0(poly_r.min(axis=0))
x_e, y_e = np.int0(poly_r.max(axis=0))
# 设置预留边框
border = 100
x_s = int(max((x_s - border), 0))
y_s = int(max((y_s - border), 0))
x_e = int(min((x_e + border), w))
y_e = int(min((y_e + border), h))
# 剪裁
cut_img = dst[y_s:y_e, x_s:x_e, :]

window segmentation

After the image has been straightened, it can be divided into windows as needed. After specifying the window size, overlap ratio and output directory, you can get a bunch of small pictures.

def slice(img, img_file, window_l=1024, overlap=0.2, out_dir=""):
    # 切割图片 生成文件 xxx_000_000.jpg
    h, w, c = img.shape

    step_l = int(window_l - window_l * overlap)  # 步长
    x_num = int(np.ceil(max((w - window_l) / step_l, 0))) + 1
    y_num = int(np.ceil(max((h - window_l) / step_l, 0))) + 1
    for i in range(x_num):
        for j in range(y_num):
            x_s, x_e = i * step_l, i * step_l + window_l
            y_s, y_e = j * step_l, j * step_l + window_l
            # 修正越界
            if x_e > w:
                x_s, x_e = w - window_l, w
            if y_e > h:
                y_s, y_e = h - window_l, h
            assert w >= window_l
            assert h >= window_l
            new_img_file = img_file[:-4] + '_%03d_%03d.jpg' % (i, j)
            im = img[y_s:y_e, x_s:x_e, :]
            cv2.imwrite(out_dir + new_img_file, im)
    return

Batch processing

Encapsulate the function, scan the entire directory, and save the corresponding configuration file with the original image to prepare for coordinate restoration later.

def adjust_angle(org_img, img_file, border=100):
    h, w, c = org_img.shape
    # 统一尺度,如果尺寸小于 4000,放大一倍
    scale = 1
    if w < 4000 or h < 4000:
        scale = 2
        w = int(w * scale)
        h = int(h * scale)
        org_img = cv2.resize(org_img, (w, h), interpolation=cv2.INTER_LINEAR)

    x_s, y_s, x_e, y_e, rect, new_img = getCornerPoint(org_img)

    # 去除边框
    x_s = int(max((x_s - border), 0))
    y_s = int(max((y_s - border), 0))
    x_e = int(min((x_e + border), w))
    y_e = int(min((y_e + border), h))

    img = new_img[y_s:y_e, x_s:x_e, :]

    data = dict()
    data['name'] = img_file
    data['xyxy'] = [x_s, y_s, x_e, y_e]
    data['rect'] = rect
    data['border'] = border
    data['scale'] = scale

    return data, img

Set BASE_DIRto the original image directory, OUT_ADJUSTto the directory after angle adjustment, and adjust.jsonto the configuration file.

result_json = []
img_list = os.listdir(BASE_DIR)
for img_file in tqdm(img_list):
    org_img = Image.open(BASE_DIR + img_file)
    org_img = cv2.cvtColor(np.asarray(org_img), cv2.COLOR_RGB2BGR)
    data, img = adjust_angle(org_img, img_file, border=100)
    result_json.append(data)
    cv2.imwrite(OUT_ADJUST + img_file, img)

    slice(img, img_file, TARGET, overlap=OVERLAP, out_dir=OUT_SLICE)

with open(OUT_DIR + 'adjust.json', 'w') as fp:
    json.dump(result_json, fp, indent=4, ensure_ascii=False)

Coordinate restoration

1. Read the sliced ​​image list

with open("instances_test2017_1024.json", 'r') as f:
    test_imgs = json.load(f)['images']
test_imgs_dict = {}
for i, obj in enumerate(test_imgs):
    img_name = obj['file_name']
    test_imgs_dict[img_name] = i

2. Read original file information

with open(OUT_DIR + 'adjust.json', 'r') as fp:
    img_info = json.load(fp)
img_info_dict = {}
for i, obj in enumerate(img_info):
    img_name = obj['name']
    img_info_dict[img_name] = i

3. Read the inference result file

Putting the inference results of a bunch of subgraphs together, you can make full use mmdetectionof multi-threading DataLoaderand large video memory batch sizeto speed up the inference process.

with open("result_1024-20.pkl", 'rb') as f:
    pred_set = pickle.load(f)

4. Merge coordinates into angle adjustment map

The length and width of the image are obtained, and based on the same windowing parameters, the sum of the reference coordinates of each sub-image can be x_srestored y_s.

test_imgs_dictA dictionary of file names of subgraphs and pred_seta list of prediction results are saved in it . Through the file name in the form XXX_000_000.jpg, the corresponding inference result set can be obtained after two-level mapping.

def merge_result(info, pred_set, test_imgs_dict, img_file, window_l=1024, overlap=0.2):
    assert info['name'] == img_file
    # 这里只需要取图片长宽信息,避免读图操作太慢,直接读取配置文件
    x1, y1, x2, y2 = info['xyxy']
    w = x2 - x1
    h = y2 - y1

    step_l = int(window_l - window_l * overlap)  # 步长
    x_num = int(np.ceil(max((w - window_l) / step_l, 0))) + 1
    y_num = int(np.ceil(max((h - window_l) / step_l, 0))) + 1

    result = [np.array([[], ] * 5).T.astype(np.float32), ] * 6  # 分类数为6, bbox.shape 为(0, 5)
    for i in range(x_num):
        for j in range(y_num):
            x_s, x_e = i * step_l, i * step_l + window_l
            y_s, y_e = j * step_l, j * step_l + window_l
            # 修正越界
            if x_e > w:
                x_s, x_e = w - window_l, w
            if y_e > h:
                y_s, y_e = h - window_l, h
            assert w >= window_l
            assert h >= window_l

            new_img_file = img_file[:-4] + '_%03d_%03d.jpg' % (i, j)
            pred = pred_set[test_imgs_dict[new_img_file]]  # 获取预测结果

            for label_id, bboxes in enumerate(pred):
                # 坐标修正 x_s, y_s 划窗基坐标
                bboxes[:, 0] = bboxes[:, 0] + x_s
                bboxes[:, 1] = bboxes[:, 1] + y_s
                bboxes[:, 2] = bboxes[:, 2] + x_s
                bboxes[:, 3] = bboxes[:, 3] + y_s
                # 合并到大图
                result[label_id] = np.vstack((result[label_id], bboxes))

    return result

5. Coordinate mapping to original image

First, obtain the original image information info, obtain the external rectangle parameters, rotation angle, scaling ratio, border size, etc., build an inverse affine matrix M, and perform coordinate transformation on all detection frames.

def generate_json(pred, info, img_file, score_threshold=0.05, out_dir="", vis=False):
    base_x, base_y, x2, y2 = info['xyxy']
    rect = info['rect']
    scale = info['scale']
    border = info['border']
    x1, y1, x2, y2 = (border, border, x2 - border, y2 - border)
    poly = np.asarray([(x1, y1), (x2, y1), (x2, y2), (x1, y2)]) 
    center = tuple(rect[0])
    angle = rect[2]
    if angle > 45:
        angle = angle - 90
    
    # 逆旋转还原
    M = cv2.getRotationMatrix2D(center, -angle, 1)

    # 遍历完所有分片, nms
    json_results = []
    for label_id, bboxes in enumerate(pred):  # 6个分类
        bboxes = nms(np.array(bboxes[:, :4]), np.array(bboxes[:, 4]), iou_threshold=0.5)[0]
        # 坐标转换到原始图片
        bboxes[:, 0] = bboxes[:, 0] + base_x
        bboxes[:, 1] = bboxes[:, 1] + base_y
        bboxes[:, 2] = bboxes[:, 2] + base_x
        bboxes[:, 3] = bboxes[:, 3] + base_y

        for ann in bboxes:
            x1, y1, x2, y2, score = ann
            if score < score_threshold:
                continue

            poly_r = np.asarray([(M[0][0] * x + M[0][1] * y + M[0][2],
                                  M[1][0] * x + M[1][1] * y + M[1][2]) for (x, y) in
                                 [(x1, y1), (x1, y2), (x2, y1), (x2, y2)]])

            # 还原小图片缩放
            ann = poly2ann(poly_r, score, scale=scale)

            data = dict()
            data['name'] = img_file
            data['category'] = label_id + 1
            data['bbox'] = [float(ann[0]), float(ann[1]), float(ann[2]), float(ann[3])]
            data['score'] = float(score)

            json_results.append(data)

    return json_results

Finally, nmsafter a series of post-processing, it can be mapped to the original image.

Perfect finish!

Guess you like

Origin blog.csdn.net/weixin_47479625/article/details/113449495
Recommended