python predicts the actual size of the target in the picture based on OpenCV (computer vision)

Summary

This experiment requires predicting the length and width of the book in the picture and the outer diameter of the circle drawn with a pencil in the upper right corner of the book based on the known diameter of the coin. You can first correct the picture, find the outline of the coin and calculate the pixel size occupied by the diameter of the coin, and then obtain the proportion coefficient of the actual size and pixels, then find the outline of the circle drawn by the book and pencil, and then calculate the pixel size occupied by their outline and proportional coefficient to estimate the actual size of the two. The predicted length of the book is: 20.150000 cm, the width of the book is: 15.250000 cm, and the outer diameter of the circle drawn with a pencil on the upper right side of the book is: 4.100000 cm.

1 Introduction


Restatement of the problem:We know that a one yuan (diameter 2.5 cm) is placed in the lower right corner of the book. Please use computer vision technology to predict the actual size of the target in the picture.

1. Predict the length and width of the book in the picture (unit: centimeters).

​2 Predict the outer diameter (unit: centimeters) of the circle drawn with a pencil in the upper right corner of the book.

 2、How to solve

In this experiment, I first made modifications based on existing relevant solutions and referring to the existing code.

Among them, the first part to be modified is the threshold of Canny operator edge detection.

imgCanny = cv2.Canny(imgBlur, 35, 70)

I find the above results to be the best.

The second modification is to define the shape detection function part. During the test, I found that the outline edges of books, manually drawn circles, and coins were all treated as circles (maybe because the outline edges of the books were not completely closed), so I defined a Circle list to store these outlines, and drew them from From the contour map, it can be seen that books, manually drawn circles, and coins happen to be the largest contours.

So you can first get the pixel size occupied by the book outline, and then delete the book outline from the Circle. By analogy, we get the pixel sizes occupied by the pencil-drawn circle and the coin outline respectively. Finally, substitute the actual size of the coin, 2.5 centimeters, to get the proportional coefficient of the actual size and pixels, and then multiply it by the pixel size occupied by the outline of the circle drawn by the book and pencil, respectively, to get the estimated actual size of the circle drawn by the book and pencil. During operation, there will be errors when drawing the border due to issues such as shooting angle, reflection, shadow, etc., resulting in the bounding box of the circle drawn by coins and pencils not being square, resulting in estimation errors. I took the average of the length and width instead of the diameter to calculate the scale factor and the estimated actual size of the penciled circle.

# 定义查找书本、手绘圆圈、硬币轮廓并绘制边界框
def draw_Shape(image, counters):
    max_area = 0
    shape_contour = None
    for contour in counters:
        area = cv2.contourArea(contour)
        if max_area < area:
            max_area = area
            shape_contour = contour
    cv2.drawContours(image, shape_contour, -1, (255, 0, 0), 4)  # 绘制轮廓线
    perimeter = cv2.arcLength(shape_contour, True)  # 计算轮廓周长
    approx = cv2.approxPolyDP(shape_contour, 0.02 * perimeter, True)  # 获取轮廓角点坐标
    CornerNum = len(approx)  # 轮廓角点的数量
    x, y, w, h = cv2.boundingRect(approx)  # 获取坐标值和宽度、高度
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)  # 绘制边界框
    return shape_contour, w, h


# 定义轮廓集合删去最大的轮廓
def del_max_contours(contours, contour):
    new_contours = []
    for circle in contours:
        if np.array_equal(circle, contour):
            continue
        else:
            new_contours.append(circle)
    return new_contours


# 定义形状检测函数
def ShapeDetection(img):
    contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)  # 寻找轮廓点
    Circle = []
    for obj in contours:
        area = cv2.contourArea(obj)  # 计算轮廓内区域的面积
        cv2.drawContours(imgContour, obj, -1, (255, 0, 0), 4)  # 绘制轮廓线
        perimeter = cv2.arcLength(obj, True)  # 计算轮廓周长
        approx = cv2.approxPolyDP(obj, 0.02 * perimeter, True)  # 获取轮廓角点坐标
        CornerNum = len(approx)  # 轮廓角点的数量
        x, y, w, h = cv2.boundingRect(approx)  # 获取坐标值和宽度、高度

        if CornerNum > 4:
            Circle.append(obj)

Final Results:

However, because the image has not been corrected, there will be some error in the predicted value. So, I went online to check how to correct the picture and then measured it. Through repeated testing, the following parameters are used for complete drawing of the contour.

# 高斯模糊
imgBlur = cv2.GaussianBlur(imgGray, (9, 9), 0)
# Canny算子边缘检测
imgCanny = cv2.Canny(imgBlur, 20, 52)
kernel = np.ones((5, 5))
imgDial = cv2.dilate(imgCanny, kernel, iterations=6)  # 膨胀
imgThre = cv2.erode(imgDial, kernel, iterations=5)  # 腐蚀

 Then the book is corrected through the edge points of the book, and the size (pixels) of the outline of the book can be obtained during the correction process.

# 将轮廓拐点重新排列的方法
def reorder(myPoints):
    myPointsNew = np.zeros_like(myPoints)
    myPoints = myPoints.reshape((4, 2))
    add = myPoints.sum(1)
    myPointsNew[0] = myPoints[np.argmin(add)]
    myPointsNew[3] = myPoints[np.argmax(add)]
    diff = np.diff(myPoints, axis=1)
    myPointsNew[1] = myPoints[np.argmin(diff)]
    myPointsNew[2] = myPoints[np.argmax(diff)]

    return myPointsNew


# 图像矫正的方法
def warpImg(img, points, w, h, pad=6):
    points = reorder(points)
    pts1 = np.float32(points)
    pts2 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
    matrix = cv2.getPerspectiveTransform(pts1, pts2)
    imgWrap = cv2.warpPerspective(img, matrix, (w, h))
    imgWrap = imgWrap[pad:imgWrap.shape[0] - pad, pad:imgWrap.shape[1] - pad]

    return imgWrap


# 定义形状检测函数
def ShapeDetection(img):
    contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)  # 寻找轮廓点
    Rectangle = []
    for obj in contours:
        area = cv2.contourArea(obj)  # 计算轮廓内区域的面积
        cv2.drawContours(imgContour, obj, -1, (255, 0, 0), 4)  # 绘制轮廓线
        perimeter = cv2.arcLength(obj, True)  # 计算轮廓周长
        approx = cv2.approxPolyDP(obj, 0.02 * perimeter, True)  # 获取轮廓角点坐标
        CornerNum = len(approx)  # 轮廓角点的数量
        x, y, w, h = cv2.boundingRect(approx)  # 获取坐标值和宽度、高度

        if CornerNum == 4:
            Rectangle.append([obj, area, approx, h, w])

    Rectangle = sorted(Rectangle, key=lambda x: x[1], reverse=True)
    book_approx = Rectangle[0][2]
    w = Rectangle[0][4]
    h = Rectangle[0][3]
    img_warp = warpImg(imgContour, book_approx, w, h)
    return img_warp, h, w

Picture after correction:

 The task after correction is to find the outline of the coin and the outline of the great circle. In this step, I originally used the shape recognition method at the beginning to find shapes with more than four contour points, but the result after drawing was not ideal. The shape of the circle is similar to an ellipse, so I used Hough transform - Hough circle detection.

# 高斯模糊
imgBlur = cv2.GaussianBlur(imgGray, (7, 7), 0)
# 霍夫变换
circles = cv2.HoughCircles(imgBlur, cv2.HOUGH_GRADIENT, 1, 50,
                           param1=20, param2=54, minRadius=10, maxRadius=1000)

print(circles[0])
if circles is not None:
    circles = np.round(circles[0, :]).astype("int")
    for (x, y, r) in circles:
        cv2.circle(imgWarpContour, (x, y), r, (0, 255, 0), 2)

Plot the result:

Finally, the diameter of the coin's outline detected based on the Hough transform is compared with the actual diameter to obtain the proportional coefficient, and then the predicted length and width of the book and the diameter of the circle drawn by the pencil are obtained.​ 

3. Experimental results

I think the results of this experiment are in line with expectations.

Before correcting the image, the outline I drew would have large errors due to interference from image reflections, shadows, tilt angles, etc. After the image is corrected, some interfering factors are eliminated, such as reducing the tilt angle, reflection, etc., so that the error of the obtained contour is reduced.

However, there are still some interference factors in the corrected image. For example, the circles drawn by pencils and the boundary lines of coins have ghosting problems, resulting in errors in the drawn outlines.

4 Conclusion

In this experiment, the main problem is how to extract the outline of the circle drawn by books, coins, and pencils. The outline drawn by the uncorrected picture will have a large error, but the outline error drawn by the corrected picture is significantly reduced. I proposed a detection method based on OpenCV and demonstrated the specific implementation process through Python code. This method can be used to detect the position and radius of graphic objects in pictures and then predict the object size.

First, Gaussian filtering is performed on the book, and the Canny operator detects the edges, and then the expansion and corrosion connections are used to fill the small holes in the object and connect the disconnected contours. Then you can get the four edge points of the book, and then perform perspective transformation based on these edge points to get the picture correction effect. Then use the Hough transform to perform circle detection, obtain the radius of the coin and the large circle, and finally get the result.

Because most of the interference factors have been eliminated, I feel that the prediction results are in line with expectations.

Complete code

import cv2
import numpy as np


# 将轮廓拐点重新排列的方法
def reorder(myPoints):
    myPointsNew = np.zeros_like(myPoints)
    myPoints = myPoints.reshape((4, 2))
    add = myPoints.sum(1)
    myPointsNew[0] = myPoints[np.argmin(add)]
    myPointsNew[3] = myPoints[np.argmax(add)]
    diff = np.diff(myPoints, axis=1)
    myPointsNew[1] = myPoints[np.argmin(diff)]
    myPointsNew[2] = myPoints[np.argmax(diff)]

    return myPointsNew


# 图像矫正的方法
def warpImg(img, points, w, h, pad=6):
    points = reorder(points)
    pts1 = np.float32(points)
    pts2 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
    matrix = cv2.getPerspectiveTransform(pts1, pts2)
    imgWrap = cv2.warpPerspective(img, matrix, (w, h))
    imgWrap = imgWrap[pad:imgWrap.shape[0] - pad, pad:imgWrap.shape[1] - pad]

    return imgWrap


# 定义形状检测函数--书本
def ShapeDetection(img):
    contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)  # 寻找轮廓点
    Rectangle = []
    for obj in contours:
        area = cv2.contourArea(obj)  # 计算轮廓内区域的面积
        cv2.drawContours(imgContour, obj, -1, (255, 0, 0), 4)  # 绘制轮廓线
        perimeter = cv2.arcLength(obj, True)  # 计算轮廓周长
        approx = cv2.approxPolyDP(obj, 0.02 * perimeter, True)  # 获取轮廓角点坐标
        CornerNum = len(approx)  # 轮廓角点的数量
        x, y, w, h = cv2.boundingRect(approx)  # 获取坐标值和宽度、高度

        if CornerNum == 4:
            Rectangle.append([obj, area, approx, h, w])

    Rectangle = sorted(Rectangle, key=lambda x: x[1], reverse=True)
    book_approx = Rectangle[0][2]
    w = Rectangle[0][4]
    h = Rectangle[0][3]
    img_warp = warpImg(imgContour, book_approx, w, h)
    return img_warp, h, w


# 定义形状检测函数--圆
def new_ShapeDetection(img):
    contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)  # 寻找轮廓点
    Rectangle = []
    circle = []
    for obj in contours:
        area = cv2.contourArea(obj)  # 计算轮廓内区域的面积
        cv2.drawContours(imgContour, obj, -1, (255, 0, 0), 4)  # 绘制轮廓线
        perimeter = cv2.arcLength(obj, True)  # 计算轮廓周长
        approx = cv2.approxPolyDP(obj, 0.02 * perimeter, True)  # 获取轮廓角点坐标
        CornerNum = len(approx)  # 轮廓角点的数量
        x, y, w, h = cv2.boundingRect(approx)  # 获取坐标值和宽度、高度

        if CornerNum == 4:
            Rectangle.append([obj, area, approx, x, y, w, h])
        elif CornerNum > 4:
            circle.append([obj, area, approx, x, y, w, h])

    circle = sorted(circle, key=lambda x: x[1], reverse=True)
    # 绘制大圆
    cv2.drawContours(imgWarpContour, circle[1][0], -1, (255, 0, 0), 4)
    # 绘制小圆
    cv2.drawContours(imgWarpContour, circle[2][0], -1, (255, 0, 0), 4)

    cv2.imshow("IMG", imgWarpContour)
    return circle


path = 'D:\\fzu\\task.jpg'
img = cv2.imread(path)
# 调整图像大小,显示全部
img = cv2.resize(img, (0, 0), fx=0.3, fy=0.3)

imgContour = img.copy()

# 转灰度图
imgGray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

# 高斯模糊
imgBlur = cv2.GaussianBlur(imgGray, (9, 9), 0)

# Canny算子边缘检测
imgCanny = cv2.Canny(imgBlur, 20, 52)

kernel = np.ones((5, 5))

imgDial = cv2.dilate(imgCanny, kernel, iterations=6)  # 膨胀

imgThre = cv2.erode(imgDial, kernel, iterations=5)  # 腐蚀

cv2.imshow("imgThre", imgThre)

imgWarp, book_h, book_w = ShapeDetection(imgThre)  # 形状检测
cv2.imshow("ImgWarp", imgWarp)

imgWarpContour = imgWarp.copy()

imgGray = cv2.cvtColor(imgWarp, cv2.COLOR_RGB2GRAY)

# 高斯模糊
imgBlur = cv2.GaussianBlur(imgGray, (7, 7), 0)

circles = cv2.HoughCircles(imgBlur, cv2.HOUGH_GRADIENT, 1, 50,
                           param1=20, param2=54, minRadius=10, maxRadius=1000)

# print(circles[0])
if circles is not None:
    circles = np.round(circles[0, :]).astype("int")
    for (x, y, r) in circles:
        cv2.circle(imgWarpContour, (x, y), r, (0, 255, 0), 2)

cv2.imshow("IMG", imgWarpContour)

coin_diameter_px = circles[1][2] * 2
coin_size_cm = 2.5  # 假设硬币直径为25毫米
scale_factor = coin_size_cm / coin_diameter_px

draw_circle_diameter_px = circles[0][2] * 2
draw_circle_diameter_cm = draw_circle_diameter_px * scale_factor

book_w_cm = book_w * scale_factor
book_h_cm = book_h * scale_factor
print("书本的长为:%f" % book_h_cm + "厘米,书本的宽为:%f" % book_w_cm + "厘米")
print("书本右上方用铅笔画的圆圈的外圆直径为:%f" % draw_circle_diameter_cm + "厘米")

cv2.waitKey(0)

Guess you like

Origin blog.csdn.net/HUmOrOus02/article/details/131294955