机器学习笔记 - 基于OpenCV图像处理和手写数字识别进行数独求解

一、说明参考代码

再上一篇简单了解了手写数字训练，https://blog.csdn.net/bashendixie5/article/details/109754138，可以应用训练结果到数独识别，并使用python的sudoku进行求解。
参考代码如下
在使用epochs=20的训练结果，识别效果很差(无法求解)，
在使用epochs=50的训练结果的时候有个别识别的不对(无法求解)，
最后使用epochs=100的训练结果的时候求解正确。
# 导入相关的包
from imutils.perspective import four_point_transform
from skimage.segmentation import clear_border
import numpy as np
import imutils
import cv2
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
# pip install py-sudoku 并且把包内的sudoku.py文件copy到当前程序的同级目录
from sudoku import Sudoku
import tensorflow as tf

# 处理图片
def find_puzzle(image, debug=False):
    # 灰度化并高斯模糊
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (7, 7), 3)
    # 自适应二值化
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    thresh = cv2.bitwise_not(thresh)
    # check to see if we are visualizing each step of the image
    # processing pipeline (in this case, thresholding)
    if debug:
        cv2.imshow("Puzzle Thresh", thresh)
        cv2.waitKey(0)

    # 寻找轮廓并从大到小排序
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
    # initialize a contour that corresponds to the puzzle outline
    puzzleCnt = None
    # loop over the contours
    for c in cnts:
        # 多边形拟合
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
        # if our approximated contour has four points, then we can
        # assume we have found the outline of the puzzle
        if len(approx) == 4:
            puzzleCnt = approx
            break

    # 如果没有找到适合的，则抛出异常
    if puzzleCnt is None:
        raise Exception(("Could not find Sudoku puzzle outline. "
                         "Try debugging your thresholding and contour steps."))
    # check to see if we are visualizing the outline of the detected
    # Sudoku puzzle
    if debug:
        # draw the contour of the puzzle on the image and then display
        # it to our screen for visualization/debugging purposes
        output = image.copy()
        cv2.drawContours(output, [puzzleCnt], -1, (0, 255, 0), 2)
        cv2.imshow("Puzzle Outline", output)
        cv2.waitKey(0)

    # 对原始图像和灰度图像都应用四点透视变换，以获得自上而下的拼图鸟瞰图
    puzzle = four_point_transform(image, puzzleCnt.reshape(4, 2))
    warped = four_point_transform(gray, puzzleCnt.reshape(4, 2))
    # check to see if we are visualizing the perspective transform
    if debug:
        # show the output warped image (again, for debugging purposes)
        cv2.imshow("Puzzle Transform", puzzle)
        cv2.waitKey(0)
    # return a 2-tuple of puzzle in both RGB and grayscale
    return (puzzle, warped)


# 提取数字
def extract_digit(cell, debug=False):
    # apply automatic thresholding to the cell and then clear any
    # connected borders that touch the border of the cell
    thresh = cv2.threshold(cell, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
    thresh = clear_border(thresh)
    # check to see if we are visualizing the cell thresholding step
    if debug:
        cv2.imshow("Cell Thresh", thresh)
        cv2.waitKey(0)

    # find contours in the thresholded cell
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    # if no contours were found than this is an empty cell
    if len(cnts) == 0:
        return None
    # otherwise, find the largest contour in the cell and create a
    # mask for the contour
    c = max(cnts, key=cv2.contourArea)
    mask = np.zeros(thresh.shape, dtype="uint8")
    cv2.drawContours(mask, [c], -1, 255, -1)

    # compute the percentage of masked pixels relative to the total
    # area of the image
    (h, w) = thresh.shape
    percentFilled = cv2.countNonZero(mask) / float(w * h)
    # if less than 3% of the mask is filled then we are looking at
    # noise and can safely ignore the contour
    if percentFilled < 0.03:
        return None
    # apply the mask to the thresholded cell
    digit = cv2.bitwise_and(thresh, thresh, mask=mask)
    # check to see if we should visualize the masking step
    if debug:
        cv2.imshow("Digit", digit)
        cv2.waitKey(0)
    # return the digit to the calling function
    return digit

# 加载训练模型并识别
def loadmodelandocr():
    # 从本地磁盘加载手写识别的模型
    model = load_model('C:\\Users\\xiaomao\\Desktop\\mnist.h5')
    # 加载数独图片
    image = cv2.imread('C:\\Users\\xiaomao\\Desktop\\sudoku.jpg')
    # 改变尺寸
    image = imutils.resize(image, width=600)
    # 调用方法获取仿射变换后的图片
    (puzzleImage, warped) = find_puzzle(image, False)
    # 初始化9*9的数独board
    board = np.zeros((9, 9), dtype="int")
    # a Sudoku puzzle is a 9x9 grid (81 individual cells), so we can
    # infer the location of each cell by dividing the warped image
    # into a 9x9 grid
    stepX = warped.shape[1] // 9
    stepY = warped.shape[0] // 9
    # initialize a list to store the (x, y)-coordinates of each cell
    # location
    cellLocs = []
    na = 0;
    # loop over the grid locations
    for y in range(0, 9):
        # initialize the current list of cell locations
        row = []
        for x in range(0, 9):
            # compute the starting and ending (x, y)-coordinates of the
            # current cell
            startX = x * stepX
            startY = y * stepY
            endX = (x + 1) * stepX
            endY = (y + 1) * stepY
            # add the (x, y)-coordinates to our cell locations list
            row.append((startX, startY, endX, endY))
            # crop the cell from the warped transform image and then
            # extract the digit from the cell
            cell = warped[startY:endY, startX:endX]
            digit = extract_digit(cell, False)
            # verify that the digit is not empty
            if digit is not None:
                # 改变图片大小
                roi = cv2.resize(digit, (28, 28))
                roi = tf.reshape(roi, (1, 28 * 28))
                # 预测
                pred = model.predict(roi).argmax(axis=1)[0]
                board[y, x] = pred
            # add the row to our cell locations
        cellLocs.append(row)

    # 构建一个数独board
    puzzle = Sudoku(3, 3, board=board.tolist())
    puzzle.show()
    # 数独求解
    solution = puzzle.solve()
    solution.show_full()

    # 遍历位置绘制识别出来的数字和数独答案
    for (cellRow, boardRow) in zip(cellLocs, solution.board):
        # loop over individual cell in the row
        for (box, digit) in zip(cellRow, boardRow):
            # unpack the cell coordinates
            startX, startY, endX, endY = box
            # compute the coordinates of where the digit will be drawn
            # on the output puzzle image
            textX = int((endX - startX) * 0.33)
            textY = int((endY - startY) * -0.2)
            textX += startX
            textY += endY
            # draw the result digit on the Sudoku puzzle image
            cv2.putText(puzzleImage, str(digit), (textX, textY), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2)
    # show the output image
    cv2.imshow("Sudoku Result", puzzleImage)
    cv2.waitKey(0)


loadmodelandocr()
机器学习笔记 - 基于OpenCV图像处理和手写数字识别进行数独求解

一、说明参考代码

二、效果如下

猜你喜欢