普通数字加字母验证码破解

from pytesseract import pytesseract
from PIL import Image, ImageDraw
import random
# 降噪
# 二值数组
t2val = {}
def twoValue(image, G):
    for y in range(0, image.size[1]):
        for x in range(0, image.size[0]):
            g = image.getpixel((x, y))
            if g > G:
                t2val[(x, y)] = 1
            else:
                t2val[(x, y)] = 0
# 根据一个点A的RGB值,与周围的8个点的RBG值比较,设定一个值N(0 <N <8),当A的RGB值与周围8个点的RGB相等数小于N时,此点为噪点
# G: Integer 图像二值化阀值
# N: Integer 降噪率 0 <N <8
# Z: Integer 降噪次数
# 输出
#  0:降噪成功
#  1:降噪失败
def clearNoise(image, N, Z):
    for i in range(0, Z):
        t2val[(0, 0)] = 1
        t2val[(image.size[0] - 1, image.size[1] - 1)] = 1
        for x in range(1, image.size[0] - 1):
            for y in range(1, image.size[1] - 1):
                nearDots = 0
                L = t2val[(x, y)]
                if L == t2val[(x - 1, y - 1)]:
                    nearDots += 1
                if L == t2val[(x - 1, y)]:
                    nearDots += 1
                if L == t2val[(x - 1, y + 1)]:
                    nearDots += 1
                if L == t2val[(x, y - 1)]:
                    nearDots += 1
                if L == t2val[(x, y + 1)]:
                    nearDots += 1
                if L == t2val[(x + 1, y - 1)]:
                    nearDots += 1
                if L == t2val[(x + 1, y)]:
                    nearDots += 1
                if L == t2val[(x + 1, y + 1)]:
                    nearDots += 1

                if nearDots < N:
                    t2val[(x, y)] = 1
def saveImage(filename, size):
    image = Image.new("1", size)
    draw = ImageDraw.Draw(image)
    for x in range(0, size[0]):
        for y in range(0, size[1]):
            draw.point((x, y), t2val[(x, y)])
    image.save(filename)
for i in range(1,21):
    path = r"C:\Users\Administrator\PycharmProjects\测试\he.jpg"
    image = Image.open(path).convert("L")
    twoValue(image, 80)
    clearNoise(image, 3, 2)
    path1 = r"C:\Users\Administrator\PycharmProjects\测试\he2.jpg"
    saveImage(path1, image.size)
out = Image.open('he2.jpg')
result = pytesseract.image_to_string(out).split(' ')
print(result)
math_num = '0'
if math_num in result:
    result[result.index(math_num)] = 'o'
res = ''.join(result)
print(res)

识别有误差可以通过以下两个函数里面的参数进行调节

twoValue(image, 80)
clearNoise(image, 3, 2)

猜你喜欢

转载自www.cnblogs.com/542684416-qq/p/10904621.html