本文是使用PIL+pytesseract识别简单验证码,目的是要识别红色字体,思路如下:
话不多说,直接上代码
"""
# author: ****
# date : 2018.7.10
# desc : 图片验证码识别
"""
import requests
from PIL import Image
import pytesseract
import os
img_type = ".png"
default_name = "verifying_code" + img_type
default_out_name = "test_" + "verifying_code" + img_type
fp = "image/"
def downloads_pic(pic_name=default_name):
url = 'http://www.bhi.com.cn/Public/Isvalid.ashx'
res = requests.get(url, stream=True)
with open("image/" + pic_name, 'wb') as f:
for chunk in res.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
f.close()
def img_to_str(image_name=default_out_name):
"""
二值化的图片识别,返回字符串
:param image_name: 图片名称
:return: 验证码字符串
"""
bname = fp + 'b' + image_name
pytesseract.pytesseract.tesseract_cmd = 'D:\\Program Files\\Tesseract-OCR\\tesseract.exe'
tessdata_dir_config = '--tessdata-dir "D:\\Program Files\\Tesseract-OCR\\tessdata"'
text = pytesseract.image_to_string(Image.open(bname), config=tessdata_dir_config)
r_text = []
for t in list(text):
if t.strip():
r_text.append(t.strip())
return "".join(r_text)
def noise_reduction(image_name=default_name):
"""
去除非红色干扰像素,将非红色的点替换成白色
:param image_name: 验证码图片名称
:return: 无
"""
fp_img = fp + image_name
img = Image.open(fp_img)
width = img.size[0]
height = img.size[1]
for i in range(0, height):
for j in range(0, width):
data = img.getpixel((j, i))
if data < 210 or data > 230:
img.putpixel((j, i), 251)
img = img.convert("RGB")
img.save(fp + default_out_name)
def remove_noise_pixel(img, noise_point_list):
"""
根据噪点的位置信息,消除二值图片的黑点噪声
:type img:Image
:param img:
:param noise_point_list:
:return:
"""
for item in noise_point_list:
img.putpixel((item[0], item[1]), 1)
def get_clear_bin_image(image_name=default_out_name):
"""
获取干净的二值化的图片。
图像的预处理:
1. 先转化为灰度
2. 再二值化
3. 然后清除噪点
参考:http://python.jobbole.com/84625/
:type image_name:图片名称
:return: 无
"""
bname = fp + 'b' + image_name
im = Image.open(fp + image_name)
imgry = im.convert('L')
table = get_bin_table()
out = imgry.point(table, '1')
noise_point_list = []
for x in range(out.width):
for y in range(out.height):
res_9 = sum_9_region(out, x, y)
if (0 < res_9 < 3) and out.getpixel((x, y)) == 0:
pos = (x, y)
noise_point_list.append(pos)
for item in noise_point_list:
out.putpixel((item[0], item[1]), 1)
out.save(bname)
def get_bin_table(threshold=140):
"""
获取灰度转二值的映射table
:param threshold:
:return:
"""
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
return table
def sum_9_region(img, x, y):
"""
9邻域框,以当前点为中心的田字框,黑点个数
:param x: 像素x位置
:param y: 像素y位置
:return:
"""
cur_pixel = img.getpixel((x, y))
width = img.width
height = img.height
if cur_pixel == 1:
return 0
if y == 0:
if x == 0:
sum = cur_pixel \
+ img.getpixel((x, y + 1)) \
+ img.getpixel((x + 1, y)) \
+ img.getpixel((x + 1, y + 1))
return 4 - sum
elif x == width - 1:
sum = cur_pixel \
+ img.getpixel((x, y + 1)) \
+ img.getpixel((x - 1, y)) \
+ img.getpixel((x - 1, y + 1))
return 4 - sum
else:
sum = img.getpixel((x - 1, y)) \
+ img.getpixel((x - 1, y + 1)) \
+ cur_pixel \
+ img.getpixel((x, y + 1)) \
+ img.getpixel((x + 1, y)) \
+ img.getpixel((x + 1, y + 1))
return 6 - sum
elif y == height - 1:
if x == 0:
sum = cur_pixel \
+ img.getpixel((x + 1, y)) \
+ img.getpixel((x + 1, y - 1)) \
+ img.getpixel((x, y - 1))
return 4 - sum
elif x == width - 1:
sum = cur_pixel \
+ img.getpixel((x, y - 1)) \
+ img.getpixel((x - 1, y)) \
+ img.getpixel((x - 1, y - 1))
return 4 - sum
else:
sum = cur_pixel \
+ img.getpixel((x - 1, y)) \
+ img.getpixel((x + 1, y)) \
+ img.getpixel((x, y - 1)) \
+ img.getpixel((x - 1, y - 1)) \
+ img.getpixel((x + 1, y - 1))
return 6 - sum
else:
if x == 0:
sum = img.getpixel((x, y - 1)) \
+ cur_pixel \
+ img.getpixel((x, y + 1)) \
+ img.getpixel((x + 1, y - 1)) \
+ img.getpixel((x + 1, y)) \
+ img.getpixel((x + 1, y + 1))
return 6 - sum
elif x == width - 1:
sum = img.getpixel((x, y - 1)) \
+ cur_pixel \
+ img.getpixel((x, y + 1)) \
+ img.getpixel((x - 1, y - 1)) \
+ img.getpixel((x - 1, y)) \
+ img.getpixel((x - 1, y + 1))
return 6 - sum
else:
sum = img.getpixel((x - 1, y - 1)) \
+ img.getpixel((x - 1, y)) \
+ img.getpixel((x - 1, y + 1)) \
+ img.getpixel((x, y - 1)) \
+ cur_pixel \
+ img.getpixel((x, y + 1)) \
+ img.getpixel((x + 1, y - 1)) \
+ img.getpixel((x + 1, y)) \
+ img.getpixel((x + 1, y + 1))
return 9 - sum
def get_verifying_code():
"""
获取验证码结果
:return: 验证码字符串
"""
for file in os.listdir(fp):
os.remove(fp + file)
downloads_pic()
noise_reduction()
get_clear_bin_image()
result = img_to_str()
return result
if __name__ == '__main__':
print(get_verifying_code())
pass