识别上图所示的验证码
#!/usr/bin/python
# encoding: utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import requests
from lxml import etree
import time,datetime
import re
from switch import *
from PIL import Image
import sys
import pyocr
import pyocr.builders
import pytesseract
image_path = 'XXX.png'
im = Image.open(image_path)
imgry = im.convert('L')
threshold = 140
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = imgry.point(table, '1')
out.show()
#读取出字符串
cap_str = pytesseract.image_to_string(out)
print cap_str
然后使用 image_to_string 识别 准确率就提高很多了、
图片降噪后的处理结果
<img src="https://img-blog.csdn.net/20160604175525790?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQv/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/Center" alt="" />