Python uses Baidu OCR interface for verification code image recognition

The last time I got started with OCR image recognition from pytesseract software and its python library, including image reading, format conversion and image processing, and also carried out verification code recognition experiments, including verification code acquisition, login verification and recognition of different image processing Effect test, the specific content can be read by clicking the link below: Python uses pytesseract for verification code image recognition_Cameback_Tang's Blog-CSDN Blog_Verification Code Image Recognition

This time, the OCR recognition interface from Baidu will be used , and the Internet method will be used instead of the method based on software and its interface. In addition, it is also free and can be called directly . By testing the last non-interference verification code, the direct recognition rate can reach 99%, which is much better than pytesseract's 76%. Of course, if an interfering verification code is added, the recognition rate can only be improved after the interference is dealt with.

The source of the OCR interface used this time: Text Recognition_General Scene Text Recognition-Baidu AI Open Platform

There are mainly four types of simple OCR text recognition, mainly "general text recognition", which can be learned by yourself through the above link.

In the code, you need to specify the corresponding OCR interface through the type parameter. By default, the "universal text recognition interface, high-precision without location" is used.

Same as last time, I wrote two recognition functions here, that is, to recognize through image files and image base64 encoding .

 

import requests
import base64
from urllib.parse import urlencode

def get_result_by_baiduOCR(file_path):
    url = 'https://cloud.baidu.com/aidemo'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36 Edg/94.0.992.47',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'cloud.baidu.com',
        'Origin': 'https://cloud.baidu.com',
        ## 其他接口请详见https://cloud.baidu.com/product/ocr_general
        'Referer': 'https://cloud.baidu.com/product/ocr/general',  # 通用文字识别接口,高精度不带位置
        # 'Referer': 'https://cloud.baidu.com/product/ocr_others/handwriting', # 手写接口
        # 'Referer': 'https://cloud.baidu.com/product/ocr/doc_analysis_office', # 文档接口
        # 'Referer': 'https://cloud.baidu.com/product/ocr_others/webimage', # 网络图片接口
        # 'Connection':'keep-alive',
        # 'Cookie':'hadhsahjsaj',
        # # '':'',
    }
    with open(file_path, 'rb') as f:
        img_base64 = base64.b64encode(f.read())
        data = {
            'image':f'data:image/png;base64,{img_base64.decode()}',
            'image_url':'xxxxxx',
            'type':'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic',
            # 'type':'https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting',
            # 'type':'https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office',
            # 'type':'https://aip.baidubce.com/rest/2.0/ocr/v1/webimage',
            'detect_direction': 'false',
            # 'language_type':'CHN_ENG',
            'language_type': 'ENG',
            # 'detect_direction':False,
        }
        data = urlencode(data)
        data = data.replace('image_url=xxxxxx', 'image_url')
        html = requests.post(url, data, headers=headers)
        # print(html.text)
        # rsp = {
        #     "errno": 0,
        #     "msg": "success",
        #     "data": {
        #         "words_result": [{"words": "Pi15"}],
        #         "words_result_num": 1,
        #         "log_id": "1515968155725851265"}
        # }
        html = html.json()
        print(html)
        if html.get('errno') == 0:
            result = html.get('data').get('words_result')[0].get('words')
            result = ''.join(list(filter(str.isalnum, result)))  # 只保留字母和数字
        else:
            result = ''
    return result

def get_result_by_baiduOCR_base64(img_base64):
    url = 'https://cloud.baidu.com/aidemo'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36 Edg/94.0.992.47',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'cloud.baidu.com',
        'Origin': 'https://cloud.baidu.com',
        'Referer': 'https://cloud.baidu.com/product/ocr/general',
        # 'Connection': 'close'   # http的连接数超过最大限制,默认的情况下连接是Keep-alive的,所以这就导致了服务器保持了太多连接而不能再新建连接。
        # 'Connection':'keep-alive',
        # 'Cookie':'',
        # # '':'',
    }
    data = {
        'image':f'data:image/png;base64,{img_base64}',
        # 'image_url':'xxxxxx',
        'type':'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic',
        'detect_direction':'false',
        # 'language_type':'CHN_ENG',
        'language_type':'ENG',
    }
    data = urlencode(data)
    html = requests.post(url, data, headers=headers)
    html = html.json()
    if html.get('errno') == 0:
        result = html.get('data').get('words_result')[0].get('words')
        result = ''.join(list(filter(str.isalnum, result)))  # 只保留字母和数字
    else:
        result = ''
    return result

Also, you may need conversions between image objects and image base64 encoding , otherwise saving and reading files.

import base64
from PIL import Image
from io import BytesIO

# image:图像对象


    def image_to_base64(image, fmt='JPEG'):
        output_buffer = BytesIO()
        image.save(output_buffer, format=fmt)
        byte_data = output_buffer.getvalue()
        base64_str = base64.b64encode(byte_data).decode('utf-8')
        return base64_str

    def base64_to_image(base64_str):
        byte_data = base64.b64decode(base64_str)
        image_data = BytesIO(byte_data)
        img = Image.open(image_data)
        return img

Insert a digression here, about the verification code acquisition,




# 获取验证码,保存html到图片文件
session = requests.session()
vpic_url = 'https://xxxxxxx/getVerify'
html = session.get(vpic_url, headers=headers)
with open("py016.jpeg", "wb") as f:
    f.write(html.content)
    img = Image.open("py016.jpeg")


# 获取验证码,使用selenium.webdriver和谷歌浏览器方式
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
url = 'https://xxxxxx/login'
driver = webdriver.Chrome()
driver.get(url)

img = driver.find_element_by_tag_name('img') 
img.screenshot('aaa.jpeg')  # 神来之笔,保存为图像文件
verifyCode = get_result_by_baiduOCR_base64(
    driver.find_element_by_tag_name('img').screenshot_as_base64 # 神来之笔,直接变成图像base64编码
)

Here is another digression, regarding the verification code processing, I found that their background interference is not the same, and there is only one color, a relatively light color, try to use grayscale processing and binary black and white, the result is great .

 Using the threshold threshold as 100, the result of black and white binary is as follows:

# 灰度化和其他阈值二值黑白化
def gray_processing(img, threshold = 127):
    img = img.convert('L')
    # threshold = 127 # image.convert('1')
    # threshold = 125
    lookup_table = [0 if i < threshold else 1 for i in range(256)]
    img = img.point(lookup_table, '1')
    return img


# 如果有干扰线,也可采用九宫格去噪,一次不行就两次,然后还可以膨胀腐蚀法
# 九宫格法去噪音点
def denoise(image, pixel_node):
    rows, cols = image.size
    noise_pos = []

    for i in range(1, rows-1):
        for j in range(1, cols-1):
            pixel_around = 0
            for m in range(i-1, i+2):
                for n in range(j-1, j+2):
                    if image.getpixel((m,n)) != 1:
                        pixel_around +=1
            if pixel_around <= pixel_node:
                noise_pos.append((i,j))
    for pos in noise_pos:
        image.putpixel(pos, 1)
    return image

 At this point, I will not insert a digression, probably that's it.

 

Guess you like

Origin blog.csdn.net/Cameback_Tang/article/details/126442051