[python]百度云批量调用ocr接口识别

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/yang332233/article/details/83411336

图片命名方式:140_24_4_长期负债合计.png

# -*- coding: utf-8 -*-

import sys
sys.path.append('/usr/local/lib/python2.7/dist-packages')

from aip import AipOcr
import os
import shutil
import time
L_id = [["11251755","0yGxZfkXBADO4GSqu45UUYtEO","oqFGYEoqXCl0Al5GbQkNjrWURGGomYAKF"],["14558774","0XUjxnyLurAtq1VqRC1tkvqvB","vqAzSvyyNvwTrE1ZdYGgoac9STrGW9Gbk"],
        ["14558623","0gyyZqcqvgASVfU1M01sKUGuK","Hqm2yhe3XIvNq2YtLw5vrsqHOCz9v1guC"],["14558656","0ARwQ0RloVgcSVSZlKH3W0L3B","gqrRXr8ViHTdpgNAksZvkOwu3N6WgFfYP"],
        ["14558705","0CwOiX1VRrX4ueA9g0XoklAjo","PqCGoZG5DCPTGZ85IdW9RHmBwQzmnbHM9"],["14391307","08U3loMb3xqvsDzX1WSOogQiE","LqKGsGR5zwknwtq83uWhi5SgKZ8mnafB6"],
        ["14559968","0q2R7RHpqNZG0wzcogBUO67T7","rq18WRtGttCMOPG70zHelPuxDcenG1Qyg"],["14560105","0KX7QtFLNTkfQbaR5e9kZGw7c","SqHSD7en7wgbz5Po6Gb85GwqwcnNakMtG"],
        ["14561611","06oYssZPLKFXr2FGbMIGZEvq0","7qoOT1IQ25un9eIvTRpiDzdUb67U0ulhm"],["14563236","0Lb0a3XQgHaUWemWXGwueBLY1","iqD88xE8NN1hHGEOImshrv4nBO3psvj76"],
        ["14306795","0uAvAe3EA6wyrWFGdO5B9ifHH","FqEbg7djDxOojRlEyzWgS0igjKHSdsOto"],["11251755","0yGxZfkXBADO4GSqu45UUYtEO","oqFGYEoqXCl0Al5GbQkNjrWURGGomYAKF"],
        ["14557197","0cHY1O4pGBeGMas6mxYzoQK1U","QqhDDtg8nzcWeyIdccOXI15jaBlPwzPRS"],["14310442","0RhDWkPAPo26Xnhy2GZlKb4ma","kqzR1QbkWyfNGoqQkEoNMW19CMduHsiIO"],
        ["14374610","0fioGkSgNAYwRu7SoHDlyVILc","PqlV3agNn5Amp5MRZGMlZvcYifjfkFjfZ"],["14558616","0EuoOxV1tMDBXe5rFhNfFvw93","zqW5ZN6jRjkdvGsjLVyl18MKMyRSok8Mb"],
        ["14563626","035lcdq7pYbLg5GwnijGPq77S","hqd3UNhEdE7cpLtGWGvoKckNZKTz1gTOf"]]

def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

srcdir="/media/d_2/everyday/1025/print_data_1W/merge_cs_xingye/"
savedir="/media/d_2/everyday/1025/print_data_1W/merge_cs_xingye_save/"
imglist=os.listdir(srcdir)
imglist.sort()
num=0
L_id_idx=0

#client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
client = AipOcr(L_id[L_id_idx][0], L_id[L_id_idx][1], L_id[L_id_idx][2])
for img in imglist:
    imgpath = srcdir + img
    num += 1

    if(num<4002):
        continue

    print ("num=",num)
    print ("img_path=",imgpath)

    image = get_file_content(imgpath)
    options = {}
    options["detect_direction"] = "false"
    options["probability"] = "false"
    re = client.basicAccurate(image, options)
    print("re=", re)
    if (True == ('error_code' in re)):
        error_code = re['error_code']
        if (17 == error_code):
            print("my::: Open api daily request limit reached")
            L_id_idx += 1
            client = AipOcr(L_id[L_id_idx][0], L_id[L_id_idx][1], L_id[L_id_idx][2])
            re = client.basicAccurate(image, options)

    img_t1 = img[:-4]
    #print (img)
    #print (img_t1)
    img_t2_L = img_t1.split('_')
    #print (img_t2_L)
    ans = img_t2_L[-1]
    print("ans=",ans)

    rec_str = 'nothing'

    words_result_num = 0
    if(True == ('words_result_num' in re)):
        words_result_num = re['words_result_num']


    if((True == ('words_result' in re)) and (0 != words_result_num)):
        #print(re)
        rec_str_t = re['words_result'][0]
        if(True == ('words' in rec_str_t)):
            rec_str = rec_str_t['words']

    print("rec=",rec_str)
    str = imgpath + '######' + ans + '######' +rec_str
    with open('/media/d_2/everyday/1025/print_data_1W/merge_cs_xingye_save/merge.txt', 'a') as f:
        f.writelines(str+'\n')

    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`")
    time.sleep(1.5)

保存的结果txt:/media/d_2/everyday/1025/print_data_1W/1008_part2_500/140_13_0_一年内到期的长期债券投资.png######一年内到期的长期债券投资######一年内到期的长期债券投资

识别的结果与答案匹对:

代码中有格式转化的,因为标注答案数字、字符好像都是全角的而识别出来的都是半角的,然后经过函数统一一下再比对

# -*- coding: utf-8 -*-


symbol_dict = {"0":"0","1":"1","2":"2","3":"3","4":"4","5":"5","6":"6","7":"7","8":"8","9":"9","-":"-",
               "(": "(",")":")",":":":",".":".","*":"*","✳":"★","/":"/"}


def my_deplace(str):
    my_str=""
    for l in str:
        #print (l)
        if(l in symbol_dict):
            l = symbol_dict[l]
        my_str += l
    return my_str



def strQ2B(ustring):
    """quanjiao zhuan banjiao"""
    rstring = ""
    for uchar in ustring:
        inside_code=ord(uchar)
        if inside_code == 12288:
            inside_code = 32
        elif (inside_code >= 65281 and inside_code <= 65374):
            inside_code -= 65248

        rstring += chr(inside_code)
    return rstring


txt = open("/media/d_2/everyday/1025/hand_1000/part1_save/part1.txt")

num_all = 0
num_right = 0

for line in txt:
    line = line.strip('\n')
    #print (line,end='')
    L = line.split('######')
    ans = L[1]
    rec = L[2]
    # print("ans=",ans)
    # print ("rec=",rec)
    ans = my_deplace(ans)
    rec = my_deplace(rec)
    # print ("deal_ans=",ans)
    # print("deal_rec=", rec)

    num_all += 1

    if(ans == rec):
        print ("yes")
        num_right += 1
    else:
        print ("no")
        with open('/media/d_2/everyday/1025/hand_1000/part1_save/part1_no.txt', 'a') as f:
            f.writelines(line + '\n')

print ("\n~down~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print ("all=",num_all)
print("right=",num_right)

猜你喜欢

转载自blog.csdn.net/yang332233/article/details/83411336