版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/yang332233/article/details/83411336
图片命名方式:140_24_4_长期负债合计.png
# -*- coding: utf-8 -*-
import sys
sys.path.append('/usr/local/lib/python2.7/dist-packages')
from aip import AipOcr
import os
import shutil
import time
L_id = [["11251755","0yGxZfkXBADO4GSqu45UUYtEO","oqFGYEoqXCl0Al5GbQkNjrWURGGomYAKF"],["14558774","0XUjxnyLurAtq1VqRC1tkvqvB","vqAzSvyyNvwTrE1ZdYGgoac9STrGW9Gbk"],
["14558623","0gyyZqcqvgASVfU1M01sKUGuK","Hqm2yhe3XIvNq2YtLw5vrsqHOCz9v1guC"],["14558656","0ARwQ0RloVgcSVSZlKH3W0L3B","gqrRXr8ViHTdpgNAksZvkOwu3N6WgFfYP"],
["14558705","0CwOiX1VRrX4ueA9g0XoklAjo","PqCGoZG5DCPTGZ85IdW9RHmBwQzmnbHM9"],["14391307","08U3loMb3xqvsDzX1WSOogQiE","LqKGsGR5zwknwtq83uWhi5SgKZ8mnafB6"],
["14559968","0q2R7RHpqNZG0wzcogBUO67T7","rq18WRtGttCMOPG70zHelPuxDcenG1Qyg"],["14560105","0KX7QtFLNTkfQbaR5e9kZGw7c","SqHSD7en7wgbz5Po6Gb85GwqwcnNakMtG"],
["14561611","06oYssZPLKFXr2FGbMIGZEvq0","7qoOT1IQ25un9eIvTRpiDzdUb67U0ulhm"],["14563236","0Lb0a3XQgHaUWemWXGwueBLY1","iqD88xE8NN1hHGEOImshrv4nBO3psvj76"],
["14306795","0uAvAe3EA6wyrWFGdO5B9ifHH","FqEbg7djDxOojRlEyzWgS0igjKHSdsOto"],["11251755","0yGxZfkXBADO4GSqu45UUYtEO","oqFGYEoqXCl0Al5GbQkNjrWURGGomYAKF"],
["14557197","0cHY1O4pGBeGMas6mxYzoQK1U","QqhDDtg8nzcWeyIdccOXI15jaBlPwzPRS"],["14310442","0RhDWkPAPo26Xnhy2GZlKb4ma","kqzR1QbkWyfNGoqQkEoNMW19CMduHsiIO"],
["14374610","0fioGkSgNAYwRu7SoHDlyVILc","PqlV3agNn5Amp5MRZGMlZvcYifjfkFjfZ"],["14558616","0EuoOxV1tMDBXe5rFhNfFvw93","zqW5ZN6jRjkdvGsjLVyl18MKMyRSok8Mb"],
["14563626","035lcdq7pYbLg5GwnijGPq77S","hqd3UNhEdE7cpLtGWGvoKckNZKTz1gTOf"]]
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read()
srcdir="/media/d_2/everyday/1025/print_data_1W/merge_cs_xingye/"
savedir="/media/d_2/everyday/1025/print_data_1W/merge_cs_xingye_save/"
imglist=os.listdir(srcdir)
imglist.sort()
num=0
L_id_idx=0
#client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
client = AipOcr(L_id[L_id_idx][0], L_id[L_id_idx][1], L_id[L_id_idx][2])
for img in imglist:
imgpath = srcdir + img
num += 1
if(num<4002):
continue
print ("num=",num)
print ("img_path=",imgpath)
image = get_file_content(imgpath)
options = {}
options["detect_direction"] = "false"
options["probability"] = "false"
re = client.basicAccurate(image, options)
print("re=", re)
if (True == ('error_code' in re)):
error_code = re['error_code']
if (17 == error_code):
print("my::: Open api daily request limit reached")
L_id_idx += 1
client = AipOcr(L_id[L_id_idx][0], L_id[L_id_idx][1], L_id[L_id_idx][2])
re = client.basicAccurate(image, options)
img_t1 = img[:-4]
#print (img)
#print (img_t1)
img_t2_L = img_t1.split('_')
#print (img_t2_L)
ans = img_t2_L[-1]
print("ans=",ans)
rec_str = 'nothing'
words_result_num = 0
if(True == ('words_result_num' in re)):
words_result_num = re['words_result_num']
if((True == ('words_result' in re)) and (0 != words_result_num)):
#print(re)
rec_str_t = re['words_result'][0]
if(True == ('words' in rec_str_t)):
rec_str = rec_str_t['words']
print("rec=",rec_str)
str = imgpath + '######' + ans + '######' +rec_str
with open('/media/d_2/everyday/1025/print_data_1W/merge_cs_xingye_save/merge.txt', 'a') as f:
f.writelines(str+'\n')
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`")
time.sleep(1.5)
保存的结果txt:/media/d_2/everyday/1025/print_data_1W/1008_part2_500/140_13_0_一年内到期的长期债券投资.png######一年内到期的长期债券投资######一年内到期的长期债券投资
识别的结果与答案匹对:
代码中有格式转化的,因为标注答案数字、字符好像都是全角的而识别出来的都是半角的,然后经过函数统一一下再比对
# -*- coding: utf-8 -*-
symbol_dict = {"0":"0","1":"1","2":"2","3":"3","4":"4","5":"5","6":"6","7":"7","8":"8","9":"9","-":"-",
"(": "(",")":")",":":":",".":".","*":"*","✳":"★","/":"/"}
def my_deplace(str):
my_str=""
for l in str:
#print (l)
if(l in symbol_dict):
l = symbol_dict[l]
my_str += l
return my_str
def strQ2B(ustring):
"""quanjiao zhuan banjiao"""
rstring = ""
for uchar in ustring:
inside_code=ord(uchar)
if inside_code == 12288:
inside_code = 32
elif (inside_code >= 65281 and inside_code <= 65374):
inside_code -= 65248
rstring += chr(inside_code)
return rstring
txt = open("/media/d_2/everyday/1025/hand_1000/part1_save/part1.txt")
num_all = 0
num_right = 0
for line in txt:
line = line.strip('\n')
#print (line,end='')
L = line.split('######')
ans = L[1]
rec = L[2]
# print("ans=",ans)
# print ("rec=",rec)
ans = my_deplace(ans)
rec = my_deplace(rec)
# print ("deal_ans=",ans)
# print("deal_rec=", rec)
num_all += 1
if(ans == rec):
print ("yes")
num_right += 1
else:
print ("no")
with open('/media/d_2/everyday/1025/hand_1000/part1_save/part1_no.txt', 'a') as f:
f.writelines(line + '\n')
print ("\n~down~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print ("all=",num_all)
print("right=",num_right)