Variety of codes, the method I provided herein have only noise identification verification code is valid.
First, it is I am going to the original picture 4.png
Specific implementation code
Import tesserocr
from the PIL Import Image, ImageDraw
Import Time
# Image Image.open = ( "IMG / 4_1.png")
# FH = Open ( "IMG / 1.txt", "W")
# W, H = image.size
# image-to-text, test
# for I in Range (H):
# for J in Range (W):
# Cl image.getpixel = ((J, I))
# clall = Cl [0] + Cl [. 1] Cl + [2]
# # == 0 clall current pixel that is black
# IF clall == 0:
# fh.write ( "0")
# the else:
# fh.write ( ". 1")
# Fh.write ( "\ the n-")
# fh.close ()
# pictures into black and white bicolor
DEF Black_White (Image):
w, H = image.size
for i in (H) the Range:
for J in the Range ( W):
Cl = image.getpixel ((J, I))
clall = Cl [0] + Cl [. 1] + Cl [2 ]
# clall == 0 i.e. the current pixel is black
IF clall> * = 155. 3 : # tailored to the specific picture
image.putpixel ((J, I), ( 255, 255, 255 ))
the else :
image.putpixel ((J, I), (0, 0, 0))
# binary array
T2VAL = {}
DEF twoValue (Image, G):
for Y in Range (0, image.size [. 1 ]):
for X in Range (0, image.size [0]):
G = image.getpixel ((X, Y))
IF G> G:
T2VAL [(X, Y)] =. 1
the else :
T2VAL [(X, Y) ] = 0
# noise
# the RGB value of a point a, compared with the RBG values of the surrounding eight points, a set value N (0 <N <8) , when the RGB value a and the surrounding eight points when RGB equal to the number smaller than N, the noise this point
#G: Integer binarization threshold N: Integer noise ratio 0 <N <8 Z: Integer frequency noise
DEF clearNoise (Image, N, the Z):
for I in Range (0, the Z):
T2VAL [(0 , 0)] =. 1
T2VAL [(image.size [0] -. 1, image.size [. 1] -. 1)]. 1 = for X in Range (. 1, image.size [0] -. 1 ):
for Y in Range (. 1, image.size [. 1] -. 1 ):
nearDots = 0
L = T2VAL [(X, Y)]
IF L == T2VAL [(X -. 1, Y -. 1 )]:
nearDots +. 1 =
if L == t2val[(x - 1,y)]:
nearDots += 1
if L == t2val[(x- 1,y + 1)]:
nearDots += 1
if L == t2val[(x,y - 1)]:
nearDots += 1
if L == t2val[(x,y + 1)]:
nearDots += 1
if L == t2val[(x + 1,y - 1)]:
nearDots += 1
if L == t2val[(x + 1,y)]:
nearDots += 1
if L == t2val[(x + 1,y + 1)]:
nearDots += 1
if nearDots < N:
t2val[(x,y)] = 1
def saveImage(filename,size):
image = Image.new("1",size)
draw = ImageDraw.Draw(image)
for x in range(0,size[0]):
for y in range(0,size[1]):
draw.point((x,y),t2val[(x,y)])
image.save(filename)
def start(img_path,save_img_path):
image = Image.open(img_path)
black_white(image)
image = image.convert("L")
twoValue(image,100)
clearNoise(image,4,1)
saveImage(save_img_path,image.size)
print(tesserocr.file_to_text(save_img_path))
img_path = "img/4.png"
save_img_path = "img/4_1.png"
start(img_path, save_img_path)
After treatment get the following picture 4_1.png
Console output
ziri
However, the above is achieved in the ideal case, for some picture recognition rate is not high
Plus some other post-algorithm to improve the recognition rate.