版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
写这个主要是为了提高OCR识别率,筛选出图像质量极差的二值图返回不识别,只识别质量高一点的二值图,灰度图以及RGB图片。
统计以下:'白底方差', '图案方差', '像素波动方差','图像大小/分辨率'
这几个参考因素,若是像素波动方差
小于1,图像大小/分辨率
小于0.01,则是质量极差的图片了。
import cv2,os,csv
import numpy as np
def get_img(Img_path):
image_paths = []
for (dir, dirnames, filenames) in os.walk(Img_path):
for img_file in filenames:
ext = ['.jpg','.png','.jpeg','.tif']
if img_file.endswith(tuple(ext)):
image_paths.append(dir+'/'+img_file)
return image_paths
def Calculate_QD(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
numdiff = np.diff(gray)
var_all = np.sum((numdiff) ** 2) / gray.size
baseline = np.bincount(gray.reshape(1,-1)[0])
sortNum = np.argsort(baseline)
var_255 = np.sum((gray - sortNum[-1]) ** 2) / gray.size
var_000 = np.sum((gray - sortNum[-2]) ** 2) / gray.size
return var_255,var_000,var_all
if __name__ == '__main__':
Img_path = './idcard'
csv_path = './idcard.csv'
Imgpaths = get_img(Img_path)
Result_CSV = []
for Img_path in Imgpaths:
filename = Img_path.split('/')[-1]
imgArray = cv2.imread(Img_path)
image_size = os.path.getsize(Img_path)
px_size = round((image_size/imgArray.size ),4)
var_255, var_000, var_all = Calculate_QD(imgArray)
Result_CSV.append((filename,var_255, var_000, var_all,px_size))
with open(csv_path, 'w', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
csv_writer.writerow(('filename','白底方差', '图案方差', '像素波动方差','图像大小/分辨率'))
csv_writer.writerows(Result_CSV)