java 验证码识别 ocr

本程序是将http://ykf.iteye.com/blog/212431整理得到。
Java编写的验证码识别程序
简介:通过java处理图片,将图片去除干扰等,转成黑白图,然后采用OCR识别
正确率:与验证码简易程度有关,只有干扰点的效果比较好
代码结构如下图:

Src:java源码
Img:java将验证码转换成黑白图的存放目录
Lib:所需要的jar包
Tesseract:ocr相关资源包

主要包括下边几个类:


ImageFilter.java

package com.goma.readimage;

import java.awt.Color;
import java.awt.Graphics2D;
import java.awt.color.ColorSpace;
import java.awt.geom.AffineTransform;
import java.awt.image.AffineTransformOp;
import java.awt.image.BufferedImage;
import java.awt.image.ColorConvertOp;
import java.awt.image.ColorModel;
import java.awt.image.MemoryImageSource;
import java.awt.image.PixelGrabber;
import java.util.HashMap;
import java.util.Map;

public class ImageFilter {
private BufferedImage image;
private int iw, ih;   //图片宽度、高度
private int[] pixels; //像素

public ImageFilter(BufferedImage image) {
this.image = image;
iw = image.getWidth();
ih = image.getHeight();
pixels = new int[iw * ih];
}

/** 图像二值化 */
public BufferedImage changeGrey() {
PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih,pixels, 0, iw);
try {
pg.grabPixels();
} catch (InterruptedException e) {
e.printStackTrace();
}
// 设定二值化的域值,默认值为100
int grey = 150;
// 对图像进行二值化处理,Alpha值保持不变
ColorModel cm = ColorModel.getRGBdefault();
for (int i = 0; i < iw * ih; i++) {
int red, green, blue;
int alpha = cm.getAlpha(pixels[i]);
if (cm.getRed(pixels[i]) > grey) {
red = 255;
} else {
red = 0;
}

if (cm.getGreen(pixels[i]) > grey) {
green = 255;
} else {
green = 0;
}

if (cm.getBlue(pixels[i]) > grey) {
blue = 255;
} else {
blue = 0;
}

pixels[i] = alpha << 24 | red << 16 | green << 8 | blue;
}
// 将数组中的象素产生一个图像
return ImageIOHelper.imageProducerToBufferedImage(new MemoryImageSource(iw, ih,pixels, 0, iw));
}

/** 提升清晰度,进行锐化 */
public BufferedImage sharp() {
PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih,
pixels, 0, iw);
try {
pg.grabPixels();
} catch (InterruptedException e) {
e.printStackTrace();
}

// 象素的中间变量
int tempPixels[] = new int[iw * ih];
for (int i = 0; i < iw * ih; i++) {
tempPixels[i] = pixels[i];
}
// 对图像进行尖锐化处理,Alpha值保持不变
ColorModel cm = ColorModel.getRGBdefault();
for (int i = 1; i < ih - 1; i++) {
for (int j = 1; j < iw - 1; j++) {
int alpha = cm.getAlpha(pixels[i * iw + j]);

// 对图像进行尖锐化
int red6 = cm.getRed(pixels[i * iw + j + 1]);
int red5 = cm.getRed(pixels[i * iw + j]);
int red8 = cm.getRed(pixels[(i + 1) * iw + j]);
int sharpRed = Math.abs(red6 - red5) + Math.abs(red8 - red5);

int green5 = cm.getGreen(pixels[i * iw + j]);
int green6 = cm.getGreen(pixels[i * iw + j + 1]);
int green8 = cm.getGreen(pixels[(i + 1) * iw + j]);
int sharpGreen = Math.abs(green6 - green5)
+ Math.abs(green8 - green5);

int blue5 = cm.getBlue(pixels[i * iw + j]);
int blue6 = cm.getBlue(pixels[i * iw + j + 1]);
int blue8 = cm.getBlue(pixels[(i + 1) * iw + j]);
int sharpBlue = Math.abs(blue6 - blue5)
+ Math.abs(blue8 - blue5);

if (sharpRed > 255) {
sharpRed = 255;
}
if (sharpGreen > 255) {
sharpGreen = 255;
}
if (sharpBlue > 255) {
sharpBlue = 255;
}

tempPixels[i * iw + j] = alpha << 24 | sharpRed << 16
| sharpGreen << 8 | sharpBlue;
}
}

// 将数组中的象素产生一个图像
return ImageIOHelper
.imageProducerToBufferedImage(new MemoryImageSource(iw, ih,
tempPixels, 0, iw));
}








public static int isWhite(int colorInt) {
Color color = new Color(colorInt);
if (color.getRed() + color.getGreen() + color.getBlue() > 600) {
return 1;
}
return 0;
}

public  BufferedImage removeBackgroud(){
        BufferedImage img = this.image;
        img = img.getSubimage(1, 1, img.getWidth() - 2, img.getHeight() - 2);
        int width = img.getWidth();
        int height = img.getHeight();
        double subWidth = (double) width / 5.0;
        for (int i = 0; i < 5; i++) {
        Map<Integer, Integer> map = new HashMap<Integer, Integer>();
       for (int x = (int) (1 + i * subWidth); x < (i + 1) * subWidth && x < width - 1; ++x) {
      for (int y = 0; y < height; ++y) {
      if (isWhite(img.getRGB(x, y)) == 1)
      continue;
      if (map.containsKey(img.getRGB(x, y))) {
      map.put(img.getRGB(x, y), map.get(img.getRGB(x, y)) + 1);
      } else {
      map.put(img.getRGB(x, y), 1);
      }
      }
       }
       int max = 0;
       int colorMax = 0;
       for (Integer color : map.keySet()) {
        if (max < map.get(color)) {
     max = map.get(color);
     colorMax = color;
   }
    }
    for (int x = (int) (1 + i * subWidth); x < (i + 1) * subWidth && x < width - 1; ++x) {
    for (int y = 0; y < height; ++y) {
    if (img.getRGB(x, y) != colorMax) {
    img.setRGB(x, y, Color.WHITE.getRGB());
    } else {
     img.setRGB(x, y, Color.BLACK.getRGB());
    }
    }
    }
      }
       return img;
   }


/** 中值滤波 */
public BufferedImage median() {
PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih,
pixels, 0, iw);
try {
pg.grabPixels();
} catch (InterruptedException e) {
e.printStackTrace();
}
// 对图像进行中值滤波,Alpha值保持不变
ColorModel cm = ColorModel.getRGBdefault();
for (int i = 1; i < ih - 1; i++) {
for (int j = 1; j < iw - 1; j++) {
int red, green, blue;
int alpha = cm.getAlpha(pixels[i * iw + j]);

//int red2 = cm.getRed(pixels[(i - 1) * iw + j]);
int red4 = cm.getRed(pixels[i * iw + j - 1]);
int red5 = cm.getRed(pixels[i * iw + j]);
int red6 = cm.getRed(pixels[i * iw + j + 1]);
//int red8 = cm.getRed(pixels[(i + 1) * iw + j]);

// 水平方向进行中值滤波
if (red4 >= red5) {
if (red5 >= red6) {
red = red5;
} else {
if (red4 >= red6) {
red = red6;
} else {
red = red4;
}
}
} else {
if (red4 > red6) {
red = red4;
} else {
if (red5 > red6) {
red = red6;
} else {
red = red5;
}
}
}

// int green2 = cm.getGreen(pixels[(i - 1) * iw + j]);
int green4 = cm.getGreen(pixels[i * iw + j - 1]);
int green5 = cm.getGreen(pixels[i * iw + j]);
int green6 = cm.getGreen(pixels[i * iw + j + 1]);
// int green8 = cm.getGreen(pixels[(i + 1) * iw + j]);

// 水平方向进行中值滤波
if (green4 >= green5) {
if (green5 >= green6) {
green = green5;
} else {
if (green4 >= green6) {
green = green6;
} else {
green = green4;
}
}
} else {
if (green4 > green6) {
green = green4;
} else {
if (green5 > green6) {
green = green6;
} else {
green = green5;
}
}
}

// int blue2 = cm.getBlue(pixels[(i - 1) * iw + j]);
int blue4 = cm.getBlue(pixels[i * iw + j - 1]);
int blue5 = cm.getBlue(pixels[i * iw + j]);
int blue6 = cm.getBlue(pixels[i * iw + j + 1]);
// int blue8 = cm.getBlue(pixels[(i + 1) * iw + j]);

// 水平方向进行中值滤波
if (blue4 >= blue5) {
if (blue5 >= blue6) {
blue = blue5;
} else {
if (blue4 >= blue6) {
blue = blue6;
} else {
blue = blue4;
}
}
} else {
if (blue4 > blue6) {
blue = blue4;
} else {
if (blue5 > blue6) {
blue = blue6;
} else {
blue = blue5;
}
}
}
pixels[i * iw + j] = alpha << 24 | red << 16 | green << 8
| blue;
}
}

// 将数组中的象素产生一个图像
return ImageIOHelper
.imageProducerToBufferedImage(new MemoryImageSource(iw, ih,
pixels, 0, iw));
}

/** 线性灰度变换 */
public BufferedImage lineGrey() {
PixelGrabber pg = new PixelGrabber(image.getSource(), 0, 0, iw, ih,
pixels, 0, iw);
try {
pg.grabPixels();
} catch (InterruptedException e) {
e.printStackTrace();
}
// 对图像进行进行线性拉伸,Alpha值保持不变
ColorModel cm = ColorModel.getRGBdefault();
for (int i = 0; i < iw * ih; i++) {
int alpha = cm.getAlpha(pixels[i]);
int red = cm.getRed(pixels[i]);
int green = cm.getGreen(pixels[i]);
int blue = cm.getBlue(pixels[i]);

// 增加了图像的亮度
red = (int) (1.1 * red + 30);
green = (int) (1.1 * green + 30);
blue = (int) (1.1 * blue + 30);
if (red >= 255) {
red = 255;
}
if (green >= 255) {
green = 255;
}
if (blue >= 255) {
blue = 255;
}
pixels[i] = alpha << 24 | red << 16 | green << 8 | blue;
}

// 将数组中的象素产生一个图像

return ImageIOHelper
.imageProducerToBufferedImage(new MemoryImageSource(iw, ih,
pixels, 0, iw));
}

/** 转换为黑白灰度图 */
public BufferedImage grayFilter() {
ColorSpace cs = ColorSpace.getInstance(ColorSpace.CS_GRAY);
ColorConvertOp op = new ColorConvertOp(cs, null);
return op.filter(image, null);
}

/** 平滑缩放 */
public BufferedImage scaling(double s) {
AffineTransform tx = new AffineTransform();
tx.scale(s, s);
AffineTransformOp op = new AffineTransformOp(tx,
AffineTransformOp.TYPE_BILINEAR);
return op.filter(image, null);
}

public BufferedImage scale(Float s) {
int srcW = image.getWidth();
int srcH = image.getHeight();
int newW = Math.round(srcW * s);
int newH = Math.round(srcH * s);
// 先做水平方向上的伸缩变换
BufferedImage tmp = new BufferedImage(newW, newH, image.getType());
Graphics2D g = tmp.createGraphics();
for (int x = 0; x < newW; x++) {
g.setClip(x, 0, 1, srcH);
// 按比例放缩
g.drawImage(image, x - x * srcW / newW, 0, null);
}

// 再做垂直方向上的伸缩变换
BufferedImage dst = new BufferedImage(newW, newH, image.getType());
g = dst.createGraphics();
for (int y = 0; y < newH; y++) {
g.setClip(0, y, newW, 1);
// 按比例放缩
g.drawImage(tmp, 0, y - y * srcH / newH, null);
}
return dst;
}

}

ImageIOHelper.java
package com.goma.readimage;

import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;
import java.awt.image.ImageProducer;
import java.awt.image.WritableRaster;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Locale;

import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.ImageWriteParam;
import javax.imageio.ImageWriter;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.stream.ImageInputStream;
import javax.imageio.stream.ImageOutputStream;
import javax.swing.JOptionPane;

import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;

public class ImageIOHelper {

public ImageIOHelper() {
}

public static File createImage(File imageFile, String imageFormat) {
File tempFile = null;
try {
Iterator<ImageReader> readers = ImageIO
.getImageReadersByFormatName(imageFormat);
ImageReader reader = readers.next();

ImageInputStream iis = ImageIO.createImageInputStream(imageFile);
reader.setInput(iis);
// Read the stream metadata
IIOMetadata streamMetadata = reader.getStreamMetadata();

// Set up the writeParam
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(
Locale.US);
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);

// Get tif writer and set output to file
Iterator<ImageWriter> writers = ImageIO
.getImageWritersByFormatName("tiff");
ImageWriter writer = writers.next();

BufferedImage bi = reader.read(0);
IIOImage image = new IIOImage(bi, null, reader.getImageMetadata(0));
tempFile = tempImageFile(imageFile);
ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);
writer.setOutput(ios);
writer.write(streamMetadata, image, tiffWriteParam);
ios.close();

writer.dispose();
reader.dispose();
} catch (Exception exc) {
exc.printStackTrace();
}
return tempFile;
}

public static File createImage(BufferedImage bi) {
File tempFile = null;
try {
tempFile = File.createTempFile("tempImageFile", ".tif");
tempFile.deleteOnExit();
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(
Locale.US);
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);

// Get tif writer and set output to file
Iterator<ImageWriter> writers = ImageIO
.getImageWritersByFormatName("tiff");
ImageWriter writer = writers.next();

IIOImage image = new IIOImage(bi, null, null);
tempFile = tempImageFile(tempFile);
ImageOutputStream ios = ImageIO.createImageOutputStream(tempFile);
writer.setOutput(ios);
writer.write(null, image, tiffWriteParam);
ios.close();
writer.dispose();
} catch (Exception exc) {
exc.printStackTrace();
}
return tempFile;
}

public static File tempImageFile(File imageFile) {
String path = imageFile.getPath();
StringBuffer strB = new StringBuffer(path);
strB.insert(path.lastIndexOf('.'), 0);
return new File(strB.toString().replaceFirst("(?<=\\.)(\\w+)$", "tif"));
}

public static BufferedImage getImage(File imageFile) {
BufferedImage al = null;
try {
String imageFileName = imageFile.getName();
String imageFormat = imageFileName.substring(imageFileName
.lastIndexOf('.') + 1);
Iterator<ImageReader> readers = ImageIO
.getImageReadersByFormatName(imageFormat);
ImageReader reader = readers.next();

if (reader == null) {
JOptionPane
.showConfirmDialog(null,
"Need to install JAI Image I/O package.\nhttps://jai-imageio.dev.java.net");
return null;
}

ImageInputStream iis = ImageIO.createImageInputStream(imageFile);
reader.setInput(iis);

al = reader.read(0);

reader.dispose();
} catch (IOException ioe) {
System.err.println(ioe.getMessage());
} catch (Exception e) {
System.err.println(e.getMessage());
}

return al;
}

public static BufferedImage imageToBufferedImage(Image image) {
BufferedImage bufferedImage = new BufferedImage(image.getWidth(null),
image.getHeight(null), BufferedImage.TYPE_INT_RGB);
Graphics2D g = bufferedImage.createGraphics();
g.drawImage(image, 0, 0, null);
return bufferedImage;
}

public static BufferedImage imageProducerToBufferedImage(
ImageProducer imageProducer) {
return imageToBufferedImage(Toolkit.getDefaultToolkit().createImage(
imageProducer));
}

public static byte[] image_byte_data(BufferedImage image) {
WritableRaster raster = image.getRaster();
DataBufferByte buffer = (DataBufferByte) raster.getDataBuffer();
return buffer.getData();
}
}

OCR.java
package com.goma.readimage;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OCR {
protected transient final Logger logger = LoggerFactory.getLogger(this.getClass());
private final String LANG_OPTION = "-l";
private final String EOL = System.getProperty("line.separator");
private String tessPath = new File("tesseract").getAbsolutePath();

public String recognizeText(File imageFile, String imageFormat)
throws Exception {
File tempImage = ImageIOHelper.createImage(imageFile, imageFormat);

File outputFile = new File(imageFile.getParentFile(), "output");
StringBuffer strB = new StringBuffer();

List<String> cmd = new ArrayList<String>();
cmd.add(tessPath + "\\tesseract");
cmd.add("");
cmd.add(outputFile.getName());
cmd.add(LANG_OPTION);
cmd.add("eng");

ProcessBuilder pb = new ProcessBuilder();
pb.directory(imageFile.getParentFile());

cmd.set(1, tempImage.getName());
pb.command(cmd);
pb.redirectErrorStream(true);
Process process = pb.start();

int w = process.waitFor();
logger.debug("Exit value = {}", w);

// delete temp working files
tempImage.delete();

if (w == 0) {
BufferedReader in = new BufferedReader(new InputStreamReader(
new FileInputStream(outputFile.getAbsolutePath() + ".txt"),
"UTF-8"));

String str;

while ((str = in.readLine()) != null) {
strB.append(str).append(EOL);
}
in.close();
} else {
String msg;
switch (w) {
case 1:
msg = "Errors accessing files. There may be spaces in your image's filename.";
break;
case 29:
msg = "Cannot recognize the image or its selected region.";
break;
case 31:
msg = "Unsupported image format.";
break;
default:
msg = "Errors occurred.";
}
tempImage.delete();
throw new RuntimeException(msg);
}

new File(outputFile.getAbsolutePath() + ".txt").delete();
logger.info("图像识别结果:{}", strB);
return strB.toString();
}
}

ImageRead.java
package com.goma.readimage;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;

public class ImageRead {

public static String read(BufferedImage bi,int ii){
    ImageFilter imgFliter = new ImageFilter(bi);
     BufferedImage ss = imgFliter.changeGrey();

     imgFliter = new ImageFilter(ss);
     ss = imgFliter.median();

     imgFliter = new ImageFilter(ss);
     ss = imgFliter.grayFilter();
    
     File xx = ImageIOHelper.createImage(ss);
    
    try{
        FileInputStream input = new FileInputStream(xx);
            FileOutputStream output = new FileOutputStream(System.getProperty("user.dir")+"\\img\\yzm"+ii+".tiff");// 把扩展名添加到原来文件的后面
        int in = input.read();
        while (in != -1) {
         output.write(in);
         in = input.read();
        }
        input.close();
        output.close();
        OCR ocr = new OCR();
        String rlt = ocr.recognizeText(xx, "tiff");
        StringBuffer str = new StringBuffer();
        for(int i=0;i<rlt.length();i++){
        String s = rlt.substring(i,i+1);
        try{
           int t = Integer.valueOf(s);
           str.append(t);
        }catch (Exception e) {
}
        }
        return str.toString();
    }catch (Exception e) {
    e.printStackTrace();
    return null;
}
}
}


RegeitTest.java
package com.goma.readimage;

import java.awt.image.BufferedImage;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import javax.imageio.ImageIO;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;

public class RegeitTest {
static public void main(String[] args) {
HttpClient httpclient = new DefaultHttpClient();
HttpUriRequest getMethod = new HttpGet("验证码URL");
for(int i=1;i<=10;i++){
    try {
    String yzm = "";
    HttpResponse res = httpclient.execute(getMethod);
    HttpEntity entity = res.getEntity();
    if (entity != null) {
    InputStream instream = entity.getContent();
BufferedImage bi = ImageIO.read(instream);
    instream.close();
    /************************************/
    yzm = ImageRead.read(bi,i);
/********************************************/
    }
                System.out.println(yzm+":===="+i+"   ");
    } catch (Exception e) {
    e.printStackTrace();
    }
    }
}

}

详见:http://ykf.iteye.com/blog/212431

猜你喜欢

转载自oma1989.iteye.com/blog/1168433