Java calls Tesseract to realize text recognition in pictures

1. Download the language pack (Simplified Chinese)

Address: https://github.com/tesseract-ocr/tessdata/blob/master/chi_sim.traineddata

2. Put the language pack under resources in IDEA


3. Add maven dependencies

<!--OCR  Tesseract-->
<dependency>
   <groupId>net.java.dev.jna</groupId>
   <artifactId>jna</artifactId>
   <version>4.1.0</version>
</dependency>
<dependency>
   <groupId>net.sourceforge.tess4j</groupId>
   <artifactId>tess4j</artifactId>
   <version>2.0.1</version>
   <exclusions>
      <exclusion>
         <groupId>com.sun.jna</groupId>
         <artifactId>jna</artifactId>
      </exclusion>
   </exclusions>
</dependency>

4. Implement the code

package com.xinjian.x.modules;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.util.ImageHelper;
import net.sourceforge.tess4j.util.LoadLibs;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
public class Test {
public static void main(String[] args){
try{
File 

                        imageFile = new File ( "D:/imagesImage/6/1.jpg" );
 BufferedImage img = ImageIO . read ( imageFile );
 int width = new Double ( img . getWidth ()* 0.2 ). intValue ();
 int height = new Double ( img . getHeight ()* 0.05 ). intValue ();
 // Take a picture, the position to be scanned
 img =                                                         ImageHelper . getSubImage ( img , width , height , 250 , 100 );
 // Image gray
 img = ImageHelper . convertImageToGrayscale ( img );
 // Image sharpening
 img = ImageHelper . convertImageToBinary ( img );
 // Image magnification 5 times , Enhanced recognition rate ( many pictures cannot be recognized by themselves, and can be easily recognized when magnified 5 times , but the test filter shows that the customer's computer configuration is low ,                                                         针式打印机打印不连贯的问题,这里就放大5)
            img = ImageHelper.getScaledInstance(img, img.getWidth() * 5, img.getHeight() * 5);
            //ImageIO.write(img, "jpg", new File("D:/imagesImage/jcaptcha0.jpg"));
            ITesseract instance = new Tesseract();
         //获取tessdata下的文件
            File tessDataFolder = LoadLibs.extractTessResources("tessdata");
         //设置语言包
            instance.setLanguage("chi_sim");
         //设置语言包位置
            instance.setDatapath(tessDataFolder.getAbsolutePath());
            String result = instance.doOCR(img);
            System.out.println(result);
        }catch(Exception e){
            System.out.println(e.getMessage());
        }
    }
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325654230&siteId=291194637