1. Download the language pack (Simplified Chinese)
Address: https://github.com/tesseract-ocr/tessdata/blob/master/chi_sim.traineddata
2. Put the language pack under resources in IDEA
3. Add maven dependencies
<!--OCR Tesseract--> <dependency> <groupId>net.java.dev.jna</groupId> <artifactId>jna</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>net.sourceforge.tess4j</groupId> <artifactId>tess4j</artifactId> <version>2.0.1</version> <exclusions> <exclusion> <groupId>com.sun.jna</groupId> <artifactId>jna</artifactId> </exclusion> </exclusions> </dependency>
4. Implement the code
package com.xinjian.x.modules; import net.sourceforge.tess4j.ITesseract; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.util.ImageHelper; import net.sourceforge.tess4j.util.LoadLibs; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; public class Test { public static void main(String[] args){ try{ File imageFile = new File ( "D:/imagesImage/6/1.jpg" ); BufferedImage img = ImageIO . read ( imageFile ); int width = new Double ( img . getWidth ()* 0.2 ). intValue (); int height = new Double ( img . getHeight ()* 0.05 ). intValue (); // Take a picture, the position to be scanned img = ImageHelper . getSubImage ( img , width , height , 250 , 100 ); // Image gray img = ImageHelper . convertImageToGrayscale ( img ); // Image sharpening img = ImageHelper . convertImageToBinary ( img ); // Image magnification 5 times , Enhanced recognition rate ( many pictures cannot be recognized by themselves, and can be easily recognized when magnified 5 times , but the test filter shows that the customer's computer configuration is low , 针式打印机打印不连贯的问题,这里就放大5倍) img = ImageHelper.getScaledInstance(img, img.getWidth() * 5, img.getHeight() * 5); //ImageIO.write(img, "jpg", new File("D:/imagesImage/jcaptcha0.jpg")); ITesseract instance = new Tesseract(); //获取tessdata下的文件 File tessDataFolder = LoadLibs.extractTessResources("tessdata"); //设置语言包 instance.setLanguage("chi_sim"); //设置语言包位置 instance.setDatapath(tessDataFolder.getAbsolutePath()); String result = instance.doOCR(img); System.out.println(result); }catch(Exception e){ System.out.println(e.getMessage()); } } }