1. Implementation logic
package com.vue.demo.service.serviceimpl; import com.vue.demo.service.OCRService; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; import net.sourceforge.tess4j.util.ImageHelper; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; /** * @author yangwj * @date 2020/4/1 9:29 */ @Service public class OCRServiceImpl implements OCRService { private static final Logger ocrServiceImplLog = LoggerFactory.getLogger(OCRServiceImpl.class); String language = ""; /** * method one * @param file * @return */ @Override public String getCharacterFromPic(MultipartFile file) { // String modelPath = "D:\\software\\ocr-tesseract\\tessdata"; String modelPath = "/root/project/java/tesseract_model"; Tesseract tessreact = new Tesseract (); // You need to specify the training set to download the training set to https://github.com/tesseract-ocr/tessdata . tessreact.setDatapath (modelPath); if (language.equals ("ch" )) { // Note that the default is English recognition, if you do Chinese recognition, you need to set it separately. tessreact.setLanguage ("chi_sim" ); } try { File imageFile = new File(file.getOriginalFilename()); FileUtils.copyInputStreamToFile(file.getInputStream(), imageFile); String result = tessreact.doOCR(imageFile); ocrServiceImplLog.info(result); System.out.println("----------------"); String handleResult = this.ocr(imageFile,modelPath); ocrServiceImplLog.info(handleResult); return result+"----------------------------------\n\r"+handleResult; } catch (TesseractException e) { System.err.println(e.getMessage()); } catch (IOException e) { e.printStackTrace (); } return null; } @Override public String getLanguage(String language) { if(language == null || language == "" ) { return null; } this.language = language; return "success"; } /** * Method Two * @param file * @param modelPath * @return */ private String ocr(File file,String modelPath) { String result = null; try { double start = System.currentTimeMillis(); BufferedImage textImage = ImageIO.read (file); // Here, the image is processed in black and white to enhance the recognition rate. Here, first take a screenshot to intercept the part of the image that needs to be recognized textImage = ImageHelper.convertImageToGrayscale (textImage); // Image sharpening textImage = ImageHelper.convertImageToBinary (textImage); // Image magnification, enhanced recognition rate (many pictures themselves cannot be recognized, they can be easily recognized when magnified 5 times, but the problem is that the customer's computer configuration is low and the dot printer does not print consistently Here it is enlarged by 5 times) textImage = ImageHelper.getScaledInstance (textImage, textImage.getWidth () * 1, textImage.getHeight () * 1 ); textImage = ImageHelper.convertImageToBinary(textImage); String saveImgPath = "/root/project/java/tesseract_model/temp_img"; // String saveImgPath = "D:\\software\\ocr-tesseract\\img_tem\\temp.img"; ImageIO.write(textImage, "png", new File(saveImgPath)); Tesseract instance = new Tesseract (); // Set the location of the training library // String modelPath = "/ root / project / java / tesseract_model"; instance.setDatapath(modelPath); // Chinese recognition instance.setLanguage ("chi_sim" ); result = instance.doOCR(textImage); double end = System.currentTimeMillis(); System.out.println("耗时" + (end - start) / 1000 + " s"); } catch (Exception e) { e.printStackTrace (); } return result; } }
2. Deploy to centos, you can see this problem