阿里OCR扫描字识别demo

1、OCR (Optical Character Recognition,光学字符识别)是指电子设备(例如扫描仪或数码相机)检查纸上打印的字符,通过检测暗、亮的模式确定其形状,然后用字符识别方法将形状翻译成计算机文字的过程;


这里讲的是通过代码识别图片里的文字。


2、准备

 String host = "https://ocrapi-document.taobao.com";//不变
	    String path = "/ocrservice/document";//不变
	    String method = "POST";//不变
	    String appcode = "27b621bd285de290";//需要修改



3、pom.xml

<dependencies>
         <dependency>
			<groupId>com.alibaba</groupId>
			<artifactId>fastjson</artifactId>
			<version>1.2.15</version>
		</dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpcore</artifactId>
            <version>4.2.1</version>
        </dependency>
        <dependency>
            <groupId>commons-lang</groupId>
            <artifactId>commons-lang</artifactId>
            <version>2.6</version>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-util</artifactId>
            <version>9.3.7.v20160115</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.5</version>
            <scope>test</scope>
        </dependency>
    
    
    
  </dependencies>




4、代码

package com.ekz.aliocr.aliocr;

import java.util.HashMap;
import java.util.Map;

import org.apache.http.HttpResponse;
import org.apache.http.util.EntityUtils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;


/** 
 * @author: py
 * @version:2018年3月16日 下午2:50:32 
 * com.ali.ocr.TestAliOcr.java
 * @Desc 
 */
public class AliOcrTest {

	public static void main(String[] args) {
	    String host = "https://ocrapi-document.taobao.com";//不变
	    String path = "/ocrservice/document";//不变
	    String method = "POST";//不变
	    String appcode = "27b621bd285de290";//需要修改
	    Map<String, String> headers = new HashMap<String, String>();
	    //最后在header中的格式(中间是英文空格)为Authorization:APPCODE 83359fd73fe94948385f570e3c139105
	    headers.put("Authorization", "APPCODE " + appcode);
	    //根据API的要求,定义相对应的Content-Type
	    headers.put("Content-Type", "application/json; charset=UTF-8");
	    Map<String, String> querys = new HashMap<String, String>();
//	    + "//图像数据:base64编码,要求base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和url参数只能同时存在一个"
//	    + "//图像url地址:图片完整URL,URL长度不超过1024字节,URL对应的图片base64编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式,和img参数只能同时存在一个"
//	    + "//是否需要识别结果中每一行的置信度,默认不需要。true:需要false:不需要"
	    String bodys = "{"
	    		+ "\"img\":\"\","
	    	    + "\"url\":\"https://ss.oss-cn-shenzhen.aliyuncs.com/test/%E6%89%8B%E6%9C%AF%E8%AE%B0%E5%BD%95.jpg\","
	    		+ "\"prob\":false}";


	    try {
	    	/**
	    	* 重要提示如下:
	    	* HttpUtils请从
	    	* https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/src/main/java/com/aliyun/api/gateway/demo/util/HttpUtils.java
	    	* 下载
	    	*
	    	* 相应的依赖请参照
	    	* https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/pom.xml
	    	*/
	    	HttpResponse response = HttpUtils.doPost(host, path, method, headers, querys, bodys);
	    	System.out.println(response.toString());
	    	//获取response的body
	    	String message = EntityUtils.toString(response.getEntity()) ;
	    	System.out.println(message);
	    	JSONObject parseObject = JSONObject.parseObject(message);
//	    	String sid = parseObject.getString("sid");
//	    	String prism_version = parseObject.getString("prism_version");
	    	JSONArray jan = parseObject.getJSONArray("prism_wordsInfo");
	    	if(jan!=null||jan.size()!=0){ 
	    		for(int i=0;i<jan.size();i++){ 
    		    JSONObject jo = JSONObject.parseObject(jan.get(i).toString());
    		    String word = jo.getString("word"); 
    		    System.out.println(word);        
	    		} 
	    	} 
	    	
	    	
	    } catch (Exception e) {
	    	e.printStackTrace();
	    }
	}
}


图片url路径:

https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1523367775393&di=4cd8a2c97a08eeba1ed6b60213735be3&imgtype=0&src=http%3A%2F%2Fimg5q.duitang.com%2Fuploads%2Fitem%2F201504%2F08%2F20150408H2909_4vuxK.thumb.700_0.jpeg

5、打印结果

{"sid":"fa25cfae90d563319258ee98702ae785286c209d319256977ac03b63f2390ccf26ab2529","prism_version":"1.0.6","prism_wnum":14,"prism_wordsInfo":[{"word":"的流而","pos":[{"x":162,"y":74},{"x":367,"y":74},{"x":367,"y":111},{"x":162,"y":111}]},{"word":"公的种子从远处回","pos":[{"x":10,"y":157},{"x":451,"y":147},{"x":452,"y":199},{"x":11,"y":209}]},{"word":"聚成伞的旋样","pos":[{"x":469,"y":137},{"x":695,"y":137},{"x":695,"y":193},{"x":469,"y":193}]},{"word":"大阳边斗起,向东方","pos":[{"x":68,"y":227},{"x":467,"y":210},{"x":469,"y":256},{"x":70,"y":273}]},{"word":"动员日列起跑线上","pos":[{"x":69,"y":363},{"x":415,"y":344},{"x":417,"y":385},{"x":71,"y":405}]},{"word":"我友网录取通和书,高3t年窗","pos":[{"x":52,"y":436},{"x":615,"y":402},{"x":618,"y":452},{"x":55,"y":486}]},{"word":"厨房里飘来饭的香","pos":[{"x":70,"y":507},{"x":412,"y":484},{"x":415,"y":534},{"x":73,"y":557}]},{"word":"你美掉电视帮戒把书包背上","pos":[{"x":78,"y":573},{"x":568,"y":537},{"x":572,"y":585},{"x":81,"y":621}]},{"word":"龙退回记平我","pos":[{"x":77,"y":644},{"x":355,"y":626},{"x":358,"y":670},{"x":80,"y":688}]},{"word":"成走园乡间小路上","pos":[{"x":389,"y":624},{"x":686,"y":605},{"x":689,"y":648},{"x":392,"y":666}]},{"word":"闭着眼哪着峰","pos":[{"x":69,"y":712},{"x":348,"y":691},{"x":351,"y":740},{"x":73,"y":761}]},{"word":"戒大","pos":[{"x":364,"y":691},{"x":547,"y":675},{"x":550,"y":716},{"x":368,"y":732}]},{"word":"你还在戒身","pos":[{"x":88,"y":776},{"x":342,"y":769},{"x":343,"y":819},{"x":89,"y":826}]},{"word":"(借用一封微情书)","pos":[{"x":453,"y":871},{"x":610,"y":867},{"x":611,"y":889},{"x":453,"y":893}]}]}
的流而
公的种子从远处回
聚成伞的旋样
大阳边斗起,向东方
动员日列起跑线上
我友网录取通和书,高3t年窗
厨房里飘来饭的香
你美掉电视帮戒把书包背上
龙退回记平我
成走园乡间小路上
闭着眼哪着峰
戒大
你还在戒身
(借用一封微情书)


6、手写的字错误率比较高,一般用来识别印刷字。同时ocr技术在各个云平台都有提供,比如腾讯云,百度云等


猜你喜欢

转载自blog.csdn.net/u014520797/article/details/79885711