在说分词之前,笔者先来介绍下何为分词:分词就是将连续的字序列按照一定的规范重新组合成词序列的过程。英文中,单词之间是以空格作为自然分界符的,但是中文的分词就复杂多了,要涉及一些算法,对于初学者来说,还是有很多难度的。这里笔者只介绍一种最简单的方式,有兴趣的朋友可以看下,直接上代码,python实现方式
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding: utf-8 -*-
# flake8: noqa
__author__ = 'wukong'
import urllib
from urllib import urlencode
#配置您申请的appKey和openId
app_key="***"
open_id="***"
"""
request_url 请求地址
params 请求参数
method 请求方法
"""
def request_content(request_url,params,method):
params = urlencode(params)
if method and method.lower() =="get":
f = urllib.urlopen("%s?%s" % (request_url, params))
else:
f = urllib.urlopen(request_url, params)
content = f.read()
print content
def main():
domain="http://api.xiaocongjisuan.com/"
servlet="data/chinesekeyword/analysis"
method="get"
request_url=domain+servlet
#字典
params ={}
params["appKey"]=app_key
params["openId"]=open_id
#变动部分
params["content"]="我是一个中国人,你知道嘛"
request_content(request_url,params,method)
if __name__ == '__main__':
main()
java 为例:
复制代码
package com.xiaocongjisuan.module.example;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Map;
public class Application {
public static final String DEF_CHATSET = "UTF-8";
public static final int DEF_CONN_TIMEOUT = 30000;
public static final int DEF_READ_TIMEOUT = 30000;
public static String userAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36";
//配置您申请的appKey和openId
public static final String APP_KEY ="yours";
public static final String OPEN_ID ="yours";
//将map型转为请求参数型
public static String urlEncode(Map<String,Object> params) {
if(params==null){return "";};
StringBuilder sb = new StringBuilder();
for (Map.Entry<String,Object> i : params.entrySet()) {
try {
sb.append(i.getKey()).append("=").append(URLEncoder.encode(i.getValue()+"","UTF-8")).append("&");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
String r=sb.toString();
if(r.endsWith("&")){
r = r.substring(0,r.length()-1);
}
return r;
}
/**
*
* @param requestUrl 请求地址
* @param params 请求参数
* @param method 请求方法
* @return 请求结果
* @throws Exception
*/
public static String requestContent(String requestUrl, Map<String,Object> params,String method) throws Exception {
HttpURLConnection conn = null;
BufferedReader reader = null;
String rs = null;
try {
//组装请求链接
StringBuffer sb = new StringBuffer();
if(method!=null&&method.equalsIgnoreCase("get")){
requestUrl = requestUrl+"?"+urlEncode(params);
}
//默认get
URL url = new URL(requestUrl);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
if(method!=null&&method.equalsIgnoreCase("post")){
conn.setRequestMethod("POST");
conn.setDoOutput(true);
conn.setDoInput(true);
}
//参数配置
conn.setRequestProperty("User-agent", userAgent);
conn.setUseCaches(false);
conn.setConnectTimeout(DEF_CONN_TIMEOUT);
conn.setReadTimeout(DEF_READ_TIMEOUT);
conn.setInstanceFollowRedirects(false);
conn.connect();
if (params!= null && method.equalsIgnoreCase("post")) {
try {
DataOutputStream out = new DataOutputStream(conn.getOutputStream());
out.writeBytes(urlEncode(params));
} catch (Exception e) {
e.printStackTrace();
}
}
//读取数据
InputStream is = conn.getInputStream();
reader = new BufferedReader(new InputStreamReader(is, DEF_CHATSET));
String strRead = null;
while ((strRead = reader.readLine()) != null) {
sb.append(strRead);
}
rs = sb.toString();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
reader.close();
}
if (conn != null) {
conn.disconnect();
}
}
return rs;
}
public static void main(String[] args) throws Exception{
String domain="http://api.xiaocongjisuan.com/";
String servlet="data/skydriverdata/get";
String method="get";
String requestUrl=domain+servlet;
Map<String,Object> params=new HashMap<String,Object>();
params.put("appKey",APP_KEY);
params.put("openId",OPEN_ID);
//变动部分
params.put("q","a");
params.put("currentPage",1);
params.put("pageSize",20);
String result=requestContent(requestUrl,params,method);
System.out.println(result);
let nextIds = [];
this.relationList.forEach(item =www.jintianxuesha.com> {
// 源
if (item.source =www.baichuangyul.com== sId) {
if (!this.nodesObj[item.target].xIndex) {
this.nodesObj[item.target].xIndex = xIndex;
nextIds.push(item.target);
}
}
// 目的
if (item.target === sId) www.xinxingyLpt.com{
if (!this.nodesObj[item.source].xIndex)www.pingguoyul.cn {
this.nodesObj[item.source].xIndex = xIndex;
nextIds.push(item.source);
}
}
});
let nextIdsLen = nextIds.length;
// 1、没有当前的队列,没有后续的队列,有nextIds
if (
!this.currentQueue.length &&
!this.afterQueue.length &&
nextIdsLen
) {
this.currentQueue www.baiweiegw.com= nextIds;
this.currentXindex = this.currentXindex + 1;
this.setNextOrder(www.qjljdgt.cn);
} else if (this.currentQueue.length && nextIdsLen) {
// 2、有当前的队列在遍历,排队
this.afterQueue = this.afterQueue.concat(nextIds);
} else if (!this.currentQueue.length && this.afterQueue.length) {
// 3、没有当前的队列了,有排队的队列,则排队的进去
if (nextIdsLen) www.yuanyangyuL.com {
this.afterQueue www.sangyuLpt.com= this.afterQueue.concat(nextIds);
}
this.currentQueue = JSON.parse(JSON.stringify(this.afterQueue));
this.afterQueue = [www.yuanyangyuL.com ];
this.currentXindex = this.currentXindex + 1;
}
复制代码
原理主要是调用接口,直接输入一串字符串,然后接口会自动把结果以json或者xml的形式返回,具体文档可以点我查看。这种实现方式很简单,省去了大量的开发时间,屏蔽了语言之间的差异性,值得推荐。