java相似度判断(余弦相似度)

业务逻辑:

输入某一企业,返回跟该企业相似的企业列表。

大体思路:

1、输入企业有哪些字段来构建相似度字典;

2、输入某个企业字段具体值,转换成向量 来计算;

3、根据向量计算的值来排序。

代码如下:

/**

* 企业相似度

* @param id

* @return

*/

@Override

public Page findAndOrderBySimilar(String id,HiddenDangerListVO hiddenDangerListVO) {

Integer page = hiddenDangerListVO.getPage();

Integer limit = hiddenDangerListVO.getLimit();

if (page == null || page < 1) {

page = 1;

}

if (limit == null || limit < 0) {

limit = 5;

}

List<JSONObject> jsonObjectList = new ArrayList<>();

List<JSONObject> jsonObjectList2 = new ArrayList<>();

Map map = new HashMap();

String sql = "SELECT" +

" HY,YHBW,YHLY, companyname '企业名称', " +

" HCRQ '核查日期', " +

"CASE WHEN YHDJ = '1' THEN " +

" '一般隐患' " +

" WHEN YHDJ = '2' THEN " +

" '重大隐患' ELSE '无类型' " +

" END '隐患级别', " +

"CASE WHEN ZGZT = '1' THEN " +

" '未整改' " +

" WHEN ZGZT = '2' THEN " +

" '整改中' " +

" WHEN ZGZT = '3' THEN " +

" '已整改' ELSE '无整改状态' " +

" END '整改状态', " +

" YHMS '隐患描述', " +

" YHDD '隐患地点'," +

" PCRE '排查人'," +

" CASE WHEN ZGLX = '1' THEN '立即整改' " +

" WHEN ZGLX = '2' THEN '限期整改' " +

" WHEN ZGLX = '3' THEN '停业停产整顿' ELSE '无整改信息' " +

" END '整改类型'," +

" ZGWCRQ '整改完成时间' " +

" FROM t_hidden_danger_list limit 8000 ";

List<Map<String, Object>> findAll = jdbcTemplate.queryForList(sql);

for (Map map1 : findAll) {

jsonObjectList.add(JSONObject.fromObject(map1));

}

List<String> column = new ArrayList<>();

List<String> num_column = new ArrayList<>();

column.add("HY");

column.add("YHBW");

column.add("整改类型");

column.add("隐患级别");

column.add("整改状态");

column.add("隐患地点");

num_column.add("YHLY");

Set<DicVO> dictionaries = RestructureUtil.dictionaries(jsonObjectList, column, num_column);

sql= "SELECT" +

" HY,YHBW,YHLY, companyname '企业名称', " +

" HCRQ '核查日期', " +

"CASE WHEN YHDJ = '1' THEN " +

" '一般隐患' " +

" WHEN YHDJ = '2' THEN " +

" '重大隐患' ELSE '无类型' " +

" END '隐患级别', " +

"CASE WHEN ZGZT = '1' THEN " +

" '未整改' " +

" WHEN ZGZT = '2' THEN " +

" '整改中' " +

" WHEN ZGZT = '3' THEN " +

" '已整改' ELSE '无整改状态' " +

" END '整改状态', " +

" YHMS '隐患描述', " +

" YHDD '隐患地点'," +

" PCRE '排查人'," +

" CASE WHEN ZGLX = '1' THEN '立即整改' " +

" WHEN ZGLX = '2' THEN '限期整改' " +

" WHEN ZGLX = '3' THEN '停业停产整顿' ELSE '无整改信息' " +

" END '整改类型'," +

" ZGWCRQ '整改完成时间' " +

" FROM t_hidden_danger_list "+

" WHERE ID="+id+" ";

List<Map<String, Object>> findAll2 = jdbcTemplate.queryForList(sql);

for (Map map1 : findAll2) {

jsonObjectList2.add(JSONObject.fromObject(map1));

}

List<Double> vector_1 = RestructureUtil.vectorFromJson(dictionaries, jsonObjectList2.get(0));

List<Double> vector_2;

for (int i = 0; i < jsonObjectList.size() - 1; i++) {

vector_2 = RestructureUtil.vectorFromJson(dictionaries, jsonObjectList.get(i));

if (vector_2!=null){

Double aDouble = RestructureUtil.similarityDegree(vector_1, vector_2);

map.put(jsonObjectList.get(i), aDouble);

}

}

map = sortByComparator(map);

List<Map.Entry<JSONObject, Double>> list = new ArrayList<Map.Entry<JSONObject, Double>>(map.entrySet());

//输出

List listResult=new ArrayList();

for (Map.Entry<JSONObject, Double> entry : list) {

System.out.println(entry.getKey() + ":" + entry.getValue());

listResult.add(entry.getKey());

if (listResult.size()==100){

break;

}

}

Page pageResult = PageUtil.getPage(page, limit, listResult);

return pageResult;

}

/**

* map以value排序

*

* @param unsortMap

* @return

*/

public static Map sortByComparator(Map unsortMap) {

List list = new LinkedList(unsortMap.entrySet());

Collections.sort(list, new Comparator() {

public int compare(Object o1, Object o2) {

return ((Comparable) ((Map.Entry) (o2)).getValue())

.compareTo(((Map.Entry) (o1)).getValue());

}

});

Map sortedMap = new LinkedHashMap();

for (Iterator it = list.iterator(); it.hasNext(); ) {

Map.Entry entry = (Map.Entry) it.next();

sortedMap.put(entry.getKey(), entry.getValue());

}

return sortedMap;

}

猜你喜欢

转载自blog.csdn.net/qq_18769269/article/details/83685988