ES search results highlight JAVA and Kibana implementation

/**
 * 查询接口
 *
 * @param searchReqVO
 */
public EsSearchPageInfoResVO guessYouWantListForClient(EsSearchRequestVO searchReqVO) {
    BaseInfo baseInfo = getApp();
    List<Long> catalogues = getAccesses();
    EsSearchPageInfoResVO result = new EsSearchPageInfoResVO();
    SearchRequest request = new SearchRequest();
    CountRequest countRequest = new CountRequest();
    countRequest.indices(INDEX_NAME);
    request.indices(INDEX_NAME);
    HighlightBuilder highlightBuilder = new HighlightBuilder();
    highlightBuilder.preTags("<em style='color: red'>");
    highlightBuilder.postTags("</em>");
    highlightBuilder.field("question_info");
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
    boolQueryBuilder.must(QueryBuilders.matchQuery("base_info_id", baseInfo.getId()));
    int shouldCount = 0;
    if (!StringUtils.isEmpty(searchReqVO.getSearchText())) {
        shouldCount++;
        boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("question_info", searchReqVO.getSearchText()));
        boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("answer_info", searchReqVO.getSearchText()));
        boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("keyword", searchReqVO.getSearchText()));
        boolQueryBuilder.should(QueryBuilders.matchQuery("question_info", searchReqVO.getSearchText()));
        boolQueryBuilder.should(QueryBuilders.matchQuery("answer_info", searchReqVO.getSearchText()));
        boolQueryBuilder.should(QueryBuilders.matchQuery("keyword", searchReqVO.getSearchText()));
    }
    boolQueryBuilder.minimumShouldMatch(shouldCount);
    countRequest.query(boolQueryBuilder);
    //设置分页 from:页码,(当前页-1)*每页条数
    searchSourceBuilder.from(searchReqVO.getRows() * (searchReqVO.getPage() - 1));
    searchSourceBuilder.size(searchReqVO.getRows());
    searchSourceBuilder.query(boolQueryBuilder);
    searchSourceBuilder.highlighter(highlightBuilder);


    //未输入模糊搜索内容时默认按更新时间排序、输入则默认按es相似度分值排序
    if (StringUtils.isEmpty(searchReqVO.getSearchText())) {
        searchSourceBuilder.sort("update_timestamp", SortOrder.DESC);
    }
    request.source(searchSourceBuilder);
    SearchResponse searchResponse = null;
    CountResponse countResponse = null;
    List<EsSearchResponseVO> resultList = new ArrayList<>();
    try {
        countResponse = highLevelClient.count(countRequest, RequestOptions.DEFAULT);
        Long totalCount = countResponse.getCount();
        result.setTotal(totalCount);
        searchResponse = highLevelClient.search(request, RequestOptions.DEFAULT);
        SearchHit[] searchHits = searchResponse.getHits().getHits();
        for (SearchHit searchHit : searchHits) {
            //原理就是用es自动查找出来的hightlight字段值替换正常检索出来的值
            Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
            HighlightField highlightTitle = highlightFields.get("question_info");//注意是数组
            Map<String, Object> sourceMap = searchHit.getSourceAsMap();
            if(highlightTitle != null){
                Text[] fragments = highlightTitle.getFragments();
                if(fragments != null && fragments.length > 0){
                    //替换(fargment[0]是Text类型的)
                    sourceMap.replace("question_info", fragments[0].toString());
                }
            }
            ESQuestionAnswerVersionDTO esResult = JSON.parseObject(JSON.toJSONString(sourceMap), ESQuestionAnswerVersionDTO.class);
            EsSearchResponseVO vo = new EsSearchResponseVO();
            vo.setQuestionInfo(esResult.getQuestion_info());
            vo.setKnowledgeId(esResult.getKnowledge_id());
            vo.setId(esResult.getId());
            vo.setBaseInfoId(esResult.getBase_info_id());
            resultList.add(vo);
        }
    } catch (Exception e) {
        log.info("联想搜索知识失败,搜索条件: ", JSONUtil.toJsonStr(searchReqVO));
        Traces.recordException(e);
    }
    result.setRows(resultList);
    return result;
}

Compare the results before and after highlighting:

Before highlighting:

After highlighting:

You can see that after adding the highlighted code, the keywords hit by the returned json string are covered with a <em style='color: red'>xxx</em> tag, which is the preTags and postTags we set in advance;

Of course, hightlight itself supports multiple field highlighting, and java code implementation only needs to set multiple    

highlightBuilder.field("aaaa”);

highlightBuilder.field(“bbb”);

…

Subsequent query results can be replaced one by one with the results of hightlight.


The kibana statement translated into es is as follows:

{
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 22.00881,
    "hits" : [
      {
        "_index" : "knowledge_question_answer",
        "_type" : "_doc",
        "_id" : "12494",
        "_score" : 22.00881,
        "_source" : {
          "id" : 12494,
          "question_info" : "香肠/腊肠/金字火腿常见问题",
          "answer_info" : "万有全广式香肠蒸出来口感很粉,&nbsp; 是面粉放多了吗",
          "keyword" : "香肠发酸,香肠,腊肠,金字火腿,火腿,金字金华香肠,腊肠发酸,万有全广式香肠"
        },
        "highlight" : {
          "question_info" : [
            "<em style='color: red'>香肠</em>/腊肠/金字火腿常见问题"
          ]
        }
      }
    ]
  }
}

Only one field is highlighted here, as long as the field has a matching keyword, it will be placed in the highlighted column of the result set. The result is as follows:

GET /knowledge_question_answer/_doc/_search
{
  "from": 0,
  "size": 20,
  "query": {
    "bool": {
      "should": [
        //查询条件忽略
        ...
      ],
      "adjust_pure_negative": true,
      "minimum_should_match": "1",
      "boost": 1
    }
  },
  "highlight": {
    "pre_tags": [
      "<em style='color: red'>"
    ],
    "post_tags": [
      "</em>"
    ],
    "fields": {
      "question_info": {},
      "answer_info": {},
      "keyword": {}
    }
  }
}

The results of multiple highlighted queries are as follows:

{
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 22.00881,
    "hits" : [
      {
        "_index" : "knowledge_question_answer",
        "_type" : "_doc",
        "_id" : "12494",
        "_score" : 22.00881,
        "_source" : {
          "id" : 12494,
          "question_info" : "香肠/腊肠/金字火腿常见问题",
          "answer_info" : "万有全广式香肠蒸出来口感很粉,&nbsp; 是面粉放多了吗",
          "keyword" : "香肠发酸,香肠,腊肠,金字火腿,火腿,金字金华香肠,腊肠发酸,万有全广式香肠"
        },
        "highlight" : {
          "answer_info" : [
"万有全广式<em style='color: red'>香肠</em>蒸出来口感很粉,&nbsp; 是面粉放多了吗?"
          ],
          "question_info" : [
            "<em style='color: red'>香肠</em>/腊肠/金字火腿常见问题"
          ],
          "keyword" : [
            "<em style='color: red'>香肠</em>发酸,<em style='color: red'>香肠</em>,腊肠,金字火腿,火腿,金字金华<em style='color: red'>香肠</em>,腊肠发酸,万有全广式<em style='color: red'>香肠</em>"
          ]
        }
      }
    ]
  }
}

It can be seen that as long as the search word "sausage" appears in multiple fields, the front and rear labels are set, and the display on the front page will also have the effect of highlighting.

Guess you like

Origin blog.csdn.net/qq_23974323/article/details/129745172