默认分词使用的是IK分词
但对于英文,比如nike,分词结果是nike,但我们如果搜索nik就不会命中
对于英文,我使用模糊搜索来解决,添加一个字段,分词结果就是不区分大小写
下面贴上代码
{
"index": {
"analysis": {
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 50
},
"pinyin_simple_filter": {
"type": "pinyin",
"first_letter": "prefix",
"padding_char": " ",
"limit_first_letter_length": 50,
"lowercase": true
}
},
"char_filter": {
"tsconvert": {
"type": "stconvert",
"convert_type": "t2s"
}
},
"analyzer": {
"ikSearchAnalyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"char_filter": [
"tsconvert"
]
},
"pinyinSimpleIndexAnalyzer": {
"tokenizer": "keyword",
"filter": [
"pinyin_simple_filter",
"edge_ngram_filter",
"lowercase"
]
},
"caseSensitive": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
}
}
{
"search": {
"_all": {
"enabled": true
},
"properties": {
"id": {
"type": "text"
},
"keyword": {
"type": "text",
"analyzer": "caseSensitive",
"search_analyzer": "caseSensitive"
},
"keywordEn": {
// text和keyword的区别text:存储数据时候,会自动分词,并生成索引,keyword:存储数据时候,不会分词建立索引
"type": "text",
"analyzer": "ikSearchAnalyzer",
"search_analyzer": "ikSearchAnalyzer",
"fields": {
"pinyin": {
"type": "text",
"analyzer": "pinyinSimpleIndexAnalyzer",
"search_analyzer": "pinyinSimpleIndexAnalyzer"
}
}
}
}
}
}
// 构建查询 NativeSearchQueryBuilder searchQuery = new NativeSearchQueryBuilder(); // 索引查询 searchQuery.withIndices("search-entity"); BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); //boost 设置权重 //分词查询 QueryBuilder queryBuilder = QueryBuilders.matchQuery("keywordEn", searchVo.getKeyword()).boost(2f); boolQueryBuilder.should(queryBuilder); //拼音查询 QueryBuilder queryBuilder2 = QueryBuilders.matchQuery("keywordEn.pinyin", searchVo.getKeyword()).boost(2f); boolQueryBuilder.should(queryBuilder2); //模糊查询,不区分大小写 QueryBuilder queryBuilder1 = QueryBuilders.wildcardQuery("keyword", "*"+searchVo.getKeyword().toLowerCase()+"*").boost(2f); boolQueryBuilder.should(queryBuilder1); boolQueryBuilder.must(QueryBuilders.matchQuery("module",searchVo.getModule())); //必须满足should其中一个条件 boolQueryBuilder.minimumShouldMatch(1); searchQuery.withQuery(boolQueryBuilder);
使用should来关联搜索,注意,必须指定should命中的数量minimumShouldMatch,否则结果就是会出现所有must,优先should,没有should也会出现
关于高亮
拼音结果我不做高亮,只高亮了中英文分词和模糊,模糊检索高亮用正则替换来高亮
//高亮字段(拼音不做高亮,拼音的高亮有问题,会将整个字符串高亮) if (!StringUtils.isEmpty(searchHit.getHighlightFields().get("keywordEn"))) { Text[] text = searchHit.getHighlightFields().get("keywordEn").getFragments(); model.setKeyword(text[0].toString()); } //模糊搜索高亮,不区分大小写直接字符串替换 if(!model.getKeyword().contains("<high>")){ Pattern pattern = Pattern.compile("(?i)"+searchVo.getKeyword()); Matcher matcher = pattern.matcher(model.getKeyword()); if(matcher.find()){ String s = matcher.group(); model.setKeyword(model.getKeyword().replace(s,"<high>"+s+"</high>")); } }