Elasticsearch5基于completion suggester实现提示词(类京东淘宝)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wwd0501/article/details/80885987

支持拼音首字母,全拼,和中文匹配提示,如下



中文搜索使用IK分词器,IK分词器安装:https://blog.csdn.net/wwd0501/article/details/78258274

因要支持拼音提示,故需安装拼音插件,拼音插件安装以及IK+拼音使用地址:https://blog.csdn.net/wwd0501/article/details/80622669

1、创建index,设置setting

curl -XPUT "http://localhost:9200/medcl/" -d '
{
	"index": {
		"analysis": {
			"analyzer": {
				"default": {
					"tokenizer": "ik_max_word"
				},
				"pinyin_analyzer": {
					"tokenizer": "shopmall_pinyin"
				},
				"first_py_letter_analyzer": {
					"tokenizer": "first_py_letter"
				},
				"full_pinyin_letter_analyzer": {
					"tokenizer": "full_pinyin_letter"
				}
			},
			"tokenizer": {
				"shopmall_pinyin": {
					"keep_joined_full_pinyin": "true",
					"keep_first_letter": "true",
					"keep_separate_first_letter": "false",
					"lowercase": "true",
					"type": "pinyin",
					"limit_first_letter_length": "16",
					"keep_original": "true",
					"keep_full_pinyin": "true",
					"keep_none_chinese_in_joined_full_pinyin": "true"
				},
				"first_py_letter": {
					"type": "pinyin",
					"keep_first_letter": true,
					"keep_full_pinyin": false,
					"keep_original": false,
					"limit_first_letter_length": 16,
					"lowercase": true,
					"trim_whitespace": true,
					"keep_none_chinese_in_first_letter": false,
					"none_chinese_pinyin_tokenize": false,
					"keep_none_chinese": true,
					"keep_none_chinese_in_joined_full_pinyin": true
				},
				"full_pinyin_letter": {
					"type": "pinyin",
					"keep_separate_first_letter": false,
					"keep_full_pinyin": false,
					"keep_original": false,
					"limit_first_letter_length": 16,
					"lowercase": true,
					"keep_first_letter": false,
					"keep_none_chinese_in_first_letter": false,
					"none_chinese_pinyin_tokenize": false,
					"keep_none_chinese": true,
					"keep_joined_full_pinyin": true,
					"keep_none_chinese_in_joined_full_pinyin": true
				}
			}
		}
	}
}'

2、mapping

curl -XPOST http://localhost:9200/medcl/folks/_mapping -d'
{
	"folks": {
		"properties": {
			"name": {
				"type": "completion",
				"fields": {
					"pinyin": {
						"type": "completion",
						"analyzer": "pinyin_analyzer"
					},
					"keyword_pinyin": {
						"type": "completion",
						"analyzer": "full_pinyin_letter_analyzer"
					},
					"keyword_first_py": {
						"type": "completion",
						"analyzer": "first_py_letter_analyzer"
					}
				}
			}
		}
	}
}'

3、初始化测试数据

curl -XPOST http://localhost:9200/medcl/folks/ -d'{"name":"苹果"}'

4、搜索

curl -XPOST http://localhost:9200/medcl/folks/_search -d '
{
  "size": 0,
  "_source": "name",
  "suggest": {
    "my-suggest-1": {
      "text": "苹",
      "completion": {
        "field": "name",
        "size": 20
      }
    }
  }
}'

5、结果

{
	"took": 2,
	"timed_out": false,
	"_shards": {
		"total": 5,
		"successful": 5,
		"failed": 0
	},
	"hits": {
		"total": 0,
		"max_score": 0,
		"hits": []
	},
	"suggest": {
		"my-suggest-1": [{
			"text": "苹",
			"offset": 0,
			"length": 1,
			"options": [{
					"text": "苹果",
					"_index": "medcl",
					"_type": "folks",
					"_id": "AWRLJ9lrsB4QSA8b-FrJ",
					"_score": 1,
					"_source": {
						"name": "苹果"
					}
				}
			]
		}]
	}
}

6、代码实现,基于elasticsearch5.x版本

* 1、检测搜索词是中文还是拼音
* 2、若是中文,直接按照name字段提示
* 3、若是拼音(拼音+汉字),先按照name.keyword_pinyin获取,若是无结果按照首字母name.keyword_first_py获取

Java代码:

package test;

import java.net.UnknownHostException;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;

import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestBuilder;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class EsSuggestTest01 {

    private static Client client;

    @Before
    public void init() throws UnknownHostException {
        client = ElasticsearchConfiguration.getClient();
    }

    @After
    public void close() {
    	ElasticsearchConfiguration.close();
    }
    
    @Test
    public void test(){
    	String index = "medcl";
    	String type = "folks";
    	QueryBuilder queryBuilder = QueryBuilders.matchAllQuery();
    	String text = "苹";
    	String field = "name";
		
    	if(checkLetter(text)) {
    	    field = "name.keyword_pinyin";	
    	} else if(checkChinese(text)) {
    	    field = "name";
    	} else {
    	    field = "name.keyword_pinyin";
	}
		
	Set<String> results = getSuggestWord(index, type, field, text, queryBuilder);
	//结果为空且是拼音,可以尝试拼音首字母提示
	if(results.size() == 0 && checkLetter(text)) {
		field = "nicknames.nicknameSuggest.keyword_first_py";
		results = getSuggestWord(index, type, field, text, queryBuilder);
	}
		
    	for (String result : results) {
    	    System.out.println(result);
    	}
    }
    
	/**
	  * Description:提示词,支持中文、拼音、首字母等(注意要去掉_source信息)
	  * 
	  * 1、检测搜索词是中文还是拼音
      * 2、若是中文,直接按照name字段提示
      * 3、若是拼音(拼音+汉字),先按照name.keyword_pinyin获取,若是无结果按照首字母name.keyword_first_py获取
	  * 
	  * SearchRequestBuilder的size要设置为0,否则显示hits结果
	  * searchRequestBuilder.setSize(0);
	  * 
	  * _source 由于磁盘读取和网络传输开销,可以影响性能的大小,为了节省一些网络开销,请从_source 使用源过滤中过滤掉不必要的字段以最小化 _source大小
	  * 可以采用过滤的形式,也可以直接不显示_source
	  * 1、searchRequestBuilder.setFetchSource("name", null);     过滤形式
	  * 2、searchRequestBuilder.setFetchSource(false)   直接不显示_source
	  * 
	  * @author wangweidong
	  * CreateTime: 2018年6月28日 下午2:39:47
	  *
	  * @param index
	  * @param type
	  * @param field
	  * @param text
	  * @return
	 */
	 public static Set<String> getSuggestWord(String index, String type, String field, String text, QueryBuilder queryBuilder) {
		//过滤相同的提示词,Es5.2版本不支持过滤掉重复的建议,故需自己对ES返回做去重处理,Es6.1以上版本可以通过skip_duplicates字段处理,skip_duplicates表示是否应过滤掉重复的建议(默认为false)
		Set<String> results = new TreeSet<String>();
		CompletionSuggestionBuilder suggestionBuilder = new CompletionSuggestionBuilder(field);
	   	suggestionBuilder.text(text);
	   	suggestionBuilder.size(20);
	      
	   	SuggestBuilder suggestBuilder = new SuggestBuilder();
	   	suggestBuilder.addSuggestion("my-suggest-1", suggestionBuilder);
	   	
	   	SearchRequestBuilder searchRequestBuilder = client.prepareSearch(index).setTypes(type);
	   	searchRequestBuilder.setExplain(false);
	   	searchRequestBuilder.setSize(0);
	   	searchRequestBuilder.setQuery(queryBuilder);
	   	searchRequestBuilder.suggest(suggestBuilder);
	   	searchRequestBuilder.setFetchSource(false);
	//   	searchRequestBuilder.setFetchSource("name", null);
	   	
	   	SearchResponse resp = searchRequestBuilder.execute().actionGet();
	   	Suggest sugg = resp.getSuggest();
	   	CompletionSuggestion suggestion = sugg.getSuggestion("my-suggest-1");
	   	List<CompletionSuggestion.Entry> list = suggestion.getEntries();
	   	for (int i = 0; i < list.size(); i++) {
	   		List<? extends Suggest.Suggestion.Entry.Option> options = list.get(i).getOptions();  
	   		for (Suggest.Suggestion.Entry.Option op : options) {
	   			results.add(op.getText().toString());
			}
	   	}
	   	return results;
    }
    
    /**
     * 只包含字母
     * @return 验证成功返回true,验证失败返回false
     */
    public static boolean checkLetter(String cardNum) { 
        String regex = "^[A-Za-z]+$";
        return Pattern.matches(regex, cardNum); 
    }
    
    /**
     * 验证中文
     * @param chinese 中文字符
     * @return 验证成功返回true,验证失败返回false
     */ 
    public static boolean checkChinese(String chinese) { 
        String regex = "^[\u4E00-\u9FA5]+$"; 
        return Pattern.matches(regex,chinese); 
    } 
}

import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.List;

public class ElasticsearchConfiguration {

    private static TransportClient client;

    private static String clusterName;
    private static List<String> clusterNodes;

    static {
        clusterName = "shop-es";
        clusterNodes = Arrays.asList("http://172.16.32.6:9300","http://172.16.32.8:9300");
    }

    private ElasticsearchConfiguration() {
    }

    public static Client getClient() throws UnknownHostException {
        Settings settings = Settings.builder().put("cluster.name", clusterName).build();
        client = new PreBuiltTransportClient(settings);
        for (String node : clusterNodes) {
            URI host = URI.create(node);
            client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host.getHost()), host.getPort()));
        }
        return client;
    }

    public static void close() {
        client.close();
    }
}
参考官网地址:https://www.elastic.co/guide/en/elasticsearch/reference/5.2/search-suggesters-completion.html

猜你喜欢

转载自blog.csdn.net/wwd0501/article/details/80885987