Elasticsearch hot words (new words/custom words) update configuration

Internet words are changing with each passing day, how to make new Internet hot words (or specific words) updated to our search in real time? 

Test it with ik first:

curl -XGET 'http://localhost:9200/_analyze?pretty&analyzer=ik_max_word' -d '
Jackie Chan was originally named Chen Gangsheng
'
#return
{
  "tokens" : [ {
    "token" : "成龙",
    "start_offset" : 1,
    "end_offset" : 3,
    "type" : "CN_WORD",
    "position" : 0
  }, {
    "token" : "原名",
    "start_offset" : 3,
    "end_offset" : 5,
    "type" : "CN_WORD",
    "position" : 1
  }, {
    "token" : "陈",
    "start_offset" : 5,
    "end_offset" : 6,
    "type" : "CN_CHAR",
    "position" : 2
  }, {
    "token" : "港",
    "start_offset" : 6,
    "end_offset" : 7,
    "type" : "CN_WORD",
    "position" : 3
  }, {
    "token" : "生",
    "start_offset" : 7,
    "end_offset" : 8,
    "type" : "CN_CHAR",
    "position" : 4
  } ]
}
ik's main dictionary does not have the word "Chen Gangsheng", so it was split. 
Now let's configure 
and modify the configuration file of IK: ES directory /plugins/ik/config/ik/IKAnalyzer.cfg.xml 

Modify as follows:

<?xml version="1.0" encoding="UTF-8"?>  
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">    
<properties>    
    <comment>IK Analyzer Extended Configuration</comment>  
    <!--Users can configure their own extension dictionary here-->      
    <entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>       
     <!--Users can configure their own extended stop word dictionary here-->  
    <entry key="ext_stopwords">custom/ext_stopword.dic</entry>  
    <!--Users can configure the remote extension dictionary here-->   
    <entry key="remote_ext_dict">http://192.168.1.136/hotWords.php</entry>  
    <!--Users can configure the remote extended stop word dictionary here-->  
    <!-- <entry key="remote_ext_stopwords">words_location</entry> -->  
</properties>  
Here I use the remote expansion dictionary, because it is very convenient to use other programs to call the update without restarting the ES; using a local file to expand the thesaurus requires restarting the ES . Of course, it is also very convenient to use a custom mydict.dic dictionary. You can add one word per line. 
Since it is a remote dictionary, it must be an accessible link, which can be a page or a txt document. But make sure the output is in utf-8 format 

Contents of hotWords.php

$s = <<<'EOF'  
Chen Gangsheng  
Yuan Lou  
blue and thin  
EOF;  
header('Last-Modified: '.gmdate('D, d M Y H:i:s', time()).' GMT', true, 200);  
header('ETag: "5816f349-19"');  
echo $s;

ik receives two returned header attributes Last-Modified and ETag, as long as one of them changes, it will trigger an update, and ik will get it every minute 

Restart Elasticsearch, check the startup record, and see that three words have been loaded

[2016-10-31 15:08:57,749][INFO ][ik-analyzer ] Chen Gangsheng  
[2016-10-31 15:08:57,749][INFO ][ik-analyzer ] Yuanlou  
[2016-10-31 15:08:57,749][INFO ][ik-analyzer ] Blue Thin  

Now let's test it, execute the above request again, and return

...  
  }, {  
    "token" : "陈港生",  
    "start_offset" : 5,  
    "end_offset" : 8,  
    "type" : "CN_WORD",  
    "position" : 2  
  }, {  
...

It can be seen that the ik tokenizer has matched the word "Chen Gangsheng".


Java server-side implementation: realize loading extension words, adding extension words, and extension word refresh interfaces

<!--Users can configure the remote extension dictionary here-->   
    <entry key="remote_ext_dict">http://ip:port/es/dic/loadExtDict</entry>  
@RestController
@RequestMapping("/es/dic")
public class DicController {
	
	private static final Logger logger = LoggerFactory.getLogger(DicController.class);
	
	@Autowired
	private DictRedis dictRedis;
	
	private static final String EXT_DICT_PATH = "E:\\ext_dict.txt";
	
	/**
	  * Description: Load expansion words
	  * @param response
	 */
	@RequestMapping(value = "/loadExtDict")
	public void loadExtDict(HttpServletResponse response) {
		logger.error("extDict get start");
		long count = dictRedis.incr(RedisKeyConstants.ES_EXT_DICT_FLUSH);
		//To ensure that each node can get the extension word
		if(count > getEsClusterNodesNum()) {
			return;
		}
		
		String result = FileUtil.read(EXT_DICT_PATH);
		if(StringUtils.isEmpty(result)) {
			return;
		}
		
// String result = "Yellow Braised Chicken Rice\nTengchong Rescue\nChen Gangsheng\nBig Watermelon\nBig Pumpkin";
		try {
			response.setHeader("Last-Modified", TimeUtil.currentTimeHllDT().toString());
			response.setHeader("ETag",TimeUtil.currentTimeHllDT().toString());
			response.setContentType("text/plain; charset=UTF-8");
            PrintWriter out = response.getWriter();
            out.write(result);
            out.flush();
        } catch (IOException e) {
            logger.error("DicController loadExtDict exception" , e);
        }
		
		logger.error("extDict get end,result:{}", result);
	}
	
	/**
	  * Description: Extended word refresh
	  * @param response
	  * @return
	 */
	@RequestMapping(value = "/extDictFlush")
	public String extDictFlush() {
		String result = "ok";
		try {
			dictRedis.del(RedisKeyConstants.ES_EXT_DICT_FLUSH);
        } catch (Exception e) {
        	result = e.getMessage();
        }
		return result;
	}
	
	/**
	  * Description: Add an extended dictionary, multiple words are separated by commas ","
	  * @param dict
	  * @return
	 */
	@RequestMapping(value = "/addExtDict")
	public String addExtDict(String dict) {
		String result = "ok";
		if(StringUtils.isEmpty(dict)) {
			return "Add word cannot be empty";
		}
		
		StringBuilder sb = new StringBuilder();
		String[] dicts = dict.split(",");
		for (String str : dicts) {
			sb.append("\n").append(str);
		}
		
		boolean flag = FileUtil.write(EXT_DICT_PATH, sb.toString());
		if(flag) {
			extDictFlush();
		} else {
			result = "fail";
		}
		
		return result;
	}
	
	/**
	  * Description: Get the number of cluster nodes, if not obtained, the default is 10
	  * @return
	 */
	private int getEsClusterNodesNum() {
		int num = 10;
		String esAddress = PropertyConfigurer.getString("es.address","http://172.16.32.69:9300,http://172.16.32.48:9300");
		List<String> clusterNodes = Arrays.asList(esAddress.split(","));
		if(clusterNodes != null && clusterNodes.size() != 0) {
			num = clusterNodes.size();
		}
		return num;
	}
}

File read and write tool class:

public class FileUtil {

	private static final Logger logger = LoggerFactory.getLogger(FileUtil.class);

	/**
	  * Description: file read
	  *
	  * @param path
	  * @return
	  * @throws Exception
	 */
	public static String read(String path) {
		StringBuilder sb = new StringBuilder();
		BufferedReader reader = null;
		try {
			BufferedInputStream fis = new BufferedInputStream(new FileInputStream(new File(path)));
			reader = new BufferedReader(new InputStreamReader(fis, "utf-8"), 512);// Read the text file with a buffer of 512

			String line = "";
			while ((line = reader.readLine()) != null) {
				sb.append(line).append("\n");
			}
		} catch (Exception e) {
			logger.error("FileUtil read exception", e);
		} finally {
			if(reader != null) {
				try {
					reader.close();
				} catch (IOException e) {
					e.printStackTrace ();
				}
			}
		}
		return sb.toString();
	}

	/**
	  * Description: Append write to file
	  *
	 */
	public static boolean write(String path, String content) {
		boolean flag = true;
		BufferedWriter out = null;
		try {
			out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(path), true))); // 追加的方法
			out.write(content);
		} catch (IOException e) {
			flag = false;
			logger.error("FileUtil write exception", e);
		} finally {
			try {
				if(out != null) {
					out.close();
				}
			} catch (IOException e) {
				e.printStackTrace ();
			}
		}
		return flag;
	}

}



Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325961651&siteId=291194637
Recommended