example:
import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.wltea.analyzer.lucene.IKAnalyzer; public class IkAnalyzerTest { public static void main(String[] args) { // String keyWord = // "What is the effect of IKAnalyzer's word segmentation, let's take a look at the effect of IKAnalyzer's word segmentation, let's take a look, we we we we we we we we we we"; String keyWord = "write of pork ribs, pork, pork, pork, pork, pork"; // String keyWord = ""; // Create IKAnalyzer Chinese word segmentation object IKAnalyzer analyzer = new IKAnalyzer(); // use smart word segmentation analyzer.setUseSmart(true); // print word segmentation result try { printAnalysisResult(analyzer, keyWord); } catch (Exception e) { e.printStackTrace (); } } /** * Print out the segmentation result of the given tokenizer * * @param analyzer tokenizer * @param keyWord keyword * @throws Exception */ private static String[] printAnalysisResult(Analyzer analyzer, String keyWord) throws Exception { System.out.println("[" + keyWord + "]The word segmentation effect is as follows"); String logString = "GetKeyWordArray getKeyWordArray "; String[] returnMsgArray = null; String returnMsgTemp = ""; TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(keyWord)); tokenStream.addAttribute(CharTermAttribute.class); try { while (tokenStream.incrementToken()) { CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); if (charTermAttribute != null) { System.out.println(logString + "charTermAttribute ====== " + charTermAttribute.toString()); if (charTermAttribute.toString() != null) { returnMsgTemp += charTermAttribute.toString(); returnMsgTemp += ","; } else { System.out.println(logString + "charTermAttribute.toString() is null"); } } else { System.out.println(logString + "charTermAttribute is null"); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace (); } System.out.println(logString + "returnMsgTemp == " + returnMsgTemp); returnMsgArray = returnMsgTemp.split(","); if (returnMsgArray == null) { System.out.println(logString + "returnMsgArray is null"); return null; } System.out.println(logString + "returnMsgArray len == " + returnMsgArray.length); // remove duplicate elements from the array List<String> list = new ArrayList<String>(); for (int i = 0; i < returnMsgArray.length; i++) { if (!list.contains(returnMsgArray[i])) {// If the array list does not contain the current item, add the item to the array if (returnMsgArray[i].equals("")) { System.out.println(logString + "returnMsgArray[" + i + "].equals(\"\")"); continue; } list.add(returnMsgArray[i]); } } String[] newStr = list.toArray(new String[1]); if (newStr == null) { System.out.println(logString + "newStr is null"); return null; } returnMsgTemp = ""; System.out.println(logString + "newStr.length ==" + newStr.length); for (int i = 0; i < newStr.length; i++) { if (newStr[i] == null) { System.out.println(logString + "newStr[" + i + "] is null"); } returnMsgTemp = returnMsgTemp + "[" + newStr[i] + "]"; } System.out.println(logString + "returnMsgArray ==" + returnMsgTemp); System.out.println(logString + "newStr ==" + newStr.toString()); return newStr; } }
jar package download: attachment