基于 Lucene5 的搜索结果高亮显示

前言

全文检索方面,使用基于 Lucene 的 框架 ElasticSearch/Solr,是很方便的,但是还是那句话

框架会用就好,基础才是最重要的

下面来个 demo

package test;

import java.io.StringReader;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Searcher {

       public static void search(String indexDir, String q) throws Exception {
              Directory dir = FSDirectory.open(Paths.get(indexDir));
              IndexReader reader = DirectoryReader.open(dir);
              IndexSearcher is = new IndexSearcher(reader);
              SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
              QueryParser parser = new QueryParser("desc", analyzer);
              Query query = parser.parse(q);
              long start = System.currentTimeMillis();
              TopDocs hits = is.search(query, 10);
              long end = System.currentTimeMillis();
              System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");

              QueryScorer scorer = new QueryScorer(query);
              Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
              SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<", ">");
              Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
              highlighter.setTextFragmenter(fragmenter);
              for (ScoreDoc scoreDoc : hits.scoreDocs) {
                     Document doc = is.doc(scoreDoc.doc);
                     System.out.println(doc.get("name"));
                     System.out.println(doc.get("desc"));
                     String desc = doc.get("desc");
                     if (desc != null) {
                           TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
                            System.out.println(highlighter.getBestFragment(tokenStream, desc));
                     }
                     System.out.println("<hr>----------------");
              }
              reader.close();
       }

       public static void main(String[] args) {
              String indexDir = "C:\\LLLLLLLLLLLLLLLLLLL\\886";
              String q = "语言";
              try {
                     search(indexDir, q);
              } catch (Exception e) {
                     e.printStackTrace();
              }
       }
}

效果

这里写图片描述

猜你喜欢

转载自blog.csdn.net/larger5/article/details/79935930