前言
全文检索方面,使用基于 Lucene 的 框架 ElasticSearch/Solr
,是很方便的,但是还是那句话
框架会用就好,基础才是最重要的
下面来个 demo
package test;
import java.io.StringReader;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Searcher {
public static void search(String indexDir, String q) throws Exception {
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
QueryParser parser = new QueryParser("desc", analyzer);
Query query = parser.parse(q);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");
QueryScorer scorer = new QueryScorer(query);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<", ">");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
highlighter.setTextFragmenter(fragmenter);
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("name"));
System.out.println(doc.get("desc"));
String desc = doc.get("desc");
if (desc != null) {
TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
System.out.println(highlighter.getBestFragment(tokenStream, desc));
}
System.out.println("<hr>----------------");
}
reader.close();
}
public static void main(String[] args) {
String indexDir = "C:\\LLLLLLLLLLLLLLLLLLL\\886";
String q = "语言";
try {
search(indexDir, q);
} catch (Exception e) {
e.printStackTrace();
}
}
}