目录
Lucene简介
https://baike.baidu.com/item/Lucene/6753302
简单来说,Lucene是一个用来检索的开源框架。
lucene检索需要的jar包:
pom.xml:
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>7.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>7.4.0</version>
</dependency>
</dependencies>
lucene建立索引:
写了一个Junit测试类:
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Before;
import org.junit.Test;
/*
lucene测试类
*/
public class IndexingTest {
private String ids[]={"1","2","3"};
private String citys[]={"qingdao","nanjing","shanghai"};
private String descs[]={
"Qingdao is a beautiful city.",
"Nanjing is a city of culture.",
"Shanghai is a bustling city."
};
private Directory dir;
@Before
public void setUp() throws Exception {
dir=FSDirectory.open(Paths.get("D:\\lucene2"));
IndexWriter writer=getWriter();
for(int i=0;i<ids.length;i++) {
Document doc=new Document();
doc.add(new StringField("id",ids[i], Field.Store.YES));
doc.add(new StringField("city",citys[i],Field.Store.YES));
doc.add(new TextField("desc",descs[i],Field.Store.NO));
writer.addDocument(doc);
}
writer.close();
}
/**
* 获取IndexWriter实例
*/
private IndexWriter getWriter()throws Exception{
Analyzer analyzer=new StandardAnalyzer();
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
IndexWriter writer=new IndexWriter(dir, iwc);
return writer;
}
/**
* 测试写了几个文档
* @throws Exception
*/
@Test
public void testIndexWriter()throws Exception{
IndexWriter writer=getWriter();
System.out.println("写入了"+writer.numDocs()+"个文档");
writer.close();
}
/**
* 测试读取文档实例
*/
@Test
public void testIndexReader()throws Exception{
IndexReader reader=DirectoryReader.open(dir);
System.out.println("最大文档数:"+reader.maxDoc());
System.out.println("实际文档数:"+reader.numDocs());
reader.close();
}
/**
* 测试删除,在合并之前
*
*/
@Test
public void testDeleteBeforeMerge()throws Exception{
IndexWriter writer=getWriter();
System.out.println("删除之前的文档数量:"+writer.numDocs());
writer.deleteDocuments(new Term("id","1"));
writer.commit();
System.out.println("writer.numDocs:"+writer.numDocs());
System.out.println("writer.maxDocs:"+writer.maxDoc());
writer.close();
}
/**
* 测试删除,在合并之后
*/
@Test
public void testDeleteAfterMerge()throws Exception{
IndexWriter writer=getWriter();
System.out.println("删除之前的文档数量:"+writer.numDocs());
writer.deleteDocuments(new Term("id","1"));
writer.forceMergeDeletes(); //强制删除
writer.commit();
System.out.println("writer.numDocs:"+writer.numDocs());
System.out.println("writer.maxDocs:"+writer.maxDoc());
writer.close();
}
/**
* 测试更新
*/
@Test
public void testUpdate()throws Exception{
IndexWriter writer=getWriter();
Document doc=new Document();
doc.add(new StringField("id","1",Field.Store.YES));
doc.add(new StringField("city","qingdao",Field.Store.YES));
doc.add(new TextField("desc","ddd is a city",Field.Store.NO));
writer.updateDocument(new Term("id","1"), doc);
writer.close();
}
}
结果:
lucene的几种检索方式:
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class SearchTest {
private Directory dir;
private IndexReader reader;
private IndexSearcher is;
@Before
public void setUp() throws Exception {
dir=FSDirectory.open(Paths.get("D:\\lucene3"));
reader=DirectoryReader.open(dir);
is=new IndexSearcher(reader);
}
@After
public void tearDown() throws Exception {
reader.close();
}
/**
* 对特定项进行搜索
* @throws Exception
*/
@Test
public void TestTermQuery()throws Exception{
String searchField="contents";
String q="particular";
Term t=new Term(searchField,q);
Query query=new TermQuery(t);
TopDocs hits=is.search(query, 10);
System.out.println("匹配"+q+"共查询到"+hits.totalHits+"个文档。。。");
for(ScoreDoc scoreDoc:hits.scoreDocs) {
Document doc=is.doc(scoreDoc.doc);
System.out.println(doc.get("fullPath"));
}
}
/**
* 表达式,模糊搜索,匹配搜索,解析查询表达式
*/
@Test
public void TestQueryParser()throws Exception{
Analyzer analyzerr=new StandardAnalyzer();
String searchField="contents";
String q="particular AND Requirements AND benchmarks";
QueryParser parser=new QueryParser(searchField,analyzerr);
Query query=parser.parse(q);
TopDocs hits=is.search(query, 10);
System.out.println("匹配"+q+"共查询到"+hits.totalHits+"个记录。。。");
for(ScoreDoc scoreDoc:hits.scoreDocs) {
Document doc=is.doc(scoreDoc.doc);
System.out.println(doc.get("fullPath"));
}
}
}