javaEE Lucene,全文检索,索引库的维护(删、改、查)

Lucene的Jar包下载:https://pan.baidu.com/s/1ekc7ZWqukUjkSXxQp09hDA  密码:yvj3

Test.java(测试类,索引库的删、改、查):

package com.xxx.lucene;

import static org.junit.Assert.*;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

/**
 * 索引库的维护
 * 添加(参考上一篇博客)
 * 删除
 * 修改
 * 查询
 */
public class Test {

	// 获取IndexWriter对象。(写入索引库的流对象)  (增、删、改都需要该对象)
	private IndexWriter getIndexWriter() throws Exception{
		Directory directory = FSDirectory.open(new File("D:\\temp\\index"));
		// Directory directory = new RAMDirectory();//保存索引到内存中 (内存索引库)
		Analyzer analyzer = new IKAnalyzer(); 
		IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); //Version根据导入的Jar包选择,LATEST表示最新的。 
		return new IndexWriter(directory, config);
	}
	
	//全删除
	@Test
	public void testAllDelete() throws Exception {
		IndexWriter indexWriter = getIndexWriter();
		
		indexWriter.deleteAll();  //删除全部索引(文档)
		indexWriter.close();
	}
	
	//根据条件删除(文档)
	@Test
	public void testDelete() throws Exception {
		IndexWriter indexWriter = getIndexWriter();
		
		//TermQuery表示精准查询条件
		Query query = new TermQuery(new Term("fileName","关键词")); //fileName是(field)域名
		indexWriter.deleteDocuments(query);  //根据条件删除(文档)
		indexWriter.close();
	}
	
	//修改 (先删除后添加)
	@Test
	public void testUpdate() throws Exception {
		IndexWriter indexWriter = getIndexWriter();
		
		Document doc = new Document();
		doc.add(new TextField("fileN", "测试文件名",Store.YES));
		doc.add(new TextField("fileC", "测试文件内容",Store.YES));
		//先删除第一个参数索引的文档;再添加第二个参数表示的文档。
		indexWriter.updateDocument(new Term("fileName","被修改的"), doc, new IKAnalyzer());
		indexWriter.close();
	}
	
	//获取IndexSearcher对象。 (查询时用到该对象)
	private IndexSearcher getIndexSearcher() throws Exception{
		Directory directory = FSDirectory.open(new File("D:\\temp\\index"));// 磁盘
		IndexReader indexReader = DirectoryReader.open(directory); //查询用的是Reader
		return new IndexSearcher(indexReader);
	}
	
	// 根据条件执行查询,并打印结果
	private void printResult(IndexSearcher indexSearcher,Query query)throws Exception{
		// 执行查询。
		TopDocs topDocs = indexSearcher.search(query, 10); // 10表示查询前10条
		// 返回查询结果。遍历查询结果并输出。
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		for (ScoreDoc scoreDoc : scoreDocs) {
			int doc = scoreDoc.doc; //获取文档Document的id。(从0开始,自增的id)
			Document document = indexSearcher.doc(doc); //根据id查询文档。
			// 文件名称
			String fileName = document.get("fileName");
			System.out.println(fileName);
			// 文件内容
			String fileContent = document.get("fileContent");
			System.out.println(fileContent);
			// 文件大小
			String fileSize = document.get("fileSize");
			System.out.println(fileSize);
			// 文件路径
			String filePath = document.get("filePath");
			System.out.println(filePath);
			System.out.println("------------");
		}
	}
	
	//查询所有 (MatchAllDocsQuery)
	@Test
	public void testMatchAllDocsQuery() throws Exception {
		IndexSearcher indexSearcher = getIndexSearcher();
		
		//MatchAllDocsQuery表示查询所有
		Query query = new MatchAllDocsQuery();  
		//TermQuery类是精准查询的条件类
		//Query query = new TermQuery(new Term("fileName", "关键字"));  //fileName是(field)域名 
		printResult(indexSearcher, query);
		//关闭资源
		indexSearcher.getIndexReader().close();
	}
	
	//根据数值范围查询 (NumericRangeQuery)
	@Test
	public void testNumericRangeQuery() throws Exception {
		IndexSearcher indexSearcher = getIndexSearcher();
		
		//fileSize是(field)域名  (文件大小); 第二个参数:最小值;第三个参数:最大值;第四个参数:是否包含最小值;第五个参数:是否包含最大值 
		Query query = NumericRangeQuery.newLongRange("fileSize", 47L, 200L, false, true);
		printResult(indexSearcher, query);
		//关闭资源
		indexSearcher.getIndexReader().close();
	}
	
	//根据组合条件查询 (BooleanQuery)
	@Test
	public void testBooleanQuery() throws Exception {
		IndexSearcher indexSearcher = getIndexSearcher();
		
		BooleanQuery booleanQuery = new BooleanQuery();
		
		Query query1 = new TermQuery(new Term("fileName","关键字"));
		Query query2 = new TermQuery(new Term("fileContent","关键字"));
		booleanQuery.add(query1, Occur.MUST);  //MUST:必须; SHOULD:可有可无; MUST_NOT:必须没有 
		booleanQuery.add(query2, Occur.SHOULD);
		printResult(indexSearcher, booleanQuery);
		//关闭资源
		indexSearcher.getIndexReader().close();
	}
	
	//常用方式。
	//通过条件解析对象查询 (QueryParser);对查询条件(关键词语句)先进行分词解析。 (需要导入lucene-queryparser-4.10.3.jar包)
	@Test
	public void testQueryParser() throws Exception {
		IndexSearcher indexSearcher = getIndexSearcher();
		
		//参数1:默认查询的域(Field); 参数2:采用的分析器(分词器)
		QueryParser queryParser = new QueryParser("fileName",new IKAnalyzer()); //要和创建索引时使用的分词器一致。
		Query query1 = queryParser.parse("关键字");  //使用上面设置的默认域。
		// *:* 表示查询所有。   域:值(用户输入的查询条件)。   有冒号查询指定的域,没有冒号用默认的域。
		Query query = queryParser.parse("fileName:lucene is apache OR fileContent:lucene is apache");
		//范围条件:fileSize:[1 TO 1000]      组合条件:大写的AND表示必须(Occur.MUST),或者+fileName(-fileName表示必须不是);大写的OR表示或者(Occur.SHOULD) 
		
		printResult(indexSearcher, query);
		//关闭资源
		indexSearcher.getIndexReader().close();
	}
	
	//通过条件解析对象查询 (MultiFieldQueryParser); 指定多个默认域
	@Test
	public void testMultiFieldQueryParser() throws Exception {
		IndexSearcher indexSearcher = getIndexSearcher();
		
		String[] fields = {"fileName","fileContent"};  //多个域
		//参数1:默认查询的域(可以指定多个域); 参数2:采用的分析器(分词器)
		MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields,new IKAnalyzer());
		Query query = queryParser.parse("lucene is apache");
		
		printResult(indexSearcher, query);
		//关闭资源
		indexSearcher.getIndexReader().close();
	}
	
	
}

src/IKAnalyzer.cfg.xml(IK分词器的配置文件):

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">  
<properties>  
	<comment>IK Analyzer 扩展配置</comment>
	<!--用户可以在这里配置自己的扩展字典 -->
	<entry key="ext_dict">ext.dic;</entry> 
	
	<!--用户可以在这里配置自己的扩展停止词字典-->
	<entry key="ext_stopwords">stopword.dic;</entry> 
	
</properties>

src/ext.dic(扩展字典):

高富帅
流行词汇
新词

src/stopword.dic(停止词字典):

啊
的
吗
a
and
the

猜你喜欢

转载自blog.csdn.net/houyanhua1/article/details/82845945