Lucene06---查询

前面总结了很多Lucene上的东西,建立索引、高亮等等这些都是为了查询做准备和服务的,下面我们来说说查询,我们知道Lucene的主要功能就是查询功能,所以Lucene里的查询做的非常强大,可以有各种各样的查询。

org.apache.lucene.search.Query包下的Query类下有需要查询对象,这里我们说其中几个比较重要的:

       TermQuery:关键字查询

       TermRangeQuery:范围查询

       WildcardQuery:通配符查询

       PhraseQuery:短语查询

       BooleanQueryBoolean查询(最重要的)

 

FirstLucene04SearchByQuery.Java

package com.iflytek.lucene;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

/**
 * @author xudongwang 2012-2-10
 * 
 *         Email:[email protected]
 */
public class FirstLucene04SearchByQuery {

	/**
	 * 源文件路径
	 */
	private String filePath01 = "F:\\Workspaces\\workspaceSE\\BlogDemo\\luceneDatasource\\HelloLucene01.txt";
	private String filePath02 = "F:\\Workspaces\\workspaceSE\\BlogDemo\\luceneDatasource\\HelloLucene04.txt";

	/**
	 * 索引路径
	 */
	private String indexPath = "F:\\Workspaces\\workspaceSE\\BlogDemo\\luceneIndex";

	/**
	 * 分词器,这里我们使用默认的分词器,标准分析器(好几个,但对中文的支持都不好)
	 */
	private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);

	private Directory ramDir = null;

	/**
	 * 搜索
	 * 
	 * @param queryStr
	 *            搜索的关键词
	 * @throws Exception
	 */
	public void search(String queryStr) throws Exception {

		// 1、把要搜索的文本解析为Query对象
		String[] fields = { "name", "content" };
		QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
		Query query = queryParser.parse(queryStr);
		search(query);
	}
	
	/**
	 * 搜索
	 * 
	 * @param query
	 *            Query对象
	 * @throws Exception
	 */
	public void search(Query query) throws Exception {
		// 2、进行查询
		IndexReader indexReader = IndexReader.open(ramDir);
		IndexSearcher indexSearcher = new IndexSearcher(indexReader);
		Filter filter = null;
		TopDocs topDocs = indexSearcher.search(query, filter, 10000);
		System.out.println("总共有【" + topDocs.totalHits + "】条匹配的结果");// 注意这里的匹配结果是指文档的个数,而不是文档中包含搜索结果的个数

		// 3、取出数据,并打印结果
		for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
			int docSn = scoreDoc.doc;// 文档内部编号
			Document document = indexSearcher.doc(docSn);// 根据文档编号取出相应的文档
			File2Document.printDocumentInfo(document);// 打印出文档信息
		}

	}

	/**
	 * 优化创建索引,将索引存在在内存和磁盘配合使用
	 * 
	 * @throws Exception
	 */
	public void createIndexByYouHua() throws Exception {
		File indexFile = new File(indexPath);
		Directory fsDir = FSDirectory.open(indexFile);

		// 1、启动时,将磁盘中的索引读取到内存中
		ramDir = new RAMDirectory(fsDir);
		IndexWriterConfig ramConf = new IndexWriterConfig(Version.LUCENE_35, analyzer);

		// 运行程序时操作内存中的索引
		IndexWriter ramIndexWriter = new IndexWriter(ramDir, ramConf);
		Document document = File2Document.file2Document(filePath01);
		Document document2 = File2Document.file2Document(filePath02);
		ramIndexWriter.addDocument(document);
		ramIndexWriter.addDocument(document2);
		ramIndexWriter.close();

		// 2、退出时将内存中的索引保存到磁盘中
		IndexWriterConfig fsConf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
		IndexWriter fsIndexWriter = new IndexWriter(fsDir, fsConf);
		fsIndexWriter.addIndexes(ramDir);// 把另外几个索引库中的所有索引数据合并到当前的索引库中
		fsIndexWriter.commit();
		//fsIndexWriter.optimize();// 对索引文件进行优化,从而减少IO操作
		fsIndexWriter.forceMerge(1);
		fsIndexWriter.close();
	}

	public static void main(String[] args) throws Exception {
		FirstLucene04SearchByQuery lucene = new FirstLucene04SearchByQuery();
		//lucene.createIndexByYouHua();
		lucene.search("iteye");
	}

}

 

 

 

关键词查询(TermQuery):

 

	/**
	 * 关键字查询
	 * 
	 * @throws Exception
	 */
	public void testTermQuery() throws Exception {
		Term term = new Term("content", "iteye");
		// 关键字查询,注意关键词中没有大写字符,全是小写字符
		Query query = new TermQuery(term);
System.out.println("对应的查询字符串:"+query);
		byQuery.createIndexByYouHua();
		byQuery.search(query);
	}

 运行结果:

对应的查询字符串:content:iteye

总共有【2】条匹配的结果

name -->HelloLucene01.txt

content -->Hello, my name is wang xudong, I in iteye blog address is xdwangiflytek.iteye.com.

 

path -->F:\Workspaces\workspaceSE\BlogDemo\luceneDatasource\HelloLucene01.txt

size -->84

name -->HelloLucene04.txt

content -->iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

 

path -->F:\Workspaces\workspaceSE\BlogDemo\luceneDatasource\HelloLucene04.txt

size -->738

 

 

范围查询(TermRangeQuery):

	/**
	 * 范围查询
	 * 
	 * @throws Exception
	 */
	public void testTermRangeQuery() throws Exception {
		// true表示包含边界
		Query query = new TermRangeQuery("size",200,900, true, true);
		byQuery.createIndexByYouHua();
		byQuery.search(query);
	}

运行结果:

总共有【1】条匹配的结果

name -->HelloLucene04.txt

content -->iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

 

path -->F:\Workspaces\workspaceSE\BlogDemo\luceneDatasource\HelloLucene04.txt

size -->738

但是上面需要注意的是如果上面的范围是100-1000,则就不会有结果,这是为什么呢?

大家需要注意,这里1001000字符串排序谁大?应该是100大,因为100ANSIC大,所以这里它是按照字符串进行排序的,因为它在储存的时候里面全部都是字符串。这时怎么办呢?解决方法就是让索引的时候是相同的宽度,搜索的时候也是相同的宽度。

Test里的方法改为:

	public void testTermRangeQuery() throws Exception {
		// true表示包含边界
		Query query = new TermRangeQuery("size", NumberTools.longToString(100), NumberTools.longToString(1000), true, true);
		byQuery.createIndexByYouHua();
		byQuery.search(query);
	}

 同时在建立索引的地方,File2Document.java中,需要将

document.add(new Field("size", String.valueOf(file.length()),
				Store.YES, Index.NOT_ANALYZED));

 改为:

document.add(new Field("size", NumberTools.longToString(file.length()),
				Store.YES, Index.NOT_ANALYZED));

 但是这里的方法提示过时了,在3.5中我还没找到类似的方法,知道的可以告诉我一下。

 

通配符查询(WildcardQuery):

 

	/**
	 * 通配符查询
	 * 
	 * '?'代表一个字符
	 * 
	 * '*'代表0个或多个字符
	 * 
	 * @throws Exception
	 */
	public void testWildcardQuery() throws Exception {
//		Term term = new Term("content", "itey?");
		Term term = new Term("content", "ite*");
		Query query = new WildcardQuery(term);
		byQuery.createIndexByYouHua();
		byQuery.search(query);
	}

运行结果:

总共有【2】条匹配的结果

name -->HelloLucene01.txt

content -->Hello, my name is wang xudong, I in iteye blog address is xdwangiflytek.iteye.com.

 

path -->F:\Workspaces\workspaceSE\BlogDemo\luceneDatasource\HelloLucene01.txt

size -->84

name -->HelloLucene04.txt

content -->iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

iteye too other.iteye too other.iteye too other.iteye too other.iteye too other.

 

path -->F:\Workspaces\workspaceSE\BlogDemo\luceneDatasource\HelloLucene04.txt

size -->738

 

 

短语查询(PhraseQuery):

/**
	 * 短语查询
	 * @throws Exception
	 */
	public void testPhraseQuery() throws Exception{
		PhraseQuery phraseQuery = new PhraseQuery();
		//这里的1和3是相对的位置
		phraseQuery.add(new Term("content","iteye"),1);
		phraseQuery.add(new Term("content","address"),3);
		
		/**
		 * 	
			phraseQuery.add(new Term("content","iteye"));
			phraseQuery.add(new Term("content","address"));
			//设置上面的两个词之间最多能隔几个词
			phraseQuery.setSlop(2);
		 */
		
		byQuery.createIndexByYouHua();
		byQuery.search(phraseQuery);
		
	}

 运行结果:

总共有【1】条匹配的结果

name -->HelloLucene01.txt

content -->Hello, my name is wang xudong, I in iteye blog address is xdwangiflytek.iteye.com.

 

path -->F:\Workspaces\workspaceSE\BlogDemo\luceneDatasource\HelloLucene01.txt

size -->84

 

 

Boolean查询(BooleanQuery):

/**
	 * Boolean查询
	 * 
	 * @throws Exception
	 */
	public void testBooleanQuery() throws Exception{
		
		//条件一    短语查询
		PhraseQuery phraseQuery = new PhraseQuery();
		phraseQuery.add(new Term("content","iteye"),1);
		phraseQuery.add(new Term("content","address"),3);

		//条件二    范围查询
		Query query = new TermRangeQuery("size", NumberTools.longToString(100), NumberTools.longToString(1000), true, true);
		
		BooleanQuery booleanQuery = new BooleanQuery();
		
		//条件一必须出现,条件二可能出现
		booleanQuery.add(phraseQuery, Occur.MUST);
		booleanQuery.add(query, Occur.SHOULD);
		
		byQuery.createIndexByYouHua();
		byQuery.search(booleanQuery);
	}

运行结果:

总共有【1】条匹配的结果

name -->HelloLucene01.txt

content -->Hello, my name is wang xudong, I in iteye blog address is xdwangiflytek.iteye.com.

 

path -->F:\Workspaces\workspaceSE\BlogDemo\luceneDatasource\HelloLucene01.txt

size -->84

说明:

Occur用于表示布尔查询子句关系,包括:

Occur.MUSTOccur.MUST_NOTOccur.SHOULD

 

1、MUSTMUST,取得两个查询子句的交集;

2MUSTMUST_NOT,包含MUST并且查询结果中不包含MUST_NOT的检索结果;

3SHOULDSHOULD,表示“或”关系,最终检索结果为所有检索子句的并集;

 

使用时注意:

       1MUSTSHOULD:此时SHOULD无意义,结果为MUST子句的检索结果;

       2MUST_NOTMUST_NOT,无意义,检索无结果;

       3MUST_NOTSHOULD,此时SHOULD相当于MUST,结果同MUSTMUST_NOT

       4、单独使用SHOULD,结果相当于MUST

       5、单独使用MUST_NOT,无意义,检索无结果;

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

猜你喜欢

转载自xdwangiflytek.iteye.com/blog/1402858