Lucene3.0 学习笔记（2）

今天针对Lucene3.0的一些新增的特性，做了简单的实践。主要实现了两种索引的建立方法：1.对某一个txt文档进行建立索引并进行搜索。2.对某一个文件夹下的所有txt文件进行建立索引并进行搜索。
有两点发现，在此share一下：
1.在向索引段中添加新的field时，如果用到了new Field("***",Reader reader)是不存储的。所以在doc.get("***");时，无法取出内容，这时就需要自己写一个方法，来实现将reader转化为字符串。
2.在对某一个文件夹下的所有txt文档进行建立索引时，需要对每一个文件都构建一个document对象，然后将field域分别add到document中。否则在搜索的时候将会出错（未知原因,还望那位高人指点一二），并且用Luck工具查看的时候，对于是否给每个文本构建Docment，两种结果内容一样，但是顺序会不同。

[color=green][/color]

package test3;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.junit.Test;

public class IndexTxt {

private IndexWriter write = null;
private IndexSearcher search = null;

private String dataPath = "E:\\testlucene\\test\\test.txt";//针对某一个文本文档建立索引并搜索
private String dataPath1 = "E:\\testlucene\\test";//针对某一个文本下所有文档建立索引并搜索
private String indexPath ="E:\\testlucene\\fileIndex";

private Directory indexDir = null;
private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
public IndexTxt() throws IOException{
File file = new File(indexPath);
indexDir = FSDirectory.open(file);//创建索引目录
}
@Test
public void createIndex() throws CorruptIndexException, LockObtainFailedException, IOException{//建立索引
/*
* 先把将要建立索引的文件转化为document对象
*/
Document doc = new Document();
File dataFile = new File(dataPath);
//获取文件输入流

//添加所有field
doc.add(new Field("name",dataFile.getName(),Store.YES,Index.ANALYZED));

//doc.add(new Field("content",reader));//并没有存储，故不能用doc.get("content")来获得内容,so to adapt next line
doc.add(new Field("content",filecontent(dataFile),Store.YES,Index.ANALYZED));
//在哪儿建立索引
write = new IndexWriter(indexDir,analyzer,true,MaxFieldLength.LIMITED);
write.addDocument(doc);
write.close();
}
/*
* 针对某一个文件夹下所有文档建立索引并搜索
*/
@Test
public void createIndex1() throws IOException{//
File folder = new File(dataPath1);
write = new IndexWriter(indexDir,analyzer,true,MaxFieldLength.LIMITED);
if(folder.isDirectory()){
String[] files = folder.list();// 返回一个字符串数组，这些字符串指定此抽象路径名表示的目录中的文件和目录。

for(int i = 0;i<files.length;i++){
File file = new File(folder,files[i]);//根据 parent 抽象路径名和 child 路径名的字符串创建一个新 File 实例。
Document doc = new Document();
doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
doc.add(new Field("content",filecontent(file),Store.YES,Index.ANALYZED));
write.addDocument(doc);
}
//在哪儿建立索引
write.close();
}else {
            System.out.println("-----folder.isDirectory():false.");
        }
}
private String filecontent(File file) throws IOException {
FileInputStream fis = new FileInputStream(file);
StringBuffer content = new StringBuffer();
BufferedReader reader = new BufferedReader(new InputStreamReader(fis));
for(String line=null;(line=reader.readLine())!=null;){
content.append(line).append("\n");
}
return content.toString();
}
@Test
public void createSearch() throws CorruptIndexException, IOException, ParseException{
//针对某一个索引目录进行搜索
search = new IndexSearcher(indexDir);
//key --> Query object
String key = "game";
QueryParser parse = new QueryParser(Version.LUCENE_30,"content",analyzer);
Query query = parse.parse(key);//将搜索的关键词转化为Query对象

TopDocs hits = search.search(query, 100);//封装了返回的符合条件的所有记录
int total = hits.totalHits;//返回包含有该关键词的文档个数,切记是文档个数
if(total == 0)
System.out.println("no such a file");
else{
for(int i=0;i<hits.scoreDocs.length;i++){//hits.scoreDocs return The top hits for the query.
ScoreDoc scoreDoc = hits.scoreDocs[i];//返回的符合条件的某一条记录
Document doc = search.doc(scoreDoc.doc);//Returns the stored fields of document i.
System.out.println(doc.get("name"));
System.out.println(doc.get("content"));
System.out.println("----------------");
}
}
}
}

Lucene3.0 学习笔记（2）

猜你喜欢