由于我们学校这期课程需要做一个基于源码工具包搭建一个搜索引擎,上课的老师强力推荐使用lucene工具并且读一读lucene源码。小编准备先把搜索引擎这个任务完成,学习lucene的使用,然后有时间的时候研究一下lucene这个优秀的源码。
1.lucene学习框架
2.基本的创建索引和搜索框架
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class HelloLucene {
/**
* 建立索引
*
* @throws IOException
*/
public void index() throws IOException {
// 1.创建directory(索引创建在哪呢?)
// 2.创建IndexWriter(对象来写索引)
// 3.创建Document对象(索引一篇文档?索引一个。。。?
// 4.为Document添加Field
// 5.通过indexWriter添加文档的索引
//Directory directory = new RAMDirectory();// 在内存中创建索引
Directory directory=FSDirectory.open(Paths.get("d:/lucene",new String[]{"index"}));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);// 对写索引进行配置的对象
IndexWriter writer = new IndexWriter(directory, config);// 写入索引对象,用完之后需要关闭
Document doc = null;
File f = new File("d:/lucene/index_sample");
File[] ff = f.listFiles();
for (int i = 0; i < ff.length; i++) {
doc = new Document();
doc.add(new Field("filename", ff[i].getName(),
TextField.TYPE_STORED));
doc.add(new Field("content", readFile(ff[i]), TextField.TYPE_STORED));
doc.add(new Field("filepath", ff[i].getAbsolutePath(),
TextField.TYPE_STORED));
writer.addDocument(doc);// 通过IndexWriter添加文档到索引中
}
writer.close();//关闭IndexWriter之后才能看见索引创建效果
}
private static String readFile(File file) throws IOException {
StringBuffer content = new StringBuffer();
try {
BufferedReader bf = new BufferedReader(new InputStreamReader(
new FileInputStream(file)));
for (String line = null; (line = bf.readLine()) != null;) {
content.append(line).append("\n");
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return content.toString();
}
public void searcher(String keyword) throws ParseException{
//1.创建Directory
//2.创建IndexReader
//3.根据IndexReader创建InderSearcher
//4.创建搜索的Query
//5.根据searcher搜索并返回TopDocs
//6.根据searcher和ScoreDoc对象获取具体的Document对象
//7.根据Document对象获取需要的值
try {
Directory directory=FSDirectory.open(Paths.get("d:/lucene",new String[]{"index"}));
DirectoryReader reader=DirectoryReader.open(directory);
IndexSearcher searcher=new IndexSearcher(reader);
QueryParser parser=new QueryParser("content",new StandardAnalyzer());
Query query=parser.parse(keyword);
TopDocs topDocs=searcher.search(query,10);
System.out.println("查找到的文档总共有:"+topDocs.totalHits);
ScoreDoc[] scoreDocs=topDocs.scoreDocs;
for(int i=0;i<scoreDocs.length;i++){
Document doc=new Document();
doc=searcher.doc(scoreDocs[i].doc);
System.out.println(doc.get("filename")+doc.get("filepath"));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) throws IOException, ParseException {
HelloLucene lucene = new HelloLucene();
lucene.index();
lucene.searcher("设置");
}
}