全文检索小案例

1.什么是全文检索?
全文检索是计算机程序通过扫描文章中的每一个词,对每一个词建立一个索引,指明该词在文章

中出现的次数和位置。当用户查询时根据建立的索引查找,类似于通过字典的检索字表查字的过程。
2.搭建全文检索环境
 lucene-core-3.0.1.jar(核心包)
 contrib\analyzers\common\lucene-analyzers-3.0.1.jar(分词器)
 contrib\highlighter\lucene-highlighter-3.0.1.jar(高亮)
contrib\memory\lucene-memory-3.0.1.jar(高亮)
3,具体案例:
建立实体类:
student.java
package cn.laibin.util;


public class Student {
private Integer id;
private String name;
private String description;
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}


}
建立测试类
package cn.laibin.test;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import cn.laibin.util.Student;

public class TestLu {
private static Directory directory;
private static Analyzer a;
static{
try {
directory=FSDirectory.open(new File("./Dir"));//目录   
a=new StandardAnalyzer(Version.LUCENE_30);//lucene版本       
} catch (IOException e) {
throw new RuntimeException(e);
}
}
//建立索引
@Test
public void testCreateIndex() throws Exception{
//准备数据
Student student=new Student();
student.setId(1);
student.setName("周雨佳");
student.setDescription("【如何让当前页面3秒以后自动跳转到其他页面?JS页面自动

跳转】");
//放到索引库中

//建立索引
//1.把Article转为Document
Document document=new Document();
String idString=student.getId().toString();
document.add(new Field("name",student.getName() , Store.YES,

Index.ANALYZED));
document.add(new Field("id",idString, Store.YES, Index.ANALYZED));
document.add(new Field("description",student.getDescription() , Store.YES,

Index.ANALYZED));

//2把document放到索引库中


IndexWriter indexWriter=new IndexWriter(directory, a,

MaxFieldLength.LIMITED);
indexWriter.addDocument(document);
indexWriter.close();
}


//搜索
@Test
public void testSearch() throws Exception {
//准备查询条件
String queryString="周雨佳";
//2执行搜索
List<Student> list=new ArrayList<Student>();
//===================================
//第一:把查询字符串转成Query对象(默认只从title中查询)
QueryParser queryParser=new QueryParser(Version.LUCENE_30, "name", a);
Query query= queryParser.parse(queryString);
//第二:执行查询,得到中间结果
IndexSearcher indexSearcher= new IndexSearcher(directory);//指定索引库
TopDocs topDocs=indexSearcher.search(query, 100);//n,返回前n条结果

int count=topDocs.totalHits;
ScoreDoc[] scoreDocs=topDocs.scoreDocs;


//3.处理结果
for(int i=0;i<scoreDocs.length;i++){
ScoreDoc scoreDoc=scoreDocs[i];
float score=scoreDoc.score;//相关度得分
int docId=scoreDoc.doc;//document的内部编号

//根据编号拿到Document数据
Document doc=indexSearcher.doc(docId);
//把Document转为Article
String idString=doc.get("id");
String name=doc.get("name");
String description=doc.get("description");//等价于doc。getField()。

StringValue();
Student student=new Student();
student.setId(Integer.parseInt(idString));
student.setName(name);
student.setDescription(description);
list.add(student);
}
indexSearcher.close();



[color=darkred][/color]

//===================================
//3显示结果
System.out.println("总结果"+count);
for (Student a:list) {
System.out.println("----------------------------------");
System.out.println("id="+a.getId());
System.out.println("name="+a.getName());
System.out.println("description="+a.getDescription());
}
}
}

 

猜你喜欢

转载自792836647.iteye.com/blog/2191662
今日推荐