搜索思路:
1 用一个indexSearcher对象,去一个预先建立的索引文件(indexFir)中查找关键字(key).
2其中要先对关键字采用特定的分词器(analyzer)进行解析(解析器(parser)),解析后作为query对象,再去查。
3.遍历结果集,根据docId去doc集合中找相应的doc.
4.设置高亮显示的格式,每条匹配记录显示的字符数。用highlighter对象,取得文章或数据的片段并将与关键字相同的文章,标红。
关于如何建立索引,请查看:搜索技术--建立索引(lucene3.0)
package com.jrj.datamart.action;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.jrj.datamart.model.ApiIndexEntity;
import com.jrj.datamart.model.Pagination;
//
//搜索并返回结果
public class SearcherAction extends BaseAction{
// 保存索引的地方
private String INDEX_DIR = "f:\\lucene\\luceneIndexDir";
private String keyword = "行情";
private int TOP_NUM = 100;
private int numIndexed;
// 保持搜索结果的List
List<ApiIndexEntity> apiIndexEntityLists = new ArrayList<ApiIndexEntity>();
// 搜索结果的个数
private int totalHitsNumber;
// 用时(毫秒)
private long spentTime;
//struts2中的主方法
public String execute() throws Exception {
System.out.println("execue of SearcherAction started...keyword: "
+ keyword);
long start = new Date().getTime();
File indexDir = new File(INDEX_DIR);
if (!indexDir.exists() || !indexDir.isDirectory()) {
throw new Exception(indexDir
+ " does not exist or is not a directory.");
}
search(indexDir, keyword);
long end = new Date().getTime();
spentTime = (end - start);
System.out.println("searing spent: " + spentTime + " milliseconds");
return "success";
}
//查詢
// @param indexDir
// 索引目录地址
// @param q
// 要查询的字符串
// @throws Exception
//
public void search(File indexDir, String q) throws Exception {
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(
Version.LUCENE_30);
String field = "contents";
try {
// 排序
IndexSearcher indexSearch = new IndexSearcher(
FSDirectory.open(indexDir));
QueryParser parser = new QueryParser(Version.LUCENE_30, field,
analyzer);
// AND--->OR
parser.setDefaultOperator(QueryParser.OR_OPERATOR);
// 生成Query对象
// 多域查询
// String[] fields = { "icnname", "contents" };
// BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD,
// BooleanClause.Occur.SHOULD };
// MultiFieldQueryParser.parse(Version.LUCENE_30, q, fields, flags,
// analyzer);
// TopScoreDocCollector topCollector = TopScoreDocCollector.create(
// indexSearch.maxDoc(), false);
Query query = parser.parse(q);
// 排序: 先API中文名,后描述
// SortField sortArray[];
// SortField sortField1 = new SortField("icnname", SortField.STRING,
// false);// false代表升序,TRUE代表降序
// SortField sortField2 = new SortField("contents",
// SortField.STRING,
// false);// false代表升序,TRUE代表降序
// sortArray = new SortField[] { sortField1, sortField2 };
// Sort sort = new Sort(sortArray);
// TopDocs topDocs = indexSearch.search(query, null, 1000, sort);
// indexSearch.search(query, topCollector);
TopDocs topDocs = indexSearch.search(query, TOP_NUM);
totalHitsNumber = topDocs.totalHits;
System.out.println("命中:" + totalHitsNumber);
// 输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
ApiIndexEntity apiIndexEntity;
// 将符合结果的都保持到list中。/
// 设置需要高亮的字段值
String[] highlightCol = { "icnname", "contents" };
Highlighter highlighter = null;
// 关键字高亮显示设置
// 设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"<FONT COLOR='#FF0000'>", "</FONT>");
highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(
query));
// 设置每次返回的字符数
highlighter.setTextFragmenter(new SimpleFragmenter(150));
for (int i = 0; i < topDocs.totalHits; i++) {
apiIndexEntity = new ApiIndexEntity();
Document targetDoc = indexSearch.doc(scoreDocs[i].doc);
for (Fieldable fa : targetDoc.getFields()) {
String value = targetDoc.get(fa.name());
for (String col : highlightCol) {
if (fa.name().equals(col)) {
// 设置高显内容
TokenStream tokenStream = analyzer.tokenStream(
"content", new StringReader(value));
value = highlighter.getBestFragment(tokenStream,
value);
if ("icnname".equals(fa.name())) {
apiIndexEntity.setIcnname(value == null ? targetDoc.get("icnname") : value);
} else if ("contents".equals(fa.name())) {
apiIndexEntity.setContents(value == null ? targetDoc.get("contents") : value);
}
}
}
}
apiIndexEntity.setApiid(Integer.parseInt(targetDoc.get("apiid")));
System.out.println("ApiId: "
+ targetDoc.get("apiid").toString());
System.out.println("API中文名: "
+ targetDoc.get("icnname").toString());
System.out
.println("内容:" + targetDoc.get("contents").toString());
apiIndexEntityLists.add(apiIndexEntity);
}
//将数据放入session中,
getSession().setAttribute("apiIndexEntityLists",apiIndexEntityLists);
//设置session时间1小时。
getSession().setMaxInactiveInterval(60*60);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public int getNumIndexed() {
return numIndexed;
}
public void setNumIndexed(int numIndexed) {
this.numIndexed = numIndexed;
}
public String getINDEX_DIR() {
return INDEX_DIR;
}
public String getKeyword() {
return keyword;
}
public int getTOP_NUM() {
return TOP_NUM;
}
public void setINDEX_DIR(String iNDEX_DIR) {
INDEX_DIR = iNDEX_DIR;
}
public void setKeyword(String keyword) {
this.keyword = keyword;
}
public void setTOP_NUM(int tOP_NUM) {
TOP_NUM = tOP_NUM;
}
public List<ApiIndexEntity> getApiIndexEntityLists() {
return apiIndexEntityLists;
}
public void setApiIndexEntityLists(List<ApiIndexEntity> apiIndexEntityLists) {
this.apiIndexEntityLists = apiIndexEntityLists;
}
public int getTotalHitsNumber() {
return totalHitsNumber;
}
public void setTotalHitsNumber(int totalHitsNumber) {
this.totalHitsNumber = totalHitsNumber;
}
public long getSpentTime() {
return spentTime;
}
public void setSpentTime(int spentTime) {
this.spentTime = spentTime;
}
}
搜索技术--搜索、排序并高亮显示(lucene3.0)
猜你喜欢
转载自binbinwudi.iteye.com/blog/1122377
今日推荐
周排行