lucene 3.6 全文检索

/**

  *索引字段,可根据需要修改

*/

package com.beyondbit.entity;

import java.util.Date;

public class ResultInfo {

 private Long id;   //主健id
 private String ct_title;   //标题
 private String ct_brief;   //摘要
 private String ct_content;   //内容
 private String sj_name;    //栏目   or  分类名称
 private String url;       //静态文件地址
 private String topid;     //呼叫中心下的栏目id  or  知识分类下的分类id   该栏目为 呼叫中心、知识分类下的第二级
 private Date create_time;   //知识     or  信息的发布日期
 private String recommend;  //是否推荐   1推荐
 private Integer bscredit;   //企业信用值
 public ResultInfo(){
  
 }
 
 public ResultInfo(Long id, String ct_title,String ct_brief,String ct_content,
   String sj_name,String url,String topid,Date create_time,String recommend,Integer bscredit) {
  super();
  this.id = id;
  this.ct_title = ct_title;
  this.ct_brief=ct_brief;
  this.ct_content = ct_content;
  this.sj_name = sj_name;
  this.url=url;
  this.topid=topid;
  this.create_time = create_time;
  this.recommend=recommend;
  this.bscredit=bscredit;
 }
 //此处省略get  ,set方法 

}

package com.beyondbit.util;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.transaction.annotation.Transactional;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.beyondbit.entity.ResultInfo;

/**

  *操作索引的类

   */
public class LuceneContent {
 
 public static final String LUCENE_PATH="lucene";

 public  Document createDocument(ResultInfo c) {
  Document doc = new Document();
  doc.add(new Field("id",c.getId().toString(), Field.Store.YES,Field.Index.NOT_ANALYZED));
  doc.add(new Field("title",c.getCt_title()==null?"":c.getCt_title(),Field.Store.YES, Field.Index.ANALYZED));
  doc.add(new Field("brief",c.getCt_brief()==null?"":c.getCt_brief(),Field.Store.YES, Field.Index.ANALYZED));
  doc.add(new Field("content",c.getCt_content()==null?"":c.getCt_content(),Field.Store.YES, Field.Index.ANALYZED));
  doc.add(new Field("url",c.getUrl()==null?"":c.getUrl(),Field.Store.YES,Field.Index.NOT_ANALYZED));
  doc.add(new Field("sjname",c.getSj_name()==null?"":c.getSj_name(),Field.Store.YES, Field.Index.ANALYZED));
  doc.add(new Field("topid",c.getTopid()==null?"":c.getTopid(),Field.Store.YES,Field.Index.ANALYZED));
  doc.add(new Field("date", DateTools.dateToString(c.getCreate_time(), Resolution.DAY), Field.Store.YES,Field.Index.NOT_ANALYZED));
  doc.add(new Field("recommend",c.getRecommend()==null?"0":c.getRecommend().toString(), Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
  doc.add(new Field("bscredit",c.getBscredit()==null?"0":c.getBscredit().toString(), Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
  return doc;
 }
 
 /**
  * 创建索引文件
  * @param content
  * @throws IOException
  */
 @Transactional(readOnly = true)
 public void createIndex(ResultInfo content) throws IOException {
  Directory dir = new SimpleFSDirectory(new File(Constants.luceneIndex));
  createIndex(content, dir);
 }

 
 /**
  * 创建索引文件
  * @param content
  * @param dir
  * @throws IOException
  */
 @Transactional(readOnly = true)
 public void createIndex(ResultInfo content, Directory dir) throws IOException {
  boolean exist = IndexReader.indexExists(dir);
  IndexWriterConfig iwconfig=new IndexWriterConfig(Version.LUCENE_36,new IKAnalyzer());
  IndexWriter writer=new IndexWriter(dir, iwconfig);
  try {
   writer.addDocument(createDocument(content));
  } finally {
   writer.close();
  }
 }
 
 /**
  * 删除索引文件
  * @param contentId
  * @throws IOException
  * @throws ParseException
  */
 @Transactional(readOnly = true)
 public void deleteIndex(Long contentId) throws IOException,
   ParseException {
  Directory dir = new SimpleFSDirectory(new File(Constants.luceneIndex));
  deleteIndex(contentId, dir);
 }
 
 /**
  * 根据索引文件id删除一条索引文件的信息
  * @param contentId
  * @param dir  索引文件的存放目录
  * @throws IOException
  * @throws ParseException
  */
 @Transactional(readOnly = true)
 public void deleteIndex(Long contentId, Directory dir)
   throws IOException, ParseException {
  boolean exist = IndexReader.indexExists(dir);
  if (exist) {
   IndexWriterConfig iwconfig=new IndexWriterConfig(Version.LUCENE_36,new IKAnalyzer());
   IndexWriter writer=new IndexWriter(dir, iwconfig);
   try {
    delete(contentId, writer);
   } finally {
    writer.close();
   }
  }
 }
 
 /**
  * 根据一条索引文件的id删除索引文件
  * @param contentId
  * @param writer
  * @throws CorruptIndexException
  * @throws IOException
  * @throws ParseException
  */
 public  void delete(Long contentId, IndexWriter writer)
   throws CorruptIndexException, IOException, ParseException {
  writer.deleteDocuments(new Term("id", contentId.toString()));
 }
 
 /**
  * 更新索引文件
  * @param content
  * @throws IOException
  * @throws ParseException
  */
 public void updateIndex(ResultInfo content) throws IOException, ParseException {
  Directory dir = new SimpleFSDirectory(new File(Constants.luceneIndex));
  updateIndex(content, dir);
 }

 /**
  * 更新索引文件
  * @param content
  * @param dir  索引文件存放的目录
  * @throws IOException
  * @throws ParseException
  */
 public void updateIndex(ResultInfo content, Directory dir) throws IOException,
   ParseException {
  boolean exist = IndexReader.indexExists(dir);
  IndexWriterConfig iwconfig=new IndexWriterConfig(Version.LUCENE_36,new IKAnalyzer());
  IndexWriter writer=new IndexWriter(dir, iwconfig);
  try {
   if (exist) {
    delete(content.getId(), writer);
   }
   writer.addDocument(createDocument(content));
  } finally {
   writer.close();
  }
 }
}

public String execute() throws Exception {
  page.setPageSize(getCookieCount());
  Query query=null;
  Analyzer analyzer=new IKAnalyzer();
  try {
   IndexSearcher searcher =new IndexSearcher(IndexReader.open(FSDirectory.open(new File(PropertyManager.getProperty("articleindex")))));
   TopScoreDocCollector topCollector = TopScoreDocCollector.create(searcher.maxDoc(),false);
   
   if(content==null||content.equals("")){
    QueryParser parse = new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"title","content"}, analyzer);
    query=parse.parse(keyword);
   }
   if(content!=null&&content.equals("1")){
    if(sjid.equals("0")){
     //到全文中检索关健字
     query=MultiFieldQueryParser.parse(Version.LUCENE_36,new String[]{keyword},new String[]{"content"},new BooleanClause.Occur[]{BooleanClause.Occur.MUST},analyzer);
    }else{
     //根据关健字和栏目去查找
     query=MultiFieldQueryParser.parse(Version.LUCENE_36,new String[]{keyword,sjid},new String[]{"content","topid"},new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST},analyzer);
    }
   }if(content!=null&&content.equals("2")){
    if(sjid.equals("0")){
     //到标题中检索关健字
     query=MultiFieldQueryParser.parse(Version.LUCENE_36,new String[]{keyword},new String[]{"title"},new BooleanClause.Occur[]{BooleanClause.Occur.MUST},analyzer);
    }else{
     //根据关健字和栏目去查找
     query=MultiFieldQueryParser.parse(Version.LUCENE_36,new String[]{keyword,sjid},new String[]{"title","topid"},new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST},analyzer);
    }
   }
   searcher.search(query, topCollector);
   SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
   Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));
   ScoreDoc[] docs=topCollector.topDocs((page.getPageNo()-1)*page.getPageSize(),page.getPageSize()).scoreDocs;
   ResultInfo info = null;
   for (int i = 0; i < docs.length; i++) {
    Document doc=searcher.doc(docs[i].doc);
    String content2 = doc.get("content");
    String title2 = doc.get("title");
    TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content2));
    TokenStream tokenStream1 = analyzer.tokenStream("title", new StringReader(title2));
    String content = highlighter.getBestFragment(tokenStream,content2);
    String title = highlighter.getBestFragment(tokenStream1, title2);
    info = new ResultInfo();
    info.setId(Long.parseLong(doc.get("id")));
    info.setCt_title(title==null?title2:title);
    info.setCt_content(content==null?content2:content);
    info.setUrl(doc.get("url")==null?"":doc.get("url"));
    info.setSj_name(doc.get("sjname")==null?"":doc.get("sjname"));
       info.setCreate_time(DateTools.stringToDate(doc.get("date")));
    infos.add(info);
    info = null;
   }
   page.setTotalCount(topCollector.getTotalHits());
   //subs=subjectMananger.getAllLeafSubject();
  }catch (Exception e) {
   e.printStackTrace();
  }
  return SUCCESS;
 }

还可以对查询结果用sort进行排序,如:

String[] fields = { "title","brief","content" };

QueryParser  parse = new MultiFieldQueryParser(Version.LUCENE_36,fields, analyzer);

//keyword为关需要查找的关健字
    Query query=parse.parse(keyword);

 //下面这条语句表示先按recommend进行降序排列,false表示升序,再按title的匹配度进行排序
    Sort sort=new Sort(new SortField[]{new SortField("recommend",SortField.INT,true),new SortField("title",SortField.SCORE,false),new SortField("content",SortField.SCORE,false)});
    TopFieldDocs topFieldDocs = searcher.search(query,searcher.maxDoc(), sort);

   

   //设置高亮显示
    SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
    Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));
    ScoreDoc[] docs=topFieldDocs.scoreDocs;
    for (int i =(page.getPageNo()-1)*page.getPageSize(); i <page.getPageNo()*page.getPageSize(); i++) {
     if(i<docs.length){
      Document doc=searcher.doc(docs[i].doc);
      String content2 = doc.get("content");
      String title2 = doc.get("title");
      String content=null;
      String title=null;
      if(content2!=null&&!content2.equals("")){
       TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content2));

       //为了自己能够控制显示的长度必须加上下面这行代码,上面一行代码执行后会自动截取一长度
       highlighter.setTextFragmenter(new SimpleFragmenter(content2.length()));
       content = highlighter.getBestFragment(tokenStream,content2);
      }if(title2!=null&&!title2.equals("")){
       TokenStream tokenStream1 = analyzer.tokenStream("title", new StringReader(title2));
       title = highlighter.getBestFragment(tokenStream1, title2);
      }
      info = new ResultInfo();
      info.setId(Long.parseLong(doc.get("id")));
      info.setCt_title(title==null?title2:title);
      info.setCt_content(content==null?content2:content);
      info.setUrl(doc.get("url")==null?"":doc.get("url"));
      info.setSj_name(doc.get("name")==null?"":doc.get("name"));
      info.setCreate_time(DateTools.stringToDate(doc.get("date")));
      info.setRecommend(doc.get("recommend")==null?"":doc.get("recommend"));
      infos.add(info);
      info = null;
     }
    }

猜你喜欢

转载自yuan-bin1990.iteye.com/blog/1700272