Lucene 2.4的实例

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Set;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;

import com.ole.factory.BeanFactory;

public class TestLucene {
//索引目录
public static final String INDEX_DIR=System.getProperty("user.dir")+
   "/index_dir";
public static final String LUCENE_DATA=System.getProperty("user.dir")+
   "/lucene_data";
private String[] columnNameArr ={"id","ordercode","incompanyname","outcompanyname",
    "buydate","saledate","buygoodsnum","salegoodsnum","buyprice","saleprice",
    "trucknum","empcode1","empcode2","losskg","goodscode","orderemp",
    "orderdate","orderstate","batchno_out","batchno_in","ys","ss","ysye",
    "yf","sf","yfye","carry_in","carry_out"};
/**
   * 获取数据库数据
   */
@SuppressWarnings("unchecked")
public List<Map<String,Object>> queryOrderIO(){
   BeanFactory beanFac = BeanFactory.getInstance();
    IOrderIOService orderService= (IOrderIOService)beanFac.
     getApplicationnContext().getBean("orderIOService");
   return (List<Map<String,Object>>)orderService.queryOrderIO();
}

/**
* 创建索引
*/
public void createIndex() {
   synchronized (INDEX_DIR) {
    List<Map<String,Object>> resultList = queryOrderIO();
          Date start=new Date();      
          Analyzer analyzer=new StandardAnalyzer();  
          try {  
        IndexWriter writer=new IndexWriter(INDEX_DIR,analyzer,
          true,MaxFieldLength.UNLIMITED);
        /***********************/
        for(Map<String,Object> rowItem : resultList){
         Document doc=new Document();
//         Set<String> columns = rowItem.keySet();
         for(String columnItem : columnNameArr){
          Field fieldvalue= new Field(columnItem,
            rowItem.get(columnItem)!=null?rowItem.get(columnItem).toString():"",
            Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS);//Field.TermVector.NO暂不需要分词
          doc.add(field_value);
         }
         writer.addDocument(doc);
        }
       
         writer.optimize();  
         writer.close();  
         Date end=new Date();  
         long indexTime=end.getTime()-start.getTime();  
         System.out.println("索引完成所需时间:(ms)");  
         System.out.println(indexTime);  
      } catch (CorruptIndexException e) {  
       e.printStackTrace();  
      } catch (LockObtainFailedException e) {  
       e.printStackTrace();  
      } catch (IOException e) {  
       e.printStackTrace();  
      }  
          System.out.println("创建索引完成!");
   }
}

/**
   * 跟据关键字查询
   */
public void searchIndex(String queryString,String columnName){
   try{
    IndexSearcher isearcher = new IndexSearcher(INDEX_DIR);
//    BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD};
//        TopDocCollector collector = new TopDocCollector(10000);
//        QueryParser parser = new QueryParser(INDEX_DIR, new StandardAnalyzer());
//        Query query = parser.parse(queryString);
        Query query = new QueryParser(columnName,new StandardAnalyzer()).parse(queryString);
//         MultiFieldQueryParser.parse(queryString,
//           columnName, new StandardAnalyzer());
//        isearcher.search(query, collector);
        ScoreDoc[] hits = isearcher.search(query, isearcher.maxDoc()).scoreDocs;
        System.out.println("hits.length="+hits.length);
        for (int i = 0; i < hits.length; i++) {
           Document doc = isearcher.doc(hits[i].doc);
           for(String column : columnNameArr){
           System.out.println(column+"="+doc.get(column));
           }
           System.out.println("=========================");
    }
        isearcher.close();
   }catch (Exception e) {
        e.printStackTrace();
   }
}
  
   
// /**
//   * 庖丁解牛分析器
//   */
// public synchronized Analyzer getAnalyzer() {
//      return new PaodingAnalyzer();
// }



public static void main(String[] args){
   TestLucene testLucene = new TestLucene();
//   testLucene.createIndex();
   testLucene.searchIndex("2009-08-01","buydate");
//   testLucene.searchIndex("ordercode","S200908040062",
//     new StandardAnalyzer());
}
}  
/*
* 索引域:Field是Document对象的基本组成单位,每个Field存储了实际的所有文本数据,这些文本数据在内部调用了分析器Aanlyzer的索引项结果
* Field内的检索查询最终以索引项为单位的.比索引更小的单位无法检索到
* 中文的索引项一中文分词的结果为检索单元,英文的索引项是以单词为检索单元,检索单元为最小的检索单位
* 1.public Field(String name, byte[] value, Store store)
* 2.public Field(String name, byte[] value, int offset, int length, Store store)
* 3.public Field(String name, String value, Store store, Index index)
* 4.public Field(String name, String value, Store store, Index index, TermVector termVector)
* 5.public Field(String name, Reader reader)
* 6.public Field(String name, Reader reader, TermVector termVector)
* 7.public Field(String name, TokenStream tokenStream)
* 8.public Field(String name, TokenStream tokenStream, TermVector termVector)
* 第1,2个函数用于二进制数据索引;3,4用于直接给定字符串的索引,5,6用于文件内容的索引,即Reader数据流(常用)
* name-域名为固定的参数,用于指定添加域的标识,限定检索的范围或提取属性值
* value-
* Store-控制域数据的存储,表示数据本身是否存储(注意:并不是指索引是否存储)
*       1.Store.NO 只保存索引,不包含数据本身信息,减少空间采用
*       2.Store.YES 保存索引并保存数据原始信息
*       3.Store.COMPRESS 存储并压缩原始信息
* Index-控制索引格式的参数,表示数据是否需要索引,即当前域是否需要用于检索
*       1.Index.NO 不索引,只作为辅助信息
*       2.Index.ANALYZER   2.4版本替换6
*       3.Index.NOT_ANALYZER 2.4版本替换7
*       4.Index.ANALYZER_NO_NORMS
*       5.Index.NOT_ANALYZER_NO_NOTRMS  
*       6.Index.TOKENIZED 使用分词器并建立索引
*       7.Index.UN_TOKENIZED 不分词建立索引(某些内容的整体作为索引)
*       8.Index.NO_NORMS 禁用分析器处理
* TermVector-域内信息是否需要分词,在中文处理中分词是索引的基础
*         TermVector保存Token.getPositionIncrement() 和Token.startOffset() 以及Token.endOffset() 信息
*       1.Field.TermVector.NO:不保存term vectors
*       2.Field.TermVector.YES:保存term vectors
*       3.Field.TermVector.WITH_POSITIONS:保存term vectors.(保存值和token位置信息)
*       4.Field.TermVector.WITH_OFFSETS:保存term vectors.(保存值和Token的offset)
*       5.Field.TermVector.WITH_POSITIONS_OFFSETS:保存term vectors.(保存值和token位置信息和Token的offset)  
*/

猜你喜欢

转载自ganlangreen-163-com.iteye.com/blog/757496
今日推荐