lucene 排序,过滤,高亮显示

package com.nanjing.chaoxing.lucene;

import com.nanjing.chaoxing.lucene.model.Book;
import com.nanjing.chaoxing.lucene.model.BookUtil;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SortAndFilterAndHighligher {
    private Logger logger = Logger.getLogger(SortAndFilterAndHighligher.class);

    @BeforeClass
    public static void init() throws IOException {
        BookUtil bookUtil = new BookUtil();
        bookUtil.createIndexWriter();
        bookUtil.createDocument();
    }

    /**
     * 排序并过滤
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void sort() throws IOException, ParseException {
        IndexReader indexReader = IndexReader.open(FSDirectory.open(BookUtil.indexFile));
        Query query = new QueryParser(Version.LUCENE_36, "bookid", new StandardAnalyzer(Version.LUCENE_36)).parse("6270000~");
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //数字过滤器
        NumericRangeFilter numericRangeFilter = NumericRangeFilter.newIntRange("year", 2010, 2012, true, true);
        //排序
        SortField sortField = new SortField("bookid", SortField.STRING);

        logger.info("sort" + " begin....");
        TopDocs topDocs = indexSearcher.search(query, numericRangeFilter, 1000, new Sort(sortField));
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < scoreDocs.length; i++) {
            ScoreDoc scoreDoc = scoreDocs[i];
            int doc = scoreDoc.doc;
            for (Book book : BookUtil.bookList) {
                if (indexSearcher.doc(doc).get("bookid").equals(book.getBookid())) {
                    logger.info(book.toString());
                }
            }
        }
        logger.info("sort" + " end....\n");
    }


    /**
     * 高亮显示
     *
     * @throws ParseException
     * @throws IOException
     */
//    @Test
    public void highligher() throws ParseException, IOException, InvalidTokenOffsetsException {
        List<Book> books = new ArrayList<Book>();
        IndexReader indexReader = IndexReader.open(FSDirectory.open(BookUtil.indexFile));
        Query query = new QueryParser(Version.LUCENE_36, "author", new StandardAnalyzer(Version.LUCENE_36)).parse("jam~");
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
        Scorer scorer = new QueryScorer(query);
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
        Fragmenter fragmenter = new SimpleFragmenter(100);
        highlighter.setTextFragmenter(fragmenter);

        TopDocs topDocs = indexSearcher.search(query, 1000);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        System.out.println("高亮显示:");
        for (int i = 0; i < scoreDocs.length; i++) {
            int docid = scoreDocs[i].doc;
            float score = scoreDocs[i].score;//当前结果的相关度得分
            System.out.println("score is : " + score);
            Document doc = indexSearcher.doc(docid);
            String author = doc.get("author");
            String highterBooid = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_36), "author", author);
            for (Book book : BookUtil.bookList) {
                if (book.getBookid().equals(doc.get("bookid"))) {
                    book.setAuthor(highterBooid);
                    System.out.println(book.toString());
                }
            }
        }

    }
}

猜你喜欢

转载自qq510219366.iteye.com/blog/1674999
今日推荐