lucene3搜索引擎,索引建立搜索排序分页高亮显示, IKAnalyzer分词

package com.zjr.service.impl; 
     
    import java.io.File; 
    import java.io.IOException; 
    import java.io.StringReader; 
    import java.lang.reflect.InvocationTargetException; 
    import java.util.ArrayList; 
    import java.util.List; 
     
    import org.apache.commons.beanutils.BeanUtils; 
    import org.apache.commons.logging.Log; 
    import org.apache.commons.logging.LogFactory; 
    import org.apache.lucene.analysis.Analyzer; 
    import org.apache.lucene.analysis.TokenStream; 
    import org.apache.lucene.document.Document; 
    import org.apache.lucene.document.Field; 
    import org.apache.lucene.document.Field.Index; 
    import org.apache.lucene.document.Field.Store; 
    import org.apache.lucene.index.CorruptIndexException; 
    import org.apache.lucene.index.IndexReader; 
    import org.apache.lucene.index.IndexWriter; 
    import org.apache.lucene.index.Term; 
    import org.apache.lucene.search.BooleanClause; 
    import org.apache.lucene.search.IndexSearcher; 
    import org.apache.lucene.search.Query; 
    import org.apache.lucene.search.ScoreDoc; 
    import org.apache.lucene.search.Sort; 
    import org.apache.lucene.search.SortField; 
    import org.apache.lucene.search.TopDocs; 
    import org.apache.lucene.search.TopScoreDocCollector; 
    import org.apache.lucene.search.highlight.Highlighter; 
    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 
    import org.apache.lucene.search.highlight.QueryScorer; 
    import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 
    import org.apache.lucene.store.Directory; 
    import org.apache.lucene.store.FSDirectory; 
    import org.wltea.analyzer.lucene.IKAnalyzer; 
    import org.wltea.analyzer.lucene.IKQueryParser; 
    import org.wltea.analyzer.lucene.IKSimilarity; 
     
    import com.zjr.model.User; 
     
    public class UserIndexService { 
     
        private final Log logger = LogFactory.getLog(UserIndexService.class); 
        private final String dirPath = "d:/temp/user"; 
     
        Analyzer analyzer = new IKAnalyzer(); 
        Directory directory = null; 
        IndexWriter writer = null; 
        IndexSearcher indexSearcher = null; 
     
        private void confirmDirs() { 
            File indexFile = new File(dirPath); 
            if (!indexFile.exists()) { 
                indexFile.mkdirs(); 
            } 
            if (!indexFile.exists() || !indexFile.canWrite()) { 
                if (logger.isDebugEnabled()) 
                    logger.error("索引文件目录创建失败或不可写入!"); 
            } 
        } 
     
        public void init() { 
            confirmDirs(); 
            try { 
                File f = new File(dirPath); 
                directory = FSDirectory.open(f); 
                 
            } catch (Exception e) { 
                if (logger.isDebugEnabled()) { 
                    logger.error("解除索引文件锁定失败!" + e.getCause()); 
                } 
            } 
        } 
     
        public void createIndex(List<User> userList) { 
            init(); 
            try { 
                 
    //           第一个参数是存放索引目录有FSDirectory(存储到磁盘上)和RAMDirectory(存储到内存中), 
    //          第二个参数是使用的分词器, 第三个:true,建立全新的索引,false,建立增量索引,第四个是建立的索引的最大长度 
                writer = new IndexWriter(directory, analyzer, true,IndexWriter.MaxFieldLength.LIMITED); 
                writer.setMergeFactor(500); 
                writer.setMaxBufferedDocs(155); 
                writer.setMaxFieldLength(Integer.MAX_VALUE); 
                writeIndex(writer, userList); 
                writer.optimize(); 
                writer.close(); 
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
        } 
     
        public List<User> search(String keyword) { 
     
            File indexFile = new File(dirPath); 
            if (!indexFile.exists()) { 
                return null; 
            } 
            Directory dir; 
            try { 
                dir = FSDirectory.open(indexFile); 
                indexSearcher = new IndexSearcher(dir); 
                indexSearcher.setSimilarity(new IKSimilarity()); 
                // 单字段查询,单条件查询 
                // Query query = IKQueryParser.parse("userInfo", keyword); 
     
                // 多字段,单条件查询 
                String[] fields = new String[] { "userInfo", "parameter1" }; 
                Query query = IKQueryParser.parseMultiField(fields, keyword); 
     
                // 多字体,单条件,多BooleanClause.Occur[] flags , 查询条件的组合方式(Or/And) 
                // BooleanClause.Occur[]数组,它表示多个条件之间的关系, 
                // BooleanClause.Occur.MUST表示 and, 
                // BooleanClause.Occur.MUST_NOT表示not, 
                // BooleanClause.Occur.SHOULD表示or. 
                // String[] fields =new String[]{"userInfo","parameter1"}; 
                // BooleanClause.Occur[] flags=new 
                // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD}; 
                // Query query = IKQueryParser.parseMultiField(fields, 
                // keyword,flags); 
     
                // //多Field,多条件查询分析 
                // String[] fields =new String[]{"userInfo","parameter1"}; 
                // String[] queries = new String[]{keyword,keyword}; 
                // Query query = IKQueryParser.parseMultiField(fields,queries); 
     
                // 多Field,多条件,多Occur 查询 
                // String[] fields =new String[]{"userInfo","parameter1"}; 
                // String[] queries = new String[]{keyword,keyword}; 
                // BooleanClause.Occur[] flags=new 
                // BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.SHOULD}; 
                // Query query = 
                // IKQueryParser.parseMultiField(fields,queries,flags); 
     
                // 搜索相似度最高的20条记录 
                TopDocs topDocs = indexSearcher.search(query, 20); 
                ScoreDoc[] hits = topDocs.scoreDocs; 
                return hitsToQuery(hits, query); 
     
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
     
            return null; 
        } 
     
        private List<User> hitsToQuery(ScoreDoc[] hits, Query query) { 
            List<User> list = new ArrayList<User>(); 
            try { 
                for (int i = 0; i < hits.length; i++) { 
                    User u = new User(); 
                    Document doc = indexSearcher.doc(hits[i].doc); 
                    u.setUserId(Integer.parseInt(doc.get("userId"))); 
                    u.setUserName(doc.get("userName")); 
                    u.setUserAge(Integer.parseInt(doc.get("userAge"))); 
                    // 高亮设置 
                    SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( 
                            "<font color=\"red\">", "</font>"); 
                    Highlighter highlighter = new Highlighter(simpleHtmlFormatter, 
                            new QueryScorer(query)); 
                    TokenStream tokenStream = analyzer.tokenStream("text", 
                            new StringReader(doc.get("userInfo"))); 
                    String userInfo = highlighter.getBestFragment(tokenStream, doc 
                            .get("userInfo")); 
                    if (userInfo != null) { 
                        u.setUserInfo(userInfo); 
                    } else { 
                        u.setUserInfo(doc.get("userInfo")); 
                    } 
     
                    SimpleHTMLFormatter simpleHtmlFormatter1 = new SimpleHTMLFormatter( 
                            "<font color=\"red\">", "</font>"); 
                    Highlighter highlighter1 = new Highlighter( 
                            simpleHtmlFormatter1, new QueryScorer(query)); 
                    TokenStream tokenStream1 = analyzer.tokenStream("text1", 
                            new StringReader(doc.get("parameter1"))); 
                    String p1 = highlighter1.getBestFragment(tokenStream1, doc 
                            .get("parameter1")); 
                    if (p1 != null) { 
                        u.setParameter1(p1); 
                    } else { 
                        u.setParameter1(doc.get("parameter1")); 
                    } 
     
                    u.setParameter2(doc.get("parameter2")); 
                    u.setParameter3(doc.get("parameter3")); 
                    u.setParameter4(doc.get("parameter4")); 
                    list.add(u); 
                } 
     
                indexSearcher.close(); 
                return list; 
            } catch (CorruptIndexException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } catch (InvalidTokenOffsetsException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
            return null; 
        } 
     
        public void writeIndex(IndexWriter writer, List<User> userList) { 
     
            try { 
                for (User u : userList) { 
                    Document doc = getDoc(u); 
                    writer.addDocument(doc); 
                } 
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
     
        } 
     
        private Document getDoc(User user) { 
            System.out.println("用户ID 为" + user.getUserId() + " 索引被创建"); 
            Document doc = new Document(); 
            addField2Doc(doc, user, "userId", Store.YES, Index.NOT_ANALYZED); 
            addField2Doc(doc, user, "userName", Store.YES, Index.NOT_ANALYZED);// Index.NOT_ANALYZED 
                                                                                // 不分词,但建立索引 
            addField2Doc(doc, user, "userAge", Store.YES, Index.NOT_ANALYZED);// Index.ANALYZED 
                                                                                // 分词并且建立索引 
            addField2Doc(doc, user, "userInfo", Store.YES, Index.ANALYZED); 
            addField2Doc(doc, user, "parameter1", Store.YES, Index.ANALYZED); 
            addField2Doc(doc, user, "parameter2", Store.YES, Index.ANALYZED); 
            addField2Doc(doc, user, "parameter3", Store.YES, Index.ANALYZED); 
            addField2Doc(doc, user, "parameter4", Store.YES, Index.ANALYZED); 
            return doc; 
        } 
     
        private void addField2Doc(Document doc, Object bean, String name, Store s, 
                Index i) { 
            String value; 
            try { 
                value = BeanUtils.getProperty(bean, name); 
                if (value != null) { 
                    doc.add(new Field(name, value, s, i, 
                            Field.TermVector.WITH_POSITIONS_OFFSETS)); 
                } 
            } catch (IllegalAccessException e) { 
                logger.error("get bean property error", e); 
            } catch (InvocationTargetException e) { 
                logger.error("get bean property error", e); 
            } catch (NoSuchMethodException e) { 
                logger.error("get bean property error", e); 
            } 
        } 
     
        /**
         * 没有排序,有高亮,有分页
         * 
         * @param pageNo
         * @param pageSize
         * @param keyword
         * @return
         */ 
        public PageBean getPageQuery(int pageNo, int pageSize, String keyword) { 
            List result = new ArrayList(); 
            File indexFile = new File(dirPath); 
            if (!indexFile.exists()) { 
                return null; 
            } 
            Directory dir; 
            try { 
                dir = FSDirectory.open(indexFile); 
                indexSearcher = new IndexSearcher(dir); 
                indexSearcher.setSimilarity(new IKSimilarity()); 
     
                String[] fields = new String[] { "userInfo", "parameter1" }; 
                BooleanClause.Occur[] flags = new BooleanClause.Occur[] { 
                        BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD }; 
                Query query = IKQueryParser.parseMultiField(fields, keyword, flags); 
     
                TopScoreDocCollector topCollector = TopScoreDocCollector.create( 
                        indexSearcher.maxDoc(), true); 
                indexSearcher.search(query, topCollector); 
                // 查询当页的记录 
                ScoreDoc[] docs = topCollector.topDocs((pageNo - 1) * pageSize, 
                        pageSize).scoreDocs; 
     
                // String[] highlightCol = {"userInfo", "parameter1"}; 
                // 高亮设置 
                SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter( 
                        "<font color=\"red\">", "</font>"); 
                Highlighter highlighter = new Highlighter(simpleHtmlFormatter, 
                        new QueryScorer(query)); 
     
                for (ScoreDoc scdoc : docs) { 
                    User u = new User(); 
                    Document doc = indexSearcher.doc(scdoc.doc); 
                    //               
                    // for (Fieldable fa : doc.getFields()) { 
                    // System.out.println(fa.name()); 
                    // String value = doc.get(fa.name()); 
                    // for (String col : highlightCol) { 
                    // if(fa.name().equals(col)) { 
                    // //设置高显内容 
                    // TokenStream tokenStream = analyzer.tokenStream("text",new 
                    // StringReader(value)); 
                    // value = highlighter.getBestFragment(tokenStream, value); 
                    // } 
                    // } 
                    //                   
                    // } 
     
                    u.setUserId(Integer.parseInt(doc.get("userId"))); 
                    u.setUserName(doc.get("userName")); 
                    u.setUserAge(Integer.parseInt(doc.get("userAge"))); 
     
                    TokenStream tokenStream = analyzer.tokenStream("text", 
                            new StringReader(doc.get("userInfo"))); 
                    String userInfo = highlighter.getBestFragment(tokenStream, doc 
                            .get("userInfo")); 
                    if (userInfo != null) { 
                        u.setUserInfo(userInfo); 
                    } else { 
                        u.setUserInfo(doc.get("userInfo")); 
                    } 
     
                    TokenStream tokenStream1 = analyzer.tokenStream("text1", 
                            new StringReader(doc.get("parameter1"))); 
                    String p1 = highlighter.getBestFragment(tokenStream1, doc 
                            .get("parameter1")); 
                    if (p1 != null) { 
                        u.setParameter1(p1); 
                    } else { 
                        u.setParameter1(doc.get("parameter1")); 
                    } 
     
                    u.setParameter2(doc.get("parameter2")); 
                    u.setParameter3(doc.get("parameter3")); 
                    u.setParameter4(doc.get("parameter4")); 
                    result.add(u); 
     
                } 
                PageBean pb = new PageBean(); 
                pb.setCurrentPage(pageNo);// 当前页 
                pb.setPageSize(pageSize); 
                pb.setAllRow(topCollector.getTotalHits());// hit中的记录数目 
                pb.setList(result); 
                return pb; 
     
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } catch (InvalidTokenOffsetsException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
     
            return null; 
        } 
     
        /**
         * 排序,有高亮,有分页
         * 
         * @param pageNo
         * @param pageSize
         * @param keyword
         * @return
         */ 
        public PageBean getPageQuery2(int pageNo, int pageSize, String keyword) { 
            List result = new ArrayList(); 
            File indexFile = new File(dirPath); 
            if (!indexFile.exists()) { 
                return null; 
            } 
            Directory dir; 
            try { 
                dir = FSDirectory.open(indexFile); 
                indexSearcher = new IndexSearcher(dir); 
                indexSearcher.setSimilarity(new IKSimilarity()); 
     
                String[] fields = new String[] { "userInfo", "parameter1" }; 
                BooleanClause.Occur[] flags = new BooleanClause.Occur[] { 
                        BooleanClause.Occur.MUST, BooleanClause.Occur.SHOULD }; 
                Query query = IKQueryParser.parseMultiField(fields, keyword, flags); 
     
                // 多字段排序,设置在前面的会优先排序 
                SortField[] sortFields = new SortField[2]; 
                SortField sortField = new SortField("userId", SortField.INT, false);//false升序,true降序 
                SortField FIELD_SEX = new SortField("userAge", SortField.INT, true); 
                sortFields[0] = sortField; 
                sortFields[1] = FIELD_SEX; 
                Sort sort = new Sort(sortFields); 
     
                TopDocs topDocs = indexSearcher.search(query, null, 50, sort); 
     
                if (topDocs.totalHits != 0) { 
                    // for(ScoreDoc sd : topDocs.scoreDocs) { 
                    //                   
                    // } 
                    // 高亮设置 
                    SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); 
                    Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query)); 
     
                    for (int i = (pageNo - 1) * pageSize; i < pageSize * pageNo; i++) { 
                        ScoreDoc scdoc = topDocs.scoreDocs[i]; 
                        User u = new User(); 
                        Document doc = indexSearcher.doc(scdoc.doc); 
                        u.setUserId(Integer.parseInt(doc.get("userId"))); 
                        u.setUserName(doc.get("userName")); 
                        u.setUserAge(Integer.parseInt(doc.get("userAge"))); 
                        TokenStream tokenStream = analyzer.tokenStream("text",new StringReader(doc.get("userInfo"))); 
                        String userInfo = highlighter.getBestFragment(tokenStream,doc.get("userInfo")); 
                        if (userInfo != null) { 
                            u.setUserInfo(userInfo); 
                        } else { 
                            u.setUserInfo(doc.get("userInfo")); 
                        } 
     
                        TokenStream tokenStream1 = analyzer.tokenStream("text1",new StringReader(doc.get("parameter1"))); 
                        String p1 = highlighter.getBestFragment(tokenStream1, doc.get("parameter1")); 
                        if (p1 != null) { 
                            u.setParameter1(p1); 
                        } else { 
                            u.setParameter1(doc.get("parameter1")); 
                        } 
     
                        u.setParameter2(doc.get("parameter2")); 
                        u.setParameter3(doc.get("parameter3")); 
                        u.setParameter4(doc.get("parameter4")); 
                        result.add(u); 
     
                    } 
                    PageBean pb = new PageBean(); 
                    pb.setCurrentPage(pageNo);// 当前页 
                    pb.setPageSize(pageSize); 
                    pb.setAllRow(topDocs.totalHits);// hit中的记录数目 
                    pb.setList(result); 
                    return pb; 
     
                } 
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } catch (InvalidTokenOffsetsException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
     
            return null; 
        } 
         
        /**
         * 删除索引
         * @param userId
         */ 
        public void deleIndex(String userId){ 
             
            try { 
                File f = new File(dirPath); 
                directory = FSDirectory.open(f); 
                IndexReader reader = IndexReader.open(directory,false);  
                Term term = new Term("userId", userId);  
                reader.deleteDocuments(term); 
                reader.close();  
            } catch (IOException e) { 
                // TODO Auto-generated catch block 
                e.printStackTrace(); 
            } 
             
             
        } 
     
    } 

猜你喜欢

转载自itfafa.iteye.com/blog/1596325