lucene分组查询的简单使用

网上介绍的Lucene分组查询的过程大多比较复杂，这里提供一个较为简单的实现，可以满足基本的分组查询需求。

1.首先引入依赖

    <!--组查询-->
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-grouping -->
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-grouping</artifactId>
      <version>7.2.1</version>
    </dependency>

2.创建索引

 /**
     * 添加索引文档
     *
     * @param groupField
     * @param writer
     * @throws IOException
     */
    public static void addDocuments(String groupField, IndexWriter writer)
            throws IOException {
        // 0
        Document doc = new Document();
        addGroupField(doc, groupField, "author1");
        doc.add(new StringField("author", "author1", Field.Store.YES));
        doc.add(new TextField("content", "random text", Field.Store.YES));
        doc.add(new StringField("id", "1", Field.Store.YES));
        writer.addDocument(doc);

        // 1
        doc = new Document();
        addGroupField(doc, groupField, "author1");
        doc.add(new StringField("author", "author1", Field.Store.YES));
        doc.add(new TextField("content", "some more random text",
                Field.Store.YES));
        doc.add(new StringField("id", "2", Field.Store.YES));
        writer.addDocument(doc);

        // 2
        doc = new Document();
        addGroupField(doc, groupField, "author1");
        doc.add(new StringField("author", "author1", Field.Store.YES));
        doc.add(new TextField("content", "some more random textual data",
                Field.Store.YES));
        doc.add(new StringField("id", "3", Field.Store.YES));
        writer.addDocument(doc);

        // 3
        doc = new Document();
        addGroupField(doc, groupField, "author2");
        doc.add(new StringField("author", "author2", Field.Store.YES));
        doc.add(new TextField("content", "some random text", Field.Store.YES));
        doc.add(new StringField("id", "4", Field.Store.YES));
        writer.addDocument(doc);

        // 4
        doc = new Document();
        addGroupField(doc, groupField, "author3");
        doc.add(new StringField("author", "author3", Field.Store.YES));
        doc.add(new TextField("content", "some more random text",
                Field.Store.YES));
        doc.add(new StringField("id", "5", Field.Store.YES));
        writer.addDocument(doc);

        // 5
        doc = new Document();
        addGroupField(doc, groupField, "author3");
        doc.add(new StringField("author", "author3", Field.Store.YES));
        doc.add(new TextField("content", "random", Field.Store.YES));
        doc.add(new StringField("id", "6", Field.Store.YES));
        writer.addDocument(doc);

        // 6 -- no author field
        doc = new Document();
        doc.add(new StringField("author", "author4", Field.Store.YES));
        doc.add(new TextField("content",
                "random word stuck in alot of other text", Field.Store.YES));
        doc.add(new StringField("id", "6", Field.Store.YES));
        writer.addDocument(doc);
        writer.commit();
        writer.close();
    }

/**
     * 添加分组域
     *
     * @param doc
     *            索引文档
     * @param groupField
     *            需要分组的域名称
     * @param value
     *            域值
     */
    private static void addGroupField(Document doc, String groupField,
                                      String value) {
        //进行分组的域上建立的必须是SortedDocValuesField类型
        doc.add(new SortedDocValuesField(groupField, new BytesRef(value)));
    }

3.对查询分组，一些坑以及要点注释已经说明的很清楚了

 /**
     * 测试lucene7环境下的分组查询
     */
    @Test
    public void lucene7GroupBy() throws Exception{
        GroupingSearch groupingSearch = new GroupingSearch(groupField);//指定要进行分组的索引
        groupingSearch.setGroupSort(new Sort(SortField.FIELD_SCORE));//指定分组排序规则
        groupingSearch.setFillSortFields(true);//是否填充SearchGroup的sortValues
        groupingSearch.setCachingInMB(4.0, true);
        groupingSearch.setAllGroups(true);
        //groupingSearch.setAllGroupHeads(true);
        groupingSearch.setGroupDocsLimit(10);//限制分组个数

        Analyzer analyzer = new StandardAnalyzer();
        QueryParser parser = new QueryParser("content", analyzer);
        String queryExpression = "some content";
        Query query = parser.parse(queryExpression);
        Directory directory = FSDirectory.open(Paths.get(indexDir));
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        //在content索引上对包含some与content分词的索引进行具体查询，结果按照author索引的内容进行分组
        TopGroups<BytesRef> result = groupingSearch.search(searcher, query, 0, 1000);

        //总命中数
        System.out.println("总命中数:"+result.totalHitCount);
        //分组数
        System.out.println("分组数:"+result.groups.length);
        //按照分组打印查询结果
        for (GroupDocs<BytesRef> groupDocs : result.groups){
            if (groupDocs != null) {
                if (groupDocs.groupValue != null) {
                    System.out.println("分组:" + groupDocs.groupValue.utf8ToString());
                }else{
                    //由于建立索引时有一条数据没有在分组索引上建立SortedDocValued索引，因此这个分组的groupValue为null
                    System.out.println("分组:" + "unknow");
                }
                System.out.println("组内数据条数:" + groupDocs.totalHits);

                for(ScoreDoc scoreDoc : groupDocs.scoreDocs){
                    System.out.println("author:" + searcher.doc(scoreDoc.doc).get("author"));
                    System.out.println("content:" + searcher.doc(scoreDoc.doc).get("content"));
                    System.out.println();
                }

                System.out.println("=====================================");
            }
        }
    }

完整代码可以参考我的github:github

lucene分组查询的简单使用

猜你喜欢