网上介绍的Lucene分组查询的过程大多比较复杂,这里提供一个较为简单的实现,可以满足基本的分组查询需求。
1.首先引入依赖
<!--组查询-->
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-grouping -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-grouping</artifactId>
<version>7.2.1</version>
</dependency>
2.创建索引
/**
* 添加索引文档
*
* @param groupField
* @param writer
* @throws IOException
*/
public static void addDocuments(String groupField, IndexWriter writer)
throws IOException {
// 0
Document doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new StringField("author", "author1", Field.Store.YES));
doc.add(new TextField("content", "random text", Field.Store.YES));
doc.add(new StringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
// 1
doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new StringField("author", "author1", Field.Store.YES));
doc.add(new TextField("content", "some more random text",
Field.Store.YES));
doc.add(new StringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
// 2
doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new StringField("author", "author1", Field.Store.YES));
doc.add(new TextField("content", "some more random textual data",
Field.Store.YES));
doc.add(new StringField("id", "3", Field.Store.YES));
writer.addDocument(doc);
// 3
doc = new Document();
addGroupField(doc, groupField, "author2");
doc.add(new StringField("author", "author2", Field.Store.YES));
doc.add(new TextField("content", "some random text", Field.Store.YES));
doc.add(new StringField("id", "4", Field.Store.YES));
writer.addDocument(doc);
// 4
doc = new Document();
addGroupField(doc, groupField, "author3");
doc.add(new StringField("author", "author3", Field.Store.YES));
doc.add(new TextField("content", "some more random text",
Field.Store.YES));
doc.add(new StringField("id", "5", Field.Store.YES));
writer.addDocument(doc);
// 5
doc = new Document();
addGroupField(doc, groupField, "author3");
doc.add(new StringField("author", "author3", Field.Store.YES));
doc.add(new TextField("content", "random", Field.Store.YES));
doc.add(new StringField("id", "6", Field.Store.YES));
writer.addDocument(doc);
// 6 -- no author field
doc = new Document();
doc.add(new StringField("author", "author4", Field.Store.YES));
doc.add(new TextField("content",
"random word stuck in alot of other text", Field.Store.YES));
doc.add(new StringField("id", "6", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
writer.close();
}
/**
* 添加分组域
*
* @param doc
* 索引文档
* @param groupField
* 需要分组的域名称
* @param value
* 域值
*/
private static void addGroupField(Document doc, String groupField,
String value) {
//进行分组的域上建立的必须是SortedDocValuesField类型
doc.add(new SortedDocValuesField(groupField, new BytesRef(value)));
}
3.对查询分组,一些坑以及要点注释已经说明的很清楚了
/**
* 测试lucene7环境下的分组查询
*/
@Test
public void lucene7GroupBy() throws Exception{
GroupingSearch groupingSearch = new GroupingSearch(groupField);//指定要进行分组的索引
groupingSearch.setGroupSort(new Sort(SortField.FIELD_SCORE));//指定分组排序规则
groupingSearch.setFillSortFields(true);//是否填充SearchGroup的sortValues
groupingSearch.setCachingInMB(4.0, true);
groupingSearch.setAllGroups(true);
//groupingSearch.setAllGroupHeads(true);
groupingSearch.setGroupDocsLimit(10);//限制分组个数
Analyzer analyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser("content", analyzer);
String queryExpression = "some content";
Query query = parser.parse(queryExpression);
Directory directory = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
//在content索引上对包含some与content分词的索引进行具体查询,结果按照author索引的内容进行分组
TopGroups<BytesRef> result = groupingSearch.search(searcher, query, 0, 1000);
//总命中数
System.out.println("总命中数:"+result.totalHitCount);
//分组数
System.out.println("分组数:"+result.groups.length);
//按照分组打印查询结果
for (GroupDocs<BytesRef> groupDocs : result.groups){
if (groupDocs != null) {
if (groupDocs.groupValue != null) {
System.out.println("分组:" + groupDocs.groupValue.utf8ToString());
}else{
//由于建立索引时有一条数据没有在分组索引上建立SortedDocValued索引,因此这个分组的groupValue为null
System.out.println("分组:" + "unknow");
}
System.out.println("组内数据条数:" + groupDocs.totalHits);
for(ScoreDoc scoreDoc : groupDocs.scoreDocs){
System.out.println("author:" + searcher.doc(scoreDoc.doc).get("author"));
System.out.println("content:" + searcher.doc(scoreDoc.doc).get("content"));
System.out.println();
}
System.out.println("=====================================");
}
}
}
完整代码可以参考我的github:github