ElasticSearch Scroll分页

假设取的页数较大时(深分页),如请求第20页,Elasticsearch不得不取出所有分片上的第1页到第20页的所有文档,并做排序,最终再取出from后的size条结果作爲最终的返回值

所以,当索引非常非常大(千万或亿),是无法使用from + size 做深分页的,分页越深则越容易OOM,即便不OOM,也很消耗CPU和内存资源

ES为了避免深分页,不允许使用分页(from&size)查询10000条以后的数据,因此如果要查询第10000条以后的数据,要使用ES提供的 scroll(游标) 来查询

示例:

Controller

@RequestMapping(value = "/page", method = RequestMethod.POST)
@ResponseBody
public Map<String, Object> page(@RequestBody QueryEntity appQuery){
	return jestService.page(appQuery);
}

Service

public Map<String, Object> page(QueryEntity queryEntity) {
	SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
	//全文搜索
	if(StringUtils.isNotBlank(queryEntity.getKeyword())){
		sourceBuilder.query(QueryBuilders.queryStringQuery(queryEntity.getKeyword()).analyzer("ik_max_word"));
	}
	sourceBuilder.size(queryEntity.getPageSize());
	//匹配度倒数,数值越大匹配度越高
	sourceBuilder.sort("_score", SortOrder.DESC);
	//时间倒序
	sourceBuilder.sort("updateTime", SortOrder.DESC);
	Search search = new Search.Builder(sourceBuilder.toString())
			//索引名称
			.addIndex(AppInfo.INDEX)
			//游标过期时间1分钟
			.setParameter(Parameters.SCROLL, "1m")	
			//展示的字段
			.setParameter("filter_path", "_scroll_id,hits.total.value,hits.hits")	
			.build();
	try {
		JestResult jestResult = jestClient.execute(search);
		System.out.println(jestResult.getJsonString());

		String scrollId = jestResult.getJsonObject().get("_scroll_id").getAsString();
		long total = jestResult.getJsonObject().get("hits")
				.getAsJsonObject().get("total")
				.getAsJsonObject().get("value").getAsLong();
		
		//第一页数据已经获取到
		List<AppInfo> appList = jestResult.getSourceAsObjectList(AppInfo.class);
		//从第二页开始
		for (int i = 1; i < queryEntity.getPageNum(); i++) {
			appList = searchEventHistogramByScroll(AppInfo.class, scrollId);
		}
		
		//设置分页数据
		Map<String, Object> results = new HashMap<String, Object>();
		queryEntity.init(total);	//计算总页数
		results.put("pageNum", queryEntity.getPageNum());
		results.put("pageSize", queryEntity.getPageSize());
		results.put("total", total);
		results.put("pageCount", queryEntity.getPageCount());
		results.put("list", appList);
		return results;
	} catch (Exception e) {
		e.printStackTrace();
		return null;
	}
}

public <T> List<T> searchEventHistogramByScroll(Class<T> type, String scrollId) {
	SearchScroll scroll = new SearchScroll.Builder(scrollId, "1m").build();
	JestResult result = null ;
	try {
		result = jestClient.execute(scroll);
	} catch (IOException e) {
		e.printStackTrace();
	}
	return result.getSourceAsObjectList(type);
}

Entity

public class QueryEntity {

	private String keyword; 		// 搜索关键字
	private String startTime;		// 开始日期
	private String endTime; 		//结束日期
	
	// 当前第几页
	private int pageNum = 1;
	// 总共多少页
	private long pageCount;
	// 每页显示几条数据
	private int pageSize = 10;
	// 总共多少条
	private long total;
		
	public void init(long total) {
		this.setTotal(total);
		boolean flag = (total % this.getPageSize() == 0) ? false : true;
		long iPageSize = flag ? (total / this.getPageSize() + 1)
				: (total / this.getPageSize());
		this.setPageCount(iPageSize);
	}
	

	//get set省略
}

public class AppInfo implements Serializable {

	private static final long serialVersionUID = -3449729051364769628L;

	//索引
	public static final String INDEX = "appinfo";
	//类型,Elasticsearch7默认_doc
	public static final String TYPE = "_doc";
	
    private String uuid; // UUID(对外提供)
    private String appId; // 应用ID(供开发使用)
    private String appSecretKey; // 应用密钥
    private String appName; // 应用名称
    private String appIntro; // 应用简介
    private String appClassify; // 应用分类
    private String buildUnit;//建设单位
    private String respName; // 责任人姓名
    private String respCode; // 责任人编码
    private String respTelPhone; // 责任人联系方式
    private String maintainName;//运维人员姓名
    private String maintainTel;//运维人员电话
    private String createUser; // 创建人
    private String supplierName; // 所属供应商(公司名称)
    private String createTime; // 应用创建时间
    private String updateTime; // 应用最后修改时间
	
	//get set省略
    
	
}
发布了39 篇原创文章 · 获赞 44 · 访问量 27万+

猜你喜欢

转载自blog.csdn.net/u011974797/article/details/105071419