在研究 mongo 分页查询的过程中,发现数据量大了之后,就查询特别慢;在研究中发现,mongo 处理数据主要依赖内存,在 cpu,memory 的占用率过高的情况下,mongoDB 的效率就会直线下降;所以在对 mongo 操作的过程中,要注意内存的消耗;不可做任何占用大量内存的事情;
分页查询中,skip 数据量大了之后;效率就直线下降;
我们可以通过多次少量查询来解决这个问题,你会发现虽然查询多次,效率依然可观;
通过上次查询的结果作为下次查询的条件:
因为我由时间排序,所以我以时间为查询条件:
public static FindIterable<Document> findFindIterable(MongoCollection<Document> mongoCollection, BasicDBObject basicDBObject, Map<String, Integer> sort,String sortField, int skip, int limit, Date startTime, Date endTime) throws Exception{
if (mongoCollection == null) {
return null;
}
int sortNum = -1;
if (sort != null && StringUtils.isNotEmpty(sortField) && sort.get(sortField) != null) {
if (sort.get(sortField) == MongoEnums.Sort.ASC.getValue()) {
sortNum = 1;
}
}
int skipNum = 1600; //每次查询条数
int skipStart = 1600; //以此条数为起点
if (skip > skipStart) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH);
int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum));
for (int i = 0; i <= f; i++) {
FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
if (i<f) {
//查询获取时间
findIterableSome.limit(skipNum).skip(skipNum-1);
Document first = findIterableSome.first();
String date = first.get(sortField).toString();
date = sdf.format(sdfData.parse(date));
basicDBObject.remove(sortField);
if (sortNum == -1){
Map<String, Object> queryMap = new HashMap<String, Object>();
if (startTime != null) {
queryMap.put("$gt", startTime);
}
if (StringUtils.isNotEmpty(date)) {
Date end = sdf.parse(date);
queryMap.put("$lt", end);
}
basicDBObject.put(sortField, new BasicDBObject(queryMap));
} else {
Map<String, Object> queryMap = new HashMap<String, Object>();
if (StringUtils.isNotEmpty(date)) {
Date start = sdf.parse(date);
queryMap.put("$gt", start);
}
if (endTime != null) {
queryMap.put("$lt", endTime);
}
//重新设置时间条件
basicDBObject.put(sortField, new BasicDBObject(queryMap));
}
} else {
//少于skipNum就直接返回查询结果;
if (skip > skipNum){
skip = skip - f*skipNum;
}
if (isSignlessInteger(skip) == true) {
findIterableSome.skip(skip);
}
return findIterableSome.limit(limit);
}
}
} else {
//少于skipStart就直接返回查询结果
FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
if (isSignlessInteger(skip) == true) {
findIterable.skip(skip);
}
return findIterable.limit(limit);
}
return null;
}
/**
*判断是否是正整数
*/
private static boolean isSignlessInteger(int num) {
if (num > 0 && num%1 == 0) {
return true;
}
return false;
}
/**
*定义 DEsc,ASc枚举
*/
public class MongoEnums {
public enum Sort {
DESC(2, "desc"), ASC(1, "asc");
private int value;
private String name;
Sort(int value, String name) {
this.value = value;
this.name = name;
}
public int getValue() {
return value;
}
public String getName() {
return name;
}
}
}
注意:一定要注意内存的使用,能释放的内存就释放;注意变量的作用域;尽量不要定义FindIterable<Document>的变量,一般定义一次就好,mongo 已经有很好的处理,没必要画蛇添足,不然特别占用内存;
在使用过程中,发现时间有有一样的;上面的思路就有问题了,如何处理呢;我们可以通过 time+_id 来排序;优化代码如下:
//先后排序!!!!先按sortField排序,如果sortField相同;再按_id排序!
//mongoDB 时间的比较
//mongoDB _id的比较
public static FindIterable<Document> findIterable(MongoCollection<Document> mongoCollection, BasicDBObject basicDBObject, Map<String, Integer> sort,String sortField, int skip, int limit, Date startTime, Date endTime) throws Exception{
if (mongoCollection == null) {
return null;
}
int sortNum = -1;
if (sort != null && StringUtils.isNotEmpty(sortField) && sort.get(sortField) != null) {
if (sort.get(sortField) == MongoEnums.Sort.ASC.getValue()) {
sortNum = 1;
}
}
int skipStart = 1600;
if (skip > skipStart) {
int skipNum = 1600;
if (skip >= 40000) {
skipNum = 6000;
}
if (skip >= 1000000) {
skipNum = 20000;
}
int countAll = 0;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH);
int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum));
for (int i = 0; i <= f; i++) {
//先后排序!!!!先按sortField排序,如果sortField相同;再按_id排序!
Document doc= new Document();
doc.append(sortField, sortNum);
doc.append("_id", sortNum);
FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(doc);
if (i<f) {
findIterableSome.limit(skipNum).skip(skipNum-1);
countAll += skipNum;
System.out.println(i+"=i=======countAll====="+countAll);
Document first = findIterableSome.first();
if (i == f-1) {
System.out.println("=======first+"+first);
}
findIterableSome = null;
Object date = first.get(sortField);
String _id = first.get("_id").toString();
System.out.println("=========id======="+_id);
basicDBObject.remove(sortField);
basicDBObject.remove("_id");
if (sortNum == -1){
Map<String, Object> queryMap = new HashMap<String, Object>();
if (startTime != null) {
queryMap.put("$gt", startTime);
}
if (date != null) {
queryMap.put("$lte", date);
}
//mongoDB 时间的比较
basicDBObject.put(sortField, new BasicDBObject(queryMap));
Map<String, Object> queryMapId = new HashMap<String, Object>();
//mongoDB _id的比较
queryMapId.put("$lt",new ObjectId(_id));
basicDBObject.put("_id",new BasicDBObject(queryMapId));
} else {
Map<String, Object> queryMap = new HashMap<String, Object>();
if (date != null) {
queryMap.put("$gte", date);
}
if (endTime != null) {
queryMap.put("$lt", endTime);
}
basicDBObject.put(sortField, new BasicDBObject(queryMap));
Map<String, Object> queryMapId = new HashMap<String, Object>();
queryMapId.put("$gt",new ObjectId(_id));
basicDBObject.put("_id",new BasicDBObject(queryMapId));
}
} else {
System.out.println("========"+mongoCollection.countDocuments(basicDBObject));
if (skip > skipNum){
skip = skip - f*skipNum;
}
System.out.println("============skip="+skip);
if (isSignlessInteger(skip) == true) {
findIterableSome.skip(skip);
}
return findIterableSome.limit(limit);
}
}
} else {
FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
if (isSignlessInteger(skip) == true) {
findIterable.skip(skip);
}
return findIterable.limit(limit);
}
return null;
}
通过测试,最后整合代码,如下:
/**
*分页部分代码
*/
public <T> List<T> selectPage(OperatorEnum operatorEnum, Map<String, Object> queryTerms) {
Object sort = queryTerms.get("sort");
String sortField = queryTerms.get("sortField") == null ? null : queryTerms.get("sortField").toString();
//获取查询条件 basicDBObject
BasicDBObject basicDBObject = getBasicDBObject(queryTerms);
int skip = queryTerms.get("skip") == null ? 0 : Integer.valueOf(queryTerms.get("skip").toString());
int limit = queryTerms.get("limit") == null ? 0 : Integer.valueOf(queryTerms.get("limit").toString());
Date startTime = queryTerms.get("startTime") == null ? null : (Date)queryTerms.get("startTime");
Date endTime = queryTerms.get("endTime") == null ? null : (Date)queryTerms.get("endTime");
//获取 mongoDB 连接
final MongoCollection<Document> mongoCollection = getCollection("collectionName");
List<T> list = new ArrayList<>();
if (mongoCollection == null) {
return list;
}
int sortNum = -1;
if (sort != null && sortField != null) {
if (sort.toString().equals(MongoEnums.Sort.ASC.getValue())) {
sortNum = 1;
}
}
int skipStart = 1600;
if (skip > skipStart) {
int skipNum = 1600;
if (skip >= 40000) {
skipNum = 6000;
}
if (skip >= 1000000) {
skipNum = 20000;
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss");
SimpleDateFormat sdfData = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.ENGLISH);
int f = (int)Math.floor(Double.valueOf(skip)/Double.valueOf(skipNum));
String _id = null;
Object date = null;
for (int i = 0; i <= f; i++) {
//可实现 --先按时间排序,时间相同的数据按_id排序;
Document doc= new Document();
doc.append(sortField, sortNum);
doc.append("_id", sortNum);
FindIterable<Document> findIterableSome = mongoCollection.find(basicDBObject).sort(doc);
int skipSameDate = 0;
if (StringUtils.isNotBlank(_id) && date != null) {
Map<String, Object> queryMapId = new HashMap<String, Object>();
if (sortNum == -1) {
queryMapId.put("$lt",new ObjectId(_id));
} else {
queryMapId.put("$gt",new ObjectId(_id));
}
_id = null;
BasicDBObject objDb = new BasicDBObject();
objDb.put(sortField,date);
objDb.put("_id",new BasicDBObject(queryMapId));
date = null;
//找出时间相同,并且数过的数据条数;
skipSameDate = Integer.parseInt(String.valueOf(mongoCollection.countDocuments(objDb)));
}
if (i<f) {
//把数过的数据减去;
findIterableSome.limit(skipNum-skipSameDate).skip(skipNum-skipSameDate-1);
Document first = findIterableSome.first();
findIterableSome = null;
date = first.get(sortField);
_id = first.get("_id").toString();
first = null;
basicDBObject.remove(sortField);
if (sortNum == -1){
Map<String, Object> queryMap = new HashMap<String, Object>();
if (startTime != null) {
queryMap.put("$gt", startTime);
}
if (date != null) {
queryMap.put("$lt", date);
}
basicDBObject.put(sortField, new BasicDBObject(queryMap));
} else {
Map<String, Object> queryMap = new HashMap<String, Object>();
if (date != null) {
queryMap.put("$gt", date);
}
if (endTime != null) {
queryMap.put("$lt", endTime);
}
basicDBObject.put(sortField, new BasicDBObject(queryMap));
}
} else {
if (skip > skipNum){
skip = skip - f*skipNum-skipSameDate;
}
if (isSignlessInteger(skip) == true) {
findIterableSome.skip(skip);
}
findIterableSome.limit(limit);
MongoCursor<Document> iterator = findIterableSome.iterator();
while (iterator.hasNext()){
//返回泛型
Bean bean = JSONObject.parseObject(iterator.next().getString("content"), Bean.class);
list.add((T) Bean);
}
return list;
}
}
} else {
FindIterable<Document> findIterable = mongoCollection.find(basicDBObject).sort(new BasicDBObject(sortField,sortNum));
if (isSignlessInteger(skip) == true) {
findIterable.skip(skip);
}
findIterable.limit(limit);
MongoCursor<Document> iterator = findIterable.iterator();
while (iterator.hasNext()){
Bean bean = JSONObject.parseObject(iterator.next().getString("content"), Bean.class);
list.add((T) Bean);
}
return list;
}
}
mongo 官网:
https://docs.mongodb.com/manual/reference/operator/aggregation/skip/
查看命令执行时长;
*.explain("executionStats")