http://www.cnblogs.com/shanyou/p/3494854.html
在这个网址学习了一下mongo聚合查询
两个用pymongo查询的代码示例
def get_realtime_users(self): rt = [] try: #start_time_obj = datetime.strptime(startTime, '%Y-%m-%d') #end_time_obj = datetime.strptime(endTime, '%Y-%m-%d') #post = {'userId':userId, 'time':{'$gte':start_time_obj, '$lte':end_time_obj}} #post['time'] = datetime.strptime(time, '%Y-%m-%d %H:%M:%S') now = datetime.now() delta = timedelta(days=7) time_from = now - delta post = {'time': {'$gte': time_from}} doc = {'_id':0} pageType_dict = dict() for item in self._pageTypeMapping: pageType_dict[item['id']] = item['pageType'] cursor = self.mdbBb[g_tableOperationRecord].find(post, doc).sort([('time',pymongo.DESCENDING)]) for item in cursor.batch_size(100): #print(item['time'], type(item['time'])) #print(item) item['pageType'] = pageType_dict.get(item['pageType']) rt.append(item) except Exception as e: traceback.print_exc() logging.error(e.__str__()) return rt def get_pagetype_rank(self): rt = [] try: now = datetime.now() delta = timedelta(days=7) time_from = now - delta doc = {'_id':0, 'pageType':1, 'time':1} match = { 'time': {'$gte': time_from}, } group = { '_id': '$pageType', 'count': {'$sum': 1} } sort = {'count': -1} pageType_dict = dict() for item in self._pageTypeMapping: pageType_dict[item['id']] = item['pageType'] #db.department.aggregate({'$group':{'_id':'$name','total_area':{'$sum':'$area'}}}) #cursor = self.mdbBb[g_tableOperationRecord].find(post, doc) cursor = self.mdbBb[g_tableOperationRecord].aggregate([ {'$project': doc}, {'$match': match}, {'$group': group}, {'$sort': sort}, #{'$limit': 3}, ]) #print(pageType_dict) for item in cursor['result']: #print(item) item['_id'] = pageType_dict.get(item['_id']) rt.append(item) #print('rt',rt) except Exception as e: traceback.print_exc() logging.error(e.__str__()) return rtaggregate中 $group复合键写法
'$group':{ _id: {projid: '$remarks.projId', pageid: '$remarks.pageid'}, count: {$sum: 1} }
def get_projectpage_rank(self): '''项目页面访问排行''' rt = [] try: now = datetime.now() delta = timedelta(days=7) time_from = now - delta #doc = {'_id':0, 'pageType':1, 'time':1} match = { #'time': {'$gte': time_from}, 'remarks.projId': {'$exists': True}, 'remarks.pageid': {'$exists': True} } group = { '_id': {'projid': '$remarks.projId', 'pageid': '$remarks.pageid'}, 'count': {'$sum': 1} } sort = {'count': -1} cursor = self.mdbBb[g_tableOperationRecord].aggregate([ #{'$project': doc}, {'$match': match}, {'$group': group}, {'$sort': sort}, {'$limit': 50}, ]) for item in cursor['result']: print(item) rt.append(item) print('rt',rt) except Exception as e: traceback.print_exc() logging.error(e.__str__()) return rt注意, $match是对 $project里列出的字段来匹配的,如果project里没有,则match不到 例如以下是错的
project= {'_id':0, 'pageType':1, 'time':1} match = { 'time':{'$gte':start_time, '$lte':end_time}, 'userId': userId,#projectl里没有userId字段 }这是对的
doc = {'_id':0, 'pageType':1, 'time':1, 'userId':1} match = { 'time':{'$gte':start_time, '$lte':end_time}, 'userId': userId, }