版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_34069180/article/details/84981512
# -*- coding: UTF-8 -*-
_author_ = 'zy'
_date_ = '2018/12/13 0013 0:01'
import pymongo
def savedb(dbname,data):
client = pymongo.MongoClient('127.0.0.1', 27017) # 缺少一步骤进行属性的清洗操作,确定是否有这个值
db = client.weibo
db[dbname].insert(data)
def dealwith(dbname,dbnew):
client = pymongo.MongoClient('127.0.0.1', 27017) # 缺少一步骤进行属性的清洗操作,确定是否有这个值
db = client.weibo
cursor=db[dbname].find()
list=[]
for i in cursor:
id=i['weibo_id']
if id in list:
pass
else:
list.append(id)
data={
'weibo_id':i['weibo_id'],
'created':i['created'],
'uid_name':i['uid_name'],
'uid':i['uid'],
'level':i['level'],
'area':i['area'],
'url':i['area'],
'text':i['comment'],#area
'area':i['area'],
}
savedb(dbnew,data)
begin=db[dbname].count()
after=len(list)
print('去重前'+str(begin)+'去重后'+str(after))
#4132
if __name__=='__main__':
dealwith('all','new_comment')