An original table structure
1, imsi table
MongoDB Enterprise > db.trs_action_dzwl_zm.findOne()
{
"_id" : {
"imsi" : "460029380018855",
"start_time" : "2019-03-13 15:37:07"
},
"site_address" : "织里-大港路与G318交叉口",
"xnetbar_wacode" : "EG-MIX-WL-4C-006",
"imei" : "000000052052052",
"device_longitude" : "120.275424",
"device_latitude" : "30.838656",
"tmsi" : "1552462627",
"rssi" : "140",
"band" : "40",
"plmn" :"46000", "VENDOR_NAME": "Nanjing Schengen" "device_name": "weaving - Dagang Road and the intersection G318-4G",
"tel_number": "1,595,028",
"province": "Jiangsu Province",
"City": "Yancheng"
}
2, car table
MongoDB Enterprise > db.trs_action_car_info.findOne()
{
"_id" : {
"license_number" : "苏A39NX7",
"start_time" : "2019-05-16 23:03:13"
},
"site_address" : "湖织大道-香圩桥东侧",
"site_location_id" : "",
"unlawful_act" : "",
"driving_direct" : "其它",
"lane_id" : "001",
"netbar_wacode" : "904",
"license_color" : "002",
"photo_cnt" : "",
"monitor_type" : "卡口式监控",
"photo_path" : "/pic?did=12ffaa00-78a3-1037-921c-54c4150760be&bid=486472&pid=4294966623&ptime=1558018994",
"speed" : "0",
"stat" : "0",
"vehicle_brand1" : "0",
"vehicle_brand2" : "0",
"car_length" : "",
"car_color" : "其它颜色",
"shade" : "000",
"car_type" : "轿车",
"license_type" : "92式民用车",
"vehicle_feature_path" : "",
"device_name" : "湖织大道-香圩桥东侧",
"monitor_direct" : "未知",
"lane" : "001",
"device_longitude" : "120.308512",
"device_latitude" : "30.881026",
"site_name" :"Weaving Lake Avenue - Hong Wei east side of the bridge", } "site_latitude": "30.881026" "site_longitude": "120.308512",
"road_segment_direct": "unknown",
3, face table
MongoDB Enterprise > db.trs_action_face_info.findOne()
{
"_id" : {
"pid" : "0120_1561570383884_d61beb5b9e644ed081f4ffc5e362ece7",
"start_time" : "2019-06-13 12:32:59"
},
"site_address" : "融泰宾馆",
"img_mode" : "",
"obj_img_url" : "/pic?=d4=i778z096as091-706105m6ep=t1i5i*d1=*ipd7=*9s8=42b8i2d05*717540c14-a563e27-1579*d-d0i806d8e42",
"quality_score" : "0.883593",
"netbar_wacode" : "33052802001310942740",
"device_name" : "融泰宾馆",
"device_longitude" : "120.262211",
"device_latitude" : "30.841749",
"age" : "",
"gender" : "1",
"race" : "",
"beard" : "",
"eye_open" : "",
"eye_glass" : "",
"sun_glass" : "1",
"mask" : "",
"mouth_open" : "",
"smile" : "1",
"similarity" : "0.97059",
"image_id" : "0120_1561570383884_d61beb5b9e644ed081f4ffc5e362ece7",
"bkg_url" : "/pic?=d4=i778z096as091-706105m6ep=t1i5i*d1=*ipd7=*9s8=42b8i2d05*717540c14-a563e27-1579*d-d0i806d8e42"
}
4, MAC table
Second, the co-table structure collectionsitetime
Requirement: imsi, car, face, MAC (MAC sub temporarily) four tables, will extract the key fields of the table
1) to the site
2) in two minute intervals
3) a document, a maximum of only 200 critical data stored within two minutes
MongoDB Enterprise > db.collecsites.findOne()
{
"_id" : ObjectId("5e159ef831d840f9482b2adc"),
"timeline" : "2019-03-13 15:34:00",
"site" : "织里-大港路与G318交叉口",
"face" : [ ],
"lpn" : [ ],
"mac" : [ ],
"nsamples" : 200,
"imsi" : [
{
"start_time" : "2019-03-13 15:35:56",
"imsi" : "460078995442766"
},
{
"start_time" : "2019-03-13 15:35:56",
"imsi" : "460006254007976"
}
]
}
Third, script development
1, using the python module
from multiprocessing import Pool (process pool)
from pymongo import MongoClient (python mongodb drive connection)
import pandas as pd (the period of time is divided into a plurality of time periods, the present example a period of 2 min)
2, script
1) the script connected mongodb
#coding=utf-8
import os, time, random
import json
from datetime import datetime
from pymongo import MongoClient
import sys
import datetime
def __init__(self, address, port, database):
self.conn = MongoClient(host=address, port=port)
self.db = self.conn[database]
return self.conn is not None and self.db is not None
if self.get_state():
ret = self.db[collection].insert_one(data)
return ret.inserted_id
else:
return ""
if self.get_state():
ret = self.db[collection].insert_many(data)
return ret.inserted_id
else:
return ""
# data format:
# {key:[old_data,new_data]}
data_filter = {}
data_revised = {}
for key in data.keys():
data_filter[key] = data[key][0]
data_revised[key] = data[key][1]
if self.get_state():
return self.db[collection].update_many(data_filter, {"$set": data_revised}).modified_count
return 0
if self.get_state():
return self.db[collection].update(data_filter,data_revised,True)
return 0
def find(self, col, condition, column=None):
if self.get_state():
if column is None:
return self.db[col].find(condition)
else:
return self.db[col].find(condition, column)
else:
return None
if self.get_state():
options = {'allowDiskUse':True}
result=self.db[col].aggregate(condition,**options)
return result
else:
return None
def delete(self, col, condition):
if self.get_state():
return self.db[col].delete_many(filter=condition).deleted_count
return 0
self.conn.close ()
#return 'Mongo connection was closed'
#coding:utf-8
import os, time, random
import json
from datetime import datetime
from pymongo import MongoClient
import sys
import datetime
import mongodbclient
import pandas as pd
MAX_NUM = max (num1, num2, num3)
return MAX_NUM
sitelist = []
for I in myResult:
sitelist.append (I [Colum])
return sitelist
OUTLIST List = (SET (InList))
return OUTLIST
TIME_INTERVAL = pd.date_range (str_start_time, str_end_time, FREQ = 'Min 2')
return TIME_INTERVAL
db = mongodbclient.Database("172.16.102.15", 27017, "idpad_zl")
myresult=db.find(collection_name, {"_id.start_time":{ "$gte":str_start_time,"$lt":str_end_time}})
db.close_connect()
return site_cursor_to_list(myresult,"site_address")
db = mongodbclient.Database ( "172.16.102.15", 27017, "idpad_zl")
myResult = db.find (COLLECTION_NAME, { "_id.start_time": { "$ GTE": str_start_time, "$ lt": str_end_time}, "site_address": Site}, colums)
db.close_connect ( )
return myResult
def sitetime_insert(collection_name,site,str_start_time,imsi_sitetime,face_sitetime,car_sitetime,mac_sitetime): ##将数据插入集合
db = mongodbclient.Database("172.16.102.15", 27017, "idpad_zl")
db.insert_one(collection_name,{"site":site,"timeline":str_start_time,"nsamples":200,"imsi":imsi_sitetime,"face":face_sitetime,"lpn":car_sitetime,"mac":mac_sitetime})
db.close_connect()
db = mongodbclient.Database("172.16.102.15", 27017, "idpad_zl")
db.updateOne(collection_name,{"site":site,"timeline":str_start_time,"nsamples":200,key:[]},{"$set":{key:value}})
db.close_connect()
# db.close_connect()
#coding:utf-8
import os, time, random
import json
from datetime import datetime
from pymongo import MongoClient
import sys
import datetime
import mongodbclient
import pandas as pd
import collection_curd as curd
from multiprocessing import Pool
def update_exec(type_outlen_flo,collectionname,site,str_start_time,typelist,datalist,type_outlen_int,type_max_len):
if type_outlen_flo <=1.0:
curd.sitetime_updateOne(collectionname,site,str_start_time,typelist,datalist)
else:
for x in range(type_outlen_int+1):
if x==type_outlen_int:
curd.sitetime_updateOne(collectionname,site,str_start_time,typelist,datalist[x*200:type_max_len])
#print(typelist)
else:
curd.sitetime_updateOne(collectionname,site,str_start_time,typelist,datalist[x*200:(x+1)*200])
#print(typelist)
#start the time.time = ()
#Print ( "START_TIME:", Start)
# = TIME_INTERVAL curd.get_time_interval ( '20.19031 million', '20.19123 million')
#for I in Range (len (TIME_INTERVAL ) -1): from ## time slices, each slice select period
#Print ( "Start:", the nums)
str_start_time = datetime.datetime.strftime (TIME_INTERVAL [the nums], '% Y-M-% D% % H:% M:% S ') ## time slices, each slice start time
str_end_time = datetime.datetime.strftime (time_interval [nums + 1],'% Y-% m-% d% H:% M :% S ') ## time slices, the end of each time slice
#Print (str_start_time,' ', str_end_time)
#Print ( "#################### #### ")
# the time.sleep (. 5)
#exit ()
# sitelist = []
myresult_imsi_sit = curd.get_site ( "trs_action_dzwl_zm", str_start_time, str_end_time) ## 2 minutes imsi acquired domain name, and the domain name into the following loop
myresult_car_sit = curd.get_site ( "trs_action_car_info", str_start_time, str_end_time) ## Get 2 minutes car domain name, and the domain name into the following loop
myresult_face_sit = curd.get_site ( "trs_action_face_info", str_start_time, str_end_time) ## acquires the domain name of the face within 2 minutes, and the domain name into the following the cycle
myResult = myresult_imsi_sit + myresult_car_sit + myresult_face_sit
#Print (myResult)
myResult = curd.list_Duplicate_removal (myResult) get all ## sites of de-duplicated
#Print (myResult)
#exit ()
IF not myResult:
Pass
the else:
for i in range (len (myresult)):
site=myresult[i]
#Print (Site)
my_imsi_site_data = curd.get_site_data ( "trs_action_dzwl_zm", str_start_time, str_end_time, Site, { "_ ID"}) Get the site ##, the data in this time IMSI
my_car_site_data = curd.get_site_data ( "trs_action_car_info", str_start_time, str_end_time, site, { " _ id"}) ## acquires the site, the data within this time car
my_face_site_data = curd.get_site_data ( "trs_action_face_info", str_start_time, str_end_time, site, { "_ id"}) ## acquires this site, this time in the face of data
cardata=curd.site_cursor_to_list(my_car_site_data,"_id")
facedata=curd.site_cursor_to_list(my_face_site_data,"_id")
#print(imsidata)
imsi_outlen_flo = len (imsidata) /200.0
car_outlen_int = len (cardata) / 200
face_outlen_int = len (data face) / 200
car_outlen_flo = len (cardata) /200.0
face_outlen_flo = len (data face) /200.0
face_max_len=len(facedata)
imsi_max_len=len(imsidata)
#print("car_max_len:",car_outlen_int," ","face_max_len:",face_outlen_int," ","imsi_max_len:",imsi_outlen_int)
max_mod_200=max(imsi_outlen_int,car_outlen_int,face_outlen_int)+1
#print(max_mod_200)
for i in range(max_mod_200):
curd.sitetime_insert("collecsites",site,str_start_time,[],[],[],[])
else:
for i in range(max_mod_200-1):
curd.sitetime_insert("collecsites",site,str_start_time,[],[],[],[])
update_exec(imsi_outlen_flo,"collecsites",site,str_start_time,'imsi',imsidata,imsi_outlen_int,imsi_max_len)
update_exec(car_outlen_flo,"collecsites",site,str_start_time,'lpn',cardata,car_outlen_int,car_max_len)
update_exec(face_outlen_flo,"collecsites",site,str_start_time,'face',facedata,face_outlen_int,face_max_len)
#print(site)
#exit()
#curd.sit_colse
#def update_exec(type_outlen_flo,collectionname,site,str_start_time,typelist,datalist,type_outlen_int,type_max_len):
#print("end_time : ",end)
#print('ALL Insert Task runs %s(ms).' % ((end - start)*1000))
if __name__ == '__main__':
start = time.time()
p=Pool(30)
#print("start_time : ",start)
time_interval=curd.get_time_interval('20190310','20191230')
for i in range(len(time_interval)-1): ##从时间切片中,选取每一个切片时间段
#print(i)
#res=p.apply_async(data_exec,args=(i,))
result=p.apply_async(data_exec, args=(i,time_interval))
p.close()
p.join()
end = time.time()
print("end_time : ",end)
print('ALL Insert Task runs %s(ms).' % ((end - start)*1000))