Scenario:
There A (parsed_pvlog) B (pdetail) two files
when A.uuid == B.uuid, the file A in one field (cpssrc) was added to the file B
method:
AB full row read file field into the dictionary as the value. Two nested dict. {Uu value: Switch dict}} {line
traversing the A kv, if db.get (K), DB [K] [ 'cpssrc'] = v.get ( 'cpssrc', '')
AB into each dict relatively large number of memory, and finally traverse the KV a
method two:
converting each line of the file a is a dictionary kvs = parse_logstr_to_dict (line), if the value of the uuid kvs.get ( 'cpssrc', '') as the key value cpssrc values into the dictionary as userCps
traverse each line of the file B into a dictionary kvs = parse_logstr_to_dict (line), corresponding to a value obtained uuid uuid, obtaining a key from userCps uuid value bond is placed cpssrc original file B each row in the dictionary kvs
memory only so userCps a dictionary file (uuid value of, 'cpssrc value'), B direct access file, each line was added to take the current conversion dict
#coding=utf-8
import os
import sys
from datetime import datetime, timedelta
reload(sys)
sys.setdefaultencoding('utf8')
#解析日志为字典,输入k1=v1 \t k2=v2 \t k3=v3 \t ...
def parse_logstr_to_dict(logstr,rdsp = '\t'):
log_dict = {}
kvs = logstr.strip('\n').split(rdsp)
for kv in kvs:
pos = kv.find('=')
if pos < 0 : continue
k = kv[0:pos]
v = kv[pos+1:]
log_dict[k] = v
return log_dict
#将字典转为list
def trans_dict_2_logstr( log_dict, rdsp='\t' ):
result = []
for k in log_dict:
result.append( '%s=%s' % (k, log_dict[k]) )
return rdsp.join( result )
def add_cpssrc_to_pdetail(log1,log2,outFile):
f = open(outFile, "w")
for line2 in open(log2): #遍历b文件的每一行
for line1 in open(log1):#遍历a文件的每一行
line1Dict=parse_logstr_to_dict(line1)
if(line2.find(line1Dict['uu'])>=0):
line2=line2.strip("\n")+"\t"+"cpssrc="+line1Dict['cpssrc']+"\n"
f.writelines(line2)
continue
f.close()
# add_cpssrc_to_pdetail("c://parsed_pvlog.log","c://pdetail.log","c://hello.log")
# 方法一
def load(log1,log2,outFile):
f = open(outFile, "w")
f1=open(log1)
f2=open(log2)
da={}
db={}
for s in f1: #加载A//uu为k,一行全字段字典作为值。两层dict嵌套。{uu的值:{一行转为dict}},大dict中嵌套小dict
myd=parse_logstr_to_dict(s)
da[myd['uu']]=myd
for s in f2: #加载B//uu为k,值一行全字段字典
myd = parse_logstr_to_dict(s)
db[myd['uu']] = myd
print 44
for k, v in da.items():
if db.get(k):
db[k]['cpssrc']=v.get('cpssrc','')#['cpssrc']
for k,v in db.items():
f.write(trans_dict_2_logstr(v)+"\r\n")
f.close()
load("c://parsed_pvlog.log", "c://pdetail.log", "c://hello4.log")
# 方法二
#uuid-->cpssrc
pvlog="c://parsed_pvlog.log"
oldPdetailLog="c://pdetail.log"
newPdetailLog="c://my.log"
fw = open(newPdetailLog, "w")
userCps={}
f=open(pvlog)
for line in f:
kvs=parse_logstr_to_dict(line)
if kvs.get('cpssrc',''):
userCps[kvs.get('uu')]=kvs.get('cpssrc')
f.close()
f=open(oldPdetailLog)
for line in f:
kvs=parse_logstr_to_dict(line)
uuid=kvs.get('uu','')
kvs['cpssrc']=userCps.get(uuid,'')
fw.write(trans_dict_2_logstr(kvs))
fw.write('\r\n')
fw.close()
f.close()