'''
import time
time_start=time.time()
count=0
fp=open("base.log","r")
while 1:
buffer=fp.read(8*1024*1024)
if not buffer:
break
count+=buffer.count('\n')
#print("count:"+str(count))
print("all count:"+str(count))
time_end=time.time()
print("use time:", time_end - time_start)
print("over")
fp.close()
'''
import time
time_start2=time.time()
fr=open("base.log", "r")
line = fr.readline()
count2 = 0
while line:
#print(line)
line = fr.readline()
count2 = count2 + 1
#print(str(count))
fr.close()
print("all count2:"+str(count2))
time_end2=time.time()
print("use time:", time_end2 - time_start2)
print("over")
分为两种法法,前面一种是网上找的比较高大上的方法,发现只有在文件小的时候会效率好
后面一种方法是最朴实的一行一行算,效率还可以。统计的是一个1.5G大小的文件,千万级别,结果如下所示:
>>>
========= RESTART: E:/02-公司事务资料/2-ORBI/log analysis/count_number.py =========
all count:12303830
use time: 21.817627429962158
over
all count2:12303831
use time: 12.469038963317871
over
>>>
========= RESTART: E:/02-公司事务资料/2-ORBI/log analysis/count_number.py =========
all count:12303830
use time: 21.56630301475525
over
>>>
========= RESTART: E:/02-公司事务资料/2-ORBI/log analysis/count_number.py =========
all count2:12303831
use time: 13.03462815284729
over
>>>