python解析nginx access日志,并通过django把解析结果插入数据库。
#coding=utf-8
import os
import sys
import time
from kafka import KafkaConsumer
import traceback
class nginx_tracker(object):
"""
nginx的入口监控
"""
def __init__(self):
self.data={}
self.applog_kafka_prd = [
'192.168.1.100:9092',
]
self.applog_zookeeper_prd = [
'192.168.1.100:2181',
]
self.msg_num=0
self.now=0
self.count_time = 0
self.pretime = 0
def consumer(self):
"""
从kafka获取数据
"""
broker_list = self.applog_kafka_prd
topic = "nginx-log-detector"
kafka = KafkaConsumer(topic,metadata_broker_list=broker_list,group_id='nginx_monitor2',auto_commit_enable=True,auto_commit_interval_ms=5000,auto_offset_reset='smallest')
#标准日志格式
# msg_list=[
# '118.249.144.105 - - [05/Apr/2016:09:06:29 +0800] "GET /ywr/http/common/Login?hostSessionId=1401cbdebe7a42db99e66319f782cea8 HTTP/1.1" "200" 73 "-" "Dalvik/1.6.0 (Linux; U; Android 4.4.2; SM-N9002 Build/KOT49H)" "-" "" "0.79" 172.29.4.183:30074 730 465 "0.380"',
# ]
while True:
for m in kafka:
msg=m.value
self.msg_num+=1
self.process_log(msg)
kafka.task_done(m)
now_time = time.time()
now_time -= now_time % 60
if self.count_time < now_time:
self.count_time = now_time
print gettime(time.time()) + " " + str(self.msg_num)
##解析日志
def process_log(self,msg):
try:
##把时间转化为timestamp,方便比较
msg_time=msg.split(' ',4)[3].replace("[","")
msg_time=int(time.mktime(time.strptime(msg_time,"%d/%b/%Y:%H:%M:%S")))
msg_time-=msg_time%60
##1分钟聚合一次并插入数据库
if msg_time > self.now:
print "%s %s write_db %d" % (gettime(time.time()),gettime(msg_time),self.msg_num)
self.write_db()
self.now=msg_time
self.data={}
self.msg_num=0
##解析ip
remote_ip=msg.split(" ",1)[0]
##解析状态码
code=msg.split('"',4)[3]
##解析耗时
request_time=float(msg.split()[-1].replace('"',''))*1000
if msg_time not in self.data:
##数据结构
self.data[msg_time]={
'pv':0,
'ip_list':[],
'2xx':0,
'3xx':0,
'4xx':0,
'5xx':0,
'other':0,
'request_time':0,
'response_time_300':0,
'response_time_500':0,
'response_time_1000':0,
'response_time_3000':0,
'response_time_5000':0,
'response_time_10000':0,
'response_time_ge_10000':0,
'detail_5xx':[],
'detail_request_time':[],
}
self.data[msg_time]['pv']+=1
if remote_ip not in self.data[msg_time]['ip_list']:
self.data[msg_time]['ip_list'].append(remote_ip)
if code[0] in ['2','3','4','5']:
self.data[msg_time][code[0]+"xx"]+=1
else:
self.data[msg_time]['other']+=1
##总耗时统计
self.data[msg_time]['request_time']+=request_time
if request_time < 300:
self.data[msg_time]['response_time_300']+=1
elif request_time < 500:
self.data[msg_time]['response_time_500']+=1
elif request_time < 1000:
self.data[msg_time]['response_time_1000']+=1
elif request_time < 3000:
self.data[msg_time]['response_time_3000']+=1
elif request_time < 5000:
self.data[msg_time]['response_time_5000']+=1
elif request_time < 10000:
self.data[msg_time]['response_time_10000']+=1
elif request_time > 10000:
self.data[msg_time]['response_time_ge_10000']+=1
if code[0]=='5':
self.data[msg_time]['detail_5xx'].append(msg)
if request_time > 3000:
self.data[msg_time]['detail_request_time'].append(msg)
except:
traceback.print_exc()
print msg
##插入数据库
def write_db(self):
alert_list=[]
for each_time in self.data:
info=self.data[each_time]
##如果数据延时,就补录
if nginx_log.objects.filter(time=each_time):
now_result=nginx_log.objects.get(time=each_time)
request_time=(info['request_time']+now_result.pv*now_result.request_time)/(info['pv']+now_result.pv)
nginx_log.objects.filter(time=each_time).update(
pv=info['pv']+now_result.pv,
code_2xx=info['2xx']+now_result.code_2xx,
code_3xx=info['3xx']+now_result.code_3xx,
code_4xx=info['4xx']+now_result.code_4xx,
code_5xx=info['5xx']+now_result.code_5xx,
code_other=info['other']+now_result.code_other,
request_time=request_time,
response_time_300=info['response_time_300']+now_result.response_time_300,
response_time_500=info['response_time_500']+now_result.response_time_500,
response_time_1000=info['response_time_1000']+now_result.response_time_1000,
response_time_3000=info['response_time_3000']+now_result.response_time_3000,
response_time_5000=info['response_time_5000']+now_result.response_time_5000,
response_time_10000=info['response_time_10000']+now_result.response_time_10000,
response_time_ge_10000=info['response_time_ge_10000']+now_result.response_time_ge_10000,
detail_5xx=str((eval(now_result.detail_5xx)+info['detail_5xx'])[0:10]),
detail_request_time=str((eval(now_result.detail_request_time)+info['detail_request_time'])[0:10]),
)
else:
#数据插入数据库
nginx_log.objects.create(
time=each_time,
pv=info['pv'],
uv=len(info['ip_list']),
code_2xx=info['2xx'],
code_3xx=info['3xx'],
code_4xx=info['4xx'],
code_5xx=info['5xx'],
code_other=info['other'],
request_time=info['request_time']/info['pv'],
response_time_300=info['response_time_300'],
response_time_500=info['response_time_500'],
response_time_1000=info['response_time_1000'],
response_time_3000=info['response_time_3000'],
response_time_5000=info['response_time_5000'],
response_time_10000=info['response_time_10000'],
response_time_ge_10000=info['response_time_ge_10000'],
detail_5xx=str(info['detail_5xx'][0:10]),
detail_request_time=str(info['detail_request_time'][0:10]),
)
if __name__=="__main__":
app_log=nginx_tracker()
app_log.consumer()