python分析nginx

闲着的时候写的一个分析nginx日志python脚本练练手,可以统计一些api等信息

#!/usr/bin/python
# -*- coding: UTF-8 -*-
'''
auth yufei
2016-03-02
分析nginx日志,分析出api ip 等访问次数信息
'''

#!/usr/bin/python

import os
import fileinput
import re
import json
import sys

print sys.argv[1]
dir_log = sys.argv[1]

ipP = r"?P<ip>[\d.]*"
uidP = r"?P<uid>[\d.-]*"
timeP = r"?P<time>\[[^\[\]]*\]"
servernaemeP = r'?P<servernaeme>[\w.]*'
requestP = r'?P<request>\"[^\"]*\"'
statusP = r"?P<status>\d+"
bodyBytesSentP = r"?P<bodyBytesSent>\d+"
referP = r'?P<refer>\"[^\"]*\"'
userAgentP = r'?P<userAgent>\"[^\"]*\"'
phpP = r"?P<php>[\d.:]*"
qP = r"?P<q>[\d.]*"
hP = r"?P<h>[\d.]*"

nginxLogPattern = re.compile(r"(%s)\ -\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)" %(ipP, uidP, timeP, servernaemeP, requestP, phpP, statusP, bodyBytesSentP, qP,hP,referP,userAgentP), re.VERBOSE)

StatusDict = {}
UidDict = {}
sname = {}
ipdict = {}
Apidict = {}
datas = {}

def processLog(dir_log):
    for line in fileinput.input(dir_log):
        matchs = nginxLogPattern.match(line)
        if matchs !=None:
            allGroups = matchs.groups()
            ip = allGroups[0]
            uid = allGroups[1]
            time = allGroups[2]
            servernaeme = allGroups[3]
            request = allGroups[4]
            status = allGroups[6]
            bodyBytesSent = allGroups[7]
            refer = allGroups[10]
            userAgent = allGroups[11]
            #userAgent = matchs.group("userAgent")

            api = re.match(r'.*\/[api2/]*\/(.*)\?.*', request)
            if api:
                GetResponseStatusCount(Apidict,api.group(1))

            GetResponseStatusCount(StatusDict,status)
            GetResponseStatusCount(sname,servernaeme)
            GetResponseStatusCount(ipdict,ip)
            if len(uid) < 10:
               GetResponseStatusCount(UidDict,uid)


            #pattern = r'.*\d{4}:([\d:\/]*):.*'
            #a = re.match(pattern, str)     

        else:
            #raise Exception
            pass
    fileinput.close()

def GetResponseStatusCount(dit,status):
    if dit.has_key(status):
        dit[status] += 1
    else:
        dit[status] = 1;

if __name__ == "__main__":
    processLog(dir_log)

    datas["ip"] = ipdict
    datas["stus"] = StatusDict
    datas["sname"] = sname
    datas["uid"] = UidDict
    datas["api"] = Apidict
    datas = json.dumps(datas)

    logpath =  '/data/logs/py_nginx.html'
    f = open(logpath,'w')
    f.write(datas)
    f.close()

猜你喜欢

转载自blog.csdn.net/yufei6808/article/details/80662793