Python 将日志数据存储到 ElasticSearch 间隔指定时间

主要工作代码

import json
import os
import re
import time

import requests
import yaml

# host_ip = ""

def get_log_path_dict():
    avira_log_path = "/home/xxx/logs/xxx"
    for root, dirs, files in os.walk(avira_log_path):
        log_path_dict = dict()
        for dir_name in dirs:
            dir_path = os.path.join(root, dir_name)
            log_path = dir_path + "/xxx.log"
            log_path_dict[dir_name] = log_path
        return log_path_dict


def time_msg2timestamp(time_msg):
    time_list = time.strptime(time_msg, "%Y-%m-%d %H:%M:%S")
    timestamp = int(time.mktime(time_list))
    return timestamp


def scan_log_path(dir_name, log_path):
    with open(log_path, "r", encoding="utf-8") as file_object:
        log_list = re.split(r'\[INFO\]', file_object.read())[1:]
        log_modify_time_pre = yaml_obj[dir_name]
        log_modify_time_now = os.path.getmtime(log_path)
        if log_modify_time_pre != log_modify_time_now:
            delete_module_index(dir_name)
            create_module_index(dir_name)
            yaml_obj[dir_name] = log_modify_time_now
            for log in log_list:
                log = '[INFO]' + log
                time_msg_pattern = re.compile(r'\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}')
                time_msg = re.search(time_msg_pattern, log).group()
                log_dict = dict()
                log_dict["@timestamp"] = time_msg2timestamp(time_msg)
                log_dict["log_msg"] = log
                print(log_dict)
                put_log_into_elasticsearch(dir_name, log_dict)


def put_log_into_elasticsearch(module_name, log_dict):
    url = "http://127.0.0.1:9200/{}/log/".format(module_name.lower())
    rep = session.post(url, json=log_dict)
    print(rep.status_code)
    print(rep.text)


def create_module_index(module_name):
    url = "http://127.0.0.1:9200/{}".format(module_name.lower())
    with open("./create_index.json", "r", encoding="utf-8") as file_object:
        json_obj = json.load(file_object)
        rep = session.put(url, json=json_obj)
        print(rep.status_code)
        print(rep.text)


def delete_module_index(module_name):
    url = "http://127.0.0.1:9200/{}*".format(module_name.lower())
    rep = session.delete(url)
    print(rep.status_code)
    print(rep.text)


if __name__ == "__main__":
    session = requests.session()

    while True:
        # 读取 yaml 文件
        with open("./last_query_log_time.yaml", "r", encoding="utf-8") as yaml_file:
            yaml_obj = yaml.load(yaml_file.read())

        # 遍历各个模块目录下的 log 文件
        for dir_name, log_path in get_log_path_dict().items():
            # 扫描 log 文件中的信息是否有变化,如果有新增的日志记录,则 put 到 ElasticSearch 上
            scan_log_path(dir_name, log_path)

        # 更新 yaml 文件中的信息
        with open("./last_query_log_time.yaml", "w") as yaml_file:
            yaml.dump(yaml_obj, yaml_file)

        time.sleep(10)

post 到 Es 数据的格式

{
  "mappings": {
    "log": {
      "properties": {
        "@timestamp": {
          "type": "date",
          "format": "epoch_second" },
        "log_msg": {
          "type": "text",
          "index": "true" }
      }
    }
  }
}

用于保存日志文件的最后修改时间,
每次扫描的时候,跟上次修改时间进行比对,若未修改则不进行操作

{axxxx: 0, bxxxx: 0, bxxx: 0,
  cxxxxxx: 0, dxxxxxr: 0, mxxxxxxx: 0}

猜你喜欢

转载自blog.csdn.net/HeatDeath/article/details/80547725