Filbeat+logstash configuration 2

  1. Start Filebeat and Logstash

Start logstash first, otherwise filebeat will not find the 5044 port of logstash: modify the configuration file without restarting, you can use bin/logstash -f config/logstash_filebeat2es.conf -r to check whether the parsing status is correct, pay attention to the use of grok in logstash When, pay attention to the regular spaces in the match, which correspond to the log format, the spaces are difficult to find, pay attention to ***

bin/logstash -f config/logstash_filebeat2es.conf


./filebeat -e -c filebeat_csdn.yml

If an error is reported at startup, the address is occupied

1.ps -ef|grep logstash

看否在运行,kill掉。


2.logstash.yml里有data.path的路径下 有个隐藏文件.lock 删除即可,是个缓存文件

如果没有手动指定的话,在默认的路径cd /opt/ELK/logstash/data 然后ls -alh 删除.lock

Configuration:

filbeat log format

[2020-10-29 00:01:05,203][dataQuality.py:142][get_message_error][ERROR] {"checkResult": {"dbid": "282", "error_data": "''", "check_total_num": "2999", "sql": "INSERT INTO t_check_result (id, check_rule_id, error_level, error_num, check_total_num, error_data, create_time, update_time, create_userid, update_userid, del_flag, version_consumer) VALUES ('c8c1fa0c-1936-11eb-91a9-a0369f32a65c', 1977478, 12, 0, 2999, '', '2020-10-29 00:01:05', '2020-10-29 00:01:05', '', '', 0, '')", "msg": "(1406, u\"Data too long for column 'id' at row 1\")", "error_num": "0"}, "ruleId": "1977478", "dbId": "205", "dbInfo": [{"username": "root", "status": 0, "hostName": "192.168.100.132", "dbName": "yss_datamiddle_quality", "connType": "db-mysql", "msg": "", "port": 3306}, {"username": "root", "status": 0, "hostName": "192.168.100.132", "dbName": "yss_datamiddle_standard", "connType": "db-mysql", "msg": "", "port": 3306}], "startTime": "2020-10-29 00:01:04", "sqlInfo": [{"msg": "", "dbid": "282", "result": "2999", "sql": "select count(1) as num from t_biz_norm"}, {"msg": "", "dbid": "282", "result": "0", "sql": "select count(1) from yss_datamiddle_standard.t_biz_norm where (1=1) and (chinese_name is null)"}, {"msg": "", "dbid": "282", "result": "", "sql": "select * from yss_datamiddle_standard.t_biz_norm where (1=1) and (chinese_name is null)"}], "endTime": "2020-10-29 00:01:05", "checkdbId": "282"}

filbeat.yml

filebeat.inputs:


#dataQuality-app
- type: log
  enabled: true
  paths:
    - /opt/pythonScript/dataQuality/logs/dataQuality-app*.log
  multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
  multiline.negate: true
  multiline.match: after
  document_type: "dataquality-app"
  fields: 
     tag: dataquality-app
#dataQuality-data
- type: log
  enabled: true
  paths:
    - /opt/pythonScript/dataQuality/logs/dataQuality-data*.log
  multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
  multiline.negate: true
  multiline.match: after
  document_type: "dataquality-data"
  fields:
     tag: dataquality-data

  
#lifeCycle
- type: log
  enabled: true
  paths:
    - /opt/pythonScript/lifeCycle/logs/*.log
  multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
  multiline.negate: true
  multiline.match: after
  document_type: "lifecycle"
  fields: 
     tag: lifecycle

filebeat.config.modules:
 
  path: /opt/filebeat/modules.d/*.yml

  
  reload.enabled: false


setup.template.settings:
  index.number_of_shards: 3
 

setup.kibana:

  host: "192.168.100.154:5601"

output.logstash:
  # The Logstash hosts
  hosts: ["192.168.100.154:5022"]


processors:
  - add_host_metadata: ~
  - add_cloud_metadata: ~

 Validation rules

dataQuality-data.log 格式:
format = '[%(asctime)s][%(filename)s:%(lineno)d][%(funcName)s][%(levelname)s][%(message)s]'

dataQuality-app.log 格式:
format='[%(asctime)s][%(filename)s:%(lineno)d][%(funcName)s][%(levelname)s]/s%(message)s'

 Then configure logstash

input {
	beats {
	    host => "192.168.34.14"
	    port => 5022
	    client_inactivity_timeout => 36000
	}
}

filter {
	if [fields][tag] == "dataquality-app" {

	 grok {
		match => {
		  
			 "message" => "\[%{EXIM_DATE:datetime}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\s%{GREEDYDATA:msg}"
		}
    }
    json {
		source => "msg"  #把infos作为数据源,解析
		remove_field => [ "msg" ]  #删除infos字段,删不删都行
    }
    mutate {
		add_field => {"@gg" => "%{checkResult}"}  #因为没法对json数据进行json格式解析,所以,先将其转化成字符串,赋给一个新的字段,@gg这个字段名称,自定义
    }
    json {
		source => "@gg"  #把@gg作为数据源,解析
        remove_field => [ "@gg","checkResult" ]  #同样删除不必要字段
    }

    }

    if [fields][tag] == "dataquality-data" {
      	 grok {
                  match => {
                        "message" => "\[%{EXIM_DATE:datetime}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\[%{LOGLEVEL:log_level}\]\[%{GREEDYDATA:msg}\]"
                  }
          }
          date{
         	match => ["datetime", "yyyy-MM-dd HH:mm:ss,SSS"]
                target => "@timestamp"
	       	remove_field =>["datetime"]
                 }
    }


    if [fields][tag] == "lifecycle" {
       grok {
		  match => {
			 "message" => "\[%{EXIM_DATE:datetime}\]\[%{LOGLEVEL:log_level}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\[%{GREEDYDATA:msg}\]"
		  }
	  }
	  date{
		match => ["datetime", "yyyy-MM-dd HH:mm:ss,SSS"]
		target => "@timestamp"
		remove_field =>["datetime"]
	 }
    }
    
}

output {
   	 if [fields][tag] == "dataquality-data" {
		elasticsearch {
			hosts => "192.168.100.11:9200"
			index => "quality-data-%{+YYYY.MM.dd}"
		}
	}


	if [fields][tag] == "dataquality-app" {
                elasticsearch {
                        hosts => "192.168.100.11:9200"
                        index => "quality-app-%{+YYYY.MM.dd}"
                }
        }

	

}

 

Guess you like

Origin blog.csdn.net/Baron_ND/article/details/109364941