- Start Filebeat and Logstash
Start logstash first, otherwise filebeat will not find the 5044 port of logstash: modify the configuration file without restarting, you can use bin/logstash -f config/logstash_filebeat2es.conf -r to check whether the parsing status is correct, pay attention to the use of grok in logstash When, pay attention to the regular spaces in the match, which correspond to the log format, the spaces are difficult to find, pay attention to ***
bin/logstash -f config/logstash_filebeat2es.conf
./filebeat -e -c filebeat_csdn.yml
If an error is reported at startup, the address is occupied
1.ps -ef|grep logstash
看否在运行,kill掉。
2.logstash.yml里有data.path的路径下 有个隐藏文件.lock 删除即可,是个缓存文件
如果没有手动指定的话,在默认的路径cd /opt/ELK/logstash/data 然后ls -alh 删除.lock
Configuration:
filbeat log format
[2020-10-29 00:01:05,203][dataQuality.py:142][get_message_error][ERROR] {"checkResult": {"dbid": "282", "error_data": "''", "check_total_num": "2999", "sql": "INSERT INTO t_check_result (id, check_rule_id, error_level, error_num, check_total_num, error_data, create_time, update_time, create_userid, update_userid, del_flag, version_consumer) VALUES ('c8c1fa0c-1936-11eb-91a9-a0369f32a65c', 1977478, 12, 0, 2999, '', '2020-10-29 00:01:05', '2020-10-29 00:01:05', '', '', 0, '')", "msg": "(1406, u\"Data too long for column 'id' at row 1\")", "error_num": "0"}, "ruleId": "1977478", "dbId": "205", "dbInfo": [{"username": "root", "status": 0, "hostName": "192.168.100.132", "dbName": "yss_datamiddle_quality", "connType": "db-mysql", "msg": "", "port": 3306}, {"username": "root", "status": 0, "hostName": "192.168.100.132", "dbName": "yss_datamiddle_standard", "connType": "db-mysql", "msg": "", "port": 3306}], "startTime": "2020-10-29 00:01:04", "sqlInfo": [{"msg": "", "dbid": "282", "result": "2999", "sql": "select count(1) as num from t_biz_norm"}, {"msg": "", "dbid": "282", "result": "0", "sql": "select count(1) from yss_datamiddle_standard.t_biz_norm where (1=1) and (chinese_name is null)"}, {"msg": "", "dbid": "282", "result": "", "sql": "select * from yss_datamiddle_standard.t_biz_norm where (1=1) and (chinese_name is null)"}], "endTime": "2020-10-29 00:01:05", "checkdbId": "282"}
filbeat.yml
filebeat.inputs:
#dataQuality-app
- type: log
enabled: true
paths:
- /opt/pythonScript/dataQuality/logs/dataQuality-app*.log
multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
multiline.negate: true
multiline.match: after
document_type: "dataquality-app"
fields:
tag: dataquality-app
#dataQuality-data
- type: log
enabled: true
paths:
- /opt/pythonScript/dataQuality/logs/dataQuality-data*.log
multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
multiline.negate: true
multiline.match: after
document_type: "dataquality-data"
fields:
tag: dataquality-data
#lifeCycle
- type: log
enabled: true
paths:
- /opt/pythonScript/lifeCycle/logs/*.log
multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
multiline.negate: true
multiline.match: after
document_type: "lifecycle"
fields:
tag: lifecycle
filebeat.config.modules:
path: /opt/filebeat/modules.d/*.yml
reload.enabled: false
setup.template.settings:
index.number_of_shards: 3
setup.kibana:
host: "192.168.100.154:5601"
output.logstash:
# The Logstash hosts
hosts: ["192.168.100.154:5022"]
processors:
- add_host_metadata: ~
- add_cloud_metadata: ~
Validation rules
dataQuality-data.log 格式:
format = '[%(asctime)s][%(filename)s:%(lineno)d][%(funcName)s][%(levelname)s][%(message)s]'
dataQuality-app.log 格式:
format='[%(asctime)s][%(filename)s:%(lineno)d][%(funcName)s][%(levelname)s]/s%(message)s'
Then configure logstash
input {
beats {
host => "192.168.34.14"
port => 5022
client_inactivity_timeout => 36000
}
}
filter {
if [fields][tag] == "dataquality-app" {
grok {
match => {
"message" => "\[%{EXIM_DATE:datetime}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\s%{GREEDYDATA:msg}"
}
}
json {
source => "msg" #把infos作为数据源,解析
remove_field => [ "msg" ] #删除infos字段,删不删都行
}
mutate {
add_field => {"@gg" => "%{checkResult}"} #因为没法对json数据进行json格式解析,所以,先将其转化成字符串,赋给一个新的字段,@gg这个字段名称,自定义
}
json {
source => "@gg" #把@gg作为数据源,解析
remove_field => [ "@gg","checkResult" ] #同样删除不必要字段
}
}
if [fields][tag] == "dataquality-data" {
grok {
match => {
"message" => "\[%{EXIM_DATE:datetime}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\[%{LOGLEVEL:log_level}\]\[%{GREEDYDATA:msg}\]"
}
}
date{
match => ["datetime", "yyyy-MM-dd HH:mm:ss,SSS"]
target => "@timestamp"
remove_field =>["datetime"]
}
}
if [fields][tag] == "lifecycle" {
grok {
match => {
"message" => "\[%{EXIM_DATE:datetime}\]\[%{LOGLEVEL:log_level}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\[%{GREEDYDATA:msg}\]"
}
}
date{
match => ["datetime", "yyyy-MM-dd HH:mm:ss,SSS"]
target => "@timestamp"
remove_field =>["datetime"]
}
}
}
output {
if [fields][tag] == "dataquality-data" {
elasticsearch {
hosts => "192.168.100.11:9200"
index => "quality-data-%{+YYYY.MM.dd}"
}
}
if [fields][tag] == "dataquality-app" {
elasticsearch {
hosts => "192.168.100.11:9200"
index => "quality-app-%{+YYYY.MM.dd}"
}
}
}