- FilebeatとLogstashを開始します
最初にlogstashを開始します。そうしないと、filebeatはlogstashの5044ポートを検出しません。再起動せずに構成ファイルを変更します。bin/ logstash-f config / logstash_filebeat2es.conf -rを使用して、解析ステータスが正しいかどうかを確認できます。 logstashでのgrokの使用ログ形式に対応する試合の通常のスペースに注意する場合、スペースを見つけるのが難しい場合は、***に注意してください。
bin/logstash -f config/logstash_filebeat2es.conf
./filebeat -e -c filebeat_csdn.yml
起動時にエラーが報告された場合、アドレスは占有されています
1.ps -ef|grep logstash
看否在运行,kill掉。
2.logstash.yml里有data.path的路径下 有个隐藏文件.lock 删除即可,是个缓存文件
如果没有手动指定的话,在默认的路径cd /opt/ELK/logstash/data 然后ls -alh 删除.lock
構成:
filbeatログ形式
[2020-10-29 00:01:05,203][dataQuality.py:142][get_message_error][ERROR] {"checkResult": {"dbid": "282", "error_data": "''", "check_total_num": "2999", "sql": "INSERT INTO t_check_result (id, check_rule_id, error_level, error_num, check_total_num, error_data, create_time, update_time, create_userid, update_userid, del_flag, version_consumer) VALUES ('c8c1fa0c-1936-11eb-91a9-a0369f32a65c', 1977478, 12, 0, 2999, '', '2020-10-29 00:01:05', '2020-10-29 00:01:05', '', '', 0, '')", "msg": "(1406, u\"Data too long for column 'id' at row 1\")", "error_num": "0"}, "ruleId": "1977478", "dbId": "205", "dbInfo": [{"username": "root", "status": 0, "hostName": "192.168.100.132", "dbName": "yss_datamiddle_quality", "connType": "db-mysql", "msg": "", "port": 3306}, {"username": "root", "status": 0, "hostName": "192.168.100.132", "dbName": "yss_datamiddle_standard", "connType": "db-mysql", "msg": "", "port": 3306}], "startTime": "2020-10-29 00:01:04", "sqlInfo": [{"msg": "", "dbid": "282", "result": "2999", "sql": "select count(1) as num from t_biz_norm"}, {"msg": "", "dbid": "282", "result": "0", "sql": "select count(1) from yss_datamiddle_standard.t_biz_norm where (1=1) and (chinese_name is null)"}, {"msg": "", "dbid": "282", "result": "", "sql": "select * from yss_datamiddle_standard.t_biz_norm where (1=1) and (chinese_name is null)"}], "endTime": "2020-10-29 00:01:05", "checkdbId": "282"}
filbeat.yml
filebeat.inputs:
#dataQuality-app
- type: log
enabled: true
paths:
- /opt/pythonScript/dataQuality/logs/dataQuality-app*.log
multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
multiline.negate: true
multiline.match: after
document_type: "dataquality-app"
fields:
tag: dataquality-app
#dataQuality-data
- type: log
enabled: true
paths:
- /opt/pythonScript/dataQuality/logs/dataQuality-data*.log
multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
multiline.negate: true
multiline.match: after
document_type: "dataquality-data"
fields:
tag: dataquality-data
#lifeCycle
- type: log
enabled: true
paths:
- /opt/pythonScript/lifeCycle/logs/*.log
multiline.pattern: '^\[\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3}]'
multiline.negate: true
multiline.match: after
document_type: "lifecycle"
fields:
tag: lifecycle
filebeat.config.modules:
path: /opt/filebeat/modules.d/*.yml
reload.enabled: false
setup.template.settings:
index.number_of_shards: 3
setup.kibana:
host: "192.168.100.154:5601"
output.logstash:
# The Logstash hosts
hosts: ["192.168.100.154:5022"]
processors:
- add_host_metadata: ~
- add_cloud_metadata: ~
検証ルール
dataQuality-data.log 格式:
format = '[%(asctime)s][%(filename)s:%(lineno)d][%(funcName)s][%(levelname)s][%(message)s]'
dataQuality-app.log 格式:
format='[%(asctime)s][%(filename)s:%(lineno)d][%(funcName)s][%(levelname)s]/s%(message)s'
次に、logstashを構成します
input {
beats {
host => "192.168.34.14"
port => 5022
client_inactivity_timeout => 36000
}
}
filter {
if [fields][tag] == "dataquality-app" {
grok {
match => {
"message" => "\[%{EXIM_DATE:datetime}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\s%{GREEDYDATA:msg}"
}
}
json {
source => "msg" #把infos作为数据源,解析
remove_field => [ "msg" ] #删除infos字段,删不删都行
}
mutate {
add_field => {"@gg" => "%{checkResult}"} #因为没法对json数据进行json格式解析,所以,先将其转化成字符串,赋给一个新的字段,@gg这个字段名称,自定义
}
json {
source => "@gg" #把@gg作为数据源,解析
remove_field => [ "@gg","checkResult" ] #同样删除不必要字段
}
}
if [fields][tag] == "dataquality-data" {
grok {
match => {
"message" => "\[%{EXIM_DATE:datetime}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\[%{LOGLEVEL:log_level}\]\[%{GREEDYDATA:msg}\]"
}
}
date{
match => ["datetime", "yyyy-MM-dd HH:mm:ss,SSS"]
target => "@timestamp"
remove_field =>["datetime"]
}
}
if [fields][tag] == "lifecycle" {
grok {
match => {
"message" => "\[%{EXIM_DATE:datetime}\]\[%{LOGLEVEL:log_level}\]\[%{GREEDYDATA:method_name}\]\[%{GREEDYDATA:def_name}\]\[%{GREEDYDATA:class_name}\]\[%{GREEDYDATA:msg}\]"
}
}
date{
match => ["datetime", "yyyy-MM-dd HH:mm:ss,SSS"]
target => "@timestamp"
remove_field =>["datetime"]
}
}
}
output {
if [fields][tag] == "dataquality-data" {
elasticsearch {
hosts => "192.168.100.11:9200"
index => "quality-data-%{+YYYY.MM.dd}"
}
}
if [fields][tag] == "dataquality-app" {
elasticsearch {
hosts => "192.168.100.11:9200"
index => "quality-app-%{+YYYY.MM.dd}"
}
}
}