版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wanglei_storage/article/details/82663706
input {
file {
path => "/data/logs/nginx_logs/access.log"
type => "dev-chuiyi-site-landing-1"
# codec => json
start_position => "end"
}
}
filter {
grok {
patterns_dir => "/usr/local/logstash-6.3.2/vendor/bundle/jruby/2.3.0/gems/logstash-patterns-core-4.1.2/patterns" # 定义logtash中自定义模式目录所在的位置
match => {
"message" => "%{NGINX_LOGS}"
}
#match => [
# "message","%{NGINX_LOGS}",
# "message","%{USERHOST:userhost} %{USERNAME:username}"
#]
add_field => { # 新增name_%{remote_addr}_[0|1]字段
"name_%{type}_0" => "hello world 0"
"name_%{type}_1" => "hello world 1"
"name_%{type}_2" => "hello world 2"
"coerce_value" => "null"
}
remove_field => ["message"] # 删除message字段
remove_field => ["name_%{type}_0"]
}
if [request] == "HEAD / HTTP/1.0" { # 判断request字段是否匹配,如果匹配则drop进行丢弃该条信息,不传输至es
drop {}
}
geoip {
source => "remote_addr"
database => "/usr/local/src/GeoLite2-City_20180807/GeoLite2-City.mmdb"
fields => ["country_name","region_code", "city_name", "ip"]
}
mutate {
copy => { # 将原有的字段进行拷贝一份并命名
"remote_addr" => "client_addr"
}
convert => { # 将原有的字段的值进行类型转换,可转换的类型为: integer/float/string/boolean
"status" => "integer"
"request_time" => "float"
}
split => { # 将原有字段的string进行split以指定的符号(分隔符)进行分隔,分隔后变为一组数组
"remote_addr" => "."
}
join => { # 将原有字段的array进行join以指定的的符号(分隔符)进行合并,合并后变为一串字符串
"remote_addr" => "-"
}
update => { # 用于替换原有字段的值,如果原有字段不存在,则不执行任何操作,原有字段不支持使用%{}变量,但值可以调用%{}变量
"upstream_response_time" => "%{remote_addr}"
}
replace => { # 用于替换原有字段的值,如果原有字段不存在,则新增一个字段,新增的字段不支持使用%{}变量,但值可以调用%{}变量
"new_replace" => "%{remote_addr}"
}
gsub => [ # 用于替换原有字段的值,第一个元素为字段名,第二个为正则或匹配的字符串,第三个为要替换的值(只支持字符串替换操作)
"time_local", "\d{2}/[A-Za-z]{3}/[\d:]+", "this is time",
"request", "HTTP", "http"
]
rename => { # 将字段http_referer重命名为http_source
"http_referer" => "http_source"
}
}
}
output {
redis {
host => "10.168.216.1"
port => "6379"
key => "redis_auth"
data_type => "list"
password => "2RMYFdlZNSSsqKYi"
}
}
关于grok nginx正则
cat /usr/local/logstash-6.3.2/vendor/bundle/jruby/2.3.0/gems/logstash-patterns-core-4.1.2/patterns
REMOTE_ADDR ^([\d\.]+)
TIME_LOCAL [^\s]+\s\+\d{4}
REQUEST [A-Z]+\s.*\sHTTP/\d\.\d
STATUS \d{3}
BODY_BYTES_SENT \d+
HTTP_REFERER [^\|]*
HTTP_USER_AGENT [^\|]*
HTTP_X_FORWARDED_FOR [\d.]+|-
UPSTREAM_ADDR [^\|]*
UPSTREAM_STATUS [\d]{3}|-
REQUEST_TIME [0-9]*\.[0-9]+
UPSTREAM_RESPONSE_TIME ([0-9]*\.[0-9]+|-)$
NGINX_LOGS %{REMOTE_ADDR:remote_addr}\|%{TIME_LOCAL:time_local}\|%{REQUEST:request}\|%{STATUS:status}\|%{BODY_BYTES_SENT:body_bytes_sent}\|%{HTTP_REFERER:http_referer}\|%{HTTP_USER_AGENT:http_user_agent}\|%{HTTP_X_FORWARDED_FOR:http_x_forwarded_for}\|%{UPSTREAM_ADDR:upstream_addr}\|%{UPSTREAM_STATUS:upstream_status}\|%{REQUEST_TIME:request_time}\|%{UPSTREAM_RESPONSE_TIME:upstream_response_time}
USERHOST [\d\.]+
USERNAME [a-zA-Z]+
关于nginx日志格式
log_format main '$remote_addr|$time_local|$request|'
'$status|$body_bytes_sent|$http_referer|'
'$http_user_agent|$http_x_forwarded_for|'
'$upstream_addr|$upstream_status|$request_time|$upstream_response_time';