prometheus altermanager邮件报警

prometheus altermanager邮件报警:

下载网址:
https://prometheus.io/download

tar zxfv alertmanager-0.15.2.linux-amd64.tar.gz -C /space/

mv /space/alertmanager-0.15.2.linux-amd64 /Influxdb/alertmanager

vi /space/altermanager/altermanager.yml

global:
resolve_timeout: 5m
smtp_smarthost: 'smtp.ming.com:25'
smtp_from: '[email protected]'

route:
group_by: ['down']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'shprom'
receivers:

:wq

/space/altermanager/altermanager --config.file=/space/altermanager/altermanager.yml

vi /space/altermanager/down.yml

groups:

  • name: down
    rules:- alert: InstanceDown
    expr: up == 0
    for: 30s
    labels:
    user: shprom
    annotations:
    summary "Instance {{ $labels.instance }} down"
    description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."

:wq

vi /space/altermanager/mem.yml

groups:

  • name: mem
    rules:
    • alert: NodeMemoryUsage
      expr: expr: 100 - ((sum(node_memory_Cached_bytes)+sum(node_memory_Buffers_bytes)+sum(node_memory_MemFree_bytes))/sum(node_memory_MemTotal_bytes)*100) > 80
      for: 1m
      labels:
      user: shprom
      annotations:
      summary: "{{ $labels.instance }} High Memory usage detected"
      description: "{{ $labels.instance }}: Memory usage is above 80% (current value is:{{ $value }})"

:wq

vi /space/prometheus/prometheus.yml

alerting:
alertmanagers:

  • static_configs:
    • targets: ['localhost:9093']

      - alertmanager:9093

rule_files:

  • "down.yml"
  • "mem.yml"

:wq

/space/prometheus/prometheus --config.file=/space/prometheus/prometheus.yml --storage.tsdb.path=/space/prometheus/data

猜你喜欢

转载自blog.51cto.com/yangzhiming/2309012