alertmanager 配置文件 alertmanager.yml
global:
resolve_timeout: 1m
smtp_smarthost: 'smtp.exmail.qq.com:465' # 这里为 QQ 邮箱 SMTP 服务地址,官方地址为 smtp.qq.com 端口为 465 或 587,同时要设置开启 POP3/SMTP 服务。
smtp_from: '[email protected]' # 发送邮箱名称
smtp_auth_username: '[email protected]' # 邮箱名称
smtp_auth_password: 'lojdeopbholobgah' # 邮箱密码或授权码
smtp_require_tls: false
templates:
- "/etc/alertmanager/alert.tmpl"
route:
group_by: ['alertname']
receiver: 'wechat'
group_wait: 10s
group_interval: 20s
repeat_interval: 2m
routes:
- match_re:
alertname: filesystem_usage_over.*
receiver: email
group_wait: 1m
group_interval: 5m
repeat_interval: 6h
receivers:
- name: "wechat" #这里为总路由,默认接收器
wechat_configs:
- corp_id: wwd76d598b5fad5097 # 企业信息("我的企业"--->"CorpID"[在底部])
to_user: '@all' # 发送给企业微信用户的ID,这里是所有人
# to_party: '' 接收部门ID
agent_id: 1000004 # 企业微信("企业应用"-->"自定应用"[Prometheus]--> "AgentId")
api_secret: DY9IlG0Bdwawb_ku0NblxKFrrmMwbLIZ7YxMa5rCg8g # 企业微信("企业应用"-->"自定应用"[Prometheus]--> "Secret")
message: '{
{ template "wechat.default.message" .}}' # 发送内容(调用模板)
send_resolved: true # 故障恢复后通知
- name: "email"
email_configs:
- to: "[email protected]"
send_resolved: true
html: '{
{template "email-monitor.html" . }}' #这里为上方 alert.tmpl 定义的邮箱模板
告警模板文件 alert.tmpl
{
{ define "wechat.default.message" }}
{
{- if gt (len .Alerts.Firing) 0 -}}
{
{- range $index, $alert := .Alerts -}}
{
{- if eq $index 0 }}
=========监控报警:{
{ $alert.Labels.job }} =========
告警状态:{
{ .Status }}
告警级别:{
{ .Labels.severity }}
告警类型:{
{ $alert.Labels.alertname }}
故障主机: {
{ $alert.Labels.instance }} {
{ $alert.Labels.pod }}
告警主题: {
{ $alert.Annotations.summary }}
告警详情: {
{ $alert.Annotations.message }}{
{ $alert.Annotations.description}};
触发阀值:{
{ .Annotations.value }}
故障时间: {
{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
========= = end = =========
{
{- end }}
{
{- end }}
{
{- end }}
{
{- if gt (len .Alerts.Resolved) 0 -}}
{
{- range $index, $alert := .Alerts -}}
{
{- if eq $index 0 }}
=========异常恢复:{
{ $alert.Labels.job }} =========
告警类型:{
{ .Labels.alertname }}
告警状态:{
{ .Status }}
告警主题: {
{ $alert.Annotations.summary }}
告警详情: {
{ $alert.Annotations.message }}{
{ $alert.Annotations.description}};
故障时间: {
{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
恢复时间: {
{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
{
{- if gt (len $alert.Labels.instance) 0 }}
实例信息: {
{ $alert.Labels.instance }}
{
{- end }}
========= = end = =========
{
{- end }}
{
{- end }}
{
{- end }}
{
{- end }}
{
{ define "email-monitor.html" }}
{
{- if gt (len .Alerts.Firing) 0 -}}
<h1>告警</h1>
<table border="5">
<tr>
<td>报警项</td>
<td>实例</td>
<td>报警详情</td>
<td>报警阀值</td>
<td>开始时间</td>
</tr>
{
{ range $i, $alert := .Alerts }}
<tr><td>{
{ index $alert.Labels "alertname" }}</td>
<td>{
{ index $alert.Labels "instance" }}</td>
<td>{
{ index $alert.Annotations "description" }}</td>
<td>{
{ index $alert.Annotations "value" }}</td>
<td>{
{ $alert.StartsAt.Format "2006-01-02 15:04:05" }}</td>
</tr>
{
{ end }}
</table>
{
{ end }}
{
{- if gt (len .Alerts.Resolved) 0 -}}
<h1>恢复</h1>
<table border="5">
<tr>
<td>报警项</td>
<td>实例</td>
<td>报警详情</td>
<td>报警阀值</td>
<td>开始时间</td>
</tr>
{
{ range $i, $alert := .Alerts }}
<tr>
<td>{
{ index $alert.Labels "alertname" }}</td>
<td>{
{ index $alert.Labels "instance" }}</td>
<td>{
{ index $alert.Annotations "description" }}</td>
<td>{
{ index $alert.Annotations "value" }}</td>
<td>{
{ $alert.StartsAt.Format "2006-01-02 15:04:05" }}</td>
</tr>
{
{ end }}
</table>
{
{ end }}{
{- end }}
参考链接:
https://prometheus.io/docs/alerting/latest/configuration/
https://blog.51cto.com/starsliao/5763175
http://https://blog.csdn.net/weixin_45880055/article/details/120585024