1.環境を整える
ホスト名 | IPアドレス | ハードウェア要件 | サービスの展開 |
---|---|---|---|
マスター | 10.0.0.12 | 1コア1G | プロメテウス+グルファナ |
ノード1 | 10.0.0.13 | 1コア1G | ノードエクスポート |
ノード2 | 10.0.0.14 | 1コア1G | ノードエクスポート |
2.プロメテウスをインストールする
#安装prometheus
prometheus官网:https://prometheus.io/download/
我们下载prometheus-2.23.0.linux-amd64.tar.gz即可
#解压软件包
tar xf prometheus-2.23.0.linux-amd64.tar.gz
#打开解压软件包的目录
cd prometheus-2.23.0.linux-amd64/
#修改配置文件
vim prometheus.yml
修改最后
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node-exporte'
static_configs:
- targets: ['10.0.0.13:9100','10.0.0.14:100']
#启动软件 加& 等于后台运行
./prometheus --config.file="prometheus.yml" &
#浏览器输入10.0.0.12:9090即可
3. node-exporte をインストールします
#安装node-exporte(node1,node2)
prometheus官网:https://prometheus.io/download/
我们下载node_exporter-1.1.1.linux-amd64.tar.gz即可
https://prometheus.io/download/#node_exporter
#解压软件包
tar xf node_exporter-1.1.1.linux-amd64.tar.gz
#打开解压软件包的目录
cd node_exporter-1.1.1.linux-amd64
#启动软件 加& 等于后台运行
./node_exporter &
4.プロメテウスの自動発見
#部署cadvisor prometheus自动发现
修改配置文件
vim prometheus.yml
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node-exporte'
static_configs:
- targets: ['10.0.0.13:9100','10.0.0.14:9100']
- job_name: 'cadvisor'
file_sd_configs:
- files:
- job/server.json
refresh_interval: 10s
#创建job文件和server.json文件
mkdir job
cd job
vim server.json
[
{
"targets": ["10.0.0.15:9100","10.0.0.17:9100"]
}
]
#先结束再重启
ps -ef|grep prom
kill
./prometheus --config.file="prometheus.yml" &
##切记第一次做自动发现的时候是需要重新启动prometheus.yml文件的,之后只需要修改json文件即可.
#下面是自动发现后的效果.
5.alterManagerメールアラーム
#alterManager邮件报警
先下载软件包到/opt并且解压出来
https://prometheus.io/download/#alertmanager
tar xf alertmanager-0.21.0.linux-amd64.tar.gz
cd alertmanager-0.21.0.linux-amd64
cp alertmanager.yml alertmanager.yml.bak
#编写配置文件
vim alertmanager.yml
global:
resolve_timeout: 5m
smtp_from: '[email protected]'
smtp_smarthost: 'smtp.qq.com:465'
smtp_auth_username: '[email protected]'
smtp_auth_password: 'xxxxxxxxxxxxxxxx' #邮箱授权码
smtp_require_tls: false
smtp_hello: 'qq.com'
route:
group_by: ['alertname']
group_wait: 5s
group_interval: 5s
repeat_interval: 5m
receiver: 'email'
receivers:
- name: 'email'
email_configs:
- to: '[email protected]'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
#启动
./alertmanager --config.file="alertmanager.yml" &
#然后去prometheus文件目录下 修改prometheus的配置文件
vim prometheus.yml
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 10.0.0.13:9093 #此ip地址为本机ip地址
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "node_down.yml" #设置报警规则文件
# - "second_rules.yml"
#编写报警规则
[root@docker03 prometheus-2.23.0.linux-amd64]# vim node_down.yml
igroups:
- name: node-up
rules:
- alert: node-up
expr: up{
job="node-exporter"} == 0
for: 15s
labels:
severity: 1
team: node
annotations:
summary: "{
{ $labels.instance }} 已停止运行超过 15s!"
#结束prometheus并且重新启动
ps -ef|grep prom
kill
./prometheus --config.file="prometheus.yml" &
6.グラファナの描画
#grafana出图 gtafana:插件/数据源(设置prometheus)
#下载gtafana的包
https://grafana.com/grafana/download #进入官网选择性的下载
#yum安装gtafana
yum localinstall grafana-7.3.4-1.x86_64.rpm -y
#启动并且设置为开机自启
systemctl enable grafana-server.service
systemctl start grafana-server.service
7. ダッシュボード テンプレートをインポートする
#进入官网下载模板
官网:www.grafana.com
模板页面:https://grafana.com/grafana/dashboards
8.alterManager DingTalk アラーム
#先进入钉钉创建报警群组--添加报警机器人--创建webhook--保存自动生成的秘钥
#访问github下载最新的插件(prometheus-webhook-dingtalk)
https://github.com/timonwong/prometheus-webhook-dingtalk/releases/tag/v1.4.0
#解压并且修改配置文件
tar xf prometheus-webhook-dingtalk-1.4.0.linux-amd64.tar.gz
#修改配置文件
vim config.example.yml
targets:
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxx #Webhook地址
# secret for signature
secret: SExxxxxxxxxxxxxxxxxx #秘钥
# webhook2:
# url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxx
# webhook_legacy:
# url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxx
# # Customize template content
# message:
# # Use legacy template
# title: '{
{ template "legacy.title" . }}'
# text: '{
{ template "legacy.content" . }}'
# webhook_mention_all:
# url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxx
# mention:
# all: true
webhook_mention_users:
url: https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxx #Webhook地址
mention:
mobiles: ['账户/手机号码']
#再配置altermanater的配置文件
vim alertmanager.yml
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 5s
group_interval: 5s
repeat_interval: 5m
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://localhost:8060/dingtalk/webhook1/send' #多次翻车
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
#然后启动dingtalk,alertmanager,Prometheus就行了
./alertmanager --config.file="alertmanager.yml" &
./prometheus-webhook-dingtalk --config.file="config.yml" &
./prometheus --config.file="prometheus.yml" &
##下面就是报警信息以及解决信息