1.輸出国の紹介

輸出は、データ指標を収集する責任プロメテウスの監視の重要な部分です。記事では、コンテンツプロメテウスサーバーを導入し、この記事では、プラグインデータ集録をご紹介します。プラグインの関係者はそうでredis_exporter、cadvisorとを持っているnode_exporter、blackbox_exporter、mysqld_exporter、snmp_exporterおよびその他のサードパーティ製プラグインを与えています。
今私は、それぞれのプラグインを導入する実用的な仕事を結合するために使用されます。

2. node_exporter

node_exporter主に基本的な情報、CPU、メモリ、ディスク、IOなどを含む機械の性能データを収集するために使用。トップ記事紹介promehteusサーバは繰り返しではありませんここでは、詳細node_exporterに記載されています。

3. mysqld_exporter

mysqld_exporter主にコレクションデータベースサーバーのMySQL関連の指標を監視するために使用されます。
#バイナリファイルをダウンロード

root@db-100-51:~# cd /data/
root@db-100-51:/data# wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.10.0/mysqld_exporter-0.10.0.linux-amd64.tar.gz root@db-100-51:/data# tar -xf mysqld_exporter-0.10.0.linux-amd64.tar.gz root@db-100-51:/data# cd mysqld_exporter-0.10.0/ root@db-100-51:/data/mysqld_exporter-0.10.0# ls LICENSE mysqld_exporter NOTICE root@db-100-51:/data# mkdir log

#mysqld_exporterデータベースに接続する必要があり、ユーザログインのデータベースを作成します

mysql> GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'localhost' identified by Cds20180808!'; mysql> flush privileges;

#データベース接続のための設定ファイルを作成します。

root@db-100-51:/data/mysqld_exporter-0.10.0# vim .my.cnf [client] user=exporter password=Cds20180808!

#mysqld_exporterを開始するスーパーバイザを作成します。

root@db-100-51:/data/mysqld_exporter-0.10.0# vim /etc/supervisor/conf.d/mysqld_exporter.conf
[program:mysqld_exporter]
command = /data/mysqld_exporter-0.10.0/mysqld_exporter -config.my-cnf="/data/mysqld_exporter-0.10.0/.my.cnf"
autostart = true
autorestart = true
startsecs = 5
startretries = 3
redirect_stderr = true
stdout_logfile=/data/mysqld_exporter-0.10.0/log/out-mysqld_exporter.log
stderr_logfile=/data/mysqld_exporter-0.10.0/log/err-mysqld_exporter.log
stdout_logfile_maxbytes = 20MB
stdout_logfile_backups = 20

#スタートmysqld_exporter

root@db-100-51:/data/mysqld_exporter-0.10.0# supervisorctl update mysqld_exporter root@db-100-51:/data/mysqld_exporter-0.10.0# supervisorctl status mysqld_exporter

4. redis_exporter

redis_exporter主に監視キャプチャRedisのデータベースサーバーのメトリックを使用。
#バイナリファイルをダウンロード

root@redis-node1-slave:~# cd /data/ wget https://github.com/oliver006/redis_exporter/releases/download/v0.24.0/redis_exporter-v0.24.0.linux-amd64.tar.gz
root@redis-node1-slave:/data# tar -xf redis_exporter-v0.24.0.linux-amd64.tar.gz
root@redis-node1-slave:/data# mkdir /data/redis_exporter-v0.24.0
root@redis-node1-slave:/data# mv redis_exporter redis_exporter-v0.24.0
root@redis-node1-slave:/data# cd redis_exporter-v0.24.0/ root@redis-node1-slave:/data/redis_exporter-v0.24.0# mkdir log

#redis_exporterを開始するスーパーバイザを設定します。

root@redis-node1-slave:/data/redis_exporter-v0.24.0# vim /etc/supervisor/conf.d/redis_exporter.conf
[program:redis_exporter]
command = /data/redis_exporter-v0.24.0/redis_exporter -redis.addr 10.13.225.112:6379 -redis.password cds-china
autostart = true
startsecs = 5
startretries = 3
redirect_stderr = true
stout_logfile = /data/redis_exporter-v0.24.0/log/out-redis_exporter.log
stderr_logfile = /data/redis_exporter-v0.24.0/log/err-redis_exporter.log
stdout_logfile_maxbytes = 20MB
stdout_logfile_backups = 20

#スタートredis_exporter

root@redis-node1-slave:/data/redis_exporter-v0.24.0# supervisorctl update redis_exporter
root@redis-node1-slave:/data/redis_exporter-v0.24.0# supervisorctl status redis_exporter

5. black_exporter

ネットワークを検出するために、HTTP、HTTPS、DNS、TCPおよびICMP方法:black_exporterは、ユーザーにすることができます公式のブラックボックスの監視プロメテウスコミュニティ・ソリューションです。私たちは、ページが、アクセスサービスが正常であるかどうかを判断するために、TCPポート・テスト・サービスを使用できるかどう持続することができ、HTTP、HTTPSを使用して、ICMPプローブが滑らかで使用できるネットワークを主張します。
#バイナリファイルをダウンロード

root@prometheus:~# cd /data/
root@prometheus:/data# wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.13.0/blackbox_exporter-0.13.0.linux-amd64.tar.gz
root@prometheus:/data# tar -xf blackbox_exporter-0.13.0.linux-amd64.tar.gz 

#Blackbox_exporter構成ファイルを変更

root@prometheus:/data# cd blackbox_exporter-0.13.0/
root@prometheus:/data/blackbox_exporter-0.13.0# vim blackbox.yml modules: http_2xx: prober: http timeout: 5s http: preferred_ip_protocol: "ip4" no_follow_redirects: true valid_http_versions: ["HTTP/1.1", "HTTP/2"] valid_status_codes: [200,302] # Defaults to 2xx method: GET http_post_2xx: prober: http http: method: POST tcp_connect: prober: tcp timeout: 5s pop3s_banner: prober: tcp tcp: query_response: - expect: "^+OK" tls: true tls_config: insecure_skip_verify: false ssh_banner: prober: tcp timeout: 5s tcp: query_response: - expect: "^SSH-2.0-" irc_banner: prober: tcp tcp: query_response: - send: "NICK prober" - send: "USER prober prober prober :prober" - expect: "PING :([^ ]+)" send: "PONG ${1}" - expect: "^:[^ ]+ 001" icmp: prober: icmp timeout: 5s

#設定はblackbox_exporter開始にsystemd

root@prometheus:/data/blackbox_exporter-0.13.0# vim /usr/lib/systemd/system/blackbox_exporter.service 

[Unit]
Description=Prometheus blackbox exporter
After=local-fs.target network-online.target network.target
Wants=local-fs.target network-online.target network.target

[Service]
User=root
Type=simple
WorkingDirectory=/data/blackbox_exporter-0.13.0/ ExecStart=/data/blackbox_exporter-0.13.0/blackbox_exporter --config.file=/data/blackbox_exporter-0.13.0/blackbox.yml Restart=on-failure [Install] WantedBy=multi-user.target root@prometheus:/data/blackbox_exporter-0.13.0# supervisorctl start blackbox_exporter root@prometheus:/data/blackbox_exporter-0.13.0# supervisorctl enable blackbox_exporter root@prometheus:/data/blackbox_exporter-0.13.0# supervisorctl status blackbox_exporter 

6. cadvisor

cadvisorは、血管の操作を監視するためのGoogleのオープンソースツールです。
#バイナリファイルをダウンロード

root@GZH-vSPC02:~# cd /data/
root@GZH-vSPC02:/data# wget https://github.com/google/cadvisor/releases/download/v0.33.0/cadvisor
root@GZH-vSPC02:/data# mkdir cadvisor-v0.33.0 root@GZH-vSPC02:/data# mv cadvisor cadvisor-v0.33.0/ root@GZH-vSPC02:/data# cd cadvisor-v0.33.0/ root@GZH-vSPC02:/data/cadvisor-v0.33.0# mkdir log

#cadvisorを開始するスーパーバイザを設定します。

root@GZH-vSPC02:/data/cadvisor-v0.33.0# vim /etc/supervisor/conf.d/cadvisor-server.conf 
[program:cadvisor-server]
command = /data/cadvisor-v0.33.0/cadvisor
autostart = true
autorestart = true
startsecs = 5
startretries = 3
redirect_stderr = true
stdout_logfile=/data/cadvisor-v0.33.0/log/out-cadvisor.log
stderr_logfile=/data/cadvisor-v0.33.0/log/err-cadvisor.log
stdout_logfile_maxbytes = 20MB
stdout_logfile_backups = 20
root@GZH-vSPC02:/data/cadvisor-v0.33.0# supervisorctl update cadvisor 
root@GZH-vSPC02:/data/cadvisor-v0.33.0# supervisorctl status cadvisor 

7.サーバープロメテウスの設定

輸出の上記構成は、我々はデータを収集するために輸出を取得するためにプロメテウスサーバを設定する必要があります。
#Pormetheusは、ここで説明するのではなく、コンフィギュレーションファイルの記事が紹介された、構成ファイルを変更します

root@prometheus:~# cd /data/prometheus-2.4.3/
root@prometheus:/data/prometheus-2.4.3# vim prometheus.yml #my global config global: scrape_interval: 30s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 25s # Evaluate rules every 15 seconds. The default is every 1 minute. scrape_timeout: 25s #is set to the global default (10s). #Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: - 10.13.103.152:9093 - 10.128.120.218:9093 #Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "/data/prometheus-2.4.3/rules/node_down.yml" - "/data/prometheus-2.4.3/rules/memory_over.yml" - "/data/prometheus-2.4.3/rules/disk_over.yml" - "/data/prometheus-2.4.3/rules/cpu_over.yml" - "/data/prometheus-2.4.3/rules/http_check.yml" - "/data/prometheus-2.4.3/rules/tcp_check.yml" - "/data/prometheus-2.4.3/rules/mysql_check.yml" - "/data/prometheus-2.4.3/rules/redis_down_check.yml" - "/data/prometheus-2.4.3/rules/redis_rule_check.yml" - "/data/prometheus-2.4.3/rules/container_down.yml" - "/data/prometheus-2.4.3/rules/ping_check.yml" #A scrape configuration containing exactly one endpoint to scrape: #Here it's Prometheus itself. scrape_configs: #The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'prometheus' #metrics_path defaults to '/metrics' #scheme defaults to 'http'. static_configs: - targets: ['localhost:9090'] - job_name: 'GICHOST' file_sd_configs: - files: ['./node_exporter/host.json'] - job_name: 'federate' scrape_interval: 30s scrape_timeout: 25s honor_labels: true metrics_path: '/federate' params: 'match[]': - '{job=~"kubernetes-.*"}' static_configs: - targets: - '10.13.103.12:9090' - job_name: 'blackbox-http' scrape_interval: 5s metrics_path: /probe params: module: [http_2xx] # Look for a HTTP 200 response. file_sd_configs: - files: ['./blackbox_exporter/http.json'] relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: 10.13.103.151:9115 - job_name: 'blackbox-tcp' scrape_interval: 5s metrics_path: /probe params: module: [tcp_connect] file_sd_configs: - files: ['./blackbox_exporter/tcp.json'] relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: 10.13.103.151:9115 - job_name: 'mysqld-exporter' file_sd_configs: - files: ['./mysqld_exporter/mysqld.json'] - job_name: 'blackbox-ping' scrape_interval: 5s metrics_path: /probe params: module: [icmp] file_sd_configs: - files: ['./blackbox_exporter/ping.json'] relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: 10.13.103.151:9115 - job_name: 'redis-exporter' file_sd_configs: - files: ['./redis_exporter/redis.json'] - job_name: 'container-exporter' file_sd_configs: - files: ['./container_exporter/container.json']

8.構成しプロメテウスホストファイルの監視およびアラートルール

8.1ホスト監視ファイル

#mysqld_exporterホスト監視ファイル

root@prometheus:/data/prometheus-2.4.3# mkdir mysqld_exporter
root@prometheus:/data/prometheus-2.4.3# cd mysqld_exporter/ root@prometheus:/data/prometheus-2.4.3/mysqld_exporter# vim mysqld.json [ { "targets":[ "10.13.100.51:9104" ], "labels":{ "dbinstance":"db100.51" } } ]

#redis_exporterホスト監視ファイル

root@prometheus:/data/prometheus-2.4.3# mkdir redis_exporter
root@prometheus:/data/prometheus-2.4.3# cd redis_exporter/ root@prometheus:/data/prometheus-2.4.3/redis_exporter# vim redis.json [ { "targets":[ "10.13.0.235:9121", "10.13.0.236:9121" ], "labels":{ "service":"redis" } } ]

#blackbox_exporterホスト監視ファイル

root@prometheus:/data/prometheus-2.4.3# mkdir blackbox_exporter
root@prometheus:/data/prometheus-2.4.3# cd blackbox_exporter root@prometheus:/data/prometheus-2.4.3/blackbox_exporter# vim http.json [ { "targets":[ "http://10.13.227.134:6011/health", "http://10.13.102.134:6011/health" ], "labels":{ "service":"WSREP" } } ]
root@prometheus:/data/prometheus-2.4.3/blackbox_exporter# vim tcp.json
[
{
"targets":[ "10.128.107.53:13371", "10.128.107.57:13371" ], "labels":{ "service":"vspc" } } ]
root@prometheus:/data/prometheus-2.4.3/blackbox_exporter# vim ping.json
[
{
"targets":[ "10.13.101.131", "10.13.101.132", "10.13.101.141" ], "labels":{ "service":"mysql" } } ]

#設定は、監視ファイルをホストcadvisor

root@prometheus:/data/prometheus-2.4.3# mkdir container_exporter
root@prometheus:/data/prometheus-2.4.3# cd container_exporter/ root@prometheus:/data/prometheus-2.4.3/container_exporter# vim container.json [ { "targets":[ "10.13.103.153:8080", "10.128.87.5:8080" ], "labels":{ "service":"docker-monitor" } } ]

8.2アラームルール

node_exporter取得モニタリングCPU、ディスクは、記事上のルールの例を存続することは導入された、ここでは説明しません

root@prometheus:/data/prometheus-2.4.3# cd rules
root@prometheus:/data/prometheus-2.4.3/rules# ls container_down.yml disk_over.yml mysql_check.yml node_down.yml redis_down_check.yml tcp_check.yml cpu_over.yml http_check.yml memory_over.yml ping_check.yml redis_rule_check.yml

#mysqld_exporter MySQLのデータ監視アラームルール

root@prometheus:/data/prometheus-2.4.3/rules# vim mysql_check.yml
groups:
- name: MySQLStatsAlert
  rules:
  - alert: MySQL is down
    expr: mysql_up == 0 for: 1m labels: severity: critical annotations: summary: "Instance {{$labels.instance}} MySQL is down" description: "MySQL database is down. This requires immediate action!(current value is: {{$value}})" - alert: Mysql_High_QPS expr: rate(mysql_global_status_questions[5m]) > 8000 for: 1m labels: severity: warning annotations: summary: "{{$labels.instance}}: Mysql_High_QPS detected" description: "{{$labels.instance}}: Mysql opreation is more than 5000 per second ,(current value is: {{$value}})" - alert: Mysql_Too_Many_Slow_Query expr: rate(mysql_global_status_slow_queries[30m]) > 3 for: 1m labels: severity: warning annotations: summary: "{{$labels.instance}}: Mysql_Too_Many_Slow_Query detected" description: "{{$labels.instance}}: Mysql current Slow_Query Sql is more than 3 ,(current value is: {{$value}})" - alert: Mysql_Deadlock expr: mysql_global_status_innodb_deadlocks > 300 for: 1m labels: severity: warning annotations: summary: "{{$labels.instance}}: Mysql_Deadlock detected" description: "{{$labels.instance}}: Mysql Deadlock was found ,(current value is: {{$value}})" - alert: open files high expr: mysql_global_status_innodb_num_open_files > (mysql_global_variables_open_files_limit) * 0.75 for: 1m labels: severity: warning annotations: summary: "Instance {{$labels.instance}} open files high" description: "Open files is high. Please consider increasing open_files_limit.(current value is: {{$value}})" - alert: Used more than 80% of max connections limited expr: mysql_global_status_max_used_connections > mysql_global_variables_max_connections * 0.8 for: 1m labels: severity: warning annotations: summary: "Instance {{$labels.instance}} Used more than 80% of max connections limited" description: "Used more than 80% of max connections limited.(current value is: {{$value}})" - alert: InnoDB Log File size is too small expr: mysql_global_variables_innodb_log_file_size < 16777216 for: 1m labels: severity: warning annotations: summary: "Instance {{$labels.instance}} InnoDB Log File size is too small" description: "The InnoDB Log File size is possibly too small. Choosing a small InnoDB Log File size can have significant performance impacts.(current value is: {{$value}})" - alert: Binary Log is disabled expr: mysql_global_variables_log_bin != 1 for: 1m labels: severity: warning annotations: summary: "Instance {{$labels.instance}} Binary Log is disabled" description: "Binary Log is disabled. This prohibits you to do Point in Time Recovery (PiTR).(current value is: {{$value}})" - alert: IO thread stopped expr: mysql_slave_status_slave_io_running != 1 for: 1m labels: severity: critical annotations: summary: "Instance {{$labels.instance}} IO thread stopped" description: "IO thread has stopped. This is usually because it cannot connect to the Master any more.(current value is: {{$value}})" - alert: SQL thread stopped expr: mysql_slave_status_slave_sql_running != 1 for: 1m labels: severity: critical annotations: summary: "Instance {{$labels.instance}} Sync Binlog is enabled" description: "SQL thread has stopped. This is usually because it cannot apply a SQL statement received from the master.(current value is: {{$value}})" - alert: Slave lagging behind Master expr: rate(mysql_slave_status_seconds_behind_master[1m]) >30 for: 1m labels: severity: warning annotations: summary: "Instance {{$labels.instance}} Slave lagging behind Master" description: "Slave is lagging behind Master. Please check if Slave threads are running and if there are some performance issues!(current value is: {{$value}})"

#redis_exporterアラーム監視指標のRedisのルール

root@prometheus:/data/prometheus-2.4.3/rules# vim redis_down_check.yml
groups: - name: redis检测规则 rules: - alert: redis存活检测 expr: redis_up{job="redis-exporter"} == 0 for: 1m annotations: description: "机器:{{ $labels.instance }} 所属 job:{{ $labels.job }} redis宕机,请检查!" summary: "redis服务"
root@prometheus:/data/prometheus-2.4.3/rules# vim redis_rule_check.yml
    groups:
- name: RedisStatsAlert
  rules:
  - alert: last create rdb failed expr: redis_rdb_last_bgsave_status != 1 for: 1m labels: severity: warning annotations: summary: " Instance {{ $labels.instance }} rdb_last_bgsave_status " description: "last create rdb failed" - alert: Redis linked too many clients expr: redis_connected_clients / redis_config_maxclients * 100 > 80 for: 1m labels: severity: warning annotations: summary: "Instance {{ $labels.instance }} Redis linked clients too many" description: "Redis linked clients too many. This requires immediate action!" - alert: master link status failed expr: redis_master_link_up == 0 for: 1m labels: severity: warning annotations: summary: "Instance {{ $labels.instance }} link failed" description: "redis_master_link=0 link failed" - alert: last AOF failed expr: redis_aof_last_bgrewrite_status != 1 for: 1m labels: severity: warning annotations: summary: "Instance {{ $labels.instance }} redis aof last rewrite duration sec" description: "last AOF failed" - alert: Redis Cluster State Wrong expr: redis_cluster_state != 1 for: 1m labels: severity: warning annotations: summary: "Instance {{ $labels.instance }} redis cluster status wrong" description: "Redis Cluster State Wrong"

#blackbox_exporterアラーム監視・インデックス・ルール

root@prometheus:/data/prometheus-2.4.3/rules# vim ping_check.yml
groups: - name: 机器网络存活检测 rules: - alert: 网络检测 expr: probe_success{job="blackbox-ping"} == 0 for: 1m annotations: description: "机器:{{ $labels.instance }} 所属 job:{{ $labels.job }} 网络不通或者宕机超过1分钟,请检查!" summary: "网络检测" root@prometheus:/data/prometheus-2.4.3/rules# vim http_check.yml groups: - name: 服务检测规则 rules: - alert: http服务检测 expr: probe_success{job="blackbox-http"} == 0 for: 1m annotations: description: "机器:{{ $labels.instance }} 所属 job:{{ $labels.job }} http状态码: {{ printf `probe_http_status_code{instance='%s'}` $labels.instance | query | first | value }} http检测失败,请检查!" summary: "http检测" root@prometheus:/data/prometheus-2.4.3/rules# vim tcp_check.yml groups: - name: 服务检测规则 rules: - alert: tcp服务检测 expr: probe_success{job="blackbox-tcp"} == 0 for: 1m annotations: description: "机器:{{ $labels.instance }} 所属 job:{{ $labels.job }} tcp检测失败,请检查!" summary: "tcp检测"

#cadvisorアラーム監視・インデックス・ルール

root@prometheus:/data/prometheus-2.4.3/rules# vim container_down.yml 

groups: - name: 容器存活报警规则 rules: - alert: DockerInstanceDown expr: absent(container_last_seen{name="core_vspc"}) == 1 for: 1m annotations: description: "vspc client容器:{{ $labels.name }} (所属主机{{ $labels.instance }}) 已经异常退出超过1分钟,请检查!" summary: "容器:Instance {{ $labels.name }} 存活检测"

参照文献:

https://github.com/prometheus
https://github.com/oliver006/redis_exporter
https://github.com/google/cadvisor