资源监控

README.MD

依序执行即可:

manifests-ns.yaml
manifests-secret.yaml
manifests-configmap.yaml
manifests-rbac.yaml
manifests-svc.yaml

manifests-deploy.yaml
manifests-ds.yaml
manifests-job.yaml

manifests-ns.yaml

---
apiVersion: v1
kind: Namespace
metadata:
  name: monitoring

manifests-secret.yaml

---
apiVersion: v1
kind: Secret
data:
  admin-password: YWRtaW4=
  admin-username: YWRtaW4=
metadata:
  name: grafana
  namespace: monitoring
type: Opaque

manifests-configmap.yaml

---
apiVersion: v1
data:
  default.tmpl: |
    {{ define "__alertmanager" }}AlertManager{{ end }}
    {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }}

    {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
    {{ define "__description" }}{{ end }}

    {{ define "__text_alert_list" }}{{ range . }}Labels:
    {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }}
    {{ end }}Annotations:
    {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }}
    {{ end }}Source: {{ .GeneratorURL }}
    {{ end }}{{ end }}


    {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }}
    {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }}
    {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }}
    {{ define "slack.default.pretext" }}{{ end }}
    {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }}
    {{ define "slack.default.iconemoji" }}{{ end }}
    {{ define "slack.default.iconurl" }}{{ end }}
    {{ define "slack.default.text" }}{{ end }}


    {{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }}
    {{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }}


    {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }}
    {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }}
    {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }}
    {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }}


    {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }}
    {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }}
    {{ if gt (len .Alerts.Firing) 0 -}}
    Alerts Firing:
    {{ template "__text_alert_list" .Alerts.Firing }}
    {{- end }}
    {{ if gt (len .Alerts.Resolved) 0 -}}
    Alerts Resolved:
    {{ template "__text_alert_list" .Alerts.Resolved }}
    {{- end }}
    {{- end }}
    {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }}


    {{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }}
    {{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }}


    {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }}
    {{ define "email.default.html" }}
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    <!--
    Style and HTML derived from https://github.com/mailgun/transactional-email-templates


    The MIT License (MIT)

    Copyright (c) 2014 Mailgun

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
    in the Software without restriction, including without limitation the rights
    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:

    The above copyright notice and this permission notice shall be included in all
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    SOFTWARE.
    -->
    <html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
    <head style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
    <meta name="viewport" content="width=device-width" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
    <title style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">{{ template "__subject" . }}</title>

    </head>

    <body itemscope="" itemtype="http://schema.org/EmailMessage" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; -webkit-font-smoothing: antialiased; -webkit-text-size-adjust: none; height: 100%; line-height: 1.6em; width: 100% !important; background-color: #f6f6f6; margin: 0; padding: 0;" bgcolor="#f6f6f6">

    <table style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; background-color: #f6f6f6; margin: 0;" bgcolor="#f6f6f6">
      <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td>
        <td width="600" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; display: block !important; max-width: 600px !important; clear: both !important; width: 100% !important; margin: 0 auto; padding: 0;" valign="top">
          <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; max-width: 600px; display: block; margin: 0 auto; padding: 0;">
            <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; border-radius: 3px; background-color: #fff; margin: 0; border: 1px solid #e9e9e9;" bgcolor="#fff">
              <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 16px; vertical-align: top; color: #fff; font-weight: 500; text-align: center; border-radius: 3px 3px 0 0; background-color: #E6522C; margin: 0; padding: 20px;" align="center" bgcolor="#E6522C" valign="top">
                  {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }}
                    {{ .Name }}={{ .Value }}
                  {{ end }}
                </td>
              </tr>
              <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 10px;" valign="top">
                  <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                    <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                      <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
                        <a href="{{ template "__alertmanagerURL" . }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #FFF; text-decoration: none; line-height: 2em; font-weight: bold; text-align: center; cursor: pointer; display: inline-block; border-radius: 5px; text-transform: capitalize; background-color: #348eda; margin: 0; border-color: #348eda; border-style: solid; border-width: 10px 20px;">View in {{ template "__alertmanager" . }}</a>
                      </td>
                    </tr>
                    {{ if gt (len .Alerts.Firing) 0 }}
                    <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                      <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
                        <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Firing | len }}] Firing</strong>
                      </td>
                    </tr>
                    {{ end }}
                    {{ range .Alerts.Firing }}
                    <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                      <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
                        <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
                        {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
                        {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
                        {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
                        <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
                      </td>
                    </tr>
                    {{ end }}

                    {{ if gt (len .Alerts.Resolved) 0 }}
                      {{ if gt (len .Alerts.Firing) 0 }}
                    <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                      <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
                        <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
                        <hr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
                        <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
                      </td>
                    </tr>
                      {{ end }}
                    <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                      <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
                        <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Resolved | len }}] Resolved</strong>
                      </td>
                    </tr>
                    {{ end }}
                    {{ range .Alerts.Resolved }}
                    <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                      <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top">
                        <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
                        {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
                        {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
                        {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }}
                        <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />
                      </td>
                    </tr>
                    {{ end }}
                  </table>
                </td>
              </tr>
            </table>

            <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; clear: both; color: #999; margin: 0; padding: 20px;">
              <table width="100%" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">
                  <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; vertical-align: top; text-align: center; color: #999; margin: 0; padding: 0 0 20px;" align="center" valign="top"><a href="{{ .ExternalURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; color: #999; text-decoration: underline; margin: 0;">Sent by {{ template "__alertmanager" . }}</a></td>
                </tr>
              </table>
            </div></div>
        </td>
        <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td>
      </tr>
    </table>

    </body>
    </html>

    {{ end }}

    {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }}
    {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }}
    {{ if gt (len .Alerts.Firing) 0 }}
    Alerts Firing:
    {{ template "__text_alert_list" .Alerts.Firing }}
    {{ end }}
    {{ if gt (len .Alerts.Resolved) 0 }}
    Alerts Resolved:
    {{ template "__text_alert_list" .Alerts.Resolved }}
    {{ end }}
    {{ end }}
    {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }}
  slack.tmpl: |
    {{ define "slack.devops.text" }}
    {{range .Alerts}}{{.Annotations.DESCRIPTION}}
    {{end}}
    {{ end }}
kind: ConfigMap
metadata:
  creationTimestamp: null
  name: alertmanager-templates
  namespace: monitoring
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: alertmanager
  namespace: monitoring
data:
  config.yml: |-
    global:
      # ResolveTimeout is the time after which an alert is declared resolved
      # if it has not been updated.
      resolve_timeout: 5m

      # The smarthost and SMTP sender used for mail notifications.
      smtp_smarthost: 'smtp.gmail.com:587'
      smtp_from: '[email protected]'
      smtp_auth_username: '[email protected]'
      smtp_auth_password: 'barfoo'

      # The API URL to use for Slack notifications.
      slack_api_url: 'https://hooks.slack.com/services/some/api/token'

    # # The directory from which notification templates are read.
    templates:
    - '/etc/alertmanager-templates/*.tmpl'

    # The root route on which each incoming alert enters.
    route:

      # The labels by which incoming alerts are grouped together. For example,
      # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
      # be batched into a single group.

      group_by: ['alertname', 'cluster', 'service']

      # When a new group of alerts is created by an incoming alert, wait at
      # least 'group_wait' to send the initial notification.
      # This way ensures that you get multiple alerts for the same group that start
      # firing shortly after another are batched together on the first
      # notification.

      group_wait: 30s

      # When the first notification was sent, wait 'group_interval' to send a batch
      # of new alerts that started firing for that group.

      group_interval: 5m

      # If an alert has successfully been sent, wait 'repeat_interval' to
      # resend them.

      #repeat_interval: 1m
      repeat_interval: 15m

      # A default receiver

      # If an alert isn't caught by a route, send it to default.
      receiver: default

      # All the above attributes are inherited by all child routes and can
      # overwritten on each.

      # The child route trees.
      routes:
      # Send severity=slack alerts to slack.
      - match:
          severity: slack
        receiver: slack_alert
      # - match:
      #     severity: email
      #   receiver: email_alert

    receivers:
    - name: 'default'
      slack_configs:
      - channel: '#alertmanager-test'
        text: '<!channel>{{ template "slack.devops.text" . }}'
        send_resolved: true

    - name: 'slack_alert'
      slack_configs:
      - channel: '#alertmanager-test'
        send_resolved: true
---
apiVersion: v1
data:
  grafana-net-2-dashboard.json: |
    {
      "__inputs": [{
        "name": "DS_PROMETHEUS",
        "label": "Prometheus",
        "description": "",
        "type": "datasource",
        "pluginId": "prometheus",
        "pluginName": "Prometheus"
      }],
      "__requires": [{
        "type": "panel",
        "id": "singlestat",
        "name": "Singlestat",
        "version": ""
      }, {
        "type": "panel",
        "id": "text",
        "name": "Text",
        "version": ""
      }, {
        "type": "panel",
        "id": "graph",
        "name": "Graph",
        "version": ""
      }, {
        "type": "grafana",
        "id": "grafana",
        "name": "Grafana",
        "version": "3.1.0"
      }, {
        "type": "datasource",
        "id": "prometheus",
        "name": "Prometheus",
        "version": "1.0.0"
      }],
      "id": null,
      "title": "Prometheus Stats",
      "tags": [],
      "style": "dark",
      "timezone": "browser",
      "editable": true,
      "hideControls": true,
      "sharedCrosshair": false,
      "rows": [{
        "collapse": false,
        "editable": true,
        "height": 178,
        "panels": [{
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": false,
          "colors": ["rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)"],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 1,
          "editable": true,
          "error": false,
          "format": "s",
          "id": 5,
          "interval": null,
          "links": [],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "span": 3,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "(time() - container_start_time_seconds{container_name=\"kube-apiserver\"})",
            "intervalFactor": 2,
            "refId": "A",
            "step": 4
          }],
          "thresholds": "",
          "title": "Uptime",
          "type": "singlestat",
          "valueFontSize": "80%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current",
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "rangeMaps": [{
            "from": "null",
            "to": "null",
            "text": "N/A"
          }],
          "mappingType": 1,
          "gauge": {
            "show": false,
            "minValue": 0,
            "maxValue": 100,
            "thresholdMarkers": true,
            "thresholdLabels": false
          }
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": false,
          "colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"],
          "datasource": "${DS_PROMETHEUS}",
          "editable": true,
          "error": false,
          "format": "none",
          "id": 6,
          "interval": null,
          "links": [],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "span": 3,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": true
          },
          "targets": [{
            "expr": "prometheus_local_storage_memory_series",
            "intervalFactor": 2,
            "refId": "A",
            "step": 4
          }],
          "thresholds": "1,5",
          "title": "Local Storage Memory Series",
          "type": "singlestat",
          "valueFontSize": "70%",
          "valueMaps": [],
          "valueName": "current",
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "rangeMaps": [{
            "from": "null",
            "to": "null",
            "text": "N/A"
          }],
          "mappingType": 1,
          "gauge": {
            "show": false,
            "minValue": 0,
            "maxValue": 100,
            "thresholdMarkers": true,
            "thresholdLabels": false
          }
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": true,
          "colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"],
          "datasource": "${DS_PROMETHEUS}",
          "editable": true,
          "error": false,
          "format": "none",
          "id": 7,
          "interval": null,
          "links": [],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "span": 3,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": true
          },
          "targets": [{
            "expr": "prometheus_local_storage_indexing_queue_length",
            "intervalFactor": 2,
            "refId": "A",
            "step": 4
          }],
          "thresholds": "500,4000",
          "title": "Internal Storage Queue Length",
          "type": "singlestat",
          "valueFontSize": "70%",
          "valueMaps": [{
            "op": "=",
            "text": "Empty",
            "value": "0"
          }],
          "valueName": "current",
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "rangeMaps": [{
            "from": "null",
            "to": "null",
            "text": "N/A"
          }],
          "mappingType": 1,
          "gauge": {
            "show": false,
            "minValue": 0,
            "maxValue": 100,
            "thresholdMarkers": true,
            "thresholdLabels": false
          }
        }, {
          "content": "<img src=\"http://prometheus.io/assets/prometheus_logo_grey.svg\" alt=\"Prometheus logo\" style=\"height: 40px;\">\n<span style=\"font-family: 'Open Sans', 'Helvetica Neue', Helvetica; font-size: 25px;vertical-align: text-top;color: #bbbfc2;margin-left: 10px;\">Prometheus</span>\n\n<p style=\"margin-top: 10px;\">You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the <a href=\"http://www.grafana.org/\">Grafana</a> and <a href=\"http://prometheus.io/\">Prometheus</a> projects.</p>",
          "editable": true,
          "error": false,
          "id": 9,
          "links": [],
          "mode": "html",
          "span": 3,
          "style": {},
          "title": "",
          "transparent": true,
          "type": "text"
        }],
        "title": "New row"
      }, {
        "collapse": false,
        "editable": true,
        "height": 227,
        "panels": [{
          "aliasColors": {
            "prometheus": "#C15C17",
            "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17"
          },
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "editable": true,
          "error": false,
          "fill": 1,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)"
          },
          "id": 3,
          "legend": {
            "avg": false,
            "current": false,
            "max": false,
            "min": false,
            "show": true,
            "total": false,
            "values": false
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "connected",
          "percentage": false,
          "pointradius": 2,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 9,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "{{job}}",
            "metric": "",
            "refId": "A",
            "step": 2
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Samples ingested (rate-5m)",
          "tooltip": {
            "shared": true,
            "value_type": "cumulative",
            "ordering": "alphabetical",
            "msResolution": false
          },
          "type": "graph",
          "yaxes": [{
            "show": true,
            "min": null,
            "max": null,
            "logBase": 1,
            "format": "short"
          }, {
            "show": true,
            "min": null,
            "max": null,
            "logBase": 1,
            "format": "short"
          }],
          "xaxis": {
            "show": true
          }
        }, {
          "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ",
          "editable": true,
          "error": false,
          "id": 8,
          "links": [],
          "mode": "markdown",
          "span": 2.995914043583536,
          "style": {},
          "title": "",
          "transparent": true,
          "type": "text"
        }],
        "title": "New row"
      }, {
        "collapse": false,
        "editable": true,
        "height": "250px",
        "panels": [{
          "aliasColors": {
            "prometheus": "#F9BA8F",
            "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F"
          },
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "editable": true,
          "error": false,
          "fill": 1,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)"
          },
          "id": 2,
          "legend": {
            "avg": false,
            "current": false,
            "max": false,
            "min": false,
            "show": true,
            "total": false,
            "values": false
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "connected",
          "percentage": false,
          "pointradius": 5,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 5,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "rate(prometheus_target_interval_length_seconds_count[5m])",
            "intervalFactor": 2,
            "legendFormat": "{{job}}",
            "refId": "A",
            "step": 2
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Target Scrapes (last 5m)",
          "tooltip": {
            "shared": true,
            "value_type": "cumulative",
            "ordering": "alphabetical",
            "msResolution": false
          },
          "type": "graph",
          "yaxes": [{
            "show": true,
            "min": null,
            "max": null,
            "logBase": 1,
            "format": "short"
          }, {
            "show": true,
            "min": null,
            "max": null,
            "logBase": 1,
            "format": "short"
          }],
          "xaxis": {
            "show": true
          }
        }, {
          "aliasColors": {},
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "editable": true,
          "error": false,
          "fill": 1,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)"
          },
          "id": 14,
          "legend": {
            "avg": false,
            "current": false,
            "max": false,
            "min": false,
            "show": true,
            "total": false,
            "values": false
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "connected",
          "percentage": false,
          "pointradius": 5,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 4,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "{{quantile}} ({{interval}})",
            "metric": "",
            "refId": "A",
            "step": 2
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Scrape Duration",
          "tooltip": {
            "shared": true,
            "value_type": "cumulative",
            "ordering": "alphabetical",
            "msResolution": false
          },
          "type": "graph",
          "yaxes": [{
            "show": true,
            "min": null,
            "max": null,
            "logBase": 1,
            "format": "short"
          }, {
            "show": true,
            "min": null,
            "max": null,
            "logBase": 1,
            "format": "short"
          }],
          "xaxis": {
            "show": true
          }
        }, {
          "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ",
          "editable": true,
          "error": false,
          "id": 11,
          "links": [],
          "mode": "markdown",
          "span": 3,
          "style": {},
          "title": "",
          "transparent": true,
          "type": "text"
        }],
        "title": "New row"
      }, {
        "collapse": false,
        "editable": true,
        "height": "250px",
        "panels": [{
          "aliasColors": {},
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "decimals": null,
          "editable": true,
          "error": false,
          "fill": 1,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)"
          },
          "id": 12,
          "legend": {
            "alignAsTable": false,
            "avg": false,
            "current": false,
            "hideEmpty": true,
            "max": false,
            "min": false,
            "show": true,
            "total": false,
            "values": false
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "connected",
          "percentage": false,
          "pointradius": 5,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 9,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "prometheus_evaluator_duration_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "{{quantile}}",
            "refId": "A",
            "step": 2
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Rule Eval Duration",
          "tooltip": {
            "shared": true,
            "value_type": "cumulative",
            "ordering": "alphabetical",
            "msResolution": false
          },
          "type": "graph",
          "yaxes": [{
            "show": true,
            "min": null,
            "max": null,
            "logBase": 1,
            "format": "percentunit",
            "label": ""
          }, {
            "show": true,
            "min": null,
            "max": null,
            "logBase": 1,
            "format": "short"
          }],
          "xaxis": {
            "show": true
          }
        }, {
          "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.",
          "editable": true,
          "error": false,
          "id": 15,
          "links": [],
          "mode": "markdown",
          "span": 3,
          "style": {},
          "title": "",
          "transparent": true,
          "type": "text"
        }],
        "title": "New row"
      }],
      "time": {
        "from": "now-5m",
        "to": "now"
      },
      "timepicker": {
        "now": true,
        "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"],
        "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"]
      },
      "templating": {
        "list": []
      },
      "annotations": {
        "list": []
      },
      "refresh": false,
      "schemaVersion": 12,
      "version": 0,
      "links": [{
        "icon": "info",
        "tags": [],
        "targetBlank": true,
        "title": "Grafana Docs",
        "tooltip": "",
        "type": "link",
        "url": "http://www.grafana.org/docs"
      }, {
        "icon": "info",
        "tags": [],
        "targetBlank": true,
        "title": "Prometheus Docs",
        "type": "link",
        "url": "http://prometheus.io/docs/introduction/overview/"
      }],
      "gnetId": 2,
      "description": "The  official, pre-built Prometheus Stats Dashboard."
    }
  grafana-net-737-dashboard.json: |
    {
      "__inputs": [{
        "name": "DS_PROMETHEUS",
        "label": "prometheus",
        "description": "",
        "type": "datasource",
        "pluginId": "prometheus",
        "pluginName": "Prometheus"
      }],
      "__requires": [{
        "type": "panel",
        "id": "singlestat",
        "name": "Singlestat",
        "version": ""
      }, {
        "type": "panel",
        "id": "graph",
        "name": "Graph",
        "version": ""
      }, {
        "type": "grafana",
        "id": "grafana",
        "name": "Grafana",
        "version": "3.1.0"
      }, {
        "type": "datasource",
        "id": "prometheus",
        "name": "Prometheus",
        "version": "1.0.0"
      }],
      "id": null,
      "title": "Kubernetes Pod Resources",
      "description": "Shows resource usage of Kubernetes pods.",
      "tags": [
        "kubernetes"
      ],
      "style": "dark",
      "timezone": "browser",
      "editable": true,
      "hideControls": false,
      "sharedCrosshair": false,
      "rows": [{
        "collapse": false,
        "editable": true,
        "height": "250px",
        "panels": [{
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": true,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "editable": true,
          "error": false,
          "format": "percent",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": true,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "180px",
          "id": 4,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 4,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum (container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum (machine_memory_bytes{instance=~\"^$instance$\"}) * 100",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "",
            "refId": "A",
            "step": 2
          }],
          "thresholds": "65, 90",
          "timeFrom": "1m",
          "timeShift": null,
          "title": "Memory Working Set",
          "transparent": false,
          "type": "singlestat",
          "valueFontSize": "80%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": true,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "format": "percent",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": true,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "180px",
          "id": 6,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 4,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum(rate(container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m])) / sum (machine_cpu_cores{instance=~\"^$instance$\"}) * 100",
            "interval": "10s",
            "intervalFactor": 1,
            "refId": "A",
            "step": 10
          }],
          "thresholds": "65, 90",
          "timeFrom": "1m",
          "timeShift": null,
          "title": "Cpu Usage",
          "type": "singlestat",
          "valueFontSize": "80%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": true,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "format": "percent",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": true,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "180px",
          "id": 7,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 4,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum(container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"}) * 100",
            "interval": "10s",
            "intervalFactor": 1,
            "legendFormat": "",
            "metric": "",
            "refId": "A",
            "step": 10
          }],
          "thresholds": "65, 90",
          "timeFrom": "1m",
          "timeShift": null,
          "title": "Filesystem Usage",
          "type": "singlestat",
          "valueFontSize": "80%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": false,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "format": "bytes",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": false,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "1px",
          "hideTimeOverride": true,
          "id": 9,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "20%",
          "prefix": "",
          "prefixFontSize": "20%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 2,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum(container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"})",
            "interval": "10s",
            "intervalFactor": 1,
            "refId": "A",
            "step": 10
          }],
          "thresholds": "",
          "timeFrom": "1m",
          "title": "Used",
          "type": "singlestat",
          "valueFontSize": "50%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": false,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "format": "bytes",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": false,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "1px",
          "hideTimeOverride": true,
          "id": 10,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 2,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum (machine_memory_bytes{instance=~\"^$instance$\"})",
            "interval": "10s",
            "intervalFactor": 1,
            "refId": "A",
            "step": 10
          }],
          "thresholds": "",
          "timeFrom": "1m",
          "title": "Total",
          "type": "singlestat",
          "valueFontSize": "50%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": false,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "format": "none",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": false,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "1px",
          "hideTimeOverride": true,
          "id": 11,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": " cores",
          "postfixFontSize": "30%",
          "prefix": "",
          "prefixFontSize": "50%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 2,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m]))",
            "interval": "10s",
            "intervalFactor": 1,
            "refId": "A",
            "step": 10
          }],
          "thresholds": "",
          "timeFrom": "1m",
          "timeShift": null,
          "title": "Used",
          "type": "singlestat",
          "valueFontSize": "50%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": false,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "format": "none",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": false,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "1px",
          "hideTimeOverride": true,
          "id": 12,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": " cores",
          "postfixFontSize": "30%",
          "prefix": "",
          "prefixFontSize": "50%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 2,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum (machine_cpu_cores{instance=~\"^$instance$\"})",
            "interval": "10s",
            "intervalFactor": 1,
            "refId": "A",
            "step": 10
          }],
          "thresholds": "",
          "timeFrom": "1m",
          "title": "Total",
          "type": "singlestat",
          "valueFontSize": "50%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": false,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "format": "bytes",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": false,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "1px",
          "hideTimeOverride": true,
          "id": 13,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 2,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"})",
            "interval": "10s",
            "intervalFactor": 1,
            "refId": "A",
            "step": 10
          }],
          "thresholds": "",
          "timeFrom": "1m",
          "title": "Used",
          "type": "singlestat",
          "valueFontSize": "50%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "cacheTimeout": null,
          "colorBackground": false,
          "colorValue": false,
          "colors": [
            "rgba(50, 172, 45, 0.97)",
            "rgba(237, 129, 40, 0.89)",
            "rgba(245, 54, 54, 0.9)"
          ],
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "format": "bytes",
          "gauge": {
            "maxValue": 100,
            "minValue": 0,
            "show": false,
            "thresholdLabels": false,
            "thresholdMarkers": true
          },
          "height": "1px",
          "hideTimeOverride": true,
          "id": 14,
          "interval": null,
          "isNew": true,
          "links": [],
          "mappingType": 1,
          "mappingTypes": [{
            "name": "value to text",
            "value": 1
          }, {
            "name": "range to text",
            "value": 2
          }],
          "maxDataPoints": 100,
          "nullPointMode": "connected",
          "nullText": null,
          "postfix": "",
          "postfixFontSize": "50%",
          "prefix": "",
          "prefixFontSize": "50%",
          "rangeMaps": [{
            "from": "null",
            "text": "N/A",
            "to": "null"
          }],
          "span": 2,
          "sparkline": {
            "fillColor": "rgba(31, 118, 189, 0.18)",
            "full": false,
            "lineColor": "rgb(31, 120, 193)",
            "show": false
          },
          "targets": [{
            "expr": "sum (container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"})",
            "interval": "10s",
            "intervalFactor": 1,
            "refId": "A",
            "step": 10
          }],
          "thresholds": "",
          "timeFrom": "1m",
          "title": "Total",
          "type": "singlestat",
          "valueFontSize": "50%",
          "valueMaps": [{
            "op": "=",
            "text": "N/A",
            "value": "null"
          }],
          "valueName": "current"
        }, {
          "aliasColors": {},
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "fill": 1,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)",
            "thresholdLine": false
          },
          "height": "200px",
          "id": 32,
          "isNew": true,
          "legend": {
            "alignAsTable": true,
            "avg": true,
            "current": true,
            "max": false,
            "min": false,
            "rightSide": true,
            "show": true,
            "sideWidth": 200,
            "sort": "current",
            "sortDesc": true,
            "total": false,
            "values": true
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "connected",
          "percentage": false,
          "pointradius": 5,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 12,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "sum(rate(container_network_receive_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "receive",
            "metric": "network",
            "refId": "A",
            "step": 240
          }, {
            "expr": "- sum(rate(container_network_transmit_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "transmit",
            "metric": "network",
            "refId": "B",
            "step": 240
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Network",
          "tooltip": {
            "msResolution": false,
            "shared": true,
            "sort": 0,
            "value_type": "cumulative"
          },
          "transparent": false,
          "type": "graph",
          "xaxis": {
            "show": true
          },
          "yaxes": [{
            "format": "Bps",
            "label": "transmit / receive",
            "logBase": 1,
            "max": null,
            "min": null,
            "show": true
          }, {
            "format": "Bps",
            "label": null,
            "logBase": 1,
            "max": null,
            "min": null,
            "show": false
          }]
        }],
        "showTitle": true,
        "title": "all pods"
      }, {
        "collapse": false,
        "editable": true,
        "height": "250px",
        "panels": [{
          "aliasColors": {},
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 3,
          "editable": true,
          "error": false,
          "fill": 0,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)"
          },
          "height": "",
          "id": 17,
          "isNew": true,
          "legend": {
            "alignAsTable": true,
            "avg": true,
            "current": true,
            "hideEmpty": true,
            "hideZero": true,
            "max": false,
            "min": false,
            "rightSide": true,
            "show": true,
            "sideWidth": null,
            "sort": "current",
            "sortDesc": true,
            "total": false,
            "values": true
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "connected",
          "percentage": false,
          "pointradius": 5,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 12,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "{{ pod_name }}",
            "metric": "container_cpu",
            "refId": "A",
            "step": 240
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Cpu Usage",
          "tooltip": {
            "msResolution": true,
            "shared": false,
            "sort": 2,
            "value_type": "cumulative"
          },
          "transparent": false,
          "type": "graph",
          "xaxis": {
            "show": true
          },
          "yaxes": [{
            "format": "none",
            "label": "cores",
            "logBase": 1,
            "max": null,
            "min": null,
            "show": true
          }, {
            "format": "short",
            "label": null,
            "logBase": 1,
            "max": null,
            "min": null,
            "show": false
          }]
        }, {
          "aliasColors": {},
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "fill": 0,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)"
          },
          "id": 33,
          "isNew": true,
          "legend": {
            "alignAsTable": true,
            "avg": true,
            "current": true,
            "hideEmpty": true,
            "hideZero": true,
            "max": false,
            "min": false,
            "rightSide": true,
            "show": true,
            "sideWidth": null,
            "sort": "current",
            "sortDesc": true,
            "total": false,
            "values": true
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "null",
          "percentage": false,
          "pointradius": 5,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 12,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "{{ pod_name }}",
            "metric": "",
            "refId": "A",
            "step": 240
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Memory Working Set",
          "tooltip": {
            "msResolution": false,
            "shared": false,
            "sort": 2,
            "value_type": "cumulative"
          },
          "type": "graph",
          "xaxis": {
            "show": true
          },
          "yaxes": [{
            "format": "bytes",
            "label": "used",
            "logBase": 1,
            "max": null,
            "min": null,
            "show": true
          }, {
            "format": "short",
            "label": null,
            "logBase": 1,
            "max": null,
            "min": null,
            "show": false
          }]
        }, {
          "aliasColors": {},
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "fill": 1,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)"
          },
          "id": 16,
          "isNew": true,
          "legend": {
            "alignAsTable": true,
            "avg": true,
            "current": true,
            "hideEmpty": true,
            "hideZero": true,
            "max": false,
            "min": false,
            "rightSide": true,
            "show": true,
            "sideWidth": 200,
            "sort": "avg",
            "sortDesc": true,
            "total": false,
            "values": true
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "null",
          "percentage": false,
          "pointradius": 5,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 12,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "{{ pod_name }} < in",
            "metric": "network",
            "refId": "A",
            "step": 240
          }, {
            "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "{{ pod_name }} > out",
            "metric": "network",
            "refId": "B",
            "step": 240
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Network",
          "tooltip": {
            "msResolution": false,
            "shared": false,
            "sort": 2,
            "value_type": "cumulative"
          },
          "type": "graph",
          "xaxis": {
            "show": true
          },
          "yaxes": [{
            "format": "Bps",
            "label": "transmit / receive",
            "logBase": 1,
            "max": null,
            "min": null,
            "show": true
          }, {
            "format": "short",
            "label": null,
            "logBase": 1,
            "max": null,
            "min": null,
            "show": false
          }]
        }, {
          "aliasColors": {},
          "bars": false,
          "datasource": "${DS_PROMETHEUS}",
          "decimals": 2,
          "editable": true,
          "error": false,
          "fill": 1,
          "grid": {
            "threshold1": null,
            "threshold1Color": "rgba(216, 200, 27, 0.27)",
            "threshold2": null,
            "threshold2Color": "rgba(234, 112, 112, 0.22)"
          },
          "id": 34,
          "isNew": true,
          "legend": {
            "alignAsTable": true,
            "avg": true,
            "current": true,
            "hideEmpty": true,
            "hideZero": true,
            "max": false,
            "min": false,
            "rightSide": true,
            "show": true,
            "sideWidth": 200,
            "sort": "current",
            "sortDesc": true,
            "total": false,
            "values": true
          },
          "lines": true,
          "linewidth": 2,
          "links": [],
          "nullPointMode": "null",
          "percentage": false,
          "pointradius": 5,
          "points": false,
          "renderer": "flot",
          "seriesOverrides": [],
          "span": 12,
          "stack": false,
          "steppedLine": false,
          "targets": [{
            "expr": "sum(container_fs_usage_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)",
            "interval": "",
            "intervalFactor": 2,
            "legendFormat": "{{ pod_name }}",
            "metric": "network",
            "refId": "A",
            "step": 240
          }],
          "timeFrom": null,
          "timeShift": null,
          "title": "Filesystem",
          "tooltip": {
            "msResolution": false,
            "shared": false,
            "sort": 2,
            "value_type": "cumulative"
          },
          "type": "graph",
          "xaxis": {
            "show": true
          },
          "yaxes": [{
            "format": "bytes",
            "label": "used",
            "logBase": 1,
            "max": null,
            "min": null,
            "show": true
          }, {
            "format": "short",
            "label": null,
            "logBase": 1,
            "max": null,
            "min": null,
            "show": false
          }]
        }],
        "showTitle": true,
        "title": "each pod"
      }],
      "time": {
        "from": "now-3d",
        "to": "now"
      },
      "timepicker": {
        "refresh_intervals": [
          "5s",
          "10s",
          "30s",
          "1m",
          "5m",
          "15m",
          "30m",
          "1h",
          "2h",
          "1d"
        ],
        "time_options": [
          "5m",
          "15m",
          "1h",
          "6h",
          "12h",
          "24h",
          "2d",
          "7d",
          "30d"
        ]
      },
      "templating": {
        "list": [{
          "allValue": ".*",
          "current": {},
          "datasource": "${DS_PROMETHEUS}",
          "hide": 0,
          "includeAll": true,
          "label": "Instance",
          "multi": false,
          "name": "instance",
          "options": [],
          "query": "label_values(instance)",
          "refresh": 1,
          "regex": "",
          "type": "query"
        }, {
          "current": {},
          "datasource": "${DS_PROMETHEUS}",
          "hide": 0,
          "includeAll": true,
          "label": "Namespace",
          "multi": true,
          "name": "namespace",
          "options": [],
          "query": "label_values(namespace)",
          "refresh": 1,
          "regex": "",
          "type": "query"
        }]
      },
      "annotations": {
        "list": []
      },
      "refresh": false,
      "schemaVersion": 12,
      "version": 8,
      "links": [],
      "gnetId": 737
    }
  prometheus-datasource.json: |
    {
      "name": "prometheus",
      "type": "prometheus",
      "url": "http://prometheus:9090",
      "access": "proxy",
      "basicAuth": false
    }
kind: ConfigMap
metadata:
  creationTimestamp: null
  name: grafana-import-dashboards
  namespace: monitoring

---
apiVersion: v1
data:
  prometheus.yaml: |
    global:
      scrape_interval: 10s
      scrape_timeout: 10s
      evaluation_interval: 10s
    rule_files:
      - "/etc/prometheus-rules/*.rules"
    scrape_configs:

      # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L37
      - job_name: 'kubernetes-nodes'
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        kubernetes_sd_configs:
          - role: node
        relabel_configs:
          - source_labels: [__address__]
            regex: '(.*):10250'
            replacement: '${1}:10255'
            target_label: __address__

      # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L79
      - job_name: 'kubernetes-endpoints'
        kubernetes_sd_configs:
          - role: endpoints
        relabel_configs:
          - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
            action: keep
            regex: true
          - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
            action: replace
            target_label: __scheme__
            regex: (https?)
          - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
            action: replace
            target_label: __metrics_path__
            regex: (.+)
          - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
            action: replace
            target_label: __address__
            regex: (.+)(?::\d+);(\d+)
            replacement: $1:$2
          - action: labelmap
            regex: __meta_kubernetes_service_label_(.+)
          - source_labels: [__meta_kubernetes_namespace]
            action: replace
            target_label: kubernetes_namespace
          - source_labels: [__meta_kubernetes_service_name]
            action: replace
            target_label: kubernetes_name

      # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L119
      - job_name: 'kubernetes-services'
        metrics_path: /probe
        params:
          module: [http_2xx]
        kubernetes_sd_configs:
          - role: service
        relabel_configs:
          - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
            action: keep
            regex: true
          - source_labels: [__address__]
            target_label: __param_target
          - target_label: __address__
            replacement: blackbox
          - source_labels: [__param_target]
            target_label: instance
          - action: labelmap
            regex: __meta_kubernetes_service_label_(.+)
          - source_labels: [__meta_kubernetes_namespace]
            target_label: kubernetes_namespace
          - source_labels: [__meta_kubernetes_service_name]
            target_label: kubernetes_name

      # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L156
      - job_name: 'kubernetes-pods'
        kubernetes_sd_configs:
          - role: pod
        relabel_configs:
          - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
            action: keep
            regex: true
          - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
            action: replace
            target_label: __metrics_path__
            regex: (.+)
          - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
            action: replace
            regex: (.+):(?:\d+);(\d+)
            replacement: ${1}:${2}
            target_label: __address__
          - action: labelmap
            regex: __meta_kubernetes_pod_label_(.+)
          - source_labels: [__meta_kubernetes_namespace]
            action: replace
            target_label: kubernetes_namespace
          - source_labels: [__meta_kubernetes_pod_name]
            action: replace
            target_label: kubernetes_pod_name
          - source_labels: [__meta_kubernetes_pod_container_port_number]
            action: keep
            regex: 9\d{3}

      - job_name: 'kubernetes-cadvisor'
        scheme: https
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        kubernetes_sd_configs:
          - role: node
        relabel_configs:
          - action: labelmap
          - action: labelmap
            regex: __meta_kubernetes_node_label_(.+)
          - target_label: __address__
            replacement: kubernetes.default.svc:443
          - source_labels: [__meta_kubernetes_node_name]
            regex: (.+)
            target_label: __metrics_path__
            replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor

kind: ConfigMap
metadata:
  creationTimestamp: null
  name: prometheus-core
  namespace: monitoring

---
apiVersion: v1
data:
  cpu-usage.rules: |
    ALERT NodeCPUUsage
      IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75
      FOR 2m
      LABELS {
        severity="page"
      }
      ANNOTATIONS {
        SUMMARY = "{{$labels.instance}}: High CPU usage detected",
        DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})"
      }
  instance-availability.rules: |
    ALERT InstanceDown
      IF up == 0
      FOR 1m
      LABELS { severity = "page" }
      ANNOTATIONS {
        summary = "Instance {{ $labels.instance }} down",
        description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
      }
  low-disk-space.rules: |
    ALERT NodeLowRootDisk
      IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75
      FOR 2m
      LABELS {
        severity="page"
      }
      ANNOTATIONS {
        SUMMARY = "{{$labels.instance}}: Low root disk space",
        DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})"
      }

    ALERT NodeLowDataDisk
      IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75
      FOR 2m
      LABELS {
        severity="page"
      }
      ANNOTATIONS {
        SUMMARY = "{{$labels.instance}}: Low data disk space",
        DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})"
      }
  mem-usage.rules: |
    ALERT NodeSwapUsage
      IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75
      FOR 2m
      LABELS {
        severity="page"
      }
      ANNOTATIONS {
        SUMMARY = "{{$labels.instance}}: Swap usage detected",
        DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})"
      }

    ALERT NodeMemoryUsage
      IF (((node_memory_MemTotal-node_memory_MemFree-node_memory_Cached)/(node_memory_MemTotal)*100)) > 75
      FOR 2m
      LABELS {
        severity="page"
      }
      ANNOTATIONS {
        SUMMARY = "{{$labels.instance}}: High memory usage detected",
        DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})"
      }
kind: ConfigMap
metadata:
  creationTimestamp: null
  name: prometheus-rules
  namespace: monitoring

manifests-rbac.yaml

---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
  - kind: ServiceAccount
    name: prometheus-k8s
    namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
  name: prometheus
rules:
  - apiGroups: [""]
    resources:
      - nodes
      - nodes/proxy
      - services
      - endpoints
      - pods
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources:
      - configmaps
    verbs: ["get"]
  - nonResourceURLs: ["/metrics"]
    verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus-k8s
  namespace: monitoring
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: kube-state-metrics
  namespace: monitoring

manifests-svc.yaml

---
apiVersion: v1
kind: Service
metadata:
  annotations:
    prometheus.io/scrape: 'true'
    prometheus.io/path: '/metrics'
  labels:
    name: alertmanager
  name: alertmanager
  namespace: monitoring
spec:
  selector:
    app: alertmanager
  type: NodePort
  ports:
    - name: alertmanager
      protocol: TCP
      port: 9093
      targetPort: 9093
---
apiVersion: v1
kind: Service
metadata:
  name: grafana
  namespace: monitoring
  labels:
    app: grafana
    component: core
spec:
  type: NodePort
  ports:
    - port: 3000
  selector:
    app: grafana
    component: core
---
apiVersion: v1
kind: Service
metadata:
  annotations:
    prometheus.io/scrape: 'true'
  name: kube-state-metrics
  namespace: monitoring
  labels:
    app: kube-state-metrics
spec:
  ports:
    - name: kube-state-metrics
      port: 8080
      protocol: TCP
  selector:
    app: kube-state-metrics

---
apiVersion: v1
kind: Service
metadata:
  annotations:
    prometheus.io/scrape: 'true'
  name: prometheus-node-exporter
  namespace: monitoring
  labels:
    app: prometheus
    component: node-exporter
spec:
  clusterIP: None
  ports:
    - name: prometheus-node-exporter
      port: 9100
      protocol: TCP
  selector:
    app: prometheus
    component: node-exporter
  type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
  name: prometheus
  namespace: monitoring
  labels:
    app: prometheus
    component: core
  annotations:
    prometheus.io/scrape: 'true'
spec:
  type: NodePort
  ports:
    - port: 9090
      protocol: TCP
      name: webui
  selector:
    app: prometheus
    component: core

manifests-deploy.yaml

---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: alertmanager
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app: alertmanager
  template:
    metadata:
      name: alertmanager
      labels:
        app: alertmanager
    spec:
      containers:
        - name: alertmanager
          image: quay.io/prometheus/alertmanager:v0.7.1
          args:
            - '-config.file=/etc/alertmanager/config.yml'
            - '-storage.path=/alertmanager'
          ports:
            - name: alertmanager
              containerPort: 9093
          volumeMounts:
            - name: config-volume
              mountPath: /etc/alertmanager
            - name: templates-volume
              mountPath: /etc/alertmanager-templates
            - name: alertmanager
              mountPath: /alertmanager
      volumes:
        - name: config-volume
          configMap:
            name: alertmanager
        - name: templates-volume
          configMap:
            name: alertmanager-templates
        - name: alertmanager
          emptyDir: {}

---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: grafana-core
  namespace: monitoring
  labels:
    app: grafana
    component: core
spec:
  replicas: 1
  template:
    metadata:
      labels:
        app: grafana
        component: core
    spec:
      nodeSelector:
        type: outer
      containers:
        - image: grafana/grafana:4.2.0
          name: grafana-core
          imagePullPolicy: IfNotPresent
          # env:
          resources:
            # keep request = limit to keep this container in guaranteed class
            limits:
              cpu: 100m
              memory: 100Mi
            requests:
              cpu: 100m
              memory: 100Mi
          ports:
            - containerPort: 3000
              hostPort: 3000
          env:
            # The following env variables set up basic auth twith the default admin user and admin password.
            - name: GF_AUTH_BASIC_ENABLED
              value: "true"
            - name: GF_SECURITY_ADMIN_USER
              valueFrom:
                secretKeyRef:
                  name: grafana
                  key: admin-username
            - name: GF_SECURITY_ADMIN_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: grafana
                  key: admin-password
            - name: GF_AUTH_ANONYMOUS_ENABLED
              value: "false"
            # - name: GF_AUTH_ANONYMOUS_ORG_ROLE
            #   value: Admin
            # does not really work, because of template variables in exported dashboards:
            # - name: GF_DASHBOARDS_JSON_ENABLED
            #   value: "true"
          readinessProbe:
            httpGet:
              path: /login
              port: 3000
            # initialDelaySeconds: 30
            # timeoutSeconds: 1
          volumeMounts:
            - name: grafana-persistent-storage
              mountPath: /var/lib/grafana
      volumes:
        - name: grafana-persistent-storage
          emptyDir: {}

---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: prometheus-core
  namespace: monitoring
  labels:
    app: prometheus
    component: core
spec:
  replicas: 1
  template:
    metadata:
      name: prometheus-main
      labels:
        app: prometheus
        component: core
    spec:
      serviceAccountName: prometheus-k8s
      containers:
        - name: prometheus
          image: prom/prometheus:v1.7.0
          args:
            - '-storage.local.retention=12h'
            - '-storage.local.memory-chunks=500000'
            - '-config.file=/etc/prometheus/prometheus.yaml'
            - '-alertmanager.url=http://alertmanager:9093/'
          ports:
            - name: webui
              containerPort: 9090
          resources:
            requests:
              cpu: 500m
              memory: 500M
            limits:
              cpu: 500m
              memory: 500M
          volumeMounts:
            - name: config-volume
              mountPath: /etc/prometheus
            - name: rules-volume
              mountPath: /etc/prometheus-rules
      volumes:
        - name: config-volume
          configMap:
            name: prometheus-core
        - name: rules-volume
          configMap:
            name: prometheus-rules
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: kube-state-metrics
  namespace: monitoring
spec:
  replicas: 1
  template:
    metadata:
      labels:
        app: kube-state-metrics
    spec:
      serviceAccountName: kube-state-metrics
      containers:
        - name: kube-state-metrics
          image: registry.cn-hangzhou.aliyuncs.com/google_containers/kube-state-metrics:v0.5.0
          ports:
            - containerPort: 8080

manifests-ds.yaml

---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
  name: node-directory-size-metrics
  namespace: monitoring
  annotations:
    description: |
      This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes.
      The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
      The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus.
      These are scheduled on every node in the Kubernetes cluster.
      To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`.
spec:
  template:
    metadata:
      labels:
        app: node-directory-size-metrics
      annotations:
        prometheus.io/scrape: 'true'
        prometheus.io/port: '9102'
        description: |
          This `Pod` provides metrics in Prometheus format about disk usage on the node.
          The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
          The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus.
          This `Pod` is scheduled on every node in the Kubernetes cluster.
          To choose directories from the node to check just mount them on `read-du` below `/mnt`.
    spec:
      containers:
        - name: read-du
          image: giantswarm/tiny-tools
          imagePullPolicy: Always
          # FIXME threshold via env var
          # The
          command:
            - fish
            - --command
            - |
              touch /tmp/metrics-temp
              while true
                for directory in (du --bytes --separate-dirs --threshold=100M /mnt)
                  echo $directory | read size path
                  echo "node_directory_size_bytes{path=\"$path\"} $size" \
                    >> /tmp/metrics-temp
                end
                mv /tmp/metrics-temp /tmp/metrics
                sleep 300
              end
          volumeMounts:
            - name: host-fs-var
              mountPath: /mnt/var
              readOnly: true
            - name: metrics
              mountPath: /tmp
        - name: caddy
          image: dockermuenster/caddy:0.9.3
          command:
            - "caddy"
            - "-port=9102"
            - "-root=/var/www"
          ports:
            - containerPort: 9102
          volumeMounts:
            - name: metrics
              mountPath: /var/www
      volumes:
        - name: host-fs-var
          hostPath:
            path: /var
        - name: metrics
          emptyDir:
            medium: Memory
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
  name: prometheus-node-exporter
  namespace: monitoring
  labels:
    app: prometheus
    component: node-exporter
spec:
  template:
    metadata:
      name: prometheus-node-exporter
      labels:
        app: prometheus
        component: node-exporter
    spec:
      containers:
        - image: prom/node-exporter:v0.14.0
          name: prometheus-node-exporter
          ports:
            - name: prom-node-exp
              #^ must be an IANA_SVC_NAME (at most 15 characters, ..)
              containerPort: 9100
              hostPort: 9100
      hostNetwork: true
      hostPID: true

manifests-job.yaml

---
apiVersion: batch/v1
kind: Job
metadata:
  name: grafana-import-dashboards
  namespace: monitoring
  labels:
    app: grafana
    component: import-dashboards
spec:
  template:
    metadata:
      name: grafana-import-dashboards
      labels:
        app: grafana
        component: import-dashboards
    spec:
      serviceAccountName: prometheus-k8s
      initContainers:
        - name: wait-for-grafana
          image: giantswarm/tiny-tools
          args:
            - /bin/sh
            - -c
            - >
              set -x;
              while [ $(curl -Lsw '%{http_code}' "http://grafana:3000" -o /dev/null) -ne 200 ]; do
                echo '.'
                sleep 15;
              done
      containers:
        - name: grafana-import-dashboards
          image: giantswarm/tiny-tools
          command: ["/bin/sh", "-c"]
          workingDir: /opt/grafana-import-dashboards
          args:
            - >
              for file in *-datasource.json ; do
                if [ -e "$file" ] ; then
                  echo "importing $file" &&
                  curl --silent --fail --show-error \
                    --request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/datasources \
                    --header "Content-Type: application/json" \
                    --data-binary "@$file" ;
                  echo "" ;
                fi
              done ;
              for file in *-dashboard.json ; do
                if [ -e "$file" ] ; then
                  echo "importing $file" &&
                  ( echo '{"dashboard":'; \
                    cat "$file"; \
                    echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \
                  | jq -c '.' \
                  | curl --silent --fail --show-error \
                    --request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/dashboards/import \
                    --header "Content-Type: application/json" \
                    --data-binary "@-" ;
                  echo "" ;
                fi
              done

          env:
            - name: GF_ADMIN_USER
              valueFrom:
                secretKeyRef:
                  name: grafana
                  key: admin-username
            - name: GF_ADMIN_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: grafana
                  key: admin-password
          volumeMounts:
            - name: config-volume
              mountPath: /opt/grafana-import-dashboards
      restartPolicy: Never
      volumes:
        - name: config-volume
          configMap:
            name: grafana-import-dashboards

猜你喜欢

转载自blog.csdn.net/qq_35559756/article/details/89914546