Files
k3s-cluster/monitoring/grafana-dashboard-cluster-overview.yaml

332 lines
14 KiB
YAML
Raw Permalink Normal View History

apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-cluster-overview
namespace: monitoring
labels:
app: grafana
grafana_dashboard: "1"
data:
cluster-overview.json: |
{
"annotations": {"list": []},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {"h": 5, "w": 4, "x": 0, "y": 0},
"id": 1,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "auto"
},
"pluginVersion": "10.2.3",
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "time() - max(process_start_time_seconds{job=\"prometheus\"})", "refId": "A"}],
"title": "Prometheus Uptime",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "red", "value": null},
{"color": "green", "value": 1}
]
}
},
"overrides": []
},
"gridPos": {"h": 5, "w": 4, "x": 4, "y": 0},
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"orientation": "horizontal",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "value_and_name"
},
"pluginVersion": "10.2.3",
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "count(kubelet_running_pods)", "refId": "A"}],
"title": "Running Pods (total)",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null}
]
}
},
"overrides": []
},
"gridPos": {"h": 5, "w": 4, "x": 8, "y": 0},
"id": 3,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"orientation": "horizontal",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "value_and_name"
},
"pluginVersion": "10.2.3",
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kubelet_running_containers)", "refId": "A"}],
"title": "Running Containers",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"mappings": [
{"options": {"0": {"text": "Down", "color": "red"}, "1": {"text": "Up", "color": "green"}}, "type": "value"}
],
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "red", "value": null},
{"color": "green", "value": 1}
]
}
},
"overrides": []
},
"gridPos": {"h": 5, "w": 12, "x": 12, "y": 0},
"id": 4,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "center",
"orientation": "horizontal",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "value_and_name"
},
"pluginVersion": "10.2.3",
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"kubernetes-apiservers\"}", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"kubernetes-nodes\"}", "refId": "B"}],
"title": "Control Plane & Node Exporters",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {"legend": false, "tooltip": false, "viz": false},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {"type": "linear"},
"showPoints": "never",
"spanNulls": true,
"stacking": {"group": "A", "mode": "none"},
"thresholdsStyle": {"mode": "off"}
},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 5},
"id": 10,
"options": {
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "desc"}
},
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\"}) by (namespace)", "legendFormat": "{{namespace}}", "refId": "A"}],
"title": "Memory Usage by Namespace",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "core"
},
"overrides": []
},
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 5},
"id": 11,
"options": {
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "desc"}
},
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\"}[5m])) by (namespace)", "legendFormat": "{{namespace}}", "refId": "A"}],
"title": "CPU Usage by Namespace",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "Bps"
},
"overrides": []
},
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 14},
"id": 12,
"options": {
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "desc"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_receive_bytes_total[5m])) by (namespace)", "legendFormat": "RX {{namespace}}", "refId": "A"},
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_transmit_bytes_total[5m])) by (namespace)", "legendFormat": "TX {{namespace}}", "refId": "B"}
],
"title": "Network RX/TX by Namespace",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "palette-classic"},
"custom": {
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "decbytes"
},
"overrides": []
},
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 14},
"id": 13,
"options": {
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "desc"}
},
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_fs_usage_bytes) by (instance)", "legendFormat": "{{instance}}", "refId": "A"}],
"title": "Filesystem Usage by Node",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "short"
},
"overrides": []
},
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 23},
"id": 20,
"options": {
"showHeader": true,
"cellHeight": "sm",
"footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""}
},
"pluginVersion": "10.2.3",
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sort_desc(sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\"}) by (namespace,pod))", "format": "table", "instant": true, "refId": "A"}],
"title": "Pods by Memory (live)",
"type": "table",
"transformations": [
{"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Memory (bytes)"}}}
]
},
{
"datasource": {"type": "prometheus", "uid": "Prometheus"},
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "orange", "value": 1}, {"color": "red", "value": 5}]},
"unit": "short"
},
"overrides": []
},
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 32},
"id": 30,
"options": {
"showHeader": true,
"cellHeight": "sm",
"footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""}
},
"pluginVersion": "10.2.3",
"targets": [
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Running\"}) by (namespace)", "format": "table", "instant": true, "refId": "A"},
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Pending\"}) by (namespace)", "format": "table", "instant": true, "refId": "B"},
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Failed\"}) by (namespace)", "format": "table", "instant": true, "refId": "C"},
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(increase(kube_pod_container_status_restarts_total[1h])) by (namespace)", "format": "table", "instant": true, "refId": "D"}
],
"title": "Pod Health by Namespace (KSM)",
"type": "table",
"transformations": [
{"id": "merge", "options": {}},
{"id": "groupBy", "options": {"fields": {"Value": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #B": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #C": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #D": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "namespace": {"aggregations": [], "operation": "groupby"}}}},
{"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Running", "Value #B": "Pending", "Value #C": "Failed", "Value #D": "Restarts (1h)"}}}
]
}
],
"refresh": "30s",
"schemaVersion": 38,
"style": "dark",
"tags": ["k3s", "overview"],
"templating": {"list": []},
"time": {"from": "now-6h", "to": "now"},
"timepicker": {},
"timezone": "",
"title": "Cluster Overview",
"uid": "k3s-cluster-overview",
"version": 2,
"weekStart": ""
}