monitoring: add Grafana dashboards + kube-state-metrics & node-exporter
Dashboards (provisioned via ConfigMaps into Grafana pod, 'K3s Cluster' folder): - Cluster Overview: per-namespace CPU/mem/net/fs, pod counts, pod health (KSM) - Pods & Services: per-pod CPU/mem/net/fs, throttling, pod status, restarts, PVCs - Nodes: per-node CPU%/mem%, load average, disk usage, network (node-exporter) - Control Plane & API Server: request rate, latency p95, 5xx, kubelet/PLEG - Prometheus Self-Monitoring: ingestion, series, scrape duration, memory Exporters (auto-scraped via existing kubernetes-service-endpoints job): - kube-state-metrics: pod/deployment/PVC/replica state (kube_pod_status_phase, kube_pod_container_status_restarts_total, kube_persistentvolumeclaim_*) - node-exporter (DaemonSet, hostNetwork): node_cpu_seconds_total, node_memory_*, node_filesystem_*, node_load*, node_network_*
This commit is contained in:
331
monitoring/grafana-dashboard-cluster-overview.yaml
Normal file
331
monitoring/grafana-dashboard-cluster-overview.yaml
Normal file
@@ -0,0 +1,331 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-cluster-overview
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: grafana
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
cluster-overview.json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": null}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 4, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "time() - max(process_start_time_seconds{job=\"prometheus\"})", "refId": "A"}],
|
||||
"title": "Prometheus Uptime",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": null},
|
||||
{"color": "green", "value": 1}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 4, "x": 4, "y": 0},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "count(kubelet_running_pods)", "refId": "A"}],
|
||||
"title": "Running Pods (total)",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": null}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 4, "x": 8, "y": 0},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kubelet_running_containers)", "refId": "A"}],
|
||||
"title": "Running Containers",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"mappings": [
|
||||
{"options": {"0": {"text": "Down", "color": "red"}, "1": {"text": "Up", "color": "green"}}, "type": "value"}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": null},
|
||||
{"color": "green", "value": 1}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 12, "x": 12, "y": 0},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"kubernetes-apiservers\"}", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"kubernetes-nodes\"}", "refId": "B"}],
|
||||
"title": "Control Plane & Node Exporters",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {"legend": false, "tooltip": false, "viz": false},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {"type": "linear"},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"},
|
||||
"thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 5},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\"}) by (namespace)", "legendFormat": "{{namespace}}", "refId": "A"}],
|
||||
"title": "Memory Usage by Namespace",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "core"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 5},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\"}[5m])) by (namespace)", "legendFormat": "{{namespace}}", "refId": "A"}],
|
||||
"title": "CPU Usage by Namespace",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 14},
|
||||
"id": 12,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_receive_bytes_total[5m])) by (namespace)", "legendFormat": "RX {{namespace}}", "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_transmit_bytes_total[5m])) by (namespace)", "legendFormat": "TX {{namespace}}", "refId": "B"}
|
||||
],
|
||||
"title": "Network RX/TX by Namespace",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 14},
|
||||
"id": 13,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_fs_usage_bytes) by (instance)", "legendFormat": "{{instance}}", "refId": "A"}],
|
||||
"title": "Filesystem Usage by Node",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 23},
|
||||
"id": 20,
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"cellHeight": "sm",
|
||||
"footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""}
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sort_desc(sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\"}) by (namespace,pod))", "format": "table", "instant": true, "refId": "A"}],
|
||||
"title": "Pods by Memory (live)",
|
||||
"type": "table",
|
||||
"transformations": [
|
||||
{"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Memory (bytes)"}}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "orange", "value": 1}, {"color": "red", "value": 5}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 32},
|
||||
"id": 30,
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"cellHeight": "sm",
|
||||
"footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""}
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Running\"}) by (namespace)", "format": "table", "instant": true, "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Pending\"}) by (namespace)", "format": "table", "instant": true, "refId": "B"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(kube_pod_status_phase{phase=\"Failed\"}) by (namespace)", "format": "table", "instant": true, "refId": "C"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(increase(kube_pod_container_status_restarts_total[1h])) by (namespace)", "format": "table", "instant": true, "refId": "D"}
|
||||
],
|
||||
"title": "Pod Health by Namespace (KSM)",
|
||||
"type": "table",
|
||||
"transformations": [
|
||||
{"id": "merge", "options": {}},
|
||||
{"id": "groupBy", "options": {"fields": {"Value": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #B": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #C": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #D": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "namespace": {"aggregations": [], "operation": "groupby"}}}},
|
||||
{"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Running", "Value #B": "Pending", "Value #C": "Failed", "Value #D": "Restarts (1h)"}}}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["k3s", "overview"],
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Cluster Overview",
|
||||
"uid": "k3s-cluster-overview",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
209
monitoring/grafana-dashboard-control-plane.yaml
Normal file
209
monitoring/grafana-dashboard-control-plane.yaml
Normal file
@@ -0,0 +1,209 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-control-plane
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: grafana
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
control-plane.json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(apiserver_request_total[5m])) by (verb)", "legendFormat": "{{verb}}", "refId": "A"}],
|
||||
"title": "API Server Requests by Verb",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 0},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "histogram_quantile(0.95, sum(rate(apiserver_request_duration_seconds_bucket[5m])) by (le, verb))", "legendFormat": "p95 {{verb}}", "refId": "A"}],
|
||||
"title": "API Server Request Latency p95",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 9},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(apiserver_request_total{code=~\"5..\"}[5m])) by (verb)", "legendFormat": "{{verb}}", "refId": "A"}],
|
||||
"title": "API Server 5xx Errors",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 9},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(kubelet_container_log_filesystem_used_bytes[5m]))", "legendFormat": "log fs", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "histogram_quantile(0.95, sum(rate(kubelet_pod_start_duration_seconds_bucket[5m])) by (le))", "legendFormat": "pod start p95", "refId": "B"}],
|
||||
"title": "Kubelet Pod Start Latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 18},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "histogram_quantile(0.95, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket[5m])) by (le, instance))", "legendFormat": "{{instance}}", "refId": "A"}],
|
||||
"title": "Kubelet Cgroup Manager Duration p95",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 18},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(kubelet_pleg_relist_duration_seconds_count[5m])", "legendFormat": "relists/s {{instance}}", "refId": "A"}],
|
||||
"title": "Kubelet PLEG Relist Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"mappings": [
|
||||
{"options": {"0": {"text": "Down", "color": "red"}, "1": {"text": "Up", "color": "green"}}, "type": "value"}
|
||||
],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 6, "w": 24, "x": 0, "y": 27},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up", "refId": "A"}],
|
||||
"title": "All Scrape Targets Status",
|
||||
"type": "stat"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["k3s", "control-plane"],
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Control Plane & API Server",
|
||||
"uid": "k3s-control-plane",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
279
monitoring/grafana-dashboard-nodes.yaml
Normal file
279
monitoring/grafana-dashboard-nodes.yaml
Normal file
@@ -0,0 +1,279 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-nodes
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: grafana
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
nodes.json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 6, "w": 6, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kubelet_running_pods", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kubelet_running_containers", "refId": "B"}],
|
||||
"title": "Pods / Containers per Node",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "orange", "value": 70}, {"color": "red", "value": 90}]},
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 6, "w": 18, "x": 6, "y": 0},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "{{instance}}", "refId": "A"}],
|
||||
"title": "Node CPU Usage %",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 6},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "{{instance}}", "refId": "A"}],
|
||||
"title": "Node CPU Usage % (over time)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 6},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", "legendFormat": "{{instance}}", "refId": "A"}],
|
||||
"title": "Node Memory Usage %",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 15},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\"}) by (instance)", "legendFormat": "used {{instance}}", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "node_memory_MemTotal_bytes", "legendFormat": "total {{instance}}", "refId": "B"}],
|
||||
"title": "Node Memory (used vs total)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 15},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|br-.*|cni.*|flannel.*\"}[5m]))", "legendFormat": "RX {{instance}}", "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|veth.*|docker.*|br-.*|cni.*|flannel.*\"}[5m]))", "legendFormat": "TX {{instance}}", "refId": "B"}
|
||||
],
|
||||
"title": "Node Network Traffic",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 24},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "(1 - (node_filesystem_avail_bytes{fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{fstype!~\"tmpfs|overlay|squashfs\"})) * 100", "legendFormat": "{{instance}} {{mountpoint}}", "refId": "A"}],
|
||||
"title": "Node Disk Usage %",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 24},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "node_load1", "legendFormat": "1m {{instance}}", "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "node_load5", "legendFormat": "5m {{instance}}", "refId": "B"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "node_load15", "legendFormat": "15m {{instance}}", "refId": "C"}
|
||||
],
|
||||
"title": "Node Load Average",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 33},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"cellHeight": "sm",
|
||||
"footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""}
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kubelet_running_pods", "format": "table", "instant": true, "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kubelet_running_containers", "format": "table", "instant": true, "refId": "B"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "format": "table", "instant": true, "refId": "C"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100", "format": "table", "instant": true, "refId": "D"}
|
||||
],
|
||||
"title": "Node Summary (live)",
|
||||
"type": "table",
|
||||
"transformations": [
|
||||
{"id": "merge", "options": {}},
|
||||
{"id": "groupBy", "options": {"fields": {"Value": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #B": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #C": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #D": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "instance": {"aggregations": [], "operation": "groupby"}}}},
|
||||
{"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Pods", "Value #B": "Containers", "Value #C": "CPU %", "Value #D": "Memory %"}}}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["k3s", "nodes"],
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Nodes",
|
||||
"uid": "k3s-nodes",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
312
monitoring/grafana-dashboard-pods.yaml
Normal file
312
monitoring/grafana-dashboard-pods.yaml
Normal file
@@ -0,0 +1,312 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-pods
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: grafana
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
pods.json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "normal"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "core"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}],
|
||||
"title": "CPU Usage per Pod",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "normal"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 9},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_memory_working_set_bytes{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}],
|
||||
"title": "Memory Usage per Pod",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 18},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "RX {{pod}}", "refId": "A"}
|
||||
],
|
||||
"title": "Network RX per Pod",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 18},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[5m])) by (pod)", "legendFormat": "TX {{pod}}", "refId": "A"}
|
||||
],
|
||||
"title": "Network TX per Pod",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 27},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(container_fs_usage_bytes{namespace=~\"$namespace\"}) by (pod)", "legendFormat": "{{pod}}", "refId": "A"}],
|
||||
"title": "Filesystem Usage per Pod",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 27},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\"}[5m])) by (pod) / sum(rate(container_cpu_cfs_periods_total{namespace=~\"$namespace\"}[5m])) by (pod) * 100", "legendFormat": "{{pod}}", "refId": "A"}],
|
||||
"title": "CPU Throttling % per Pod",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 10, "w": 24, "x": 0, "y": 36},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"cellHeight": "sm",
|
||||
"footer": {"show": false, "reducer": ["sum"], "countRows": false, "fields": ""}
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"})", "format": "table", "instant": true, "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "refId": "B"},
|
||||
{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace, pod) (rate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[5m]))", "format": "table", "instant": true, "refId": "C"}
|
||||
],
|
||||
"title": "Pod Resource Summary (live)",
|
||||
"type": "table",
|
||||
"transformations": [
|
||||
{"id": "merge", "options": {}},
|
||||
{"id": "groupBy", "options": {"fields": {"Value": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #B": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "Value #C": {"aggregations": ["lastNotNull"], "operation": "aggregate"}, "namespace": {"aggregations": [], "operation": "groupby"}, "pod": {"aggregations": [], "operation": "groupby"}}}},
|
||||
{"id": "organize", "options": {"excludeByName": {"Time": true}, "renameByName": {"Value": "Memory (bytes)", "Value #B": "CPU (cores)", "Value #C": "Network RX (Bps)"}}}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 46},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace) (kube_pod_status_phase{phase=~\"Running|Pending|Failed\",namespace=~\"$namespace\"})", "legendFormat": "{{namespace}} {{phase}}", "refId": "A"}],
|
||||
"title": "Pod Status by Namespace (KSM)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 46},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "sum by (namespace) (increase(kube_pod_container_status_restarts_total{namespace=~\"$namespace\"}[1h]))", "legendFormat": "{{namespace}}", "refId": "A"}],
|
||||
"title": "Container Restarts (last 1h)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {
|
||||
"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true,
|
||||
"stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 55},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "desc"}
|
||||
},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=~\"$namespace\"}", "legendFormat": "{{namespace}}/{{persistentvolumeclaim}}", "refId": "A"}],
|
||||
"title": "PVC Storage Requests by Claim (KSM)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["k3s", "pods"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {"selected": true, "text": "All", "value": "$__all"},
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"definition": "label_values(container_cpu_usage_seconds_total, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [],
|
||||
"query": "label_values(container_cpu_usage_seconds_total, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Pods & Services",
|
||||
"uid": "k3s-pods",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
218
monitoring/grafana-dashboard-prometheus.yaml
Normal file
218
monitoring/grafana-dashboard-prometheus.yaml
Normal file
@@ -0,0 +1,218 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-prometheus
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: grafana
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
prometheus.json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]},
|
||||
"mappings": [{"options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}, "type": "value"}]
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 6, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {"colorMode": "background", "graphMode": "none", "justifyMode": "center", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "value"},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "up{job=\"prometheus\"}", "refId": "A"}],
|
||||
"title": "Prometheus Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 6, "x": 6, "y": 0},
|
||||
"id": 2,
|
||||
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "process_resident_memory_bytes{job=\"prometheus\"}", "refId": "A"}],
|
||||
"title": "Prometheus RSS Memory",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 6, "x": 12, "y": 0},
|
||||
"id": 3,
|
||||
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_series", "refId": "A"}],
|
||||
"title": "Active Series",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 5, "w": 6, "x": 18, "y": 0},
|
||||
"id": 4,
|
||||
"options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "textMode": "auto"},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "count(up)", "refId": "A"}],
|
||||
"title": "Scrape Targets",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 5},
|
||||
"id": 10,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "process_resident_memory_bytes{job=\"prometheus\"}", "legendFormat": "RSS", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_memory_postings_total", "legendFormat": "postings", "refId": "B"}],
|
||||
"title": "Prometheus Memory",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "core"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 5},
|
||||
"id": 11,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(process_cpu_seconds_total{job=\"prometheus\"}[5m])", "legendFormat": "prometheus", "refId": "A"}],
|
||||
"title": "Prometheus CPU",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 14},
|
||||
"id": 12,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(prometheus_tsdb_head_samples_appended_total[5m])", "legendFormat": "samples/s", "refId": "A"}],
|
||||
"title": "Ingestion Rate (samples/s)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 14},
|
||||
"id": 13,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "scrape_duration_seconds", "legendFormat": "{{job}} {{instance}}", "refId": "A"}],
|
||||
"title": "Scrape Duration by Job",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 0, "y": 23},
|
||||
"id": 14,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_series", "legendFormat": "head series", "refId": "A"}, {"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "prometheus_tsdb_head_chunks", "legendFormat": "head chunks", "refId": "B"}],
|
||||
"title": "TSDB Head Series & Chunks",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "Prometheus"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"custom": {"drawStyle": "line", "fillOpacity": 10, "lineInterpolation": "linear", "lineWidth": 1, "showPoints": "never", "spanNulls": true, "stacking": {"group": "A", "mode": "none"}, "thresholdsStyle": {"mode": "off"}},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 9, "w": 12, "x": 12, "y": 23},
|
||||
"id": 15,
|
||||
"options": {"legend": {"calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true}, "tooltip": {"mode": "multi", "sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "prometheus", "uid": "Prometheus"}, "expr": "rate(prometheus_http_request_duration_seconds_sum[5m]) / rate(prometheus_http_request_duration_seconds_count[5m])", "legendFormat": "avg HTTP req", "refId": "A"}],
|
||||
"title": "Prometheus HTTP Request Duration",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["k3s", "prometheus"],
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Prometheus Self-Monitoring",
|
||||
"uid": "k3s-prometheus",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
20
monitoring/grafana-dashboard-provider.yaml
Normal file
20
monitoring/grafana-dashboard-provider.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-provider
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: grafana
|
||||
data:
|
||||
provider.yaml: |
|
||||
apiVersion: 1
|
||||
providers:
|
||||
- name: 'k3s-dashboards'
|
||||
orgId: 1
|
||||
folder: 'K3s Cluster'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 30
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
@@ -33,6 +33,18 @@ spec:
|
||||
mountPath: /var/lib/grafana
|
||||
- name: grafana-datasources
|
||||
mountPath: /etc/grafana/provisioning/datasources
|
||||
- name: grafana-dashboard-provider
|
||||
mountPath: /etc/grafana/provisioning/dashboards
|
||||
- name: dashboards-cluster-overview
|
||||
mountPath: /var/lib/grafana/dashboards/cluster-overview
|
||||
- name: dashboards-pods
|
||||
mountPath: /var/lib/grafana/dashboards/pods
|
||||
- name: dashboards-nodes
|
||||
mountPath: /var/lib/grafana/dashboards/nodes
|
||||
- name: dashboards-control-plane
|
||||
mountPath: /var/lib/grafana/dashboards/control-plane
|
||||
- name: dashboards-prometheus
|
||||
mountPath: /var/lib/grafana/dashboards/prometheus
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
@@ -47,3 +59,21 @@ spec:
|
||||
- name: grafana-datasources
|
||||
configMap:
|
||||
name: grafana-datasources
|
||||
- name: grafana-dashboard-provider
|
||||
configMap:
|
||||
name: grafana-dashboard-provider
|
||||
- name: dashboards-cluster-overview
|
||||
configMap:
|
||||
name: grafana-dashboard-cluster-overview
|
||||
- name: dashboards-pods
|
||||
configMap:
|
||||
name: grafana-dashboard-pods
|
||||
- name: dashboards-nodes
|
||||
configMap:
|
||||
name: grafana-dashboard-nodes
|
||||
- name: dashboards-control-plane
|
||||
configMap:
|
||||
name: grafana-dashboard-control-plane
|
||||
- name: dashboards-prometheus
|
||||
configMap:
|
||||
name: grafana-dashboard-prometheus
|
||||
|
||||
118
monitoring/kube-state-metrics.yaml
Normal file
118
monitoring/kube-state-metrics.yaml
Normal file
@@ -0,0 +1,118 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["statefulsets", "daemonsets", "deployments", "replicasets"]
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["cronjobs", "jobs"]
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources: ["horizontalpodautoscalers"]
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["networking.k8s.io"]
|
||||
resources: ["ingresses"]
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["storage.k8s.io"]
|
||||
resources: ["storageclasses", "volumeattachments"]
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["certificates.k8s.io"]
|
||||
resources: ["certificatesigningrequests"]
|
||||
verbs: ["list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kube-state-metrics
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: kube-state-metrics
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: kube-state-metrics
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: kube-state-metrics
|
||||
spec:
|
||||
serviceAccountName: kube-state-metrics
|
||||
containers:
|
||||
- name: kube-state-metrics
|
||||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http-metrics
|
||||
- containerPort: 8081
|
||||
name: telemetry
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8081
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
resources:
|
||||
requests:
|
||||
memory: "128Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: kube-state-metrics
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
spec:
|
||||
selector:
|
||||
app: kube-state-metrics
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 8080
|
||||
targetPort: http-metrics
|
||||
- name: telemetry
|
||||
port: 8081
|
||||
targetPort: telemetry
|
||||
112
monitoring/node-exporter.yaml
Normal file
112
monitoring/node-exporter.yaml
Normal file
@@ -0,0 +1,112 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: node-exporter
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9100"
|
||||
spec:
|
||||
selector:
|
||||
app: node-exporter
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 9100
|
||||
targetPort: 9100
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: node-exporter
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: node-exporter
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: node-exporter
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: node-exporter
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: node-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: node-exporter
|
||||
spec:
|
||||
serviceAccountName: node-exporter
|
||||
hostPID: true
|
||||
hostNetwork: true
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: node-exporter
|
||||
image: prom/node-exporter:v1.7.0
|
||||
args:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/host/root
|
||||
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+)($|/)
|
||||
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
hostPort: 9100
|
||||
name: metrics
|
||||
volumeMounts:
|
||||
- name: proc
|
||||
mountPath: /host/proc
|
||||
readOnly: true
|
||||
- name: sys
|
||||
mountPath: /host/sys
|
||||
readOnly: true
|
||||
- name: root
|
||||
mountPath: /host/root
|
||||
readOnly: true
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi"
|
||||
cpu: "50m"
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "200m"
|
||||
volumes:
|
||||
- name: proc
|
||||
hostPath:
|
||||
path: /proc
|
||||
- name: sys
|
||||
hostPath:
|
||||
path: /sys
|
||||
- name: root
|
||||
hostPath:
|
||||
path: /
|
||||
Reference in New Issue
Block a user