Files
k3s-cluster/validate-scheduling.sh

118 lines
3.6 KiB
Bash
Raw Permalink Normal View History

2026-02-02 20:47:09 +01:00
#!/bin/bash
# Raspberry Pi K3s Scheduling Validation Script
# Run this to check your cluster configuration and pod distribution
echo "=== Kubernetes Node Analysis ==="
echo
echo "1. Node Overview:"
kubectl get nodes -o wide
echo
echo "2. Node Resource Capacity:"
kubectl describe nodes | grep -A 5 "Allocatable:"
echo
echo "3. Node Labels and Taints:"
kubectl get nodes --show-labels
echo
kubectl describe nodes | grep -E "(Name:|Taints:)" | grep -A 1 "Name:"
echo
echo "=== Pod Distribution Analysis ==="
echo
echo "4. High-Resource Pods Location:"
echo "Checking where memory-intensive applications are scheduled..."
echo
echo "n8n PostgreSQL pods:"
kubectl get pods -n n8n -o wide | grep postgres || echo "No n8n postgres pods found"
echo
echo "Minecraft server pods:"
kubectl get pods -n minecraft -o wide || echo "No minecraft pods found"
echo
echo "OpenWebUI pods:"
kubectl get pods -o wide | grep open-webui || echo "No OpenWebUI pods found"
echo
echo "Phoenix pods:"
kubectl get pods -n phoenix -o wide || echo "No Phoenix pods found"
echo
echo "Jellyfin pods:"
kubectl get pods -n jellyfin -o wide || echo "No Jellyfin pods found"
echo
echo "Prometheus pods:"
kubectl get pods -n monitoring -o wide | grep prometheus || echo "No Prometheus pods found"
echo
echo "=== Resource Usage ==="
echo
echo "5. Current Node Resource Usage:"
kubectl top nodes 2>/dev/null || echo "Metrics server not available - install with: kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml"
echo
echo "6. Top Memory-Consuming Pods:"
kubectl top pods --all-namespaces --sort-by=memory 2>/dev/null | head -10 || echo "Metrics server not available"
echo
echo "=== Pod Events (Recent Issues) ==="
echo
echo "7. Recent Pod Scheduling Events:"
kubectl get events --all-namespaces --sort-by='.lastTimestamp' | grep -E "(Failed|Error|Warning)" | tail -10
echo
echo "=== Validation Summary ==="
echo
# Count pods per node
echo "8. Pod Distribution Per Node:"
echo "Node Pod Count"
echo "------------------------|---------"
kubectl get pods --all-namespaces -o wide --no-headers | awk '{print $8}' | sort | uniq -c | awk '{printf "%-24s| %s\n", $2, $1}'
echo
echo "=== Recommendations ==="
echo
# Check if any high-resource pods are on wrong nodes
echo "9. Checking for Potential Issues:"
# Get Raspberry Pi node name (assumes it has 'pi' in the name or is ARM64)
RPI_NODE=$(kubectl get nodes -o jsonpath='{.items[?(@.status.nodeInfo.architecture=="arm64")].metadata.name}' | head -1)
if [ -n "$RPI_NODE" ]; then
echo "Detected Raspberry Pi node: $RPI_NODE"
# Check if high-resource pods are on RPi
HIGH_MEM_PODS=$(kubectl get pods --all-namespaces -o wide | grep "$RPI_NODE" | grep -E "(postgres|minecraft|phoenix|jellyfin|prometheus|openwebui)")
if [ -n "$HIGH_MEM_PODS" ]; then
echo "⚠️ WARNING: High-resource pods found on Raspberry Pi node:"
echo "$HIGH_MEM_PODS"
echo
echo "These pods should be moved to more powerful nodes."
else
echo "✅ Good: No high-resource pods detected on Raspberry Pi node."
fi
else
echo " Could not auto-detect Raspberry Pi node. Please check manually."
fi
echo
echo "=== Next Steps ==="
echo
echo "If you see high-resource pods on your Raspberry Pi node:"
echo "1. Apply the node labels: kubectl label nodes <powerful-node> hardware=high-memory"
echo "2. Apply the taint: kubectl taint nodes <rpi-node> node-type=raspberry-pi:NoSchedule"
echo "3. Apply updated manifests with nodeSelectors"
echo "4. Delete problematic pods to force rescheduling"
echo
echo "See RASPBERRY_PI_SCHEDULING_FIX.md for detailed instructions."