Skip to content

Commit

Permalink
add embedded cluster spec
Browse files Browse the repository at this point in the history
  • Loading branch information
diamonwiggins committed May 17, 2024
1 parent 8ea335c commit 1f9cd24
Showing 1 changed file with 316 additions and 0 deletions.
316 changes: 316 additions & 0 deletions host/embedded-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
apiVersion: troubleshoot.sh/v1beta2
kind: SupportBundle
metadata:
name: default
spec:
hostCollectors:
- cpu: {}
- hostOS: {}
- memory: {}
- blockDevices: {}
- hostServices: {}
- ipv4Interfaces: {}
- time: {}
- diskUsage:
collectorName: root-disk-usage
path: /
- diskUsage:
collectorName: openebs-disk-usage
path: /var/openebs/local
- run:
collectorName: k0s-status
command: k0s
args: [ "status" ]
- run:
collectorName: k0s-issue-template
command: sh
args: [ "-c", "uname -srvmo; cat /etc/os-release || lsb_release -a" ]
- run:
collectorName: k0s-sysinfo
command: k0s
args: [ "sysinfo" ]
- copy:
collectorName: installer-logs
path: /var/lib/embedded-cluster/logs/*.log
- run:
collectorName: k8s-api-healthz-6443
command: "curl"
args: ["-k", "https://localhost:6443/healthz?verbose"]
- run:
collectorName: "free"
command: "free"
args: ["-m"]
- run:
collectorName: "top"
command: "top"
args: ["-b", "-n", "1"]
- run:
collectorName: "uptime"
command: "uptime"
args: []
- run:
collectorName: "uname"
command: "uname"
args: ["-a"]
- run:
collectorName: "df"
command: "df"
args: ["-h"]
- run:
collectorName: "iostat"
command: "iostat"
args: ["-x"]
- run:
collectorName: "sestatus"
command: "sestatus"
args: []
- run:
collectorName: "apparmor-status"
command: "apparmor_status"
args: []
- run:
collectorName: "iptables"
command: "iptables"
args: ["-L", "-v"]
- run:
collectorName: "iptables-version"
command: "iptables"
args: ["-V"]
- run:
collectorName: "nftables-list"
command: "nft"
args: ["list", "table", "filter"]
- run:
collectorName: "ipvsadm"
command: "ipvsadm"
args: ["-l", "-n"]
- run:
collectorName: "lsblk"
command: "lsblk"
args: ["--fs"]
- run:
collectorName: "netstat-ports"
command: "netstat"
args: ["-t", "-u", "-l", "-p", "-n"]
- run:
collectorName: "netstat-route-table"
command: "netstat"
args: ["-r", "-n"]
- run:
collectorName: "resolvectl-status"
command: "resolvectl"
args: ["status"]
- run:
collectorName: "resolv-conf"
command: "cat"
args: ["/etc/resolv.conf"]
- run:
collectorName: "systemd-resolved-conf"
command: "cat"
args: ["/etc/systemd/resolved.conf"]
- run:
collectorName: "nsswitch-conf"
command: "cat"
args: ["/etc/nsswitch.conf"]
- run:
collectorName: "hosts"
command: "cat"
args: ["/etc/hosts"]
- run:
collectorName: "ip-route-table"
command: "ip"
args: ["route"]
- run:
collectorName: "sysctl"
command: "sysctl"
args: ["-a"]
# Gathering hostname info to help troubleshoot scenarios where the hostname mismatch
- run:
collectorName: "hostnames"
command: "sh"
args:
- -c
- |
echo "hostname = $(hostname)"
echo "/proc/sys/kernel/hostname = $(cat /proc/sys/kernel/hostname)"
echo "uname -n = $(uname -n)"
- http:
collectorName: curl-api-replicated-com
get:
url: https://api.replicated.com/healthz
- http:
collectorName: curl-get-replicated-com
get:
url: https://get.replicated.com/healthz
- http:
collectorName: curl-registry-replicated-com
get:
url: https://registry.replicated.com/healthz
- http:
collectorName: curl-proxy-replicated-com
get:
url: https://proxy.replicated.com/healthz
- http:
collectorName: curl-replicated-app
get:
url: https://replicated.app/healthz
- run:
collectorName: "du-root"
command: "sh"
args: ["-c", "du -Shax / --exclude /proc | sort -rh | head -20"]
- run:
collectorName: "mount"
command: "mount"
args: ["-l"]
- run:
collectorName: "vmstat"
command: "vmstat"
args: ["-w"]
- run:
collectorName: "ps-high-load"
command: "sh"
args: ["-c", "ps -eo s,user,cmd | grep ^[RD] | sort | uniq -c | sort -nbr | head -20"]
- run:
collectorName: "ps-detect-antivirus-and-security-tools"
command: "sh"
args: [-c, "ps -ef | grep -E 'clamav|sophos|esets_daemon|fsav|symantec|mfend|ds_agent|kav|bdagent|s1agent|falcon|illumio|xagt' | grep -v grep"]
hostAnalyzers:
- memory:
checkName: Amount of Memory
outcomes:
- warn:
when: "< 2G"
message: At least 2G of memory is recommended
- pass:
message: The system has at least 2G of memory
- diskUsage:
checkName: Root disk usage
collectorName: root-disk-usage
outcomes:
- fail:
when: "total < 40Gi"
message: The disk containing directory / has less than 40Gi of total space
- warn:
when: "used/total > 80%"
message: The disk containing directory / is more than 80% full
- warn:
when: "available < 10Gi"
message: The disk containing directory / has less than 10Gi of disk space available
- pass:
message: The disk containing directory / has sufficient space
- diskUsage:
checkName: OpenEBS disk usage
collectorName: openebs-disk-usage
outcomes:
- fail:
when: "total < 40Gi"
message: The disk containing OpenEBS volumes has less than 40Gi of space
- warn:
when: "used/total > 80%"
message: The disk containing OpenEBS volumes is more than 80% full
- warn:
when: "available < 10Gi"
message: The disk containing OpenEBS volumes has less than 10Gi of disk space available
- pass:
message: The disk containing directory OpenEBS volumes has sufficient space
- textAnalyze:
checkName: Kubernetes API probing
fileName: host-collectors/run-host/k0s-status.txt
regex: 'Kube-api probing successful: true'
outcomes:
- fail:
when: "false"
message: Kubernetes API probing is reporting a failure
- pass:
when: "true"
message: Kubernetes API probing is reporting success
- time:
checkName: "ntp-status"
outcomes:
- fail:
when: "ntp == unsynchronized+inactive"
message: "System clock is not synchronized"
- warn:
when: "ntp == unsynchronized+active"
message: System clock not yet synchronized
- pass:
when: "ntp == synchronized+active"
message: "System clock is synchronized"
- warn:
when: "timezone != UTC"
message: "Non UTC timezone can interfere with system function"
- pass:
when: "timezone == UTC"
message: "Timezone is set to UTC"
- http:
checkName: curl-k8s-api-6443
collectorName: curl-k8s-api-6443
outcomes:
- warn:
when: "error"
message: Unable to curl https://localhost:6443/healthz. Please, run `curl -k https://localhost:6443/healthz` to check further information.
- pass:
when: "statusCode == 200"
message: curl -k https://localhost:6443/healthz returned HTTP CODE response 200.
- warn:
message: "Unexpected response. HTTP CODE response is not 200. Please, run `curl -ki https://localhost:6443/healthz` to check further information."
- http:
checkName: curl-api-replicated-com
collectorName: curl-api-replicated-com
outcomes:
- warn:
when: "error"
message: Error connecting to https://api.replicated.com/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://api.replicated.com/healthz
- warn:
message: "Unexpected response"
- http:
checkName: curl-get-replicated-com
collectorName: curl-get-replicated-com
outcomes:
- warn:
when: "error"
message: Error connecting to https://get.replicated.com/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://get.replicated.com/healthz
- warn:
message: "Unexpected response"
- http:
checkName: curl-registry-replicated-com
collectorName: curl-registry-replicated-com
outcomes:
- warn:
when: "error"
message: Error connecting to https://registry.replicated.com/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://registry.replicated.com/healthz
- warn:
message: "Unexpected response"
- http:
checkName: curl-proxy-replicated-com
collectorName: curl-proxy-replicated-com
outcomes:
- warn:
when: "error"
message: Error connecting to https://proxy.replicated.com/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://proxy.replicated.com/healthz
- warn:
message: "Unexpected response"
- http:
checkName: curl-replicated-app
collectorName: curl-replicated-app
outcomes:
- warn:
when: "error"
message: Error connecting to https://replicated.app/healthz
- pass:
when: "statusCode == 200"
message: Connected to https://replicated.app/healthz
- warn:
message: "Unexpected response"

0 comments on commit 1f9cd24

Please sign in to comment.