Skip to content

Commit

Permalink
feat: update talos taskfiles - pull talos vars from SUC ks
Browse files Browse the repository at this point in the history
Signed-off-by: Devin Buhl <[email protected]>
  • Loading branch information
onedr0p committed May 29, 2024
1 parent 0db295c commit fc4af17
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 125 deletions.
76 changes: 22 additions & 54 deletions .taskfiles/Talos/Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,32 @@
# yaml-language-server: $schema=https://taskfile.dev/schema.json
version: "3"

x-env-vars: &env-vars
TALOS_VERSION:
sh: yq 'select(document_index == 1).spec.postBuild.substitute.TALOS_VERSION' {{.KUBERNETES_DIR}}/{{.cluster}}/apps/system-upgrade/system-upgrade-controller/ks.yaml
TALOS_SCHEMATIC_ID:
sh: yq 'select(document_index == 1).spec.postBuild.substitute.TALOS_SCHEMATIC_ID' {{.KUBERNETES_DIR}}/{{.cluster}}/apps/system-upgrade/system-upgrade-controller/ks.yaml
KUBERNETES_VERSION:
sh: yq 'select(document_index == 1).spec.postBuild.substitute.KUBERNETES_VERSION' {{.KUBERNETES_DIR}}/{{.cluster}}/apps/system-upgrade/system-upgrade-controller/ks.yaml

vars:
# Ref: https://github.com/onedr0p/home-service
HOME_SERVICE_ADDR: voyager.internal
HOME_SERVICE_USER: devin
HOME_SERVICE_MATCHBOX_DIR: /var/opt/home-service/apps/matchbox/data/config
# renovate: datasource=docker depName=ghcr.io/siderolabs/installer
TALOS_VERSION: v1.7.3
TALOS_SCHEMATIC_ID: d715f723f882b1e1e8063f1b89f237dcc0e3bd000f9f970243af59c8baae0100
# renovate: datasource=docker depName=ghcr.io/siderolabs/kubelet
KUBERNETES_VERSION: v1.30.1
TALOS_SCRIPTS_DIR: "{{.ROOT_DIR}}/.taskfiles/Talos/scripts"

tasks:

bootstrap:
desc: Bootstrap Talos
summary: |
Args:
cluster: Cluster to run command against (required)
prompt: Bootstrap Talos on the '{{.cluster}}' cluster ... continue?
cmds:
- task: bootstrap-etcd
vars: &vars
cluster: "{{.cluster}}"
vars: { cluster: "{{.cluster}}" }
- task: fetch-kubeconfig
vars: *vars
vars: { cluster: "{{.cluster}}" }
- task: bootstrap-apps
vars: *vars
vars: { cluster: "{{.cluster}}" }
requires:
vars: ["cluster"]

Expand Down Expand Up @@ -74,14 +72,11 @@ tasks:

apply-config:
desc: Apply Talos configuration to a node
env:
TALOS_VERSION: "{{.TALOS_VERSION}}"
TALOS_SCHEMATIC_ID: "{{.TALOS_SCHEMATIC_ID}}"
KUBERNETES_VERSION: "{{.KUBERNETES_VERSION}}"
cmd: |
sops -d {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/assets/{{.hostname}}.secret.sops.yaml | \
envsubst | \
talosctl --context {{.cluster}} apply-config --mode={{.mode}} --nodes {{.node}} --file /dev/stdin
env: *env-vars
vars:
mode: '{{.mode | default "no-reboot"}}'
hostname:
Expand All @@ -95,42 +90,19 @@ tasks:

upgrade:
desc: Upgrade Talos on a node
cmd: bash {{.TALOS_SCRIPTS_DIR}}/upgrade.sh "{{.cluster}}" "{{.node}}" "{{.TALOS_SCHEMATIC_ID}}:{{.TALOS_VERSION}}" "{{.rollout}}"
vars:
rollout: '{{.rollout | default "false"}}'
cmds:
- until kubectl --context {{.cluster}} wait --timeout=5m --for=condition=Complete jobs --all --all-namespaces; do sleep 10; done
- talosctl --context {{.cluster}} --nodes {{.node}} upgrade --image="factory.talos.dev/installer/{{.TALOS_SCHEMATIC_ID}}:{{.TALOS_VERSION}}" --wait=true --timeout=10m --preserve=true
- talosctl --context {{.cluster}} --nodes {{.node}} health --wait-timeout=10m --server=false
- until kubectl --context {{.cluster}} wait --timeout=5m --for=jsonpath=.status.ceph.health=HEALTH_OK cephcluster --all --all-namespaces; do sleep 10; done
vars: *env-vars
requires:
vars: ["cluster", "node"]
preconditions:
- test -f {{.KUBERNETES_DIR}}/{{.cluster}}/talosconfig
- talosctl --context {{.cluster}} config info >/dev/null 2>&1
- talosctl --context {{.cluster}} --nodes {{.node}} get machineconfig >/dev/null 2>&1

upgrade-rollout:
desc: Rollout Talos upgrade on all nodes
cmds:
- flux --context {{.cluster}} suspend kustomization --all
- kubectl cnpg --context {{.cluster}} maintenance set --reusePVC --all-namespaces
- for: { var: nodes, split: "," }
task: upgrade
vars:
cluster: "{{.cluster}}"
node: "{{.ITEM}}"
rollout: "true"
- kubectl cnpg --context {{.cluster}} maintenance unset --reusePVC --all-namespaces
- flux --context {{.cluster}} resume kustomization --all
- task: :kubernetes:delete-failed-pods
vars:
cluster: "{{.cluster}}"
vars:
nodes:
sh: talosctl --context {{.cluster}} config info --output json | jq --join-output '[.nodes[]] | join(",")'
requires:
vars: ["cluster"]
preconditions:
- test -f {{.KUBERNETES_DIR}}/{{.cluster}}/talosconfig
- talosctl --context {{.cluster}} config info >/dev/null 2>&1
- talosctl --context {{.cluster}} --nodes {{.nodes}} get machineconfig >/dev/null 2>&1

upgrade-k8s:
desc: Upgrade the clusters k8s version
cmd: talosctl --context {{.cluster}} --nodes {{.controller}} upgrade-k8s --to {{.KUBERNETES_VERSION}}
Expand Down Expand Up @@ -171,20 +143,16 @@ tasks:

bootstrap-matchbox:
desc: Bootstrap required Matchbox configuration to PXE Boot machine
dir: "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos"
cmds:
- for: ["kernel-amd64", "initramfs-amd64.xz"]
cmd: |
curl -skL https://factory.talos.dev/image/{{.TALOS_SCHEMATIC_ID}}/{{.TALOS_VERSION}}/{{.ITEM}} | \
curl -skT - -u "{{.HOME_SERVICE_USER}}:" \
sftp://{{.HOME_SERVICE_ADDR}}/{{.HOME_SERVICE_MATCHBOX_DIR}}/assets/{{.ITEM}}
- find ./assets -type f | xargs -I{} sh -c "sops -d {} | envsubst | curl -skT - -u "{{.HOME_SERVICE_USER}}:" sftp://{{.HOME_SERVICE_ADDR}}/{{.HOME_SERVICE_MATCHBOX_DIR}}/assets/\$(basename {} | sed 's/\.secret\.sops//')"
- find ./groups -type f | xargs -I{} curl -skT {} -u "{{.HOME_SERVICE_USER}}:" sftp://{{.HOME_SERVICE_ADDR}}/{{.HOME_SERVICE_MATCHBOX_DIR}}/groups/
- find ./profiles -type f | xargs -I{} curl -skT {} -u "{{.HOME_SERVICE_USER}}:" sftp://{{.HOME_SERVICE_ADDR}}/{{.HOME_SERVICE_MATCHBOX_DIR}}/profiles/
- find {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/assets -type f | xargs -I{} sh -c "sops -d {} | envsubst | curl -skT - -u "{{.HOME_SERVICE_USER}}:" sftp://{{.HOME_SERVICE_ADDR}}/{{.HOME_SERVICE_MATCHBOX_DIR}}/assets/\$(basename {} | sed 's/\.secret\.sops//')"
- find {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/groups -type f | xargs -I{} curl -skT {} -u "{{.HOME_SERVICE_USER}}:" sftp://{{.HOME_SERVICE_ADDR}}/{{.HOME_SERVICE_MATCHBOX_DIR}}/groups/
- find {{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/profiles -type f | xargs -I{} curl -skT {} -u "{{.HOME_SERVICE_USER}}:" sftp://{{.HOME_SERVICE_ADDR}}/{{.HOME_SERVICE_MATCHBOX_DIR}}/profiles/
- ssh -l {{.HOME_SERVICE_USER}} {{.HOME_SERVICE_ADDR}} "cd /var/opt/home-service ; go-task restart-matchbox"
env:
TALOS_VERSION: "{{.TALOS_VERSION}}"
TALOS_SCHEMATIC_ID: "{{.TALOS_SCHEMATIC_ID}}"
KUBERNETES_VERSION: "{{.KUBERNETES_VERSION}}"
vars: *env-vars
requires:
vars: ["cluster"]
55 changes: 0 additions & 55 deletions .taskfiles/Talos/scripts/upgrade.sh

This file was deleted.

30 changes: 15 additions & 15 deletions .taskfiles/VolSync/Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ version: "3"
# 3. Applications are deployed as either a Kubernetes Deployment or StatefulSet
# 4. Each application only has one PVC that is being replicated

x-env: &env
x-env-vars: &env-vars
app: "{{.app}}"
claim: "{{.claim}}"
controller: "{{.controller}}"
Expand All @@ -34,7 +34,7 @@ tasks:
- flux --context {{.cluster}} {{.state}} kustomization volsync
- flux --context {{.cluster}} -n {{.ns}} {{.state}} helmrelease volsync
- kubectl --context {{.cluster}} -n {{.ns}} scale deployment volsync --replicas {{if eq "suspend" .state}}0{{else}}1{{end}}
env: *env
env: *env-vars
vars:
ns: '{{.ns | default "volsync-system"}}'
state: '{{index .MATCH 0}}'
Expand All @@ -54,7 +54,7 @@ tasks:
- kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=1m
- kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container main
- kubectl --context {{.cluster}} -n {{.ns}} delete job {{.job}}
env: *env
env: *env-vars
requires:
vars: ["cluster", "app"]
vars:
Expand All @@ -79,7 +79,7 @@ tasks:
- kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container minio
- kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container r2
- kubectl --context {{.cluster}} -n {{.ns}} delete job {{.job}}
env: *env
env: *env-vars
requires:
vars: ["cluster", "app"]
vars:
Expand All @@ -103,7 +103,7 @@ tasks:
- kubectl --context {{.cluster}} -n {{.ns}} patch replicationsources {{.app}} --type merge -p '{"spec":{"trigger":{"manual":"{{.now}}"}}}'
- bash {{.VOLSYNC_SCRIPTS_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}}
- kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m
env: *env
env: *env-vars
requires:
vars: ["cluster", "app"]
vars:
Expand All @@ -128,11 +128,11 @@ tasks:
app: Application to restore (required)
previous: Previous number of snapshots to restore (default: 2)
cmds:
- { task: .suspend, vars: *env }
- { task: .wipe, vars: *env }
- { task: .restore, vars: *env }
- { task: .resume, vars: *env }
env: *env
- { task: .suspend, vars: *env-vars }
- { task: .wipe, vars: *env-vars }
- { task: .restore, vars: *env-vars }
- { task: .resume, vars: *env-vars }
env: *env-vars
requires:
vars: ["cluster", "app"]
vars:
Expand Down Expand Up @@ -170,7 +170,7 @@ tasks:
cmd: |
{{- $items := (split "/" .ITEM) }}
kubectl --context {{.cluster}} delete volumesnapshot -n {{ $items._0 }} {{ $items._1 }}
env: *env
env: *env-vars
requires:
vars: ["cluster"]
vars:
Expand All @@ -189,7 +189,7 @@ tasks:
- flux --context {{.cluster}} -n {{.ns}} suspend helmrelease {{.app}}
- kubectl --context {{.cluster}} -n {{.ns}} scale {{.controller}} --replicas 0
- kubectl --context {{.cluster}} -n {{.ns}} wait pod --for delete --selector="app.kubernetes.io/name={{.app}}" --timeout=2m
env: *env
env: *env-vars

# Wipe the PVC of all data
.wipe:
Expand All @@ -200,7 +200,7 @@ tasks:
- kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m
- kubectl --context {{.cluster}} -n {{.ns}} logs job/{{.job}} --container main
- kubectl --context {{.cluster}} -n {{.ns}} delete job {{.job}}
env: *env
env: *env-vars
vars:
job: volsync-wipe-{{.app}}

Expand All @@ -212,7 +212,7 @@ tasks:
- bash {{.VOLSYNC_SCRIPTS_DIR}}/wait-for-job.sh {{.job}} {{.ns}} {{.cluster}}
- kubectl --context {{.cluster}} -n {{.ns}} wait job/{{.job}} --for condition=complete --timeout=120m
- kubectl --context {{.cluster}} -n {{.ns}} delete replicationdestination {{.job}}
env: *env
env: *env-vars
vars:
job: volsync-dst-{{.app}}

Expand All @@ -222,4 +222,4 @@ tasks:
cmds:
- flux --context {{.cluster}} -n {{.ns}} resume helmrelease {{.app}}
- flux --context {{.cluster}} -n flux-system resume kustomization {{.app}}
env: *env
env: *env-vars
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ spec:
timeout: 5m
postBuild:
substitute:
TALOS_SCHEMATIC_ID: d715f723f882b1e1e8063f1b89f237dcc0e3bd000f9f970243af59c8baae0100
# renovate: datasource=docker depName=ghcr.io/siderolabs/installer
TALOS_VERSION: v1.7.3
# renovate: datasource=docker depName=ghcr.io/siderolabs/kubelet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ spec:
args:
- --nodes=$(NODE_IP)
- upgrade
- --image=factory.talos.dev/installer/d715f723f882b1e1e8063f1b89f237dcc0e3bd000f9f970243af59c8baae0100:$(SYSTEM_UPGRADE_PLAN_LATEST_VERSION)
- --image=factory.talos.dev/installer/${TALOS_SCHEMATIC_ID}:$(SYSTEM_UPGRADE_PLAN_LATEST_VERSION)
- --preserve=true
- --wait=false

0 comments on commit fc4af17

Please sign in to comment.