Skip to content

Commit

Permalink
Add utility cluster (#2303)
Browse files Browse the repository at this point in the history
* feat: add utility cluster
* feat: update workflows
  • Loading branch information
joryirving authored May 9, 2024
1 parent b49d310 commit 9367f8b
Show file tree
Hide file tree
Showing 226 changed files with 5,864 additions and 11 deletions.
4 changes: 2 additions & 2 deletions .envrc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#shellcheck disable=SC2148,SC2155
export KUBECONFIG="$(expand_path ./kubernetes/main/kubeconfig):$(expand_path ./kubernetes/pi/kubeconfig)"
export KUBECONFIG="$(expand_path ./kubernetes/main/kubeconfig):$(expand_path ./kubernetes/pi/kubeconfig):$(expand_path ./kubernetes/utility/kubeconfig)"
export SOPS_AGE_KEY_FILE="$(expand_path ./age.key)"
# Venv
export VIRTUAL_ENV="$(expand_path ./.venv)"
Expand All @@ -10,4 +10,4 @@ export ANSIBLE_VARS_ENABLED="host_group_vars"
export ANSIBLE_INVENTORY=$(expand_path ./inventory/hosts.yaml)
PATH_add "$(expand_path ./.venv/bin)"
# Talos
export TALOSCONFIG="$(expand_path ./kubernetes/main/bootstrap/talos/clusterconfig/talosconfig)"
#export TALOSCONFIG="$(expand_path ./kubernetes/main/bootstrap/talos/clusterconfig/talosconfig):$(expand_path ./kubernetes/utility/bootstrap/talos/clusterconfig/talosconfig)"
5 changes: 5 additions & 0 deletions .github/labeler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,8 @@ cluster/pi:
- any-glob-to-any-file: ansible/pi/**/*
- any-glob-to-any-file: kubernetes/pi/**/*
- any-glob-to-any-file: terraform/pi/**/*
cluster/utility:
- changed-files:
- any-glob-to-any-file: ansible/utility/**/*
- any-glob-to-any-file: kubernetes/utility/**/*
- any-glob-to-any-file: terraform/utility/**/*
2 changes: 2 additions & 0 deletions .github/labels.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
color: "ffc300"
- name: cluster/pi
color: "ffc300"
- name: cluster/utility
color: "ffc300"
# Renovate Types
- name: renovate/ansible
color: "027fa0"
Expand Down
9 changes: 9 additions & 0 deletions .github/renovate/clusters.json5
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@
"**/terraform/pi/**"
],
"additionalBranchPrefix": "pi-"
},
{
"description": "Separate PRs for utility cluster",
"matchFileNames": [
"**/kubernetes/utility/**",
"**/ansible/utility/**",
"**/terraform/utility/**"
],
"additionalBranchPrefix": "utility-"
}
]
}
4 changes: 2 additions & 2 deletions .github/workflows/flux-ks-sync.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ name: "Flux Kustomization Sync"
on:
push:
branches: ["main"]
paths: ["kubernetes/pi/**"]
paths: ["kubernetes/pi/**", "kubernetes/utility/**"]

jobs:
sync:
name: Flux Kustomization Sync
runs-on: ["gha-runner-scale-set"]
strategy:
matrix:
cluster: ["pi"]
cluster: ["pi", "utility"]
steps:
- name: Get Secrets
uses: bitwarden/sm-action@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nas-restart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
runs-on: ["gha-runner-scale-set"]
strategy:
matrix:
cluster: ["main", "pi"]
cluster: ["main", "pi", "utility"]
steps:
- name: Get Secrets
uses: bitwarden/sm-action@v2
Expand Down
9 changes: 9 additions & 0 deletions .taskfiles/Sops/Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,12 @@ tasks:
EDITOR="vim -es +'norm Go' +':wq'" sops --ignore-mac "${file}"
fi
done
.encrypt-file:
internal: true
cmd: sops --encrypt --in-place {{.file}}
requires:
vars: ["file"]
preconditions:
- { msg: "Missing Sops config file", sh: "test -f {{.SOPS_CONFIG_FILE}}" }
- { msg: "Missing Sops Age key file", sh: "test -f {{.AGE_FILE}}" }
4 changes: 2 additions & 2 deletions .taskfiles/Talos/Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ tasks:
vars: *vars
- task: bootstrap-apps
vars: *vars
- talosctl health --server=false
- talosctl health --server=false --context {{.cluster}}
requires:
vars: ["cluster"]

Expand Down Expand Up @@ -85,7 +85,7 @@ tasks:
dir: "{{.KUBERNETES_DIR}}/{{.cluster}}/bootstrap/talos/"
cmds:
- until kubectl --kubeconfig {{.KUBERNETES_DIR}}/{{.cluster}}/kubeconfig wait --for=condition=Ready=False nodes --all --timeout=600s; do sleep 10; done
- helmfile --file ./apps/helmfile.yaml apply --skip-diff-on-install --suppress-diff
- helmfile --file ./apps/helmfile.yaml apply --skip-diff-on-install --suppress-diff --kube-context {{.cluster}}
- until kubectl --kubeconfig {{.KUBERNETES_DIR}}/{{.cluster}}/kubeconfig wait --for=condition=Ready nodes --all --timeout=600s; do sleep 10; done
requires:
vars: ["cluster"]
Expand Down
5 changes: 1 addition & 4 deletions kubernetes/main/bootstrap/talos/talconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,7 @@ patches:
- destination: /var/openebs/local
type: bind
source: /var/openebs/local
options:
- bind
- rshared
- rw
options: ["bind", "rshared", "rw"]
# # Disable predictable NIC naming
# - |-
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
---
# yaml-language-server: $schema=https://lds-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json
apiVersion: helm.toolkit.fluxcd.io/v2beta2
kind: HelmRelease
metadata:
name: cert-manager
spec:
interval: 30m
chart:
spec:
chart: cert-manager
version: v1.14.5
sourceRef:
kind: HelmRepository
name: jetstack
namespace: flux-system
install:
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
strategy: rollback
retries: 3
values:
installCRDs: true
extraArgs:
- --dns01-recursive-nameservers=1.1.1.1:53,9.9.9.9:53
- --dns01-recursive-nameservers-only
podDnsPolicy: None
podDnsConfig:
nameservers:
- "1.1.1.1"
- "9.9.9.9"
webhook:
replicaCount: 1
cainjector:
replicaCount: 1
prometheus:
enabled: true
servicemonitor:
enabled: true
prometheusInstance: observability
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./helmrelease.yaml
- ./prometheusrule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
# yaml-language-server: $schema=https://lds-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: cert-manager.rules
spec:
groups:
- name: cert-manager
rules:
- alert: CertManagerAbsent
expr: |
absent(up{job="cert-manager"})
for: 15m
labels:
severity: critical
annotations:
description: >
New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent
summary: "Cert Manager has dissapeared from Prometheus service discovery."
- name: certificates
rules:
- alert: CertManagerCertExpirySoon
expr: |
avg by (exported_namespace, namespace, name) (certmanager_certificate_expiration_timestamp_seconds - time()) < (21 * 24 * 3600)
for: 15m
labels:
severity: warning
annotations:
description: >
The domain that this cert covers will be unavailable after
{{ $value | humanizeDuration }}. Clients using endpoints that this cert
protects will start to fail in {{ $value | humanizeDuration }}.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon
summary: |
The cert {{ $labels.name }} is {{ $value | humanizeDuration }} from expiry, it should have renewed over a week ago.
- alert: CertManagerCertNotReady
expr: |
max by (name, exported_namespace, namespace, condition) (certmanager_certificate_ready_status{condition!="True"} == 1)
for: 15m
labels:
severity: critical
annotations:
description: >
This certificate has not been ready to serve traffic for at least
10m. If the cert is being renewed or there is another valid cert, the ingress
controller _may_ be able to serve that instead.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready
summary: "The cert {{ $labels.name }} is not ready to serve traffic."
- alert: CertManagerHittingRateLimits
expr: |
sum by (host) (rate(certmanager_http_acme_client_request_count{status="429"}[5m])) > 0
for: 15m
labels:
severity: critical
annotations:
description: >
Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week.
runbook_url: https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits
summary: "Cert manager hitting LetsEncrypt rate limits."
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
# yaml-language-server: $schema=https://lds-schemas.pages.dev/cert-manager.io/clusterissuer_v1.json
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-production
spec:
acme:
server: https://acme-v02.api.letsencrypt.org/directory
email: "${SECRET_ACME_EMAIL}"
privateKeySecretRef:
name: letsencrypt-production
solvers:
- dns01:
cloudflare:
apiTokenSecretRef:
name: cert-manager-secret
key: api-token
selector:
dnsZones: ["${SECRET_DOMAIN}"]
---
# yaml-language-server: $schema=https://lds-schemas.pages.dev/cert-manager.io/clusterissuer_v1.json
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
server: https://acme-staging-v02.api.letsencrypt.org/directory
email: "${SECRET_ACME_EMAIL}"
privateKeySecretRef:
name: letsencrypt-staging
solvers:
- dns01:
cloudflare:
apiTokenSecretRef:
name: cert-manager-secret
key: api-token
selector:
dnsZones: ["${SECRET_DOMAIN}"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./secret.sops.yaml
- ./issuers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: v1
kind: Secret
metadata:
name: cert-manager-secret
stringData:
api-token: ENC[AES256_GCM,data:BblSQWWP0PpyvVNEad00sMs2vQ1R5s07CwHEymi0ldS+f91z9wJtdA==,iv:rGNipefpWtKGRkQqSGuAMtFew4rlRBmphPxBoMSVw5w=,tag:0OnJoWDWqqx0vRJQVXZojg==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age:
- recipient: age12v9uw8k6myrr49z9aq6jmcwa79aepu0p6p462nrv968qcae72pcspwldec
enc: |
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBLczJLTXlhL1d0TUw1RmIw
YkF2MGVFa00xQ3lTanI2cjkwR2dCc05WbW4wCkh4N1NhWmJjYm5BdVg1T1lvalU5
STdXSFFFc1dLaFpWbXRXaTY3SFNURncKLS0tIE91ZVc4S0gvTVNWRHpWaEZJdzZi
L1YrajFWSVR3ZmZRRktsNmhac3hnNTAKQ8Y9GqFsIZND7qXfDObTGzOKhn/tXn7i
s6HawJ1gRsYzUVer2zpIWF9u5DkQIuR4dvFFoC9XfGnmba09WFaFkw==
-----END AGE ENCRYPTED FILE-----
lastmodified: "2023-11-24T16:09:41Z"
mac: ENC[AES256_GCM,data:ytQ9X1aoU0Ns0DMCzwxGoIvsQLtzVSVsaBXAhprMn1Eni1zvQCn0qwHD/U2P4Wa/trT8B3dfafpUg8EvpvMDgrC82XOL4ZLnxVHctWxQmbGk2mfEqPU/p1k071z91Luryig0uuoodzWtFAuOfLfGTfW9+MK+ptC7gEuAIZgRgiI=,iv:iBhhsg2otQVxRLC/AQpbFX5IRWNgM13dVx78ZMqmPdc=,tag:s2L2gUUDCfkUdR0TOWtIdA==,type:str]
pgp: []
encrypted_regex: ^(data|stringData)$
version: 3.8.1
44 changes: 44 additions & 0 deletions kubernetes/utility/apps/cert-manager/cert-manager/ks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
# yaml-language-server: $schema=https://lds-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app cert-manager
namespace: flux-system
spec:
targetNamespace: cert-manager
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/utility/apps/cert-manager/cert-manager/app
prune: true
sourceRef:
kind: GitRepository
name: home-kubernetes
wait: true
interval: 30m
retryInterval: 1m
timeout: 5m
---
# yaml-language-server: $schema=https://lds-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app cert-manager-issuers
namespace: flux-system
spec:
targetNamespace: cert-manager
commonMetadata:
labels:
app.kubernetes.io/name: *app
dependsOn:
- name: cert-manager
path: ./kubernetes/utility/apps/cert-manager/cert-manager/issuers
prune: true
sourceRef:
kind: GitRepository
name: home-kubernetes
wait: true
interval: 30m
retryInterval: 1m
timeout: 5m
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
# yaml-language-server: $schema=https://lds-schemas.pages.dev/cert-manager.io/certificate_v1.json
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: "${SECRET_DOMAIN}"
spec:
secretName: "${SECRET_DOMAIN}-tls"
issuerRef:
name: letsencrypt-production
kind: ClusterIssuer
commonName: "${SECRET_DOMAIN}"
dnsNames: ["${SECRET_DOMAIN}", "*.${SECRET_DOMAIN}"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://json.schemastore.org/kustomization
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# - ./certificates.yaml
- ./staging.yaml
13 changes: 13 additions & 0 deletions kubernetes/utility/apps/cert-manager/certificates/app/staging.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
# yaml-language-server: $schema=https://lds-schemas.pages.dev/cert-manager.io/certificate_v1.json
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: "${SECRET_DOMAIN/./-}-staging"
spec:
secretName: "${SECRET_DOMAIN/./-}-staging-tls"
issuerRef:
name: letsencrypt-staging
kind: ClusterIssuer
commonName: "${SECRET_DOMAIN}"
dnsNames: ["${SECRET_DOMAIN}", "*.${SECRET_DOMAIN}"]
Loading

0 comments on commit 9367f8b

Please sign in to comment.