From a8235cec6f4bca61ad6945a8ab1ac67b57a364a3 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Tue, 9 Jul 2024 18:00:56 +0000 Subject: [PATCH 01/12] Bump k3s-root to v0.14.0 Also remove the wg-add script that has been unused since v1.26 dropped the legacy wireguard backend Signed-off-by: Brad Davidson (cherry picked from commit 047664b61025f6d3a0b6f03e4cf8698320968cae) Signed-off-by: Brad Davidson --- scripts/download | 3 +-- scripts/version.sh | 2 +- scripts/wg-add.sh | 34 ---------------------------------- 3 files changed, 2 insertions(+), 37 deletions(-) delete mode 100755 scripts/wg-add.sh diff --git a/scripts/download b/scripts/download index f75d9f925c67..1b70cb196d06 100755 --- a/scripts/download +++ b/scripts/download @@ -26,13 +26,12 @@ case ${OS} in linux) git clone --single-branch --branch=${VERSION_RUNC} --depth=1 https://github.com/k3s-io/runc ${RUNC_DIR} curl --compressed -sfL https://github.com/k3s-io/k3s-root/releases/download/${VERSION_ROOT}/k3s-root-${ARCH}.tar | tar xf - - cp scripts/wg-add.sh bin/aux ;; windows) git clone --single-branch --branch=${VERSION_HCSSHIM} --depth=1 https://github.com/microsoft/hcsshim ${HCSSHIM_DIR} ;; *) - echo "[ERROR] unrecognized opertaing system: ${OS}" + echo "[ERROR] unrecognized operating system: ${OS}" exit 1 ;; esac diff --git a/scripts/version.sh b/scripts/version.sh index 84957bf66ddc..5784e89e18cd 100755 --- a/scripts/version.sh +++ b/scripts/version.sh @@ -76,7 +76,7 @@ if [ -z "$VERSION_KUBE_ROUTER" ]; then VERSION_KUBE_ROUTER="v0.0.0" fi -VERSION_ROOT="v0.13.0" +VERSION_ROOT="v0.14.0" DEPENDENCIES_URL="https://raw.githubusercontent.com/kubernetes/kubernetes/${VERSION_K8S}/build/dependencies.yaml" VERSION_GOLANG="go"$(curl -sL "${DEPENDENCIES_URL}" | yq e '.dependencies[] | select(.name == "golang: upstream version").version' -) diff --git a/scripts/wg-add.sh b/scripts/wg-add.sh deleted file mode 100755 index dc2f4a81c57c..000000000000 --- a/scripts/wg-add.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash - -auto-mtu() { - local mtu=0 endpoint output - while read -r _ endpoint; do - [[ $endpoint =~ ^\[?([a-z0-9:.]+)\]?:[0-9]+$ ]] || continue - output="$(ip route get "${BASH_REMATCH[1]}" || true)" - [[ ( $output =~ mtu\ ([0-9]+) || ( $output =~ dev\ ([^ ]+) && $(ip link show dev "${BASH_REMATCH[1]}") =~ mtu\ ([0-9]+) ) ) && ${BASH_REMATCH[1]} -gt $mtu ]] && mtu="${BASH_REMATCH[1]}" - done < <(wg show "$1" endpoints) - if [[ $mtu -eq 0 ]]; then - read -r output < <(ip route show default || true) || true - [[ ( $output =~ mtu\ ([0-9]+) || ( $output =~ dev\ ([^ ]+) && $(ip link show dev "${BASH_REMATCH[1]}") =~ mtu\ ([0-9]+) ) ) && ${BASH_REMATCH[1]} -gt $mtu ]] && mtu="${BASH_REMATCH[1]}" - fi - [[ $mtu -gt 0 ]] || mtu=1500 - ip link set mtu $(( mtu - 80 )) up dev "$1" -} - -# probe for any modules that may be needed -modprobe wireguard -modprobe tun - -# try wireguard kernel module first -ip link add "$1" type wireguard && exit - -# try boringtun and let it drop privileges -boringtun "$1" && auto-mtu "$1" && exit - -# try boringtun w/o dropping privileges -WG_SUDO=1 boringtun "$1" && auto-mtu "$1" && exit - -# try wireguard-go - p.s. should not use wireguard-go, it leaks memory -WG_I_PREFER_BUGGY_USERSPACE_TO_POLISHED_KMOD=1 wireguard-go "$1" && auto-mtu "$1" && exit - -exit 1 From 30ad86f474bb4a7af3834dbe53617c3cea524517 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Jun 2024 22:11:38 +0000 Subject: [PATCH 02/12] Bump github.com/hashicorp/go-retryablehttp from 0.7.4 to 0.7.7 Bumps [github.com/hashicorp/go-retryablehttp](https://github.com/hashicorp/go-retryablehttp) from 0.7.4 to 0.7.7. - [Changelog](https://github.com/hashicorp/go-retryablehttp/blob/main/CHANGELOG.md) - [Commits](https://github.com/hashicorp/go-retryablehttp/compare/v0.7.4...v0.7.7) --- updated-dependencies: - dependency-name: github.com/hashicorp/go-retryablehttp dependency-type: indirect ... Signed-off-by: dependabot[bot] (cherry picked from commit f6942f3de420ce3d26cc16664bcfcdec494c8ff6) Signed-off-by: Brad Davidson --- go.mod | 2 +- go.sum | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 7dc60a428d3c..ede7bdaff2d3 100644 --- a/go.mod +++ b/go.mod @@ -290,7 +290,7 @@ require ( github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect - github.com/hashicorp/go-retryablehttp v0.7.4 // indirect + github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/hashicorp/go-version v1.6.0 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/golang-lru/arc/v2 v2.0.5 // indirect diff --git a/go.sum b/go.sum index 7266b151efa9..f5a9db205b04 100644 --- a/go.sum +++ b/go.sum @@ -549,6 +549,8 @@ github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8 github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= +github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= +github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= @@ -819,15 +821,17 @@ github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brv github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= +github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= -github.com/hashicorp/go-retryablehttp v0.7.4 h1:ZQgVdpTdAL7WpMIwLzCfbalOcSUdkDZnpUv3/+BxzFA= github.com/hashicorp/go-retryablehttp v0.7.4/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8= +github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= +github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= @@ -1116,6 +1120,7 @@ github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd h1:br0buuQ854V8u83wA0rVZ8ttrq5CpaPZdvrK0LP2lOk= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd/go.mod h1:QuCEs1Nt24+FYQEqAAncTDPJIuGs+LxK1MCiFL25pMU= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= From 3e02c1d6fa28fa0da14ab6838361c3f0fb3f8dbe Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:53:46 -0700 Subject: [PATCH 03/12] Bump Local Path Provisioner version (#10394) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: Bump Local Path Provisioner version Made with ❤️️ by updatecli --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> (cherry picked from commit a0b374508eb57e1da7b72b5b9dbeb09e6016cefe) Signed-off-by: Brad Davidson --- manifests/local-storage.yaml | 2 +- pkg/deploy/zz_generated_bindata.go | 2 +- scripts/airgap/image-list.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/manifests/local-storage.yaml b/manifests/local-storage.yaml index f6fcc0d7c9b6..fb72ffc07704 100644 --- a/manifests/local-storage.yaml +++ b/manifests/local-storage.yaml @@ -67,7 +67,7 @@ spec: effect: "NoSchedule" containers: - name: local-path-provisioner - image: "%{SYSTEM_DEFAULT_REGISTRY}%rancher/local-path-provisioner:v0.0.27" + image: "%{SYSTEM_DEFAULT_REGISTRY}%rancher/local-path-provisioner:v0.0.28" imagePullPolicy: IfNotPresent command: - local-path-provisioner diff --git a/pkg/deploy/zz_generated_bindata.go b/pkg/deploy/zz_generated_bindata.go index dc7984c584af..9eb4d80d19cd 100644 --- a/pkg/deploy/zz_generated_bindata.go +++ b/pkg/deploy/zz_generated_bindata.go @@ -132,7 +132,7 @@ func corednsYaml() (*asset, error) { return a, nil } -var _localStorageYaml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xb4\x56\x5f\x6f\xdb\x36\x10\x7f\xd7\xa7\xb8\x69\xcb\xcb\x50\xca\xc9\x06\x2c\x03\xdf\xbc\xd8\x69\x03\x38\xb6\x61\xbb\x1d\x8a\xa2\x30\x68\xea\x6c\xb3\xa1\x48\x82\xa4\xdc\x7a\x59\xbe\xfb\x40\x52\x76\xe4\x24\x4d\x1c\x6c\xd3\x8b\xa0\xe3\xdd\xef\x8e\xf7\xbb\x3f\x62\x46\x7c\x40\xeb\x84\x56\x14\x36\x67\xd9\x8d\x50\x25\x85\x29\xda\x8d\xe0\xd8\xe5\x5c\xd7\xca\x67\x15\x7a\x56\x32\xcf\x68\x06\xa0\x58\x85\x14\xa4\xe6\x4c\x12\xc3\xfc\x9a\x18\xab\x37\x22\xd8\xa3\x25\x2e\xd9\x11\xd6\x18\x26\x75\x67\x18\x47\x0a\x37\xf5\x02\x89\xdb\x3a\x8f\x55\x46\x08\xc9\xda\x9e\xed\x82\xf1\x82\xd5\x7e\xad\xad\xf8\x8b\x79\xa1\x55\x71\xf3\xbb\x2b\x84\xee\xec\x63\xba\x90\xb5\xf3\x68\x27\x5a\xe2\xf1\x01\xd9\xa0\x6d\x6b\x89\x8e\x66\x04\x98\x11\x6f\xad\xae\x8d\xa3\xf0\x29\xcf\x3f\x67\x00\x16\x9d\xae\x2d\xc7\x28\x51\xba\x44\x97\xbf\x81\xdc\x84\xb0\x9c\x47\xe5\x37\x5a\xd6\x15\x72\xc9\x44\x15\x4f\xb8\x56\x4b\xb1\xaa\x98\x49\x7a\xba\x74\x1d\xa9\x57\x11\x6a\x83\x76\x11\x61\x56\xe8\xc3\xa1\x14\x2e\xbe\xbf\x32\xcf\xd7\xf9\xe7\x97\xdd\xa3\x2a\x8d\x16\xca\x3f\x19\xc2\xde\xdf\xa1\xaf\x9f\x8f\x02\xde\x60\x40\x3d\x30\xe4\x16\x99\xc7\x08\xfa\x74\x7c\xce\x6b\xcb\x56\xd8\xd0\xf0\x18\xb4\x39\xe7\x92\x39\x87\xee\xb8\x0c\xfc\x2b\xd2\xff\x10\xaa\x14\x6a\x75\x3c\xf7\x0b\xa1\xca\x2c\x14\xc0\x04\x97\x41\x79\x77\xbd\x67\x1c\x67\x00\x8f\x8b\xed\x98\x12\x73\xf5\xe2\x0b\x72\x1f\xab\xec\xc9\x16\xfa\xbf\x1a\x87\x19\xe3\xee\xd3\xd5\x43\x23\xf5\xb6\xc2\x57\xf4\xec\xf7\x5d\x39\x83\x9c\x46\xda\x93\xee\x3b\x11\x38\xdf\x0e\x44\x25\x3c\x85\xd3\x0c\xc0\x79\xcb\x3c\xae\xb6\x41\x0b\xc0\x6f\x0d\x52\x98\x68\x29\x85\x5a\xbd\x37\x25\xf3\x18\xe5\xb6\x2d\x49\xaa\x00\x15\xfb\xf6\x5e\xb1\x0d\x13\x92\x2d\x24\x52\x38\x0b\x70\x28\x91\x7b\x6d\x93\x4e\x15\xaa\x66\xc0\x16\x28\xdd\xce\x88\x19\xf3\xcc\x35\x3c\x56\x46\xee\x5d\xb4\xef\x1f\x1e\x79\x80\xf4\x12\x16\xc0\xee\xf6\xe1\x31\x56\x68\x2b\xfc\xf6\x22\x14\xfb\x30\x26\x33\x4f\x49\x22\x61\x66\x10\x6e\x85\x17\x9c\xc9\xbc\xd1\x77\x07\xdc\x0f\x5f\x47\x7c\x4c\xa5\x96\x68\x63\x61\xb6\x22\x06\x20\x70\x83\x5b\x0a\xf9\x45\xe3\xaf\x5b\x96\x5a\xb9\x91\x92\xdb\xbc\xa5\x05\xa0\x4d\xb0\xd6\x96\x42\xde\xff\x26\x9c\x77\xf9\x13\x20\x31\xf2\x50\xbc\x45\x20\xdd\x2a\xf4\x18\x7b\x8f\x6b\xe5\xad\x96\xc4\x48\xa6\xf0\x15\xb8\x00\xb8\x5c\x22\xf7\x14\xf2\xa1\x9e\xf2\x35\x96\xb5\xc4\xd7\x38\xae\x58\x68\xb9\xff\xca\x63\xb8\x06\x13\x0a\xed\x3e\x83\xe4\xa5\x3e\x48\x8f\xa8\xd8\x2a\x10\x7c\x72\x3b\xfd\x38\x9d\xf5\xaf\xe7\xbd\xfe\x65\xf7\xfd\x60\x36\x9f\xf4\xdf\x5e\x4d\x67\x93\x8f\x77\x27\x96\x29\xbe\x46\xdb\x79\x1a\x89\x6e\x4e\x8b\xd3\xe2\x97\xf3\xfc\x10\x72\x5c\x4b\x39\xd6\x52\xf0\x2d\x85\xab\xe5\x50\xfb\xb1\x45\x87\x7b\xca\x43\xc4\x55\xc5\x54\x79\x4f\x38\x79\x29\x54\x02\xce\x33\xeb\x5b\xdf\x84\xa4\x0d\xd5\x12\x75\xd0\xf3\x4e\x92\x36\xaf\xe2\x8b\xd3\x6a\xaf\x91\xf6\xcb\x75\xa8\x3e\xd7\xf6\x9d\x92\x95\x2c\x48\x52\x6a\xe5\xbe\x0a\xfa\x63\xe6\xd7\xf4\xc0\xc1\x5e\x03\xd5\xe6\x31\xd8\x78\xd4\x9b\x0f\xbb\xd7\xfd\xe9\xb8\x7b\xd1\x6f\x81\x6d\x98\xac\xf1\xd2\xea\x8a\x1e\xb0\xbb\x14\x28\xcb\x66\x78\x3f\x92\x27\xdf\xbb\x2e\x2f\xf6\x33\x2c\x6b\xdf\xea\x15\x17\x4a\xf2\x6b\x66\x0e\xbd\x3d\x2a\x99\x26\xbf\x0f\xe7\xf0\xe1\xba\xbc\x9f\xc8\xd3\x24\x8f\x93\xe3\xd9\x99\x1c\x16\x94\x52\xda\xb7\xbb\xbe\xc4\x25\xab\xa5\xff\x10\x63\x9d\xc5\xf1\x9a\x47\x8b\x54\x5a\xed\x15\xfc\xa0\x97\x84\x23\x8d\x31\x89\xc7\x14\x72\x6f\x6b\xcc\xb3\x76\x9d\x42\x53\xc7\xc1\xa0\x15\x48\x4a\x4d\xb3\x6e\xaf\x75\x89\x14\xfe\x64\xc2\x5f\x6a\x7b\x29\xac\xf3\x17\x5a\xb9\xba\x42\x9b\xd9\xf4\x5f\xb4\xab\xe9\x1e\x4a\xf4\x18\x13\xd3\xec\xd0\x5d\x46\xb3\x07\xff\x98\xcf\xae\xa6\x7d\xfd\x7e\x67\x2b\xed\x0c\x5b\xa5\x4c\xe1\x6f\x12\x13\x72\xdb\x50\x17\x47\x4c\x28\x90\x6b\x66\x72\xfa\xa9\x91\xde\xee\x89\x8d\xe7\x39\xcd\x77\x9d\x3d\xee\xce\xde\xcd\x2f\x47\x93\xf9\x70\x34\x9c\x0f\xae\xa6\xb3\x7e\x6f\x3e\x1c\xf5\xfa\xd3\xfc\xcd\xbd\x4d\x88\xce\xe5\xf4\x53\x7e\x72\xbb\xb3\x1b\x8c\x2e\xba\x83\xf9\x74\x36\x9a\x74\xdf\xf6\x23\xca\xdd\x49\xfc\x13\x0a\xcf\x5d\xf3\x4e\xdf\x77\x71\xbf\xf9\xf0\xf7\xd1\x04\xfb\xe3\x0f\x9d\x85\x50\x1d\xb7\x4e\x5c\xa2\x07\x82\x75\x5a\x5d\x37\xa5\xb0\x40\x2a\x38\x3d\x3f\x3f\x07\x62\x20\xff\xe9\xf6\xc3\x68\x30\xef\x5d\x4d\xee\x12\xf3\x7c\x5d\xe9\x12\xce\x4f\x4f\xdb\x47\x9d\xa2\xc8\xe3\x1a\x64\xb6\xd4\x5f\xd5\x11\x8e\x6c\x05\xc4\x2e\x1f\xc2\xaf\x51\x1a\xb4\x63\x5d\x16\x5b\x56\xc9\x3d\xcc\x03\x12\x83\x28\xf1\x3c\xd6\xe5\x93\x1b\x37\x51\x9b\xd0\x88\x69\x94\xda\x6b\xf5\xfb\x23\xfa\x81\x11\xbc\x6e\x2c\x57\xc2\x5a\x6d\xb1\x24\x52\x2c\x2c\xb3\x5b\xb2\xa8\xdd\x76\xa1\xbf\xd1\xb3\xe2\xd7\xdf\x8a\xb3\x63\xe7\xf2\x3f\x01\x00\x00\xff\xff\x68\x8a\xdf\xd2\x1a\x0d\x00\x00") +var _localStorageYaml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xb4\x56\xdf\x6f\xdb\xb6\x13\x7f\xd7\x5f\x71\x5f\x7d\x97\x97\xa1\x94\x93\x0d\x58\x0a\xbe\x79\xb1\xd3\x06\x70\x6c\xc3\x76\x3b\x14\x45\x61\xd0\xd4\xd9\x66\x43\x91\x04\x49\xb9\xf5\xb2\xfc\xef\x03\x49\xd9\x91\x93\x34\x71\xb0\x4d\x2f\x82\x8e\x77\x9f\x3b\xde\xe7\x7e\x88\x19\xf1\x11\xad\x13\x5a\x51\xd8\x9c\x65\x37\x42\x95\x14\xa6\x68\x37\x82\x63\x97\x73\x5d\x2b\x9f\x55\xe8\x59\xc9\x3c\xa3\x19\x80\x62\x15\x52\x90\x9a\x33\x49\x0c\xf3\x6b\x62\xac\xde\x88\x60\x8f\x96\xb8\x64\x47\x58\x63\x98\xd4\x9d\x61\x1c\x29\xdc\xd4\x0b\x24\x6e\xeb\x3c\x56\x19\x21\x24\x6b\x7b\xb6\x0b\xc6\x0b\x56\xfb\xb5\xb6\xe2\x4f\xe6\x85\x56\xc5\xcd\x5b\x57\x08\xdd\xd9\xc7\x74\x21\x6b\xe7\xd1\x4e\xb4\xc4\xe3\x03\xb2\x41\xdb\xd6\x12\x1d\xcd\x08\x30\x23\xde\x59\x5d\x1b\x47\xe1\x73\x9e\x7f\xc9\x00\x2c\x3a\x5d\x5b\x8e\x51\xa2\x74\x89\x2e\x7f\x03\xb9\x09\x61\x39\x8f\xca\x6f\xb4\xac\x2b\xe4\x92\x89\x2a\x9e\x70\xad\x96\x62\x55\x31\x93\xf4\x74\xe9\x3a\x52\xaf\x22\xd4\x06\xed\x22\xc2\xac\xd0\x87\x43\x29\x5c\x7c\x7f\x63\x9e\xaf\xf3\x2f\x2f\xbb\x47\x55\x1a\x2d\x94\x7f\x32\x84\xbd\xbf\x43\x5f\x3f\x1f\x05\xbc\xc1\x80\x7a\x60\xc8\x2d\x32\x8f\x11\xf4\xe9\xf8\x9c\xd7\x96\xad\xb0\xa1\xe1\x31\x68\x73\xce\x25\x73\x0e\xdd\x71\x19\xf8\x47\xa4\xff\x2e\x54\x29\xd4\xea\x78\xee\x17\x42\x95\x59\x28\x80\x09\x2e\x83\xf2\xee\x7a\xcf\x38\xce\x00\x1e\x17\xdb\x31\x25\xe6\xea\xc5\x57\xe4\x3e\x56\xd9\x93\x2d\xf4\x5f\x35\x0e\x33\xc6\xdd\xa7\xab\x87\x46\xea\x6d\x85\xaf\xe8\xd9\x1f\xbb\x72\x06\x39\x8d\xb4\x27\xdd\xf7\x22\x70\xbe\x1d\x88\x4a\x78\x0a\xa7\x19\x80\xf3\x96\x79\x5c\x6d\x83\x16\x80\xdf\x1a\xa4\x30\xd1\x52\x0a\xb5\xfa\x60\x4a\xe6\x31\xca\x6d\x5b\x92\x54\x01\x2a\xf6\xfd\x83\x62\x1b\x26\x24\x5b\x48\xa4\x70\x16\xe0\x50\x22\xf7\xda\x26\x9d\x2a\x54\xcd\x80\x2d\x50\xba\x9d\x11\x33\xe6\x99\x6b\x78\xac\x8c\xdc\xbb\x68\xdf\x3f\x3c\xf2\x00\xe9\x25\x2c\x80\xdd\xed\xc3\x63\xac\xd0\x56\xf8\xed\x45\x28\xf6\x61\x4c\x66\x9e\x92\x44\xc2\xcc\x20\xdc\x0a\x2f\x38\x93\x79\xa3\xef\x0e\xb8\x1f\xbe\x8e\xf8\x98\x4a\x2d\xd1\xc6\xc2\x6c\x45\x0c\x40\xe0\x06\xb7\x14\xf2\x8b\xc6\x5f\xb7\x2c\xb5\x72\x23\x25\xb7\x79\x4b\x0b\x40\x9b\x60\xad\x2d\x85\xbc\xff\x5d\x38\xef\xf2\x27\x40\x62\xe4\xa1\x78\x8b\x40\xba\x55\xe8\x31\xf6\x1e\xd7\xca\x5b\x2d\x89\x91\x4c\xe1\x2b\x70\x01\x70\xb9\x44\xee\x29\xe4\x43\x3d\xe5\x6b\x2c\x6b\x89\xaf\x71\x5c\xb1\xd0\x72\xff\x96\xc7\x70\x0d\x26\x14\xda\x7d\x06\xc9\x4b\x7d\x90\x1e\x51\xb1\x55\x20\xf8\xe4\x76\xfa\x69\x3a\xeb\x5f\xcf\x7b\xfd\xcb\xee\x87\xc1\x6c\x3e\xe9\xbf\xbb\x9a\xce\x26\x9f\xee\x4e\x2c\x53\x7c\x8d\xb6\xf3\x34\x12\xdd\x9c\x16\xa7\xc5\x2f\x6f\xf3\x43\xc8\x71\x2d\xe5\x58\x4b\xc1\xb7\x14\xae\x96\x43\xed\xc7\x16\x1d\xee\x29\x0f\x11\x57\x15\x53\xe5\x3d\xe1\xe4\xa5\x50\x09\x38\xcf\xac\x6f\x7d\x13\x92\x36\x54\x4b\xd4\x41\xcf\x3b\x49\xda\xbc\x8a\xaf\x4e\xab\xbd\x46\xda\x2f\xd7\xa1\xfa\x5c\xdb\x77\x4a\x56\xb2\x20\x49\xa9\x95\xfb\x2a\xe8\x8f\x99\x5f\xd3\x03\x07\x7b\x0d\x54\x9b\xc7\x60\xe3\x51\x6f\x3e\xec\x5e\xf7\xa7\xe3\xee\x45\xbf\x05\xb6\x61\xb2\xc6\x4b\xab\x2b\x7a\xc0\xee\x52\xa0\x2c\x9b\xe1\xfd\x48\x9e\x7c\xef\xba\xbc\xd8\xcf\xb0\xac\x7d\xab\x57\x5c\x28\xc9\xaf\x99\x39\xf4\xf6\xa8\x64\x9a\xfc\x3e\x9c\xc3\x87\xeb\xf2\x7e\x22\x4f\x93\x3c\x4e\x8e\x67\x67\x72\x58\x50\x4a\x69\xdf\xee\xfa\x12\x97\xac\x96\xfe\x63\x8c\x75\x16\xc7\x6b\x1e\x2d\x52\x69\xb5\x57\xf0\x83\x5e\x12\x8e\x34\xc6\x24\x1e\x53\xc8\xbd\xad\x31\xcf\xda\x75\x0a\x4d\x1d\x07\x83\x56\x20\x29\x35\xcd\xba\xbd\xd6\x25\x52\xf8\x83\x09\x7f\xa9\xed\xa5\xb0\xce\x5f\x68\xe5\xea\x0a\x6d\x66\xd3\x7f\xd1\xae\xa6\x7b\x28\xd1\x63\x4c\x4c\xb3\x43\x77\x19\xcd\x1e\xfc\x63\x3e\xbb\x9a\xf6\xf5\xfb\x83\xad\xb4\x33\x6c\x95\x32\x85\xbf\x48\x4c\xc8\x6d\x43\x5d\x1c\x31\xa1\x40\xae\x99\xc9\xe9\xe7\x46\x7a\xbb\x27\x36\x9e\xe7\x34\xdf\x75\xf6\xb8\x3b\x7b\x3f\xbf\x1c\x4d\xe6\xc3\xd1\x70\x3e\xb8\x9a\xce\xfa\xbd\xf9\x70\xd4\xeb\x4f\xf3\x37\xf7\x36\x21\x3a\x97\xd3\xcf\xf9\xc9\xed\xce\x6e\x30\xba\xe8\x0e\xe6\xd3\xd9\x68\xd2\x7d\xd7\x8f\x28\x77\x27\xf1\x4f\x28\x3c\x77\xcd\x3b\x7d\xdf\xc5\xfd\xe6\xc3\xdf\x47\x13\xec\xff\xff\xd7\x59\x08\xd5\x71\xeb\xc4\x25\x7a\x20\x58\xa7\xd5\x75\x53\x0a\x0b\xa4\x82\xd3\xf3\xf3\x73\x20\x06\xf2\x9f\x6e\x3f\x8e\x06\xf3\xde\xd5\xe4\x2e\x31\xcf\xd7\x95\x2e\xe1\xfc\xf4\xb4\x7d\xd4\x29\x8a\x3c\xae\x41\x66\x4b\xfd\x4d\x1d\xe1\xc8\x56\x40\xec\xf2\x21\xfc\x1a\xa5\x41\x3b\xd6\x65\xb1\x65\x95\xdc\xc3\x3c\x20\x31\x88\x12\xcf\x63\x5d\x3e\xb9\x71\x13\xb5\x09\x8d\x98\x46\xa9\xbd\x56\x7f\x3c\xa2\x1f\x18\xc1\xeb\xc6\x72\x25\xac\xd5\x16\x4b\x22\xc5\xc2\x32\xbb\x25\x8b\xda\x6d\x17\xfa\x3b\x3d\x2b\x7e\xfd\xad\x38\x3b\x76\x2e\xff\x1d\x00\x00\xff\xff\xf6\x4c\xc2\x69\x1a\x0d\x00\x00") func localStorageYamlBytes() ([]byte, error) { return bindataRead( diff --git a/scripts/airgap/image-list.txt b/scripts/airgap/image-list.txt index e0f3fd59419a..407d1b55a5e3 100644 --- a/scripts/airgap/image-list.txt +++ b/scripts/airgap/image-list.txt @@ -1,6 +1,6 @@ docker.io/rancher/klipper-helm:v0.8.4-build20240523 docker.io/rancher/klipper-lb:v0.4.7 -docker.io/rancher/local-path-provisioner:v0.0.27 +docker.io/rancher/local-path-provisioner:v0.0.28 docker.io/rancher/mirrored-coredns-coredns:1.10.1 docker.io/rancher/mirrored-library-busybox:1.36.1 docker.io/rancher/mirrored-library-traefik:2.10.7 From 3f50d7767621be2c197c99f32d939792e3be2930 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Tue, 9 Jul 2024 23:36:29 +0000 Subject: [PATCH 04/12] Ensure remotedialer kubelet connections use kubelet bind address Signed-off-by: Brad Davidson (cherry picked from commit eb8bd158897c7582327762e55dbb2feb6f400963) Signed-off-by: Brad Davidson --- go.mod | 2 +- go.sum | 8 ++++---- pkg/agent/tunnel/tunnel.go | 27 ++++++++++++++++++++++----- pkg/daemons/control/tunnel.go | 15 ++++++++------- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/go.mod b/go.mod index ede7bdaff2d3..0d2632c2f491 100644 --- a/go.mod +++ b/go.mod @@ -125,7 +125,7 @@ require ( github.com/rancher/dynamiclistener v0.3.6 github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7 - github.com/rancher/remotedialer v0.3.0 + github.com/rancher/remotedialer v0.4.1 github.com/rancher/wharfie v0.6.4 github.com/rancher/wrangler v1.1.1 github.com/robfig/cron/v3 v3.0.1 diff --git a/go.sum b/go.sum index f5a9db205b04..12a0bf03c26a 100644 --- a/go.sum +++ b/go.sum @@ -1434,8 +1434,8 @@ github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 h1:+kige/h8/LnzWgPjB github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29/go.mod h1:kgk9kJVMj9FIrrXU0iyM6u/9Je4bEjPImqswkTVaKsQ= github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7 h1:0Kg2SGoMeU1ll4xPi4DE0+qNHLFO/U5MwtK0WrIdK+o= github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7/go.mod h1:fsbs0YOsGn1ofPD5p+BuI4qDhbMbSJtTegKt6Ucna+c= -github.com/rancher/remotedialer v0.3.0 h1:y1EO8JCsgZo0RcqTUp6U8FXcBAv27R+TLnWRcpvX1sM= -github.com/rancher/remotedialer v0.3.0/go.mod h1:BwwztuvViX2JrLLUwDlsYt5DiyUwHLlzynRwkZLAY0Q= +github.com/rancher/remotedialer v0.4.1 h1:jwOf2kPRjBBpSFofv1OuZHWaYHeC9Eb6/XgDvbkoTgc= +github.com/rancher/remotedialer v0.4.1/go.mod h1:Ys004RpJuTLSm+k4aYUCoFiOOad37ubYev3TkOFg/5w= github.com/rancher/wharfie v0.6.4 h1:JwYB+q661n8ut/ysgsjKe0P0z6bHCCFoC+29995ME90= github.com/rancher/wharfie v0.6.4/go.mod h1:kWv97z0sMAbnVNT/oe+JFZJVKn4xkas7ZdFf6UifWis= github.com/rancher/wrangler v1.1.1-0.20230818201331-3604a6be798d h1:RQBqHXyAN5gWqUazV637kqmYcy8M8K5bdvXszNciLcY= @@ -1452,8 +1452,8 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/rootless-containers/rootlesskit v1.0.1 h1:jepqW1txFSowKSMAEkVhWH3Oa1TCY9S400MVYe/6Iro= github.com/rootless-containers/rootlesskit v1.0.1/go.mod h1:t2UAiYagxrJ+wmpFAUIZPcqsm4k2B7ve6g7lILKbloc= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= diff --git a/pkg/agent/tunnel/tunnel.go b/pkg/agent/tunnel/tunnel.go index 23c6dac404b8..79122c6b1f16 100644 --- a/pkg/agent/tunnel/tunnel.go +++ b/pkg/agent/tunnel/tunnel.go @@ -38,6 +38,7 @@ import ( var ( endpointDebounceDelay = time.Second + defaultDialer = net.Dialer{} ) type agentTunnel struct { @@ -45,6 +46,7 @@ type agentTunnel struct { cidrs cidranger.Ranger ports map[string]bool mode string + kubeletAddr string kubeletPort string startTime time.Time } @@ -82,6 +84,7 @@ func Setup(ctx context.Context, config *daemonconfig.Node, proxy proxy.Proxy) er cidrs: cidranger.NewPCTrieRanger(), ports: map[string]bool{}, mode: config.EgressSelectorMode, + kubeletAddr: config.AgentConfig.ListenAddress, kubeletPort: fmt.Sprint(ports.KubeletPort), startTime: time.Now().Truncate(time.Second), } @@ -186,7 +189,7 @@ func (a *agentTunnel) setKubeletPort(ctx context.Context, apiServerReady <-chan return false, nil } a.kubeletPort = kubeletPort - logrus.Infof("Tunnel authorizer set Kubelet Port %s", a.kubeletPort) + logrus.Infof("Tunnel authorizer set Kubelet Port %s", net.JoinHostPort(a.kubeletAddr, a.kubeletPort)) return true, nil }) } @@ -390,7 +393,7 @@ func (a *agentTunnel) authorized(ctx context.Context, proto, address string) boo logrus.Debugf("Tunnel authorizer checking dial request for %s", address) host, port, err := net.SplitHostPort(address) if err == nil { - if a.isKubeletPort(proto, host, port) { + if a.isKubeletOrStreamPort(proto, host, port) { return true } if ip := net.ParseIP(host); ip != nil { @@ -448,7 +451,7 @@ func (a *agentTunnel) connect(rootCtx context.Context, waitGroup *sync.WaitGroup go func() { for { // ConnectToProxy blocks until error or context cancellation - err := remotedialer.ConnectToProxy(ctx, wsURL, nil, auth, ws, onConnect) + err := remotedialer.ConnectToProxyWithDialer(ctx, wsURL, nil, auth, ws, a.dialContext, onConnect) connected = false if err != nil && !errors.Is(err, context.Canceled) { logrus.WithField("url", wsURL).WithError(err).Error("Remotedialer proxy error; reconnecting...") @@ -471,7 +474,21 @@ func (a *agentTunnel) connect(rootCtx context.Context, waitGroup *sync.WaitGroup } } -// isKubeletPort returns true if the connection is to a reserved TCP port on a loopback address. -func (a *agentTunnel) isKubeletPort(proto, host, port string) bool { +// isKubeletOrStreamPort returns true if the connection is to a reserved TCP port on a loopback address. +func (a *agentTunnel) isKubeletOrStreamPort(proto, host, port string) bool { return proto == "tcp" && (host == "127.0.0.1" || host == "::1") && (port == a.kubeletPort || port == daemonconfig.StreamServerPort) } + +// dialContext dials a local connection on behalf of the remote server. If the +// connection is to the kubelet port on the loopback address, the kubelet is dialed +// at its configured bind address. Otherwise, the connection is dialed normally. +func (a *agentTunnel) dialContext(ctx context.Context, network, address string) (net.Conn, error) { + host, port, err := net.SplitHostPort(address) + if err != nil { + return nil, err + } + if a.isKubeletOrStreamPort(network, host, port) && port == a.kubeletPort { + address = net.JoinHostPort(a.kubeletAddr, port) + } + return defaultDialer.DialContext(ctx, network, address) +} diff --git a/pkg/daemons/control/tunnel.go b/pkg/daemons/control/tunnel.go index 86c685318b3f..fba58aa4f77b 100644 --- a/pkg/daemons/control/tunnel.go +++ b/pkg/daemons/control/tunnel.go @@ -3,7 +3,6 @@ package control import ( "bufio" "context" - "fmt" "io" "net" "net/http" @@ -197,7 +196,6 @@ func (t *TunnelServer) dialBackend(ctx context.Context, addr string) (net.Conn, if err != nil { return nil, err } - loopback := t.config.Loopback(true) var nodeName string var toKubelet, useTunnel bool @@ -224,14 +222,17 @@ func (t *TunnelServer) dialBackend(ctx context.Context, addr string) (net.Conn, useTunnel = true } - // Always dial kubelet via the loopback address. - if toKubelet { - addr = fmt.Sprintf("%s:%s", loopback, port) - } - // If connecting to something hosted by the local node, don't tunnel if nodeName == t.config.ServerNodeName { useTunnel = false + if toKubelet { + // Dial local kubelet at the configured bind address + addr = net.JoinHostPort(t.config.BindAddress, port) + } + } else if toKubelet { + // Dial remote kubelet via the loopback address, the remotedialer client + // will ensure that it hits the right local address. + addr = net.JoinHostPort(t.config.Loopback(false), port) } if useTunnel { From d652e310ce54f1dad7dd1e0c17b3ae5425344600 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 7 Jul 2024 18:12:35 +0000 Subject: [PATCH 05/12] chore: Bump Trivy version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Made with ❤️️ by updatecli (cherry picked from commit 5508589faeda13ab867111060d05a786f51817a3) Signed-off-by: Brad Davidson --- Dockerfile.dapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.dapper b/Dockerfile.dapper index 2654f138757a..cd3a518acc27 100644 --- a/Dockerfile.dapper +++ b/Dockerfile.dapper @@ -22,7 +22,7 @@ RUN apk -U --no-cache add \ RUN PIPX_BIN_DIR=/usr/local/bin pipx install awscli # Install Trivy -ENV TRIVY_VERSION="0.51.4" +ENV TRIVY_VERSION="0.53.0" RUN case "$(go env GOARCH)" in \ arm64) TRIVY_ARCH="ARM64" ;; \ amd64) TRIVY_ARCH="64bit" ;; \ From 3b02a074a7d73e7bab1916f01c755abc1f6e5976 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Tue, 11 Jun 2024 00:29:17 +0000 Subject: [PATCH 06/12] Add etcd s3 config secret implementation * Move snapshot structs and functions into pkg/etcd/snapshot * Move s3 client code and functions into pkg/etcd/s3 * Refactor pkg/etcd to track snapshot and s3 moves * Add support for reading s3 client config from secret * Add minio client cache, since S3 client configuration can now be changed at runtime by modifying the secret, and don't want to have to create a new minio client every time we read config. * Add tests for pkg/etcd/s3 Signed-off-by: Brad Davidson (cherry picked from commit c36db53e54ab1263839d3618674133481edd5b0f) Signed-off-by: Brad Davidson --- docs/adrs/etcd-s3-secret.md | 83 ++ go.mod | 6 +- pkg/cli/cmds/etcd_snapshot.go | 10 + pkg/cli/cmds/server.go | 12 + pkg/cli/etcdsnapshot/etcd_snapshot.go | 26 +- pkg/cli/server/server.go | 28 +- pkg/daemons/config/types.go | 49 +- pkg/etcd/etcd.go | 77 +- pkg/etcd/etcd_test.go | 18 +- pkg/etcd/s3.go | 494 ------- pkg/etcd/s3/config_secret.go | 119 ++ pkg/etcd/s3/s3.go | 567 ++++++++ pkg/etcd/s3/s3_test.go | 1743 +++++++++++++++++++++++++ pkg/etcd/snapshot.go | 469 ++----- pkg/etcd/snapshot/types.go | 270 ++++ pkg/etcd/snapshot_controller.go | 21 +- pkg/etcd/snapshot_handler.go | 63 +- tests/e2e/s3/Vagrantfile | 7 +- tests/e2e/s3/s3_test.go | 26 +- 19 files changed, 3104 insertions(+), 984 deletions(-) create mode 100644 docs/adrs/etcd-s3-secret.md delete mode 100644 pkg/etcd/s3.go create mode 100644 pkg/etcd/s3/config_secret.go create mode 100644 pkg/etcd/s3/s3.go create mode 100644 pkg/etcd/s3/s3_test.go create mode 100644 pkg/etcd/snapshot/types.go diff --git a/docs/adrs/etcd-s3-secret.md b/docs/adrs/etcd-s3-secret.md new file mode 100644 index 000000000000..bd728cdd3960 --- /dev/null +++ b/docs/adrs/etcd-s3-secret.md @@ -0,0 +1,83 @@ +# Support etcd Snapshot Configuration via Kubernetes Secret + +Date: 2024-02-06 +Revised: 2024-06-10 + +## Status + +Accepted + +## Context + +### Current State + +K3s currently reads configuration for S3 storage of etcd snapshots from CLI flags and/or configuration files. + +Security-conscious users have raised issue with the current state. They want to store snapshots on S3, but do not want +to have credentials visible in config files or systemd units. Users operating in highly secure environments have also +asked for the ability to configure a proxy server to be used when creating/restoring snapshots stored on S3, without +managing complicated `NO_PROXY` settings or affecting the rest of the K3s process environment. + +### Security Considerations + +Storing credentials on-disk is generally considered a bad idea, and is not allowed by security practices in many +organizations. Use of static credentials in the config file also makes them difficult to rotate, as K3s only reloads the +configuration on startup. + +### Existing Work + +Cloud-providers and other tools that need to auth to external systems frequently can be configured to retrieve secrets +from an existing credential secret that is provisioned via an external process, such as a secrets management tool. This +avoids embedding the credentials directly in the system configuration, chart values, and so on. + +## Decision + +* We will add a `--etcd-s3-proxy` flag that can be used to set the proxy used by the S3 client. This will override the + settings that golang's default HTTP client reads from the `HTTP_PROXY/HTTPS_PROXY/NO_PROXY` environment varibles. +* We will add support for reading etcd snapshot S3 configuration from a Secret. The secret name will be specified via a new + `--etcd-s3-config-secret` flag, which accepts the name of the Secret in the `kube-system` namespace. +* Presence of the `--etcd-s3-config-secret` flag does not imply `--etcd-s3`. If S3 is not enabled by use of the `--etcd-s3` flag, + the Secret will not be used. +* The Secret does not need to exist when K3s starts; it will be checked for every time a snapshot operation is performed. +* Secret and CLI/config values will NOT be merged. The Secret will provide values to be used in absence of other + configuration; if S3 configuration is passed via CLI flags or configuration file, ALL fields set by the Secret + will be ignored. +* The Secret will ONLY be used for on-demand and scheduled snapshot save operations; it will not be used by snapshot restore. + Snapshot restore operations that want to retrieve a snapshot from S3 will need to pass the appropriate configuration + via environment variables or CLI flags, as the Secret is not available during the restore process. + +Fields within the Secret will match `k3s server` CLI flags / config file keys. For the `etcd-s3-endpoint-ca`, which +normally contains the path of a file on disk, the `etcd-s3-endpoint-ca` field can specify an inline PEM-encoded CA +bundle, or the `etcd-s3-endpoint-ca-name` can be used to specify the name of a ConfigMap in the `kube-system` namespace +containing one or more CA bundles. All valid CA bundles found in either field are loaded. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: k3s-etcd-snapshot-s3-config + namespace: kube-system +stringData: + etcd-s3-endpoint: "" + etcd-s3-endpoint-ca: "" + etcd-s3-endpoint-ca-name: "" + etcd-s3-skip-ssl-verify: "false" + etcd-s3-access-key: "AWS_ACCESS_KEY_ID" + etcd-s3-secret-key: "AWS_SECRET_ACCESS_KEY" + etcd-s3-bucket: "bucket" + etcd-s3-folder: "folder" + etcd-s3-region: "us-east-1" + etcd-s3-insecure: "false" + etcd-s3-timeout: "5m" + etcd-s3-proxy: "" +``` + +## Consequences + +This will require additional documentation, tests, and QA work to validate use of secrets for s3 snapshot configuration. + +## Revisions + +#### 2024-06-10: +* Changed flag to `etcd-s3-config-secret` to avoid confusion with `etcd-s3-secret-key`. +* Added `etcd-s3-folder` to example Secret. diff --git a/go.mod b/go.mod index 0d2632c2f491..e4b3ab7331d3 100644 --- a/go.mod +++ b/go.mod @@ -138,11 +138,9 @@ require ( github.com/vishvananda/netlink v1.2.1-beta.2 github.com/yl2chen/cidranger v1.0.2 go.etcd.io/etcd/api/v3 v3.5.13 - go.etcd.io/etcd/client/pkg/v3 v3.5.13 go.etcd.io/etcd/client/v3 v3.5.13 - go.etcd.io/etcd/etcdutl/v3 v3.5.9 + go.etcd.io/etcd/etcdutl/v3 v3.5.13 go.etcd.io/etcd/server/v3 v3.5.13 - go.uber.org/zap v1.27.0 golang.org/x/crypto v0.23.0 golang.org/x/net v0.25.0 golang.org/x/sync v0.7.0 @@ -431,6 +429,7 @@ require ( github.com/xlab/treeprint v1.2.0 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect go.etcd.io/bbolt v1.3.9 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.5.13 // indirect go.etcd.io/etcd/client/v2 v2.305.13 // indirect go.etcd.io/etcd/pkg/v3 v3.5.13 // indirect go.etcd.io/etcd/raft/v3 v3.5.13 // indirect @@ -451,6 +450,7 @@ require ( go.uber.org/fx v1.20.1 // indirect go.uber.org/mock v0.4.0 // indirect go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect golang.org/x/mod v0.15.0 // indirect golang.org/x/oauth2 v0.17.0 // indirect diff --git a/pkg/cli/cmds/etcd_snapshot.go b/pkg/cli/cmds/etcd_snapshot.go index c885031b3a42..e97228d1e21b 100644 --- a/pkg/cli/cmds/etcd_snapshot.go +++ b/pkg/cli/cmds/etcd_snapshot.go @@ -100,6 +100,16 @@ var EtcdSnapshotFlags = []cli.Flag{ Usage: "(db) S3 folder", Destination: &ServerConfig.EtcdS3Folder, }, + &cli.StringFlag{ + Name: "s3-proxy,etcd-s3-proxy", + Usage: "(db) Proxy server to use when connecting to S3, overriding any proxy-releated environment variables", + Destination: &ServerConfig.EtcdS3Proxy, + }, + &cli.StringFlag{ + Name: "s3-config-secret,etcd-s3-config-secret", + Usage: "(db) Name of secret in the kube-system namespace used to configure S3, if etcd-s3 is enabled and no other etcd-s3 options are set", + Destination: &ServerConfig.EtcdS3ConfigSecret, + }, &cli.BoolFlag{ Name: "s3-insecure,etcd-s3-insecure", Usage: "(db) Disables S3 over HTTPS", diff --git a/pkg/cli/cmds/server.go b/pkg/cli/cmds/server.go index e179f5237de3..c7fec4f54139 100644 --- a/pkg/cli/cmds/server.go +++ b/pkg/cli/cmds/server.go @@ -104,6 +104,8 @@ type Server struct { EtcdS3BucketName string EtcdS3Region string EtcdS3Folder string + EtcdS3Proxy string + EtcdS3ConfigSecret string EtcdS3Timeout time.Duration EtcdS3Insecure bool ServiceLBNamespace string @@ -430,6 +432,16 @@ var ServerFlags = []cli.Flag{ Usage: "(db) S3 folder", Destination: &ServerConfig.EtcdS3Folder, }, + &cli.StringFlag{ + Name: "etcd-s3-proxy", + Usage: "(db) Proxy server to use when connecting to S3, overriding any proxy-releated environment variables", + Destination: &ServerConfig.EtcdS3Proxy, + }, + &cli.StringFlag{ + Name: "etcd-s3-config-secret", + Usage: "(db) Name of secret in the kube-system namespace used to configure S3, if etcd-s3 is enabled and no other etcd-s3 options are set", + Destination: &ServerConfig.EtcdS3ConfigSecret, + }, &cli.BoolFlag{ Name: "etcd-s3-insecure", Usage: "(db) Disables S3 over HTTPS", diff --git a/pkg/cli/etcdsnapshot/etcd_snapshot.go b/pkg/cli/etcdsnapshot/etcd_snapshot.go index b6e774affec8..876b0ea7dec5 100644 --- a/pkg/cli/etcdsnapshot/etcd_snapshot.go +++ b/pkg/cli/etcdsnapshot/etcd_snapshot.go @@ -16,6 +16,7 @@ import ( "github.com/k3s-io/k3s/pkg/cli/cmds" "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/cluster/managed" + "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/etcd" "github.com/k3s-io/k3s/pkg/proctitle" "github.com/k3s-io/k3s/pkg/server" @@ -50,17 +51,20 @@ func commandSetup(app *cli.Context, cfg *cmds.Server) (*etcd.SnapshotRequest, *c } if cfg.EtcdS3 { - sr.S3 = &etcd.SnapshotRequestS3{} - sr.S3.AccessKey = cfg.EtcdS3AccessKey - sr.S3.Bucket = cfg.EtcdS3BucketName - sr.S3.Endpoint = cfg.EtcdS3Endpoint - sr.S3.EndpointCA = cfg.EtcdS3EndpointCA - sr.S3.Folder = cfg.EtcdS3Folder - sr.S3.Insecure = cfg.EtcdS3Insecure - sr.S3.Region = cfg.EtcdS3Region - sr.S3.SecretKey = cfg.EtcdS3SecretKey - sr.S3.SkipSSLVerify = cfg.EtcdS3SkipSSLVerify - sr.S3.Timeout = metav1.Duration{Duration: cfg.EtcdS3Timeout} + sr.S3 = &config.EtcdS3{ + AccessKey: cfg.EtcdS3AccessKey, + Bucket: cfg.EtcdS3BucketName, + ConfigSecret: cfg.EtcdS3ConfigSecret, + Endpoint: cfg.EtcdS3Endpoint, + EndpointCA: cfg.EtcdS3EndpointCA, + Folder: cfg.EtcdS3Folder, + Insecure: cfg.EtcdS3Insecure, + Proxy: cfg.EtcdS3Proxy, + Region: cfg.EtcdS3Region, + SecretKey: cfg.EtcdS3SecretKey, + SkipSSLVerify: cfg.EtcdS3SkipSSLVerify, + Timeout: metav1.Duration{Duration: cfg.EtcdS3Timeout}, + } // extend request timeout to allow the S3 operation to complete timeout += cfg.EtcdS3Timeout } diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index 7fd735bba495..f3a26700a8a0 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -32,6 +32,7 @@ import ( "github.com/rancher/wrangler/pkg/signals" "github.com/sirupsen/logrus" "github.com/urfave/cli" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilnet "k8s.io/apimachinery/pkg/util/net" kubeapiserverflag "k8s.io/component-base/cli/flag" "k8s.io/kubernetes/pkg/controlplane/apiserver/options" @@ -186,17 +187,22 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont serverConfig.ControlConfig.EtcdSnapshotCron = cfg.EtcdSnapshotCron serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir serverConfig.ControlConfig.EtcdSnapshotRetention = cfg.EtcdSnapshotRetention - serverConfig.ControlConfig.EtcdS3 = cfg.EtcdS3 - serverConfig.ControlConfig.EtcdS3Endpoint = cfg.EtcdS3Endpoint - serverConfig.ControlConfig.EtcdS3EndpointCA = cfg.EtcdS3EndpointCA - serverConfig.ControlConfig.EtcdS3SkipSSLVerify = cfg.EtcdS3SkipSSLVerify - serverConfig.ControlConfig.EtcdS3AccessKey = cfg.EtcdS3AccessKey - serverConfig.ControlConfig.EtcdS3SecretKey = cfg.EtcdS3SecretKey - serverConfig.ControlConfig.EtcdS3BucketName = cfg.EtcdS3BucketName - serverConfig.ControlConfig.EtcdS3Region = cfg.EtcdS3Region - serverConfig.ControlConfig.EtcdS3Folder = cfg.EtcdS3Folder - serverConfig.ControlConfig.EtcdS3Insecure = cfg.EtcdS3Insecure - serverConfig.ControlConfig.EtcdS3Timeout = cfg.EtcdS3Timeout + if cfg.EtcdS3 { + serverConfig.ControlConfig.EtcdS3 = &config.EtcdS3{ + AccessKey: cfg.EtcdS3AccessKey, + Bucket: cfg.EtcdS3BucketName, + ConfigSecret: cfg.EtcdS3ConfigSecret, + Endpoint: cfg.EtcdS3Endpoint, + EndpointCA: cfg.EtcdS3EndpointCA, + Folder: cfg.EtcdS3Folder, + Insecure: cfg.EtcdS3Insecure, + Proxy: cfg.EtcdS3Proxy, + Region: cfg.EtcdS3Region, + SecretKey: cfg.EtcdS3SecretKey, + SkipSSLVerify: cfg.EtcdS3SkipSSLVerify, + Timeout: metav1.Duration{Duration: cfg.EtcdS3Timeout}, + } + } } else { logrus.Info("ETCD snapshots are disabled") } diff --git a/pkg/daemons/config/types.go b/pkg/daemons/config/types.go index b6175b63c3a0..dc1d7221f2f0 100644 --- a/pkg/daemons/config/types.go +++ b/pkg/daemons/config/types.go @@ -8,13 +8,13 @@ import ( "sort" "strings" "sync" - "time" "github.com/k3s-io/k3s/pkg/generated/controllers/k3s.cattle.io" "github.com/k3s-io/kine/pkg/endpoint" "github.com/rancher/wharfie/pkg/registries" "github.com/rancher/wrangler/pkg/generated/controllers/core" "github.com/rancher/wrangler/pkg/leader" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilnet "k8s.io/apimachinery/pkg/util/net" "k8s.io/apiserver/pkg/authentication/authenticator" "k8s.io/client-go/tools/record" @@ -62,6 +62,21 @@ type Node struct { DefaultRuntime string } +type EtcdS3 struct { + AccessKey string `json:"accessKey,omitempty"` + Bucket string `json:"bucket,omitempty"` + ConfigSecret string `json:"configSecret,omitempty"` + Endpoint string `json:"endpoint,omitempty"` + EndpointCA string `json:"endpointCA,omitempty"` + Folder string `json:"folder,omitempty"` + Proxy string `json:"proxy,omitempty"` + Region string `json:"region,omitempty"` + SecretKey string `json:"secretKey,omitempty"` + Insecure bool `json:"insecure,omitempty"` + SkipSSLVerify bool `json:"skipSSLVerify,omitempty"` + Timeout metav1.Duration `json:"timeout,omitempty"` +} + type Containerd struct { Address string Log string @@ -216,27 +231,17 @@ type Control struct { EncryptSkip bool MinTLSVersion string CipherSuites []string - TLSMinVersion uint16 `json:"-"` - TLSCipherSuites []uint16 `json:"-"` - EtcdSnapshotName string `json:"-"` - EtcdDisableSnapshots bool `json:"-"` - EtcdExposeMetrics bool `json:"-"` - EtcdSnapshotDir string `json:"-"` - EtcdSnapshotCron string `json:"-"` - EtcdSnapshotRetention int `json:"-"` - EtcdSnapshotCompress bool `json:"-"` - EtcdListFormat string `json:"-"` - EtcdS3 bool `json:"-"` - EtcdS3Endpoint string `json:"-"` - EtcdS3EndpointCA string `json:"-"` - EtcdS3SkipSSLVerify bool `json:"-"` - EtcdS3AccessKey string `json:"-"` - EtcdS3SecretKey string `json:"-"` - EtcdS3BucketName string `json:"-"` - EtcdS3Region string `json:"-"` - EtcdS3Folder string `json:"-"` - EtcdS3Timeout time.Duration `json:"-"` - EtcdS3Insecure bool `json:"-"` + TLSMinVersion uint16 `json:"-"` + TLSCipherSuites []uint16 `json:"-"` + EtcdSnapshotName string `json:"-"` + EtcdDisableSnapshots bool `json:"-"` + EtcdExposeMetrics bool `json:"-"` + EtcdSnapshotDir string `json:"-"` + EtcdSnapshotCron string `json:"-"` + EtcdSnapshotRetention int `json:"-"` + EtcdSnapshotCompress bool `json:"-"` + EtcdListFormat string `json:"-"` + EtcdS3 *EtcdS3 `json:"-"` ServerNodeName string VLevel int VModule string diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index e3907e696e1f..b4b58ec7a3fa 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -12,7 +12,6 @@ import ( "net/url" "os" "path/filepath" - "regexp" "sort" "strconv" "strings" @@ -26,6 +25,8 @@ import ( "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/daemons/control/deps" "github.com/k3s-io/k3s/pkg/daemons/executor" + "github.com/k3s-io/k3s/pkg/etcd/s3" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" "github.com/k3s-io/k3s/pkg/server/auth" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" @@ -40,10 +41,8 @@ import ( "github.com/sirupsen/logrus" "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" - "go.etcd.io/etcd/client/pkg/v3/logutil" clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/etcdutl/v3/snapshot" - "go.uber.org/zap" + snapshotv3 "go.etcd.io/etcd/etcdutl/v3/snapshot" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -93,8 +92,6 @@ var ( ErrAddressNotSet = errors.New("apiserver addresses not yet set") ErrNotMember = errNotMember() ErrMemberListFailed = errMemberListFailed() - - invalidKeyChars = regexp.MustCompile(`[^-._a-zA-Z0-9]`) ) type NodeControllerGetter func() controllerv1.NodeController @@ -110,8 +107,8 @@ type ETCD struct { name string address string cron *cron.Cron - s3 *S3 cancel context.CancelFunc + s3 *s3.Controller snapshotMu *sync.Mutex } @@ -128,16 +125,16 @@ type Members struct { Members []*etcdserverpb.Member `json:"members"` } -type MembershipError struct { - Self string - Members []string +type membershipError struct { + self string + members []string } -func (e *MembershipError) Error() string { - return fmt.Sprintf("this server is a not a member of the etcd cluster. Found %v, expect: %s", e.Members, e.Self) +func (e *membershipError) Error() string { + return fmt.Sprintf("this server is a not a member of the etcd cluster. Found %v, expect: %s", e.members, e.self) } -func (e *MembershipError) Is(target error) bool { +func (e *membershipError) Is(target error) bool { switch target { case ErrNotMember: return true @@ -145,17 +142,17 @@ func (e *MembershipError) Is(target error) bool { return false } -func errNotMember() error { return &MembershipError{} } +func errNotMember() error { return &membershipError{} } -type MemberListError struct { - Err error +type memberListError struct { + err error } -func (e *MemberListError) Error() string { - return fmt.Sprintf("failed to get MemberList from server: %v", e.Err) +func (e *memberListError) Error() string { + return fmt.Sprintf("failed to get MemberList from server: %v", e.err) } -func (e *MemberListError) Is(target error) bool { +func (e *memberListError) Is(target error) bool { switch target { case ErrMemberListFailed: return true @@ -163,7 +160,7 @@ func (e *MemberListError) Is(target error) bool { return false } -func errMemberListFailed() error { return &MemberListError{} } +func errMemberListFailed() error { return &memberListError{} } // NewETCD creates a new value of type // ETCD with initialized cron and snapshot mutex values. @@ -256,7 +253,7 @@ func (e *ETCD) Test(ctx context.Context) error { memberNameUrls = append(memberNameUrls, member.Name+"="+member.PeerURLs[0]) } } - return &MembershipError{Members: memberNameUrls, Self: e.name + "=" + e.peerURL()} + return &membershipError{members: memberNameUrls, self: e.name + "=" + e.peerURL()} } // dbDir returns the path to dataDir/db/etcd @@ -391,14 +388,25 @@ func (e *ETCD) Reset(ctx context.Context, rebootstrap func() error) error { // If asked to restore from a snapshot, do so if e.config.ClusterResetRestorePath != "" { - if e.config.EtcdS3 { + if e.config.EtcdS3 != nil { logrus.Infof("Retrieving etcd snapshot %s from S3", e.config.ClusterResetRestorePath) - if err := e.initS3IfNil(ctx); err != nil { - return err + s3client, err := e.getS3Client(ctx) + if err != nil { + if errors.Is(err, s3.ErrNoConfigSecret) { + return errors.New("cannot use S3 config secret when restoring snapshot; configuration must be set in CLI or config file") + } else { + return errors.Wrap(err, "failed to initialize S3 client") + } } - if err := e.s3.Download(ctx); err != nil { - return err + dir, err := snapshotDir(e.config, true) + if err != nil { + return errors.Wrap(err, "failed to get the snapshot dir") } + path, err := s3client.Download(ctx, e.config.ClusterResetRestorePath, dir) + if err != nil { + return errors.Wrap(err, "failed to download snapshot from S3") + } + e.config.ClusterResetRestorePath = path logrus.Infof("S3 download complete for %s", e.config.ClusterResetRestorePath) } @@ -442,6 +450,7 @@ func (e *ETCD) Start(ctx context.Context, clientAccessInfo *clientaccess.Info) e } go e.manageLearners(ctx) + go e.getS3Client(ctx) if isInitialized { // check etcd dir permission @@ -1416,7 +1425,7 @@ func ClientURLs(ctx context.Context, clientAccessInfo *clientaccess.Info, selfIP // get the full list from the server we're joining resp, err := clientAccessInfo.Get("/db/info") if err != nil { - return nil, memberList, &MemberListError{Err: err} + return nil, memberList, &memberListError{err: err} } if err := json.Unmarshal(resp, &memberList); err != nil { return nil, memberList, err @@ -1463,13 +1472,13 @@ func (e *ETCD) Restore(ctx context.Context) error { } var restorePath string - if strings.HasSuffix(e.config.ClusterResetRestorePath, compressedExtension) { - snapshotDir, err := snapshotDir(e.config, true) + if strings.HasSuffix(e.config.ClusterResetRestorePath, snapshot.CompressedExtension) { + dir, err := snapshotDir(e.config, true) if err != nil { return errors.Wrap(err, "failed to get the snapshot dir") } - decompressSnapshot, err := e.decompressSnapshot(snapshotDir, e.config.ClusterResetRestorePath) + decompressSnapshot, err := e.decompressSnapshot(dir, e.config.ClusterResetRestorePath) if err != nil { return err } @@ -1485,13 +1494,7 @@ func (e *ETCD) Restore(ctx context.Context) error { } logrus.Infof("Pre-restore etcd database moved to %s", oldDataDir) - - lg, err := logutil.CreateDefaultZapLogger(zap.InfoLevel) - if err != nil { - return err - } - - return snapshot.NewV3(lg).Restore(snapshot.RestoreConfig{ + return snapshotv3.NewV3(e.client.GetLogger()).Restore(snapshotv3.RestoreConfig{ SnapshotPath: restorePath, Name: e.name, OutputDataDir: dbDir(e.config), diff --git a/pkg/etcd/etcd_test.go b/pkg/etcd/etcd_test.go index a28dee46e113..5a519bdcffe4 100644 --- a/pkg/etcd/etcd_test.go +++ b/pkg/etcd/etcd_test.go @@ -11,8 +11,10 @@ import ( "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/etcd/s3" testutil "github.com/k3s-io/k3s/tests" "github.com/robfig/cron/v3" + "github.com/sirupsen/logrus" clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/server/v3/etcdserver" utilnet "k8s.io/apimachinery/pkg/util/net" @@ -47,10 +49,12 @@ func generateTestConfig() *config.Control { EtcdSnapshotName: "etcd-snapshot", EtcdSnapshotCron: "0 */12 * * *", EtcdSnapshotRetention: 5, - EtcdS3Endpoint: "s3.amazonaws.com", - EtcdS3Region: "us-east-1", - SANs: []string{"127.0.0.1", mustGetAddress()}, - CriticalControlArgs: criticalControlArgs, + EtcdS3: &config.EtcdS3{ + Endpoint: "s3.amazonaws.com", + Region: "us-east-1", + }, + SANs: []string{"127.0.0.1", mustGetAddress()}, + CriticalControlArgs: criticalControlArgs, } } @@ -112,6 +116,10 @@ func Test_UnitETCD_IsInitialized(t *testing.T) { want: false, }, } + + // enable logging + logrus.SetLevel(logrus.DebugLevel) + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { e := NewETCD() @@ -227,7 +235,7 @@ func Test_UnitETCD_Start(t *testing.T) { name string address string cron *cron.Cron - s3 *S3 + s3 *s3.Controller } type args struct { clientAccessInfo *clientaccess.Info diff --git a/pkg/etcd/s3.go b/pkg/etcd/s3.go deleted file mode 100644 index 52671e5967d4..000000000000 --- a/pkg/etcd/s3.go +++ /dev/null @@ -1,494 +0,0 @@ -package etcd - -import ( - "context" - "crypto/tls" - "crypto/x509" - "encoding/base64" - "encoding/pem" - "fmt" - "io/ioutil" - "net/http" - "net/textproto" - "os" - "path" - "path/filepath" - "sort" - "strconv" - "strings" - "time" - - "github.com/k3s-io/k3s/pkg/daemons/config" - "github.com/k3s-io/k3s/pkg/util" - "github.com/k3s-io/k3s/pkg/version" - "github.com/minio/minio-go/v7" - "github.com/minio/minio-go/v7/pkg/credentials" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" -) - -var ( - clusterIDKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-cluster-id") - tokenHashKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-token-hash") - nodeNameKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-node-name") -) - -// S3 maintains state for S3 functionality. -type S3 struct { - config *config.Control - client *minio.Client - clusterID string - tokenHash string - nodeName string -} - -// newS3 creates a new value of type s3 pointer with a -// copy of the config.Control pointer and initializes -// a new Minio client. -func NewS3(ctx context.Context, config *config.Control) (*S3, error) { - if config.EtcdS3BucketName == "" { - return nil, errors.New("s3 bucket name was not set") - } - tr := http.DefaultTransport - - switch { - case config.EtcdS3EndpointCA != "": - trCA, err := setTransportCA(tr, config.EtcdS3EndpointCA, config.EtcdS3SkipSSLVerify) - if err != nil { - return nil, err - } - tr = trCA - case config.EtcdS3 && config.EtcdS3SkipSSLVerify: - tr.(*http.Transport).TLSClientConfig = &tls.Config{ - InsecureSkipVerify: config.EtcdS3SkipSSLVerify, - } - } - - var creds *credentials.Credentials - if len(config.EtcdS3AccessKey) == 0 && len(config.EtcdS3SecretKey) == 0 { - creds = credentials.NewIAM("") // for running on ec2 instance - } else { - creds = credentials.NewStaticV4(config.EtcdS3AccessKey, config.EtcdS3SecretKey, "") - } - - opt := minio.Options{ - Creds: creds, - Secure: !config.EtcdS3Insecure, - Region: config.EtcdS3Region, - Transport: tr, - BucketLookup: bucketLookupType(config.EtcdS3Endpoint), - } - c, err := minio.New(config.EtcdS3Endpoint, &opt) - if err != nil { - return nil, err - } - - logrus.Infof("Checking if S3 bucket %s exists", config.EtcdS3BucketName) - - ctx, cancel := context.WithTimeout(ctx, config.EtcdS3Timeout) - defer cancel() - - exists, err := c.BucketExists(ctx, config.EtcdS3BucketName) - if err != nil { - return nil, errors.Wrapf(err, "failed to test for existence of bucket %s", config.EtcdS3BucketName) - } - if !exists { - return nil, fmt.Errorf("bucket %s does not exist", config.EtcdS3BucketName) - } - logrus.Infof("S3 bucket %s exists", config.EtcdS3BucketName) - - s3 := &S3{ - config: config, - client: c, - nodeName: os.Getenv("NODE_NAME"), - } - - if config.ClusterReset { - logrus.Debug("Skip setting S3 snapshot cluster ID and token during cluster-reset") - } else { - if err := wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { - if config.Runtime.Core == nil { - return false, nil - } - - // cluster id hack: see https://groups.google.com/forum/#!msg/kubernetes-sig-architecture/mVGobfD4TpY/nkdbkX1iBwAJ - ns, err := config.Runtime.Core.Core().V1().Namespace().Get(metav1.NamespaceSystem, metav1.GetOptions{}) - if err != nil { - return false, errors.Wrap(err, "failed to set S3 snapshot cluster ID") - } - s3.clusterID = string(ns.UID) - - tokenHash, err := util.GetTokenHash(config) - if err != nil { - return false, errors.Wrap(err, "failed to set S3 snapshot server token hash") - } - s3.tokenHash = tokenHash - - return true, nil - }); err != nil { - return nil, err - } - } - - return s3, nil -} - -// upload uploads the given snapshot to the configured S3 -// compatible backend. -func (s *S3) upload(ctx context.Context, snapshot string, extraMetadata *v1.ConfigMap, now time.Time) (*snapshotFile, error) { - basename := filepath.Base(snapshot) - metadata := filepath.Join(filepath.Dir(snapshot), "..", metadataDir, basename) - snapshotKey := path.Join(s.config.EtcdS3Folder, basename) - metadataKey := path.Join(s.config.EtcdS3Folder, metadataDir, basename) - - sf := &snapshotFile{ - Name: basename, - Location: fmt.Sprintf("s3://%s/%s", s.config.EtcdS3BucketName, snapshotKey), - NodeName: "s3", - CreatedAt: &metav1.Time{ - Time: now, - }, - S3: &s3Config{ - Endpoint: s.config.EtcdS3Endpoint, - EndpointCA: s.config.EtcdS3EndpointCA, - SkipSSLVerify: s.config.EtcdS3SkipSSLVerify, - Bucket: s.config.EtcdS3BucketName, - Region: s.config.EtcdS3Region, - Folder: s.config.EtcdS3Folder, - Insecure: s.config.EtcdS3Insecure, - }, - Compressed: strings.HasSuffix(snapshot, compressedExtension), - metadataSource: extraMetadata, - nodeSource: s.nodeName, - } - - logrus.Infof("Uploading snapshot to s3://%s/%s", s.config.EtcdS3BucketName, snapshotKey) - uploadInfo, err := s.uploadSnapshot(ctx, snapshotKey, snapshot) - if err != nil { - sf.Status = failedSnapshotStatus - sf.Message = base64.StdEncoding.EncodeToString([]byte(err.Error())) - } else { - sf.Status = successfulSnapshotStatus - sf.Size = uploadInfo.Size - sf.tokenHash = s.tokenHash - } - if _, err := s.uploadSnapshotMetadata(ctx, metadataKey, metadata); err != nil { - logrus.Warnf("Failed to upload snapshot metadata to S3: %v", err) - } else { - logrus.Infof("Uploaded snapshot metadata s3://%s/%s", s.config.EtcdS3BucketName, metadataKey) - } - return sf, err -} - -// uploadSnapshot uploads the snapshot file to S3 using the minio API. -func (s *S3) uploadSnapshot(ctx context.Context, key, path string) (info minio.UploadInfo, err error) { - opts := minio.PutObjectOptions{ - NumThreads: 2, - UserMetadata: map[string]string{ - clusterIDKey: s.clusterID, - nodeNameKey: s.nodeName, - tokenHashKey: s.tokenHash, - }, - } - if strings.HasSuffix(key, compressedExtension) { - opts.ContentType = "application/zip" - } else { - opts.ContentType = "application/octet-stream" - } - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - - return s.client.FPutObject(ctx, s.config.EtcdS3BucketName, key, path, opts) -} - -// uploadSnapshotMetadata marshals and uploads the snapshot metadata to S3 using the minio API. -// The upload is silently skipped if no extra metadata is provided. -func (s *S3) uploadSnapshotMetadata(ctx context.Context, key, path string) (info minio.UploadInfo, err error) { - if _, err := os.Stat(path); err != nil { - if os.IsNotExist(err) { - return minio.UploadInfo{}, nil - } - return minio.UploadInfo{}, err - } - - opts := minio.PutObjectOptions{ - NumThreads: 2, - ContentType: "application/json", - UserMetadata: map[string]string{ - clusterIDKey: s.clusterID, - nodeNameKey: s.nodeName, - }, - } - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - return s.client.FPutObject(ctx, s.config.EtcdS3BucketName, key, path, opts) -} - -// Download downloads the given snapshot from the configured S3 -// compatible backend. -func (s *S3) Download(ctx context.Context) error { - snapshotKey := path.Join(s.config.EtcdS3Folder, s.config.ClusterResetRestorePath) - metadataKey := path.Join(s.config.EtcdS3Folder, metadataDir, s.config.ClusterResetRestorePath) - snapshotDir, err := snapshotDir(s.config, true) - if err != nil { - return errors.Wrap(err, "failed to get the snapshot dir") - } - snapshotFile := filepath.Join(snapshotDir, s.config.ClusterResetRestorePath) - metadataFile := filepath.Join(snapshotDir, "..", metadataDir, s.config.ClusterResetRestorePath) - - if err := s.downloadSnapshot(ctx, snapshotKey, snapshotFile); err != nil { - return err - } - if err := s.downloadSnapshotMetadata(ctx, metadataKey, metadataFile); err != nil { - return err - } - - s.config.ClusterResetRestorePath = snapshotFile - return nil -} - -// downloadSnapshot downloads the snapshot file from S3 using the minio API. -func (s *S3) downloadSnapshot(ctx context.Context, key, file string) error { - logrus.Debugf("Downloading snapshot from s3://%s/%s", s.config.EtcdS3BucketName, key) - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - defer os.Chmod(file, 0600) - return s.client.FGetObject(ctx, s.config.EtcdS3BucketName, key, file, minio.GetObjectOptions{}) -} - -// downloadSnapshotMetadata downloads the snapshot metadata file from S3 using the minio API. -// No error is returned if the metadata file does not exist, as it is optional. -func (s *S3) downloadSnapshotMetadata(ctx context.Context, key, file string) error { - logrus.Debugf("Downloading snapshot metadata from s3://%s/%s", s.config.EtcdS3BucketName, key) - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - defer os.Chmod(file, 0600) - err := s.client.FGetObject(ctx, s.config.EtcdS3BucketName, key, file, minio.GetObjectOptions{}) - if resp := minio.ToErrorResponse(err); resp.StatusCode == http.StatusNotFound { - return nil - } - return err -} - -// snapshotPrefix returns the prefix used in the -// naming of the snapshots. -func (s *S3) snapshotPrefix() string { - return path.Join(s.config.EtcdS3Folder, s.config.EtcdSnapshotName) -} - -// snapshotRetention prunes snapshots in the configured S3 compatible backend for this specific node. -// Returns a list of pruned snapshot names. -func (s *S3) snapshotRetention(ctx context.Context) ([]string, error) { - if s.config.EtcdSnapshotRetention < 1 { - return nil, nil - } - logrus.Infof("Applying snapshot retention=%d to snapshots stored in s3://%s/%s", s.config.EtcdSnapshotRetention, s.config.EtcdS3BucketName, s.snapshotPrefix()) - - var snapshotFiles []minio.ObjectInfo - - toCtx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - - opts := minio.ListObjectsOptions{ - Prefix: s.snapshotPrefix(), - Recursive: true, - } - for info := range s.client.ListObjects(toCtx, s.config.EtcdS3BucketName, opts) { - if info.Err != nil { - return nil, info.Err - } - - // skip metadata - if path.Base(path.Dir(info.Key)) == metadataDir { - continue - } - - snapshotFiles = append(snapshotFiles, info) - } - - if len(snapshotFiles) <= s.config.EtcdSnapshotRetention { - return nil, nil - } - - // sort newest-first so we can prune entries past the retention count - sort.Slice(snapshotFiles, func(i, j int) bool { - return snapshotFiles[j].LastModified.Before(snapshotFiles[i].LastModified) - }) - - deleted := []string{} - for _, df := range snapshotFiles[s.config.EtcdSnapshotRetention:] { - logrus.Infof("Removing S3 snapshot: s3://%s/%s", s.config.EtcdS3BucketName, df.Key) - - key := path.Base(df.Key) - if err := s.deleteSnapshot(ctx, key); err != nil { - return deleted, err - } - deleted = append(deleted, key) - } - - return deleted, nil -} - -func (s *S3) deleteSnapshot(ctx context.Context, key string) error { - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - - key = path.Join(s.config.EtcdS3Folder, key) - err := s.client.RemoveObject(ctx, s.config.EtcdS3BucketName, key, minio.RemoveObjectOptions{}) - if err == nil || isNotExist(err) { - metadataKey := path.Join(path.Dir(key), metadataDir, path.Base(key)) - if merr := s.client.RemoveObject(ctx, s.config.EtcdS3BucketName, metadataKey, minio.RemoveObjectOptions{}); merr != nil && !isNotExist(merr) { - err = merr - } - } - - return err -} - -// listSnapshots provides a list of currently stored -// snapshots in S3 along with their relevant -// metadata. -func (s *S3) listSnapshots(ctx context.Context) (map[string]snapshotFile, error) { - snapshots := map[string]snapshotFile{} - metadatas := []string{} - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - - opts := minio.ListObjectsOptions{ - Prefix: s.config.EtcdS3Folder, - Recursive: true, - } - - objects := s.client.ListObjects(ctx, s.config.EtcdS3BucketName, opts) - - for obj := range objects { - if obj.Err != nil { - return nil, obj.Err - } - if obj.Size == 0 { - continue - } - - if o, err := s.client.StatObject(ctx, s.config.EtcdS3BucketName, obj.Key, minio.StatObjectOptions{}); err != nil { - logrus.Warnf("Failed to get object metadata: %v", err) - } else { - obj = o - } - - filename := path.Base(obj.Key) - if path.Base(path.Dir(obj.Key)) == metadataDir { - metadatas = append(metadatas, obj.Key) - continue - } - - basename, compressed := strings.CutSuffix(filename, compressedExtension) - ts, err := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) - if err != nil { - ts = obj.LastModified.Unix() - } - - sf := snapshotFile{ - Name: filename, - Location: fmt.Sprintf("s3://%s/%s", s.config.EtcdS3BucketName, obj.Key), - NodeName: "s3", - CreatedAt: &metav1.Time{ - Time: time.Unix(ts, 0), - }, - Size: obj.Size, - S3: &s3Config{ - Endpoint: s.config.EtcdS3Endpoint, - EndpointCA: s.config.EtcdS3EndpointCA, - SkipSSLVerify: s.config.EtcdS3SkipSSLVerify, - Bucket: s.config.EtcdS3BucketName, - Region: s.config.EtcdS3Region, - Folder: s.config.EtcdS3Folder, - Insecure: s.config.EtcdS3Insecure, - }, - Status: successfulSnapshotStatus, - Compressed: compressed, - nodeSource: obj.UserMetadata[nodeNameKey], - tokenHash: obj.UserMetadata[tokenHashKey], - } - sfKey := generateSnapshotConfigMapKey(sf) - snapshots[sfKey] = sf - } - - for _, metadataKey := range metadatas { - filename := path.Base(metadataKey) - sfKey := generateSnapshotConfigMapKey(snapshotFile{Name: filename, NodeName: "s3"}) - if sf, ok := snapshots[sfKey]; ok { - logrus.Debugf("Loading snapshot metadata from s3://%s/%s", s.config.EtcdS3BucketName, metadataKey) - if obj, err := s.client.GetObject(ctx, s.config.EtcdS3BucketName, metadataKey, minio.GetObjectOptions{}); err != nil { - if isNotExist(err) { - logrus.Debugf("Failed to get snapshot metadata: %v", err) - } else { - logrus.Warnf("Failed to get snapshot metadata for %s: %v", filename, err) - } - } else { - if m, err := ioutil.ReadAll(obj); err != nil { - if isNotExist(err) { - logrus.Debugf("Failed to read snapshot metadata: %v", err) - } else { - logrus.Warnf("Failed to read snapshot metadata for %s: %v", filename, err) - } - } else { - sf.Metadata = base64.StdEncoding.EncodeToString(m) - snapshots[sfKey] = sf - } - } - } - } - - return snapshots, nil -} - -func readS3EndpointCA(endpointCA string) ([]byte, error) { - ca, err := base64.StdEncoding.DecodeString(endpointCA) - if err != nil { - return os.ReadFile(endpointCA) - } - return ca, nil -} - -func setTransportCA(tr http.RoundTripper, endpointCA string, insecureSkipVerify bool) (http.RoundTripper, error) { - ca, err := readS3EndpointCA(endpointCA) - if err != nil { - return tr, err - } - if !isValidCertificate(ca) { - return tr, errors.New("endpoint-ca is not a valid x509 certificate") - } - - certPool := x509.NewCertPool() - certPool.AppendCertsFromPEM(ca) - - tr.(*http.Transport).TLSClientConfig = &tls.Config{ - RootCAs: certPool, - InsecureSkipVerify: insecureSkipVerify, - } - - return tr, nil -} - -// isValidCertificate checks to see if the given -// byte slice is a valid x509 certificate. -func isValidCertificate(c []byte) bool { - p, _ := pem.Decode(c) - if p == nil { - return false - } - if _, err := x509.ParseCertificates(p.Bytes); err != nil { - return false - } - return true -} - -func bucketLookupType(endpoint string) minio.BucketLookupType { - if strings.Contains(endpoint, "aliyun") { // backwards compt with RKE1 - return minio.BucketLookupDNS - } - return minio.BucketLookupAuto -} diff --git a/pkg/etcd/s3/config_secret.go b/pkg/etcd/s3/config_secret.go new file mode 100644 index 000000000000..0b81e94b41f8 --- /dev/null +++ b/pkg/etcd/s3/config_secret.go @@ -0,0 +1,119 @@ +package s3 + +import ( + "encoding/base64" + "fmt" + "strconv" + "strings" + "time" + + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/util" + "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ErrNoConfigSecret = errNoConfigSecret() + +type secretError struct { + err error +} + +func (e *secretError) Error() string { + return fmt.Sprintf("failed to get etcd S3 config secret: %v", e.err) +} + +func (e *secretError) Is(target error) bool { + switch target { + case ErrNoConfigSecret: + return true + } + return false +} + +func errNoConfigSecret() error { return &secretError{} } + +func (c *Controller) getConfigFromSecret(secretName string) (*config.EtcdS3, error) { + if c.core == nil { + return nil, &secretError{err: util.ErrCoreNotReady} + } + + secret, err := c.core.V1().Secret().Get(metav1.NamespaceSystem, secretName, metav1.GetOptions{}) + if err != nil { + return nil, &secretError{err: err} + } + + etcdS3 := &config.EtcdS3{ + AccessKey: string(secret.Data["etcd-s3-access-key"]), + Bucket: string(secret.Data["etcd-s3-bucket"]), + Endpoint: defaultEtcdS3.Endpoint, + Folder: string(secret.Data["etcd-s3-folder"]), + Proxy: string(secret.Data["etcd-s3-proxy"]), + Region: defaultEtcdS3.Region, + SecretKey: string(secret.Data["etcd-s3-secret-key"]), + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + } + + // Set endpoint from secret if set + if v, ok := secret.Data["etcd-s3-endpoint"]; ok { + etcdS3.Endpoint = string(v) + } + + // Set region from secret if set + if v, ok := secret.Data["etcd-s3-region"]; ok { + etcdS3.Region = string(v) + } + + // Set timeout from secret if set + if v, ok := secret.Data["etcd-s3-timeout"]; ok { + if duration, err := time.ParseDuration(string(v)); err != nil { + logrus.Warnf("Failed to parse etcd-s3-timeout value from S3 config secret %s: %v", secretName, err) + } else { + etcdS3.Timeout.Duration = duration + } + } + + // configure ssl verification, if value can be parsed + if v, ok := secret.Data["etcd-s3-skip-ssl-verify"]; ok { + if b, err := strconv.ParseBool(string(v)); err != nil { + logrus.Warnf("Failed to parse etcd-s3-skip-ssl-verify value from S3 config secret %s: %v", secretName, err) + } else { + etcdS3.SkipSSLVerify = b + } + } + + // configure insecure http, if value can be parsed + if v, ok := secret.Data["etcd-s3-insecure"]; ok { + if b, err := strconv.ParseBool(string(v)); err != nil { + logrus.Warnf("Failed to parse etcd-s3-insecure value from S3 config secret %s: %v", secretName, err) + } else { + etcdS3.Insecure = b + } + } + + // encode CA bundles from value, and keys in configmap if one is named + caBundles := []string{} + // Add inline CA bundle if set + if len(secret.Data["etcd-s3-endpoint-ca"]) > 0 { + caBundles = append(caBundles, base64.StdEncoding.EncodeToString(secret.Data["etcd-s3-endpoint-ca"])) + } + + // Add CA bundles from named configmap if set + if caConfigMapName := string(secret.Data["etcd-s3-endpoint-ca-name"]); caConfigMapName != "" { + configMap, err := c.core.V1().ConfigMap().Get(metav1.NamespaceSystem, caConfigMapName, metav1.GetOptions{}) + if err != nil { + logrus.Warnf("Failed to get ConfigMap %s for etcd-s3-endpoint-ca-name value from S3 config secret %s: %v", caConfigMapName, secretName, err) + } else { + for _, v := range configMap.Data { + caBundles = append(caBundles, base64.StdEncoding.EncodeToString([]byte(v))) + } + for _, v := range configMap.BinaryData { + caBundles = append(caBundles, base64.StdEncoding.EncodeToString(v)) + } + } + } + + // Concatenate all requested CA bundle strings into config var + etcdS3.EndpointCA = strings.Join(caBundles, " ") + return etcdS3, nil +} diff --git a/pkg/etcd/s3/s3.go b/pkg/etcd/s3/s3.go new file mode 100644 index 000000000000..9e88dac42bb4 --- /dev/null +++ b/pkg/etcd/s3/s3.go @@ -0,0 +1,567 @@ +package s3 + +import ( + "context" + "crypto/tls" + "crypto/x509" + "encoding/base64" + "fmt" + "io/ioutil" + "net/http" + "net/textproto" + "net/url" + "os" + "path" + "path/filepath" + "reflect" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" + "github.com/k3s-io/k3s/pkg/util" + "github.com/k3s-io/k3s/pkg/version" + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/pkg/errors" + "github.com/rancher/wrangler/pkg/generated/controllers/core" + "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/lru" +) + +var ( + clusterIDKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-cluster-id") + tokenHashKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-token-hash") + nodeNameKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-node-name") +) + +var defaultEtcdS3 = &config.EtcdS3{ + Endpoint: "s3.amazonaws.com", + Region: "us-east-1", + Timeout: metav1.Duration{ + Duration: 5 * time.Minute, + }, +} + +var ( + controller *Controller + cErr error + once sync.Once +) + +// Controller maintains state for S3 functionality, +// and can be used to get clients for interacting with +// an S3 service, given specific client configuration. +type Controller struct { + clusterID string + tokenHash string + nodeName string + core core.Interface + clientCache *lru.Cache +} + +// Client holds state for a given configuration - a preconfigured minio client, +// and reference to the config it was created for. +type Client struct { + mc *minio.Client + etcdS3 *config.EtcdS3 + controller *Controller +} + +// Start initializes the cache and sets the cluster id and token hash, +// returning a reference to the the initialized controller. Initialization is +// locked by a sync.Once to prevent races, and multiple calls to start will +// return the same controller or error. +func Start(ctx context.Context, config *config.Control) (*Controller, error) { + once.Do(func() { + c := &Controller{ + clientCache: lru.New(5), + nodeName: os.Getenv("NODE_NAME"), + } + + if config.ClusterReset { + logrus.Debug("Skip setting S3 snapshot cluster ID and server token hash during cluster-reset") + controller = c + } else { + logrus.Debug("Getting S3 snapshot cluster ID and server token hash") + if err := wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { + if config.Runtime.Core == nil { + return false, nil + } + c.core = config.Runtime.Core.Core() + + // cluster id hack: see https://groups.google.com/forum/#!msg/kubernetes-sig-architecture/mVGobfD4TpY/nkdbkX1iBwAJ + ns, err := c.core.V1().Namespace().Get(metav1.NamespaceSystem, metav1.GetOptions{}) + if err != nil { + return false, errors.Wrap(err, "failed to set S3 snapshot cluster ID") + } + c.clusterID = string(ns.UID) + + tokenHash, err := util.GetTokenHash(config) + if err != nil { + return false, errors.Wrap(err, "failed to set S3 snapshot server token hash") + } + c.tokenHash = tokenHash + + return true, nil + }); err != nil { + cErr = err + } else { + controller = c + } + } + }) + + return controller, cErr +} + +func (c *Controller) GetClient(ctx context.Context, etcdS3 *config.EtcdS3) (*Client, error) { + if etcdS3 == nil { + return nil, errors.New("nil s3 configuration") + } + + // update ConfigSecret in defaults so that comparisons between current and default config + // ignore ConfigSecret when deciding if CLI configuration is present. + defaultEtcdS3.ConfigSecret = etcdS3.ConfigSecret + + // If config is default, try to load config from secret, and fail if it cannot be retrieved or if the secret name is not set. + // If config is not default, and secret name is set, warn that the secret is being ignored + isDefault := reflect.DeepEqual(defaultEtcdS3, etcdS3) + if etcdS3.ConfigSecret != "" { + if isDefault { + e, err := c.getConfigFromSecret(etcdS3.ConfigSecret) + if err != nil { + return nil, errors.Wrapf(err, "failed to get config from etcd-s3-config-secret %q", etcdS3.ConfigSecret) + } + logrus.Infof("Using etcd s3 configuration from etcd-s3-config-secret %q", etcdS3.ConfigSecret) + etcdS3 = e + } else { + logrus.Warnf("Ignoring s3 configuration from etcd-s3-config-secret %q due to existing configuration from CLI or config file", etcdS3.ConfigSecret) + } + } else if isDefault { + return nil, errors.New("s3 configuration was not set") + } + + // used just for logging + scheme := "https://" + if etcdS3.Insecure { + scheme = "http://" + } + + // Try to get an existing client from cache. The entire EtcdS3 struct + // (including the key id and secret) is used as the cache key, but we only + // print the endpoint and bucket name to avoid leaking creds into the logs. + if client, ok := c.clientCache.Get(*etcdS3); ok { + logrus.Infof("Reusing cached S3 client for endpoint=%q bucket=%q folder=%q", scheme+etcdS3.Endpoint, etcdS3.Bucket, etcdS3.Folder) + return client.(*Client), nil + } + logrus.Infof("Attempting to create new S3 client for endpoint=%q bucket=%q folder=%q", scheme+etcdS3.Endpoint, etcdS3.Bucket, etcdS3.Folder) + + if etcdS3.Bucket == "" { + return nil, errors.New("s3 bucket name was not set") + } + tr := http.DefaultTransport.(*http.Transport).Clone() + + // You can either disable SSL verification or use a custom CA bundle, + // it doesn't make sense to do both - if verification is disabled, + // the CA is not checked! + if etcdS3.SkipSSLVerify { + tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} + } else if etcdS3.EndpointCA != "" { + tlsConfig, err := loadEndpointCAs(etcdS3.EndpointCA) + if err != nil { + return nil, err + } + tr.TLSClientConfig = tlsConfig + } + + // Set a fixed proxy URL, if requested by the user. This replaces the default, + // which calls ProxyFromEnvironment to read proxy settings from the environment. + if etcdS3.Proxy != "" { + var u *url.URL + var err error + // proxy address of literal "none" disables all use of a proxy by S3 + if etcdS3.Proxy != "none" { + u, err = url.Parse(etcdS3.Proxy) + if err != nil { + return nil, errors.Wrap(err, "failed to parse etcd-s3-proxy value as URL") + } + if u.Scheme == "" || u.Host == "" { + return nil, fmt.Errorf("proxy URL must include scheme and host") + } + } + tr.Proxy = http.ProxyURL(u) + } + + var creds *credentials.Credentials + if len(etcdS3.AccessKey) == 0 && len(etcdS3.SecretKey) == 0 { + creds = credentials.NewIAM("") // for running on ec2 instance + if _, err := creds.Get(); err != nil { + return nil, errors.Wrap(err, "failed to get IAM credentials") + } + } else { + creds = credentials.NewStaticV4(etcdS3.AccessKey, etcdS3.SecretKey, "") + } + + opt := minio.Options{ + Creds: creds, + Secure: !etcdS3.Insecure, + Region: etcdS3.Region, + Transport: tr, + BucketLookup: bucketLookupType(etcdS3.Endpoint), + } + mc, err := minio.New(etcdS3.Endpoint, &opt) + if err != nil { + return nil, err + } + + logrus.Infof("Checking if S3 bucket %s exists", etcdS3.Bucket) + + ctx, cancel := context.WithTimeout(ctx, etcdS3.Timeout.Duration) + defer cancel() + + exists, err := mc.BucketExists(ctx, etcdS3.Bucket) + if err != nil { + return nil, errors.Wrapf(err, "failed to test for existence of bucket %s", etcdS3.Bucket) + } + if !exists { + return nil, fmt.Errorf("bucket %s does not exist", etcdS3.Bucket) + } + logrus.Infof("S3 bucket %s exists", etcdS3.Bucket) + + client := &Client{ + mc: mc, + etcdS3: etcdS3, + controller: c, + } + logrus.Infof("Adding S3 client to cache") + c.clientCache.Add(*etcdS3, client) + return client, nil +} + +// upload uploads the given snapshot to the configured S3 +// compatible backend. +func (c *Client) Upload(ctx context.Context, snapshotPath string, extraMetadata *v1.ConfigMap, now time.Time) (*snapshot.File, error) { + basename := filepath.Base(snapshotPath) + metadata := filepath.Join(filepath.Dir(snapshotPath), "..", snapshot.MetadataDir, basename) + snapshotKey := path.Join(c.etcdS3.Folder, basename) + metadataKey := path.Join(c.etcdS3.Folder, snapshot.MetadataDir, basename) + + sf := &snapshot.File{ + Name: basename, + Location: fmt.Sprintf("s3://%s/%s", c.etcdS3.Bucket, snapshotKey), + NodeName: "s3", + CreatedAt: &metav1.Time{ + Time: now, + }, + S3: &snapshot.S3Config{EtcdS3: *c.etcdS3}, + Compressed: strings.HasSuffix(snapshotPath, snapshot.CompressedExtension), + MetadataSource: extraMetadata, + NodeSource: c.controller.nodeName, + } + + logrus.Infof("Uploading snapshot to s3://%s/%s", c.etcdS3.Bucket, snapshotKey) + uploadInfo, err := c.uploadSnapshot(ctx, snapshotKey, snapshotPath) + if err != nil { + sf.Status = snapshot.FailedStatus + sf.Message = base64.StdEncoding.EncodeToString([]byte(err.Error())) + } else { + sf.Status = snapshot.SuccessfulStatus + sf.Size = uploadInfo.Size + sf.TokenHash = c.controller.tokenHash + } + if uploadInfo, err := c.uploadSnapshotMetadata(ctx, metadataKey, metadata); err != nil { + logrus.Warnf("Failed to upload snapshot metadata to S3: %v", err) + } else if uploadInfo.Size != 0 { + logrus.Infof("Uploaded snapshot metadata s3://%s/%s", c.etcdS3.Bucket, metadataKey) + } + return sf, err +} + +// uploadSnapshot uploads the snapshot file to S3 using the minio API. +func (c *Client) uploadSnapshot(ctx context.Context, key, path string) (info minio.UploadInfo, err error) { + opts := minio.PutObjectOptions{ + NumThreads: 2, + UserMetadata: map[string]string{ + clusterIDKey: c.controller.clusterID, + nodeNameKey: c.controller.nodeName, + tokenHashKey: c.controller.tokenHash, + }, + } + if strings.HasSuffix(key, snapshot.CompressedExtension) { + opts.ContentType = "application/zip" + } else { + opts.ContentType = "application/octet-stream" + } + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + return c.mc.FPutObject(ctx, c.etcdS3.Bucket, key, path, opts) +} + +// uploadSnapshotMetadata marshals and uploads the snapshot metadata to S3 using the minio API. +// The upload is silently skipped if no extra metadata is provided. +func (c *Client) uploadSnapshotMetadata(ctx context.Context, key, path string) (info minio.UploadInfo, err error) { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + return minio.UploadInfo{}, nil + } + return minio.UploadInfo{}, err + } + + opts := minio.PutObjectOptions{ + NumThreads: 2, + ContentType: "application/json", + UserMetadata: map[string]string{ + clusterIDKey: c.controller.clusterID, + nodeNameKey: c.controller.nodeName, + tokenHashKey: c.controller.tokenHash, + }, + } + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + return c.mc.FPutObject(ctx, c.etcdS3.Bucket, key, path, opts) +} + +// Download downloads the given snapshot from the configured S3 +// compatible backend. If the file is successfully downloaded, it returns +// the path the file was downloaded to. +func (c *Client) Download(ctx context.Context, snapshotName, snapshotDir string) (string, error) { + snapshotKey := path.Join(c.etcdS3.Folder, snapshotName) + metadataKey := path.Join(c.etcdS3.Folder, snapshot.MetadataDir, snapshotName) + snapshotFile := filepath.Join(snapshotDir, snapshotName) + metadataFile := filepath.Join(snapshotDir, "..", snapshot.MetadataDir, snapshotName) + + if err := c.downloadSnapshot(ctx, snapshotKey, snapshotFile); err != nil { + return "", err + } + if err := c.downloadSnapshotMetadata(ctx, metadataKey, metadataFile); err != nil { + return "", err + } + + return snapshotFile, nil +} + +// downloadSnapshot downloads the snapshot file from S3 using the minio API. +func (c *Client) downloadSnapshot(ctx context.Context, key, file string) error { + logrus.Debugf("Downloading snapshot from s3://%s/%s", c.etcdS3.Bucket, key) + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + defer os.Chmod(file, 0600) + return c.mc.FGetObject(ctx, c.etcdS3.Bucket, key, file, minio.GetObjectOptions{}) +} + +// downloadSnapshotMetadata downloads the snapshot metadata file from S3 using the minio API. +// No error is returned if the metadata file does not exist, as it is optional. +func (c *Client) downloadSnapshotMetadata(ctx context.Context, key, file string) error { + logrus.Debugf("Downloading snapshot metadata from s3://%s/%s", c.etcdS3.Bucket, key) + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + defer os.Chmod(file, 0600) + err := c.mc.FGetObject(ctx, c.etcdS3.Bucket, key, file, minio.GetObjectOptions{}) + if resp := minio.ToErrorResponse(err); resp.StatusCode == http.StatusNotFound { + return nil + } + return err +} + +// SnapshotRetention prunes snapshots in the configured S3 compatible backend for this specific node. +// Returns a list of pruned snapshot names. +func (c *Client) SnapshotRetention(ctx context.Context, retention int, prefix string) ([]string, error) { + if retention < 1 { + return nil, nil + } + + prefix = path.Join(c.etcdS3.Folder, prefix) + logrus.Infof("Applying snapshot retention=%d to snapshots stored in s3://%s/%s", retention, c.etcdS3.Bucket, prefix) + + var snapshotFiles []minio.ObjectInfo + + toCtx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + + opts := minio.ListObjectsOptions{ + Prefix: prefix, + Recursive: true, + } + for info := range c.mc.ListObjects(toCtx, c.etcdS3.Bucket, opts) { + if info.Err != nil { + return nil, info.Err + } + + // skip metadata + if path.Base(path.Dir(info.Key)) == snapshot.MetadataDir { + continue + } + + snapshotFiles = append(snapshotFiles, info) + } + + if len(snapshotFiles) <= retention { + return nil, nil + } + + // sort newest-first so we can prune entries past the retention count + sort.Slice(snapshotFiles, func(i, j int) bool { + return snapshotFiles[j].LastModified.Before(snapshotFiles[i].LastModified) + }) + + deleted := []string{} + for _, df := range snapshotFiles[retention:] { + logrus.Infof("Removing S3 snapshot: s3://%s/%s", c.etcdS3.Bucket, df.Key) + + key := path.Base(df.Key) + if err := c.DeleteSnapshot(ctx, key); err != nil { + return deleted, err + } + deleted = append(deleted, key) + } + + return deleted, nil +} + +// DeleteSnapshot deletes the selected snapshot (and its metadata) from S3 +func (c *Client) DeleteSnapshot(ctx context.Context, key string) error { + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + + key = path.Join(c.etcdS3.Folder, key) + err := c.mc.RemoveObject(ctx, c.etcdS3.Bucket, key, minio.RemoveObjectOptions{}) + if err == nil || snapshot.IsNotExist(err) { + metadataKey := path.Join(path.Dir(key), snapshot.MetadataDir, path.Base(key)) + if merr := c.mc.RemoveObject(ctx, c.etcdS3.Bucket, metadataKey, minio.RemoveObjectOptions{}); merr != nil && !snapshot.IsNotExist(merr) { + err = merr + } + } + + return err +} + +// listSnapshots provides a list of currently stored +// snapshots in S3 along with their relevant +// metadata. +func (c *Client) ListSnapshots(ctx context.Context) (map[string]snapshot.File, error) { + snapshots := map[string]snapshot.File{} + metadatas := []string{} + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + + opts := minio.ListObjectsOptions{ + Prefix: c.etcdS3.Folder, + Recursive: true, + } + + objects := c.mc.ListObjects(ctx, c.etcdS3.Bucket, opts) + + for obj := range objects { + if obj.Err != nil { + return nil, obj.Err + } + if obj.Size == 0 { + continue + } + + if o, err := c.mc.StatObject(ctx, c.etcdS3.Bucket, obj.Key, minio.StatObjectOptions{}); err != nil { + logrus.Warnf("Failed to get object metadata: %v", err) + } else { + obj = o + } + + filename := path.Base(obj.Key) + if path.Base(path.Dir(obj.Key)) == snapshot.MetadataDir { + metadatas = append(metadatas, obj.Key) + continue + } + + basename, compressed := strings.CutSuffix(filename, snapshot.CompressedExtension) + ts, err := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) + if err != nil { + ts = obj.LastModified.Unix() + } + + sf := snapshot.File{ + Name: filename, + Location: fmt.Sprintf("s3://%s/%s", c.etcdS3.Bucket, obj.Key), + NodeName: "s3", + CreatedAt: &metav1.Time{ + Time: time.Unix(ts, 0), + }, + Size: obj.Size, + S3: &snapshot.S3Config{EtcdS3: *c.etcdS3}, + Status: snapshot.SuccessfulStatus, + Compressed: compressed, + NodeSource: obj.UserMetadata[nodeNameKey], + TokenHash: obj.UserMetadata[tokenHashKey], + } + sfKey := sf.GenerateConfigMapKey() + snapshots[sfKey] = sf + } + + for _, metadataKey := range metadatas { + filename := path.Base(metadataKey) + dsf := &snapshot.File{Name: filename, NodeName: "s3"} + sfKey := dsf.GenerateConfigMapKey() + if sf, ok := snapshots[sfKey]; ok { + logrus.Debugf("Loading snapshot metadata from s3://%s/%s", c.etcdS3.Bucket, metadataKey) + if obj, err := c.mc.GetObject(ctx, c.etcdS3.Bucket, metadataKey, minio.GetObjectOptions{}); err != nil { + if snapshot.IsNotExist(err) { + logrus.Debugf("Failed to get snapshot metadata: %v", err) + } else { + logrus.Warnf("Failed to get snapshot metadata for %s: %v", filename, err) + } + } else { + if m, err := ioutil.ReadAll(obj); err != nil { + if snapshot.IsNotExist(err) { + logrus.Debugf("Failed to read snapshot metadata: %v", err) + } else { + logrus.Warnf("Failed to read snapshot metadata for %s: %v", filename, err) + } + } else { + sf.Metadata = base64.StdEncoding.EncodeToString(m) + snapshots[sfKey] = sf + } + } + } + } + + return snapshots, nil +} + +func loadEndpointCAs(etcdS3EndpointCA string) (*tls.Config, error) { + var loaded bool + certPool := x509.NewCertPool() + + for _, ca := range strings.Split(etcdS3EndpointCA, " ") { + // Try to decode the value as base64-encoded data - yes, a base64 string that itself + // contains multiline, ascii-armored, base64-encoded certificate data - as would be produced + // by `base64 --wrap=0 /path/to/cert.pem`. If this fails, assume the value is the path to a + // file on disk, and try to read that. This is backwards compatible with RKE1. + caData, err := base64.StdEncoding.DecodeString(ca) + if err != nil { + caData, err = os.ReadFile(ca) + } + if err != nil { + return nil, err + } + if certPool.AppendCertsFromPEM(caData) { + loaded = true + } + } + + if loaded { + return &tls.Config{RootCAs: certPool}, nil + } + return nil, errors.New("no certificates loaded from etcd-s3-endpoint-ca") +} + +func bucketLookupType(endpoint string) minio.BucketLookupType { + if strings.Contains(endpoint, "aliyun") { // backwards compatible with RKE1 + return minio.BucketLookupDNS + } + return minio.BucketLookupAuto +} diff --git a/pkg/etcd/s3/s3_test.go b/pkg/etcd/s3/s3_test.go new file mode 100644 index 000000000000..1778d21ccc5c --- /dev/null +++ b/pkg/etcd/s3/s3_test.go @@ -0,0 +1,1743 @@ +package s3 + +import ( + "context" + "fmt" + "net" + "net/http" + "os" + "path" + "path/filepath" + "reflect" + "strings" + "testing" + "text/template" + "time" + + "github.com/golang/mock/gomock" + "github.com/gorilla/mux" + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" + "github.com/rancher/dynamiclistener/cert" + "github.com/rancher/wrangler/pkg/generated/controllers/core" + corev1 "github.com/rancher/wrangler/pkg/generated/controllers/core/v1" + "github.com/rancher/wrangler/pkg/generic/fake" + "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/lru" +) + +var gmt = time.FixedZone("GMT", 0) + +func Test_UnitControllerGetClient(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http and https listeners as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + // Create temp cert/key + cert, key, _ := cert.GenerateSelfSignedCertKey("localhost", []net.IP{net.ParseIP("::1"), net.ParseIP("127.0.0.1")}, nil) + tempDir := t.TempDir() + certFile := filepath.Join(tempDir, "test.crt") + keyFile := filepath.Join(tempDir, "test.key") + os.WriteFile(certFile, cert, 0600) + os.WriteFile(keyFile, key, 0600) + + listener, _ := net.Listen("tcp", ":0") + listenerTLS, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + _, port, _ = net.SplitHostPort(listenerTLS.Addr().String()) + listenerTLSAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go server.ServeTLS(listenerTLS, certFile, keyFile) + go func() { + <-ctx.Done() + server.Close() + }() + + type fields struct { + clusterID string + tokenHash string + nodeName string + clientCache *lru.Cache + } + type args struct { + ctx context.Context + etcdS3 *config.EtcdS3 + } + tests := []struct { + name string + fields fields + args args + setup func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) + want *Client + wantErr bool + }{ + { + name: "Fail to get client with nil config", + args: args{ + ctx: ctx, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to get client when bucket not set", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to get client when bucket does not exist", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to get client with missing Secret", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return nil, errorNotFound("secret", name) + }) + return coreMock, nil + }, + }, + { + name: "Create client for config from secret", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Type: v1.SecretTypeOpaque, + Data: map[string][]byte{ + "etcd-s3-access-key": []byte("test"), + "etcd-s3-bucket": []byte("testbucket"), + "etcd-s3-endpoint": []byte(listenerTLSAddr), + "etcd-s3-region": []byte("us-west-2"), + "etcd-s3-timeout": []byte("1m"), + "etcd-s3-endpoint-ca": cert, + }, + }, nil + }) + return coreMock, nil + }, + }, + { + name: "Create client for config from secret with CA in configmap", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Type: v1.SecretTypeOpaque, + Data: map[string][]byte{ + "etcd-s3-access-key": []byte("test"), + "etcd-s3-bucket": []byte("testbucket"), + "etcd-s3-endpoint": []byte(listenerTLSAddr), + "etcd-s3-region": []byte("us-west-2"), + "etcd-s3-timeout": []byte("1m"), + "etcd-s3-endpoint-ca-name": []byte("my-etcd-s3-ca"), + "etcd-s3-skip-ssl-verify": []byte("false"), + }, + }, nil + }) + coreMock.v1.configMap.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-ca", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.ConfigMap, error) { + return &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Data: map[string]string{ + "dummy-ca": string(cert), + }, + BinaryData: map[string][]byte{ + "dummy-ca-binary": cert, + }, + }, nil + }) + return coreMock, nil + }, + }, + { + name: "Fail to create client for config from secret with CA in missing configmap", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Type: v1.SecretTypeOpaque, + Data: map[string][]byte{ + "etcd-s3-access-key": []byte("test"), + "etcd-s3-bucket": []byte("testbucket"), + "etcd-s3-endpoint": []byte(listenerTLSAddr), + "etcd-s3-region": []byte("us-west-2"), + "etcd-s3-timeout": []byte("invalid"), + "etcd-s3-endpoint-ca": []byte("invalid"), + "etcd-s3-endpoint-ca-name": []byte("my-etcd-s3-ca"), + "etcd-s3-skip-ssl-verify": []byte("invalid"), + "etcd-s3-insecure": []byte("invalid"), + }, + }, nil + }) + coreMock.v1.configMap.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-ca", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.ConfigMap, error) { + return nil, errorNotFound("configmap", name) + }) + return coreMock, nil + }, + }, + { + name: "Create insecure client for config from cli when secret is also set", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Create skip-ssl-verify client for config from cli when secret is also set", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerTLSAddr, + SkipSSLVerify: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Create client for config from cli when secret is not set", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Get cached client for config from secret", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + want: &Client{}, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + c.etcdS3 = &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + } + f.clientCache.Add(*c.etcdS3, c) + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Type: v1.SecretTypeOpaque, + Data: map[string][]byte{ + "etcd-s3-access-key": []byte("test"), + "etcd-s3-bucket": []byte("testbucket"), + "etcd-s3-endpoint": []byte(listenerAddr), + "etcd-s3-insecure": []byte("true"), + }, + }, nil + }) + return coreMock, nil + }, + }, + { + name: "Get cached client for config from cli", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + want: &Client{}, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + c.etcdS3 = a.etcdS3 + f.clientCache.Add(*c.etcdS3, c) + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Create client for config from cli with proxy", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + Proxy: "http://" + listenerAddr, + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to create client for config from cli with invalid proxy", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + Proxy: "http://%invalid", + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to create client for config from cli with no proxy scheme", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + Proxy: "/proxy", + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Create client for config from cli with CA path", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerTLSAddr, + EndpointCA: certFile, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to create client for config from cli with invalid CA path", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerTLSAddr, + EndpointCA: "/does/not/exist", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + core, err := tt.setup(t, tt.args, tt.fields, tt.want) + if err != nil { + t.Errorf("Setup for Controller.GetClient() failed = %v", err) + return + } + c := &Controller{ + clusterID: tt.fields.clusterID, + tokenHash: tt.fields.tokenHash, + nodeName: tt.fields.nodeName, + clientCache: tt.fields.clientCache, + core: core, + } + got, err := c.GetClient(tt.args.ctx, tt.args.etcdS3) + t.Logf("Got client=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Controller.GetClient() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.want != nil && !reflect.DeepEqual(got, tt.want) { + t.Errorf("Controller.GetClient() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +func Test_UnitClientUpload(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Upload() failed = %v", err) + return + } + + tempDir := t.TempDir() + metadataDir := filepath.Join(tempDir, ".metadata") + snapshotDir := filepath.Join(tempDir, "snapshots") + snapshotPath := filepath.Join(snapshotDir, "snapshot-01") + metadataPath := filepath.Join(metadataDir, "snapshot-01") + if err := os.Mkdir(snapshotDir, 0700); err != nil { + t.Errorf("Mkdir() failed = %v", err) + return + } + if err := os.Mkdir(metadataDir, 0700); err != nil { + t.Errorf("Mkdir() failed = %v", err) + return + } + if err := os.WriteFile(snapshotPath, []byte("test snapshot file\n"), 0600); err != nil { + t.Errorf("WriteFile() failed = %v", err) + return + } + if err := os.WriteFile(metadataPath, []byte("test snapshot metadata\n"), 0600); err != nil { + t.Errorf("WriteFile() failed = %v", err) + return + } + + t.Logf("Using snapshot = %s, metadata = %s", snapshotPath, metadataPath) + + type fields struct { + controller *Controller + etcdS3 *config.EtcdS3 + } + type args struct { + ctx context.Context + snapshotPath string + extraMetadata *v1.ConfigMap + now time.Time + } + tests := []struct { + name string + fields fields + args args + want *snapshot.File + wantErr bool + }{ + { + name: "Successful Upload", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotPath: snapshotPath, + extraMetadata: &v1.ConfigMap{Data: map[string]string{"foo": "bar"}}, + now: time.Now(), + }, + }, + { + name: "Successful Upload with Prefix", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotPath: snapshotPath, + extraMetadata: &v1.ConfigMap{Data: map[string]string{"foo": "bar"}}, + now: time.Now(), + }, + }, + { + name: "Fails Upload to Nonexistent Bucket", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotPath: snapshotPath, + extraMetadata: &v1.ConfigMap{Data: map[string]string{"foo": "bar"}}, + now: time.Now(), + }, + wantErr: true, + }, + { + name: "Fails Upload to Unauthorized Bucket", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotPath: snapshotPath, + extraMetadata: &v1.ConfigMap{Data: map[string]string{"foo": "bar"}}, + now: time.Now(), + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.Upload() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + got, err := c.Upload(tt.args.ctx, tt.args.snapshotPath, tt.args.extraMetadata, tt.args.now) + t.Logf("Got File=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.Upload() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.want != nil && !reflect.DeepEqual(got, tt.want) { + t.Errorf("Client.Upload() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +func Test_UnitClientDownload(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Download() failed = %v", err) + return + } + + snapshotName := "snapshot-01" + tempDir := t.TempDir() + metadataDir := filepath.Join(tempDir, ".metadata") + snapshotDir := filepath.Join(tempDir, "snapshots") + if err := os.Mkdir(snapshotDir, 0700); err != nil { + t.Errorf("Mkdir() failed = %v", err) + return + } + if err := os.Mkdir(metadataDir, 0700); err != nil { + t.Errorf("Mkdir() failed = %v", err) + return + } + + type fields struct { + etcdS3 *config.EtcdS3 + controller *Controller + } + type args struct { + ctx context.Context + snapshotName string + snapshotDir string + } + tests := []struct { + name string + fields fields + args args + want string + wantErr bool + }{ + { + name: "Successful Download", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotName: snapshotName, + snapshotDir: snapshotDir, + }, + }, + { + name: "Unauthorizied Download", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotName: snapshotName, + snapshotDir: snapshotDir, + }, + wantErr: true, + }, + { + name: "Nonexistent Bucket", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotName: snapshotName, + snapshotDir: snapshotDir, + }, + wantErr: true, + }, + { + name: "Nonexistent Snapshot", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotName: "badfile-1", + snapshotDir: snapshotDir, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.Upload() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + got, err := c.Download(tt.args.ctx, tt.args.snapshotName, tt.args.snapshotDir) + t.Logf("Got snapshotPath=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.Download() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.want != "" && got != tt.want { + t.Errorf("Client.Download() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +func Test_UnitClientListSnapshots(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Download() failed = %v", err) + return + } + + type fields struct { + etcdS3 *config.EtcdS3 + controller *Controller + } + type args struct { + ctx context.Context + } + tests := []struct { + name string + fields fields + args args + want map[string]snapshot.File + wantErr bool + }{ + { + name: "List Snapshots", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + }, + }, + { + name: "List Snapshots with Prefix", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + }, + }, + { + name: "Fail to List Snapshots from Nonexistent Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + }, + wantErr: true, + }, + { + name: "Fail to List Snapshots from Unauthorized Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.Upload() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + got, err := c.ListSnapshots(tt.args.ctx) + t.Logf("Got snapshots=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.ListSnapshots() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.want != nil && !reflect.DeepEqual(got, tt.want) { + t.Errorf("Client.ListSnapshots() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +func Test_UnitClientDeleteSnapshot(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Download() failed = %v", err) + return + } + + type fields struct { + etcdS3 *config.EtcdS3 + controller *Controller + } + type args struct { + ctx context.Context + key string + } + tests := []struct { + name string + fields fields + args args + wantErr bool + }{ + { + name: "Delete Snapshot", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + key: "snapshot-01", + }, + }, + { + name: "Fails to Delete from Nonexistent Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + key: "snapshot-01", + }, + wantErr: true, + }, + { + name: "Fails to Delete from Unauthorized Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + key: "snapshot-01", + }, + wantErr: true, + }, + { + name: "Fails to Delete Nonexistent Snapshot", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + key: "badfile-1", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.DeleteSnapshot() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + err = c.DeleteSnapshot(tt.args.ctx, tt.args.key) + t.Logf("DeleteSnapshot got error=%v", err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.DeleteSnapshot() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Test_UnitClientSnapshotRetention(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Download() failed = %v", err) + return + } + + type fields struct { + etcdS3 *config.EtcdS3 + controller *Controller + } + type args struct { + ctx context.Context + retention int + prefix string + } + tests := []struct { + name string + fields fields + args args + want []string + wantErr bool + }{ + { + name: "Prune Snapshots - keep all, no folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 10, + prefix: "snapshot-", + }, + }, + { + name: "Prune Snapshots keep 2 of 3, no folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 2, + prefix: "snapshot-", + }, + want: []string{"snapshot-03"}, + }, + { + name: "Prune Snapshots - keep 1 of 3, no folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 1, + prefix: "snapshot-", + }, + want: []string{"snapshot-02", "snapshot-03"}, + }, + { + name: "Prune Snapshots - keep all, with folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 10, + prefix: "snapshot-", + }, + }, + { + name: "Prune Snapshots keep 2 of 3, with folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 2, + prefix: "snapshot-", + }, + want: []string{"snapshot-06"}, + }, + { + name: "Prune Snapshots - keep 1 of 3, with folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 1, + prefix: "snapshot-", + }, + want: []string{"snapshot-05", "snapshot-06"}, + }, + { + name: "Fail to Prune from Unauthorized Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 1, + prefix: "snapshot-", + }, + wantErr: true, + }, + { + name: "Fail to Prune from Nonexistent Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 1, + prefix: "snapshot-", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.SnapshotRetention() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + got, err := c.SnapshotRetention(tt.args.ctx, tt.args.retention, tt.args.prefix) + t.Logf("Got snapshots=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.SnapshotRetention() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Client.SnapshotRetention() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +// +// Mocks so that we can call Runtime.Core.Core().V1() without a functioning apiserver +// + +// explicit interface check for core mock +var _ core.Interface = &coreMock{} + +type coreMock struct { + v1 *v1Mock +} + +func newCoreMock(c *gomock.Controller) *coreMock { + return &coreMock{ + v1: newV1Mock(c), + } +} + +func (m *coreMock) V1() corev1.Interface { + return m.v1 +} + +// explicit interface check for core v1 mock +var _ corev1.Interface = &v1Mock{} + +type v1Mock struct { + configMap *fake.MockControllerInterface[*v1.ConfigMap, *v1.ConfigMapList] + endpoints *fake.MockControllerInterface[*v1.Endpoints, *v1.EndpointsList] + event *fake.MockControllerInterface[*v1.Event, *v1.EventList] + namespace *fake.MockNonNamespacedControllerInterface[*v1.Namespace, *v1.NamespaceList] + node *fake.MockNonNamespacedControllerInterface[*v1.Node, *v1.NodeList] + persistentVolume *fake.MockNonNamespacedControllerInterface[*v1.PersistentVolume, *v1.PersistentVolumeList] + persistentVolumeClaim *fake.MockControllerInterface[*v1.PersistentVolumeClaim, *v1.PersistentVolumeClaimList] + pod *fake.MockControllerInterface[*v1.Pod, *v1.PodList] + secret *fake.MockControllerInterface[*v1.Secret, *v1.SecretList] + service *fake.MockControllerInterface[*v1.Service, *v1.ServiceList] + serviceAccount *fake.MockControllerInterface[*v1.ServiceAccount, *v1.ServiceAccountList] +} + +func newV1Mock(c *gomock.Controller) *v1Mock { + return &v1Mock{ + configMap: fake.NewMockControllerInterface[*v1.ConfigMap, *v1.ConfigMapList](c), + endpoints: fake.NewMockControllerInterface[*v1.Endpoints, *v1.EndpointsList](c), + event: fake.NewMockControllerInterface[*v1.Event, *v1.EventList](c), + namespace: fake.NewMockNonNamespacedControllerInterface[*v1.Namespace, *v1.NamespaceList](c), + node: fake.NewMockNonNamespacedControllerInterface[*v1.Node, *v1.NodeList](c), + persistentVolume: fake.NewMockNonNamespacedControllerInterface[*v1.PersistentVolume, *v1.PersistentVolumeList](c), + persistentVolumeClaim: fake.NewMockControllerInterface[*v1.PersistentVolumeClaim, *v1.PersistentVolumeClaimList](c), + pod: fake.NewMockControllerInterface[*v1.Pod, *v1.PodList](c), + secret: fake.NewMockControllerInterface[*v1.Secret, *v1.SecretList](c), + service: fake.NewMockControllerInterface[*v1.Service, *v1.ServiceList](c), + serviceAccount: fake.NewMockControllerInterface[*v1.ServiceAccount, *v1.ServiceAccountList](c), + } +} + +func (m *v1Mock) ConfigMap() corev1.ConfigMapController { + return m.configMap +} + +func (m *v1Mock) Endpoints() corev1.EndpointsController { + return m.endpoints +} + +func (m *v1Mock) Event() corev1.EventController { + return m.event +} + +func (m *v1Mock) Namespace() corev1.NamespaceController { + return m.namespace +} + +func (m *v1Mock) Node() corev1.NodeController { + return m.node +} + +func (m *v1Mock) PersistentVolume() corev1.PersistentVolumeController { + return m.persistentVolume +} + +func (m *v1Mock) PersistentVolumeClaim() corev1.PersistentVolumeClaimController { + return m.persistentVolumeClaim +} + +func (m *v1Mock) Pod() corev1.PodController { + return m.pod +} + +func (m *v1Mock) Secret() corev1.SecretController { + return m.secret +} + +func (m *v1Mock) Service() corev1.ServiceController { + return m.service +} + +func (m *v1Mock) ServiceAccount() corev1.ServiceAccountController { + return m.serviceAccount +} + +func errorNotFound(gv, name string) error { + return apierrors.NewNotFound(schema.ParseGroupResource(gv), name) +} + +// +// ListObjects response body template +// + +var listObjectsV2ResponseTemplate = ` +{{- /* */ -}} +{{ with $b := . -}} + + {{$b.Name}} + {{ if $b.Prefix }}{{$b.Prefix}}{{ else }}{{ end }} + {{ len $b.Objects }} + 1000 + + false + {{- range $o := $b.Objects }} + + {{ $o.Key }} + {{ $o.LastModified }} + {{ printf "%q" $o.ETag }} + {{ $o.Size }} + + 0 + test + + STANDARD + + {{- end }} + url + +{{- end }} +` + +func s3Router(t *testing.T) http.Handler { + var listResponse = template.Must(template.New("listObjectsV2").Parse(listObjectsV2ResponseTemplate)) + + type object struct { + Key string + LastModified string + ETag string + Size int + } + + type bucket struct { + Name string + Prefix string + Objects []object + } + + snapshotId := 0 + objects := []object{} + timestamp := time.Now().Format(time.RFC3339) + for _, prefix := range []string{"", "testfolder", "testfolder/netsted", "otherfolder"} { + for idx := range []int{0, 1, 2} { + snapshotId++ + objects = append(objects, object{ + Key: path.Join(prefix, fmt.Sprintf("snapshot-%02d", snapshotId)), + LastModified: timestamp, + ETag: "0000", + Size: 100, + }) + if idx != 0 { + objects = append(objects, object{ + Key: path.Join(prefix, fmt.Sprintf(".metadata/snapshot-%02d", snapshotId)), + LastModified: timestamp, + ETag: "0000", + Size: 10, + }) + } + } + } + + // badbucket returns 404 for all requests + // authbucket returns 200 for HeadBucket, 403 for all others + // others return 200 for objects with name prefix snapshot, 404 for all others + router := mux.NewRouter().SkipClean(true) + // HeadBucket + router.Path("/{bucket}/").Methods(http.MethodHead).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + } + }) + // ListObjectsV2 + router.Path("/{bucket}/").Methods(http.MethodGet).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + prefix := r.URL.Query().Get("prefix") + filtered := []object{} + for _, object := range objects { + if strings.HasPrefix(object.Key, prefix) { + filtered = append(filtered, object) + } + } + if err := listResponse.Execute(rw, bucket{Name: vars["bucket"], Prefix: prefix, Objects: filtered}); err != nil { + t.Errorf("Failed to generate ListObjectsV2 response, error = %v", err) + rw.WriteHeader(http.StatusInternalServerError) + } + } + }) + // HeadObject - snapshot + router.Path("/{bucket}/{prefix:.*}snapshot-{snapshot}").Methods(http.MethodHead).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + rw.Header().Add("last-modified", time.Now().In(gmt).Format(time.RFC1123)) + } + }) + // GetObject - snapshot + router.Path("/{bucket}/{prefix:.*}snapshot-{snapshot}").Methods(http.MethodGet).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + rw.Header().Add("last-modified", time.Now().In(gmt).Format(time.RFC1123)) + rw.Write([]byte("test snapshot file\n")) + } + }) + // PutObject/DeleteObject - snapshot + router.Path("/{bucket}/{prefix:.*}snapshot-{snapshot}").Methods(http.MethodPut, http.MethodDelete).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + if r.Method == http.MethodDelete { + rw.WriteHeader(http.StatusNoContent) + } + } + }) + // HeadObject - snapshot metadata + router.Path("/{bucket}/{prefix:.*}.metadata/snapshot-{snapshot}").Methods(http.MethodHead).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + rw.Header().Add("last-modified", time.Now().In(gmt).Format(time.RFC1123)) + } + }) + // GetObject - snapshot metadata + router.Path("/{bucket}/{prefix:.*}.metadata/snapshot-{snapshot}").Methods(http.MethodGet).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + rw.Header().Add("last-modified", time.Now().In(gmt).Format(time.RFC1123)) + rw.Write([]byte("test snapshot metadata\n")) + } + }) + // PutObject/DeleteObject - snapshot metadata + router.Path("/{bucket}/{prefix:.*}.metadata/snapshot-{snapshot}").Methods(http.MethodPut, http.MethodDelete).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + if r.Method == http.MethodDelete { + rw.WriteHeader(http.StatusNoContent) + } + } + }) + return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + scheme := "http" + if r.TLS != nil { + scheme = "https" + } + logrus.Infof("%s %s://%s %s", r.Method, scheme, r.Host, r.URL) + router.ServeHTTP(rw, r) + }) +} diff --git a/pkg/etcd/snapshot.go b/pkg/etcd/snapshot.go index 8669b8443ffa..3fccfe37e868 100644 --- a/pkg/etcd/snapshot.go +++ b/pkg/etcd/snapshot.go @@ -3,14 +3,11 @@ package etcd import ( "archive/zip" "context" - "crypto/sha256" "encoding/base64" - "encoding/hex" "encoding/json" "fmt" "io" "math/rand" - "net/http" "os" "path/filepath" "runtime" @@ -22,38 +19,31 @@ import ( k3s "github.com/k3s-io/k3s/pkg/apis/k3s.cattle.io/v1" "github.com/k3s-io/k3s/pkg/cluster/managed" "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/etcd/s3" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" - "github.com/minio/minio-go/v7" "github.com/pkg/errors" "github.com/robfig/cron/v3" "github.com/sirupsen/logrus" - "go.etcd.io/etcd/etcdutl/v3/snapshot" + snapshotv3 "go.etcd.io/etcd/etcdutl/v3/snapshot" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/validation" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" - "k8s.io/utils/ptr" ) const ( - compressedExtension = ".zip" - metadataDir = ".metadata" - errorTTL = 24 * time.Hour + errorTTL = 24 * time.Hour ) var ( - snapshotExtraMetadataConfigMapName = version.Program + "-etcd-snapshot-extra-metadata" - labelStorageNode = "etcd." + version.Program + ".cattle.io/snapshot-storage-node" - annotationLocalReconciled = "etcd." + version.Program + ".cattle.io/local-snapshots-timestamp" - annotationS3Reconciled = "etcd." + version.Program + ".cattle.io/s3-snapshots-timestamp" - annotationTokenHash = "etcd." + version.Program + ".cattle.io/snapshot-token-hash" + annotationLocalReconciled = "etcd." + version.Program + ".cattle.io/local-snapshots-timestamp" + annotationS3Reconciled = "etcd." + version.Program + ".cattle.io/s3-snapshots-timestamp" // snapshotDataBackoff will retry at increasing steps for up to ~30 seconds. // If the ConfigMap update fails, the list won't be reconciled again until next time @@ -109,7 +99,7 @@ func snapshotDir(config *config.Control, create bool) (string, error) { func (e *ETCD) compressSnapshot(snapshotDir, snapshotName, snapshotPath string, now time.Time) (string, error) { logrus.Info("Compressing etcd snapshot file: " + snapshotName) - zippedSnapshotName := snapshotName + compressedExtension + zippedSnapshotName := snapshotName + snapshot.CompressedExtension zipPath := filepath.Join(snapshotDir, zippedSnapshotName) zf, err := os.Create(zipPath) @@ -168,7 +158,7 @@ func (e *ETCD) decompressSnapshot(snapshotDir, snapshotFile string) (string, err var decompressed *os.File for _, sf := range r.File { - decompressed, err = os.OpenFile(strings.Replace(sf.Name, compressedExtension, "", -1), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, sf.Mode()) + decompressed, err = os.OpenFile(strings.Replace(sf.Name, snapshot.CompressedExtension, "", -1), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, sf.Mode()) if err != nil { return "", err } @@ -203,13 +193,13 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { // make sure the core.Factory is initialized before attempting to add snapshot metadata var extraMetadata *v1.ConfigMap if e.config.Runtime.Core == nil { - logrus.Debugf("Cannot retrieve extra metadata from %s ConfigMap: runtime core not ready", snapshotExtraMetadataConfigMapName) + logrus.Debugf("Cannot retrieve extra metadata from %s ConfigMap: runtime core not ready", snapshot.ExtraMetadataConfigMapName) } else { - logrus.Debugf("Attempting to retrieve extra metadata from %s ConfigMap", snapshotExtraMetadataConfigMapName) - if snapshotExtraMetadataConfigMap, err := e.config.Runtime.Core.Core().V1().ConfigMap().Get(metav1.NamespaceSystem, snapshotExtraMetadataConfigMapName, metav1.GetOptions{}); err != nil { - logrus.Debugf("Error encountered attempting to retrieve extra metadata from %s ConfigMap, error: %v", snapshotExtraMetadataConfigMapName, err) + logrus.Debugf("Attempting to retrieve extra metadata from %s ConfigMap", snapshot.ExtraMetadataConfigMapName) + if snapshotExtraMetadataConfigMap, err := e.config.Runtime.Core.Core().V1().ConfigMap().Get(metav1.NamespaceSystem, snapshot.ExtraMetadataConfigMapName, metav1.GetOptions{}); err != nil { + logrus.Debugf("Error encountered attempting to retrieve extra metadata from %s ConfigMap, error: %v", snapshot.ExtraMetadataConfigMapName, err) } else { - logrus.Debugf("Setting extra metadata from %s ConfigMap", snapshotExtraMetadataConfigMapName) + logrus.Debugf("Setting extra metadata from %s ConfigMap", snapshot.ExtraMetadataConfigMapName) extraMetadata = snapshotExtraMetadataConfigMap } } @@ -246,20 +236,20 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { snapshotPath := filepath.Join(snapshotDir, snapshotName) logrus.Infof("Saving etcd snapshot to %s", snapshotPath) - var sf *snapshotFile + var sf *snapshot.File - if err := snapshot.NewV3(e.client.GetLogger()).Save(ctx, *cfg, snapshotPath); err != nil { - sf = &snapshotFile{ + if err := snapshotv3.NewV3(e.client.GetLogger()).Save(ctx, *cfg, snapshotPath); err != nil { + sf = &snapshot.File{ Name: snapshotName, Location: "", NodeName: nodeName, CreatedAt: &metav1.Time{ Time: now, }, - Status: failedSnapshotStatus, + Status: snapshot.FailedStatus, Message: base64.StdEncoding.EncodeToString([]byte(err.Error())), Size: 0, - metadataSource: extraMetadata, + MetadataSource: extraMetadata, } logrus.Errorf("Failed to take etcd snapshot: %v", err) if err := e.addSnapshotData(*sf); err != nil { @@ -290,18 +280,18 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { return nil, errors.Wrap(err, "unable to retrieve snapshot information from local snapshot") } - sf = &snapshotFile{ + sf = &snapshot.File{ Name: f.Name(), Location: "file://" + snapshotPath, NodeName: nodeName, CreatedAt: &metav1.Time{ Time: now, }, - Status: successfulSnapshotStatus, + Status: snapshot.SuccessfulStatus, Size: f.Size(), Compressed: e.config.EtcdSnapshotCompress, - metadataSource: extraMetadata, - tokenHash: tokenHash, + MetadataSource: extraMetadata, + TokenHash: tokenHash, } res.Created = append(res.Created, sf.Name) @@ -323,34 +313,29 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { } res.Deleted = append(res.Deleted, deleted...) - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + if e.config.EtcdS3 != nil { + if s3client, err := e.getS3Client(ctx); err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) - sf = &snapshotFile{ - Name: f.Name(), - NodeName: "s3", - CreatedAt: &metav1.Time{ - Time: now, - }, - Message: base64.StdEncoding.EncodeToString([]byte(err.Error())), - Size: 0, - Status: failedSnapshotStatus, - S3: &s3Config{ - Endpoint: e.config.EtcdS3Endpoint, - EndpointCA: e.config.EtcdS3EndpointCA, - SkipSSLVerify: e.config.EtcdS3SkipSSLVerify, - Bucket: e.config.EtcdS3BucketName, - Region: e.config.EtcdS3Region, - Folder: e.config.EtcdS3Folder, - Insecure: e.config.EtcdS3Insecure, - }, - metadataSource: extraMetadata, + if !errors.Is(err, s3.ErrNoConfigSecret) { + err = errors.Wrap(err, "failed to initialize S3 client") + sf = &snapshot.File{ + Name: f.Name(), + NodeName: "s3", + CreatedAt: &metav1.Time{ + Time: now, + }, + Message: base64.StdEncoding.EncodeToString([]byte(err.Error())), + Size: 0, + Status: snapshot.FailedStatus, + S3: &snapshot.S3Config{EtcdS3: *e.config.EtcdS3}, + MetadataSource: extraMetadata, + } } } else { logrus.Infof("Saving etcd snapshot %s to S3", snapshotName) - // upload will return a snapshotFile even on error - if there was an + // upload will return a snapshot.File even on error - if there was an // error, it will be reflected in the status and message. - sf, err = e.s3.upload(ctx, snapshotPath, extraMetadata, now) + sf, err = s3client.Upload(ctx, snapshotPath, extraMetadata, now) if err != nil { logrus.Errorf("Error received during snapshot upload to S3: %s", err) } else { @@ -360,7 +345,7 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { // Attempt to apply retention even if the upload failed; failure may be due to bucket // being full or some other condition that retention policy would resolve. // Snapshot retention may prune some files before returning an error. Failing to prune is not fatal. - deleted, err := e.s3.snapshotRetention(ctx) + deleted, err := s3client.SnapshotRetention(ctx, e.config.EtcdSnapshotRetention, e.config.EtcdSnapshotName) res.Deleted = append(res.Deleted, deleted...) if err != nil { logrus.Warnf("Failed to apply s3 snapshot retention policy: %v", err) @@ -378,52 +363,12 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { return res, e.ReconcileSnapshotData(ctx) } -type s3Config struct { - Endpoint string `json:"endpoint,omitempty"` - EndpointCA string `json:"endpointCA,omitempty"` - SkipSSLVerify bool `json:"skipSSLVerify,omitempty"` - Bucket string `json:"bucket,omitempty"` - Region string `json:"region,omitempty"` - Folder string `json:"folder,omitempty"` - Insecure bool `json:"insecure,omitempty"` -} - -type snapshotStatus string - -const ( - successfulSnapshotStatus snapshotStatus = "successful" - failedSnapshotStatus snapshotStatus = "failed" -) - -// snapshotFile represents a single snapshot and it's -// metadata. -type snapshotFile struct { - Name string `json:"name"` - // Location contains the full path of the snapshot. For - // local paths, the location will be prefixed with "file://". - Location string `json:"location,omitempty"` - Metadata string `json:"metadata,omitempty"` - Message string `json:"message,omitempty"` - NodeName string `json:"nodeName,omitempty"` - CreatedAt *metav1.Time `json:"createdAt,omitempty"` - Size int64 `json:"size,omitempty"` - Status snapshotStatus `json:"status,omitempty"` - S3 *s3Config `json:"s3Config,omitempty"` - Compressed bool `json:"compressed"` - - // these fields are used for the internal representation of the snapshot - // to populate other fields before serialization to the legacy configmap. - metadataSource *v1.ConfigMap `json:"-"` - nodeSource string `json:"-"` - tokenHash string `json:"-"` -} - // listLocalSnapshots provides a list of the currently stored // snapshots on disk along with their relevant // metadata. -func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { +func (e *ETCD) listLocalSnapshots() (map[string]snapshot.File, error) { nodeName := os.Getenv("NODE_NAME") - snapshots := make(map[string]snapshotFile) + snapshots := make(map[string]snapshot.File) snapshotDir, err := snapshotDir(e.config, true) if err != nil { return snapshots, errors.Wrap(err, "failed to get etcd-snapshot-dir") @@ -434,7 +379,7 @@ func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { return err } - basename, compressed := strings.CutSuffix(file.Name(), compressedExtension) + basename, compressed := strings.CutSuffix(file.Name(), snapshot.CompressedExtension) ts, err := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) if err != nil { ts = file.ModTime().Unix() @@ -443,13 +388,13 @@ func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { // try to read metadata from disk; don't warn if it is missing as it will not exist // for snapshot files from old releases or if there was no metadata provided. var metadata string - metadataFile := filepath.Join(filepath.Dir(path), "..", metadataDir, file.Name()) + metadataFile := filepath.Join(filepath.Dir(path), "..", snapshot.MetadataDir, file.Name()) if m, err := os.ReadFile(metadataFile); err == nil { logrus.Debugf("Loading snapshot metadata from %s", metadataFile) metadata = base64.StdEncoding.EncodeToString(m) } - sf := snapshotFile{ + sf := snapshot.File{ Name: file.Name(), Location: "file://" + filepath.Join(snapshotDir, file.Name()), NodeName: nodeName, @@ -458,10 +403,10 @@ func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { Time: time.Unix(ts, 0), }, Size: file.Size(), - Status: successfulSnapshotStatus, + Status: snapshot.SuccessfulStatus, Compressed: compressed, } - sfKey := generateSnapshotConfigMapKey(sf) + sfKey := sf.GenerateConfigMapKey() snapshots[sfKey] = sf return nil }); err != nil { @@ -471,18 +416,21 @@ func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { return snapshots, nil } -// initS3IfNil initializes the S3 client -// if it hasn't yet been initialized. -func (e *ETCD) initS3IfNil(ctx context.Context) error { - if e.config.EtcdS3 && e.s3 == nil { - s3, err := NewS3(ctx, e.config) +// getS3Client initializes the S3 controller if it hasn't yet been initialized. +// If S3 is or can be initialized successfully, and valid S3 configuration is +// present, a client for the current S3 configuration is returned. +// The context passed here is only used to validate the configuration, +// it does not need to continue to remain uncancelled after the call returns. +func (e *ETCD) getS3Client(ctx context.Context) (*s3.Client, error) { + if e.s3 == nil { + s3, err := s3.Start(ctx, e.config) if err != nil { - return err + return nil, err } e.s3 = s3 } - return nil + return e.s3.GetClient(ctx, e.config.EtcdS3) } // PruneSnapshots deleted old snapshots in excess of the configured retention count. @@ -502,11 +450,11 @@ func (e *ETCD) PruneSnapshots(ctx context.Context) (*managed.SnapshotResult, err logrus.Errorf("Error applying snapshot retention policy: %v", err) } - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + if e.config.EtcdS3 != nil { + if s3client, err := e.getS3Client(ctx); err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) } else { - deleted, err := e.s3.snapshotRetention(ctx) + deleted, err := s3client.SnapshotRetention(ctx, e.config.EtcdSnapshotRetention, e.config.EtcdSnapshotName) if err != nil { logrus.Errorf("Error applying S3 snapshot retention policy: %v", err) } @@ -524,19 +472,23 @@ func (e *ETCD) ListSnapshots(ctx context.Context) (*k3s.ETCDSnapshotFileList, er snapshotFiles := &k3s.ETCDSnapshotFileList{ TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "List"}, } - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + + if e.config.EtcdS3 != nil { + if s3client, err := e.getS3Client(ctx); err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) - return nil, err - } - sfs, err := e.s3.listSnapshots(ctx) - if err != nil { - return nil, err - } - for k, sf := range sfs { - esf := k3s.NewETCDSnapshotFile("", k, k3s.ETCDSnapshotFile{}) - sf.toETCDSnapshotFile(esf) - snapshotFiles.Items = append(snapshotFiles.Items, *esf) + if !errors.Is(err, s3.ErrNoConfigSecret) { + return nil, errors.Wrap(err, "failed to initialize S3 client") + } + } else { + sfs, err := s3client.ListSnapshots(ctx) + if err != nil { + return nil, err + } + for k, sf := range sfs { + esf := k3s.NewETCDSnapshotFile("", k, k3s.ETCDSnapshotFile{}) + sf.ToETCDSnapshotFile(esf) + snapshotFiles.Items = append(snapshotFiles.Items, *esf) + } } } @@ -546,7 +498,7 @@ func (e *ETCD) ListSnapshots(ctx context.Context) (*k3s.ETCDSnapshotFileList, er } for k, sf := range sfs { esf := k3s.NewETCDSnapshotFile("", k, k3s.ETCDSnapshotFile{}) - sf.toETCDSnapshotFile(esf) + sf.ToETCDSnapshotFile(esf) snapshotFiles.Items = append(snapshotFiles.Items, *esf) } @@ -561,17 +513,22 @@ func (e *ETCD) DeleteSnapshots(ctx context.Context, snapshots []string) (*manage if err != nil { return nil, errors.Wrap(err, "failed to get etcd-snapshot-dir") } - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + + var s3client *s3.Client + if e.config.EtcdS3 != nil { + s3client, err = e.getS3Client(ctx) + if err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) - return nil, err + if !errors.Is(err, s3.ErrNoConfigSecret) { + return nil, errors.Wrap(err, "failed to initialize S3 client") + } } } res := &managed.SnapshotResult{} for _, s := range snapshots { if err := e.deleteSnapshot(filepath.Join(snapshotDir, s)); err != nil { - if isNotExist(err) { + if snapshot.IsNotExist(err) { logrus.Infof("Snapshot %s not found locally", s) } else { logrus.Errorf("Failed to delete local snapshot %s: %v", s, err) @@ -581,9 +538,9 @@ func (e *ETCD) DeleteSnapshots(ctx context.Context, snapshots []string) (*manage logrus.Infof("Snapshot %s deleted locally", s) } - if e.config.EtcdS3 { - if err := e.s3.deleteSnapshot(ctx, s); err != nil { - if isNotExist(err) { + if s3client != nil { + if err := s3client.DeleteSnapshot(ctx, s); err != nil { + if snapshot.IsNotExist(err) { logrus.Infof("Snapshot %s not found in S3", s) } else { logrus.Errorf("Failed to delete S3 snapshot %s: %v", s, err) @@ -599,13 +556,13 @@ func (e *ETCD) DeleteSnapshots(ctx context.Context, snapshots []string) (*manage } func (e *ETCD) deleteSnapshot(snapshotPath string) error { - dir := filepath.Join(filepath.Dir(snapshotPath), "..", metadataDir) + dir := filepath.Join(filepath.Dir(snapshotPath), "..", snapshot.MetadataDir) filename := filepath.Base(snapshotPath) metadataPath := filepath.Join(dir, filename) err := os.Remove(snapshotPath) if err == nil || os.IsNotExist(err) { - if merr := os.Remove(metadataPath); err != nil && !isNotExist(err) { + if merr := os.Remove(metadataPath); err != nil && !snapshot.IsNotExist(err) { err = merr } } @@ -613,27 +570,16 @@ func (e *ETCD) deleteSnapshot(snapshotPath string) error { return err } -func marshalSnapshotFile(sf snapshotFile) ([]byte, error) { - if sf.metadataSource != nil { - if m, err := json.Marshal(sf.metadataSource.Data); err != nil { - logrus.Debugf("Error attempting to marshal extra metadata contained in %s ConfigMap, error: %v", snapshotExtraMetadataConfigMapName, err) - } else { - sf.Metadata = base64.StdEncoding.EncodeToString(m) - } - } - return json.Marshal(sf) -} - // addSnapshotData syncs an internal snapshotFile representation to an ETCDSnapshotFile resource // of the same name. Resources will be created or updated as necessary. -func (e *ETCD) addSnapshotData(sf snapshotFile) error { +func (e *ETCD) addSnapshotData(sf snapshot.File) error { // make sure the K3s factory is initialized. for e.config.Runtime.K3s == nil { runtime.Gosched() } snapshots := e.config.Runtime.K3s.K3s().V1().ETCDSnapshotFile() - esfName := generateSnapshotName(sf) + esfName := sf.GenerateName() var esf *k3s.ETCDSnapshotFile return retry.OnError(snapshotDataBackoff, func(err error) bool { @@ -654,7 +600,7 @@ func (e *ETCD) addSnapshotData(sf snapshotFile) error { // mutate object existing := esf.DeepCopyObject() - sf.toETCDSnapshotFile(esf) + sf.ToETCDSnapshotFile(esf) // create or update as necessary if esf.CreationTimestamp.IsZero() { @@ -671,48 +617,10 @@ func (e *ETCD) addSnapshotData(sf snapshotFile) error { }) } -// generateSnapshotConfigMapKey generates a derived name for the snapshot that is safe for use -// as a configmap key. -func generateSnapshotConfigMapKey(sf snapshotFile) string { - name := invalidKeyChars.ReplaceAllString(sf.Name, "_") - if sf.NodeName == "s3" { - return "s3-" + name - } - return "local-" + name -} - -// generateSnapshotName generates a derived name for the snapshot that is safe for use -// as a resource name. -func generateSnapshotName(sf snapshotFile) string { - name := strings.ToLower(sf.Name) - nodename := sf.nodeSource - if nodename == "" { - nodename = sf.NodeName - } - // Include a digest of the hostname and location to ensure unique resource - // names. Snapshots should already include the hostname, but this ensures we - // don't accidentally hide records if a snapshot with the same name somehow - // exists on multiple nodes. - digest := sha256.Sum256([]byte(nodename + sf.Location)) - // If the lowercase filename isn't usable as a resource name, and short enough that we can include a prefix and suffix, - // generate a safe name derived from the hostname and timestamp. - if errs := validation.IsDNS1123Subdomain(name); len(errs) != 0 || len(name)+13 > validation.DNS1123SubdomainMaxLength { - nodename, _, _ := strings.Cut(nodename, ".") - name = fmt.Sprintf("etcd-snapshot-%s-%d", nodename, sf.CreatedAt.Unix()) - if sf.Compressed { - name += compressedExtension - } - } - if sf.NodeName == "s3" { - return "s3-" + name + "-" + hex.EncodeToString(digest[0:])[0:6] - } - return "local-" + name + "-" + hex.EncodeToString(digest[0:])[0:6] -} - // generateETCDSnapshotFileConfigMapKey generates a key that the corresponding // snapshotFile would be stored under in the legacy configmap func generateETCDSnapshotFileConfigMapKey(esf k3s.ETCDSnapshotFile) string { - name := invalidKeyChars.ReplaceAllString(esf.Spec.SnapshotName, "_") + name := snapshot.InvalidKeyChars.ReplaceAllString(esf.Spec.SnapshotName, "_") if esf.Spec.S3 != nil { return "s3-" + name } @@ -757,19 +665,21 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { nodeNames := []string{os.Getenv("NODE_NAME")} // Get snapshots from S3 - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + if e.config.EtcdS3 != nil { + if s3client, err := e.getS3Client(ctx); err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) - return err - } - - if s3Snapshots, err := e.s3.listSnapshots(ctx); err != nil { - logrus.Errorf("Error retrieving S3 snapshots for reconciliation: %v", err) + if !errors.Is(err, s3.ErrNoConfigSecret) { + return errors.Wrap(err, "failed to initialize S3 client") + } } else { - for k, v := range s3Snapshots { - snapshotFiles[k] = v + if s3Snapshots, err := s3client.ListSnapshots(ctx); err != nil { + logrus.Errorf("Error retrieving S3 snapshots for reconciliation: %v", err) + } else { + for k, v := range s3Snapshots { + snapshotFiles[k] = v + } + nodeNames = append(nodeNames, "s3") } - nodeNames = append(nodeNames, "s3") } } @@ -784,9 +694,9 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { for sfKey, sf := range snapshotFiles { logrus.Debugf("Found snapshotFile for %s with key %s", sf.Name, sfKey) // if the configmap has data for this snapshot, and local metadata is empty, - // deserialize the value from the configmap and attempt to load it. - if cmSnapshotValue := snapshotConfigMap.Data[sfKey]; cmSnapshotValue != "" && sf.Metadata == "" && sf.metadataSource == nil { - sfTemp := &snapshotFile{} + // deserialize the value from the configmap and attempt to load iM. + if cmSnapshotValue := snapshotConfigMap.Data[sfKey]; cmSnapshotValue != "" && sf.Metadata == "" && sf.MetadataSource == nil { + sfTemp := &snapshot.File{} if err := json.Unmarshal([]byte(cmSnapshotValue), sfTemp); err != nil { logrus.Warnf("Failed to unmarshal configmap data for snapshot %s: %v", sfKey, err) continue @@ -799,7 +709,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { labelSelector := &metav1.LabelSelector{ MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: labelStorageNode, + Key: snapshot.LabelStorageNode, Operator: metav1.LabelSelectorOpIn, Values: nodeNames, }}, @@ -823,7 +733,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { for _, esf := range esfList.Items { sfKey := generateETCDSnapshotFileConfigMapKey(esf) logrus.Debugf("Found ETCDSnapshotFile for %s with key %s", esf.Spec.SnapshotName, sfKey) - if sf, ok := snapshotFiles[sfKey]; ok && generateSnapshotName(sf) == esf.Name { + if sf, ok := snapshotFiles[sfKey]; ok && sf.GenerateName() == esf.Name { // exists in both and names match, don't need to sync delete(snapshotFiles, sfKey) } else { @@ -835,7 +745,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { } } if ok { - logrus.Debugf("Name of ETCDSnapshotFile for snapshotFile with key %s does not match: %s vs %s", sfKey, generateSnapshotName(sf), esf.Name) + logrus.Debugf("Name of ETCDSnapshotFile for snapshotFile with key %s does not match: %s vs %s", sfKey, sf.GenerateName(), esf.Name) } else { logrus.Debugf("Key %s not found in snapshotFile list", sfKey) } @@ -904,7 +814,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { "path": "/metadata/annotations/" + strings.ReplaceAll(annotationLocalReconciled, "/", "~1"), }, } - if e.config.EtcdS3 { + if e.config.EtcdS3 != nil { patch = append(patch, map[string]string{ "op": "add", "value": now, @@ -942,18 +852,18 @@ func snapshotRetention(retention int, snapshotPrefix string, snapshotDir string) logrus.Infof("Applying snapshot retention=%d to local snapshots with prefix %s in %s", retention, snapshotPrefix, snapshotDir) - var snapshotFiles []snapshotFile + var snapshotFiles []snapshot.File if err := filepath.Walk(snapshotDir, func(path string, info os.FileInfo, err error) error { if info.IsDir() || err != nil { return err } if strings.HasPrefix(info.Name(), snapshotPrefix) { - basename, compressed := strings.CutSuffix(info.Name(), compressedExtension) + basename, compressed := strings.CutSuffix(info.Name(), snapshot.CompressedExtension) ts, err := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) if err != nil { ts = info.ModTime().Unix() } - snapshotFiles = append(snapshotFiles, snapshotFile{Name: info.Name(), CreatedAt: &metav1.Time{Time: time.Unix(ts, 0)}, Compressed: compressed}) + snapshotFiles = append(snapshotFiles, snapshot.File{Name: info.Name(), CreatedAt: &metav1.Time{Time: time.Unix(ts, 0)}, Compressed: compressed}) } return nil }); err != nil { @@ -971,7 +881,7 @@ func snapshotRetention(retention int, snapshotPrefix string, snapshotDir string) deleted := []string{} for _, df := range snapshotFiles[retention:] { snapshotPath := filepath.Join(snapshotDir, df.Name) - metadataPath := filepath.Join(snapshotDir, "..", metadataDir, df.Name) + metadataPath := filepath.Join(snapshotDir, "..", snapshot.MetadataDir, df.Name) logrus.Infof("Removing local snapshot %s", snapshotPath) if err := os.Remove(snapshotPath); err != nil { return deleted, err @@ -985,13 +895,6 @@ func snapshotRetention(retention int, snapshotPrefix string, snapshotDir string) return deleted, nil } -func isNotExist(err error) bool { - if resp := minio.ToErrorResponse(err); resp.StatusCode == http.StatusNotFound || os.IsNotExist(err) { - return true - } - return false -} - // saveSnapshotMetadata writes extra metadata to disk. // The upload is silently skipped if no extra metadata is provided. func saveSnapshotMetadata(snapshotPath string, extraMetadata *v1.ConfigMap) error { @@ -999,7 +902,7 @@ func saveSnapshotMetadata(snapshotPath string, extraMetadata *v1.ConfigMap) erro return nil } - dir := filepath.Join(filepath.Dir(snapshotPath), "..", metadataDir) + dir := filepath.Join(filepath.Dir(snapshotPath), "..", snapshot.MetadataDir) filename := filepath.Base(snapshotPath) metadataPath := filepath.Join(dir, filename) logrus.Infof("Saving snapshot metadata to %s", metadataPath) @@ -1012,135 +915,3 @@ func saveSnapshotMetadata(snapshotPath string, extraMetadata *v1.ConfigMap) erro } return os.WriteFile(metadataPath, m, 0700) } - -func (sf *snapshotFile) fromETCDSnapshotFile(esf *k3s.ETCDSnapshotFile) { - if esf == nil { - panic("cannot convert from nil ETCDSnapshotFile") - } - - sf.Name = esf.Spec.SnapshotName - sf.Location = esf.Spec.Location - sf.CreatedAt = esf.Status.CreationTime - sf.nodeSource = esf.Spec.NodeName - sf.Compressed = strings.HasSuffix(esf.Spec.SnapshotName, compressedExtension) - - if esf.Status.ReadyToUse != nil && *esf.Status.ReadyToUse { - sf.Status = successfulSnapshotStatus - } else { - sf.Status = failedSnapshotStatus - } - - if esf.Status.Size != nil { - sf.Size = esf.Status.Size.Value() - } - - if esf.Status.Error != nil { - if esf.Status.Error.Time != nil { - sf.CreatedAt = esf.Status.Error.Time - } - message := "etcd snapshot failed" - if esf.Status.Error.Message != nil { - message = *esf.Status.Error.Message - } - sf.Message = base64.StdEncoding.EncodeToString([]byte(message)) - } - - if len(esf.Spec.Metadata) > 0 { - if b, err := json.Marshal(esf.Spec.Metadata); err != nil { - logrus.Warnf("Failed to marshal metadata for %s: %v", esf.Name, err) - } else { - sf.Metadata = base64.StdEncoding.EncodeToString(b) - } - } - - if tokenHash := esf.Annotations[annotationTokenHash]; tokenHash != "" { - sf.tokenHash = tokenHash - } - - if esf.Spec.S3 == nil { - sf.NodeName = esf.Spec.NodeName - } else { - sf.NodeName = "s3" - sf.S3 = &s3Config{ - Endpoint: esf.Spec.S3.Endpoint, - EndpointCA: esf.Spec.S3.EndpointCA, - SkipSSLVerify: esf.Spec.S3.SkipSSLVerify, - Bucket: esf.Spec.S3.Bucket, - Region: esf.Spec.S3.Region, - Folder: esf.Spec.S3.Prefix, - Insecure: esf.Spec.S3.Insecure, - } - } -} - -func (sf *snapshotFile) toETCDSnapshotFile(esf *k3s.ETCDSnapshotFile) { - if esf == nil { - panic("cannot convert to nil ETCDSnapshotFile") - } - esf.Spec.SnapshotName = sf.Name - esf.Spec.Location = sf.Location - esf.Status.CreationTime = sf.CreatedAt - esf.Status.ReadyToUse = ptr.To(sf.Status == successfulSnapshotStatus) - esf.Status.Size = resource.NewQuantity(sf.Size, resource.DecimalSI) - - if sf.nodeSource != "" { - esf.Spec.NodeName = sf.nodeSource - } else { - esf.Spec.NodeName = sf.NodeName - } - - if sf.Message != "" { - var message string - b, err := base64.StdEncoding.DecodeString(sf.Message) - if err != nil { - logrus.Warnf("Failed to decode error message for %s: %v", sf.Name, err) - message = "etcd snapshot failed" - } else { - message = string(b) - } - esf.Status.Error = &k3s.ETCDSnapshotError{ - Time: sf.CreatedAt, - Message: &message, - } - } - - if sf.metadataSource != nil { - esf.Spec.Metadata = sf.metadataSource.Data - } else if sf.Metadata != "" { - metadata, err := base64.StdEncoding.DecodeString(sf.Metadata) - if err != nil { - logrus.Warnf("Failed to decode metadata for %s: %v", sf.Name, err) - } else { - if err := json.Unmarshal(metadata, &esf.Spec.Metadata); err != nil { - logrus.Warnf("Failed to unmarshal metadata for %s: %v", sf.Name, err) - } - } - } - - if esf.ObjectMeta.Labels == nil { - esf.ObjectMeta.Labels = map[string]string{} - } - - if esf.ObjectMeta.Annotations == nil { - esf.ObjectMeta.Annotations = map[string]string{} - } - - if sf.tokenHash != "" { - esf.ObjectMeta.Annotations[annotationTokenHash] = sf.tokenHash - } - - if sf.S3 == nil { - esf.ObjectMeta.Labels[labelStorageNode] = esf.Spec.NodeName - } else { - esf.ObjectMeta.Labels[labelStorageNode] = "s3" - esf.Spec.S3 = &k3s.ETCDSnapshotS3{ - Endpoint: sf.S3.Endpoint, - EndpointCA: sf.S3.EndpointCA, - SkipSSLVerify: sf.S3.SkipSSLVerify, - Bucket: sf.S3.Bucket, - Region: sf.S3.Region, - Prefix: sf.S3.Folder, - Insecure: sf.S3.Insecure, - } - } -} diff --git a/pkg/etcd/snapshot/types.go b/pkg/etcd/snapshot/types.go new file mode 100644 index 000000000000..00e93cc6d8ca --- /dev/null +++ b/pkg/etcd/snapshot/types.go @@ -0,0 +1,270 @@ +package snapshot + +import ( + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "os" + "regexp" + "strings" + + k3s "github.com/k3s-io/k3s/pkg/apis/k3s.cattle.io/v1" + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/version" + "github.com/minio/minio-go/v7" + "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/validation" + "k8s.io/utils/ptr" +) + +type SnapshotStatus string + +const ( + SuccessfulStatus SnapshotStatus = "successful" + FailedStatus SnapshotStatus = "failed" + + CompressedExtension = ".zip" + MetadataDir = ".metadata" +) + +var ( + InvalidKeyChars = regexp.MustCompile(`[^-._a-zA-Z0-9]`) + + LabelStorageNode = "etcd." + version.Program + ".cattle.io/snapshot-storage-node" + AnnotationTokenHash = "etcd." + version.Program + ".cattle.io/snapshot-token-hash" + + ExtraMetadataConfigMapName = version.Program + "-etcd-snapshot-extra-metadata" +) + +type S3Config struct { + config.EtcdS3 + // Mask these fields in the embedded struct to avoid serializing their values in the snapshotFile record + AccessKey string `json:"accessKey,omitempty"` + ConfigSecret string `json:"configSecret,omitempty"` + Proxy string `json:"proxy,omitempty"` + SecretKey string `json:"secretKey,omitempty"` + Timeout metav1.Duration `json:"timeout,omitempty"` +} + +// File represents a single snapshot and it's +// metadata. +type File struct { + Name string `json:"name"` + // Location contains the full path of the snapshot. For + // local paths, the location will be prefixed with "file://". + Location string `json:"location,omitempty"` + Metadata string `json:"metadata,omitempty"` + Message string `json:"message,omitempty"` + NodeName string `json:"nodeName,omitempty"` + CreatedAt *metav1.Time `json:"createdAt,omitempty"` + Size int64 `json:"size,omitempty"` + Status SnapshotStatus `json:"status,omitempty"` + S3 *S3Config `json:"s3Config,omitempty"` + Compressed bool `json:"compressed"` + + // these fields are used for the internal representation of the snapshot + // to populate other fields before serialization to the legacy configmap. + MetadataSource *v1.ConfigMap `json:"-"` + NodeSource string `json:"-"` + TokenHash string `json:"-"` +} + +// GenerateConfigMapKey generates a derived name for the snapshot that is safe for use +// as a configmap key. +func (sf *File) GenerateConfigMapKey() string { + name := InvalidKeyChars.ReplaceAllString(sf.Name, "_") + if sf.NodeName == "s3" { + return "s3-" + name + } + return "local-" + name +} + +// GenerateName generates a derived name for the snapshot that is safe for use +// as a resource name. +func (sf *File) GenerateName() string { + name := strings.ToLower(sf.Name) + nodename := sf.NodeSource + if nodename == "" { + nodename = sf.NodeName + } + // Include a digest of the hostname and location to ensure unique resource + // names. Snapshots should already include the hostname, but this ensures we + // don't accidentally hide records if a snapshot with the same name somehow + // exists on multiple nodes. + digest := sha256.Sum256([]byte(nodename + sf.Location)) + // If the lowercase filename isn't usable as a resource name, and short enough that we can include a prefix and suffix, + // generate a safe name derived from the hostname and timestamp. + if errs := validation.IsDNS1123Subdomain(name); len(errs) != 0 || len(name)+13 > validation.DNS1123SubdomainMaxLength { + nodename, _, _ := strings.Cut(nodename, ".") + name = fmt.Sprintf("etcd-snapshot-%s-%d", nodename, sf.CreatedAt.Unix()) + if sf.Compressed { + name += CompressedExtension + } + } + if sf.NodeName == "s3" { + return "s3-" + name + "-" + hex.EncodeToString(digest[0:])[0:6] + } + return "local-" + name + "-" + hex.EncodeToString(digest[0:])[0:6] +} + +// FromETCDSnapshotFile translates fields to the File from the ETCDSnapshotFile +func (sf *File) FromETCDSnapshotFile(esf *k3s.ETCDSnapshotFile) { + if esf == nil { + panic("cannot convert from nil ETCDSnapshotFile") + } + + sf.Name = esf.Spec.SnapshotName + sf.Location = esf.Spec.Location + sf.CreatedAt = esf.Status.CreationTime + sf.NodeSource = esf.Spec.NodeName + sf.Compressed = strings.HasSuffix(esf.Spec.SnapshotName, CompressedExtension) + + if esf.Status.ReadyToUse != nil && *esf.Status.ReadyToUse { + sf.Status = SuccessfulStatus + } else { + sf.Status = FailedStatus + } + + if esf.Status.Size != nil { + sf.Size = esf.Status.Size.Value() + } + + if esf.Status.Error != nil { + if esf.Status.Error.Time != nil { + sf.CreatedAt = esf.Status.Error.Time + } + message := "etcd snapshot failed" + if esf.Status.Error.Message != nil { + message = *esf.Status.Error.Message + } + sf.Message = base64.StdEncoding.EncodeToString([]byte(message)) + } + + if len(esf.Spec.Metadata) > 0 { + if b, err := json.Marshal(esf.Spec.Metadata); err != nil { + logrus.Warnf("Failed to marshal metadata for %s: %v", esf.Name, err) + } else { + sf.Metadata = base64.StdEncoding.EncodeToString(b) + } + } + + if tokenHash := esf.Annotations[AnnotationTokenHash]; tokenHash != "" { + sf.TokenHash = tokenHash + } + + if esf.Spec.S3 == nil { + sf.NodeName = esf.Spec.NodeName + } else { + sf.NodeName = "s3" + sf.S3 = &S3Config{ + EtcdS3: config.EtcdS3{ + Endpoint: esf.Spec.S3.Endpoint, + EndpointCA: esf.Spec.S3.EndpointCA, + SkipSSLVerify: esf.Spec.S3.SkipSSLVerify, + Bucket: esf.Spec.S3.Bucket, + Region: esf.Spec.S3.Region, + Folder: esf.Spec.S3.Prefix, + Insecure: esf.Spec.S3.Insecure, + }, + } + } +} + +// ToETCDSnapshotFile translates fields from the File to the ETCDSnapshotFile +func (sf *File) ToETCDSnapshotFile(esf *k3s.ETCDSnapshotFile) { + if esf == nil { + panic("cannot convert to nil ETCDSnapshotFile") + } + esf.Spec.SnapshotName = sf.Name + esf.Spec.Location = sf.Location + esf.Status.CreationTime = sf.CreatedAt + esf.Status.ReadyToUse = ptr.To(sf.Status == SuccessfulStatus) + esf.Status.Size = resource.NewQuantity(sf.Size, resource.DecimalSI) + + if sf.NodeSource != "" { + esf.Spec.NodeName = sf.NodeSource + } else { + esf.Spec.NodeName = sf.NodeName + } + + if sf.Message != "" { + var message string + b, err := base64.StdEncoding.DecodeString(sf.Message) + if err != nil { + logrus.Warnf("Failed to decode error message for %s: %v", sf.Name, err) + message = "etcd snapshot failed" + } else { + message = string(b) + } + esf.Status.Error = &k3s.ETCDSnapshotError{ + Time: sf.CreatedAt, + Message: &message, + } + } + + if sf.MetadataSource != nil { + esf.Spec.Metadata = sf.MetadataSource.Data + } else if sf.Metadata != "" { + metadata, err := base64.StdEncoding.DecodeString(sf.Metadata) + if err != nil { + logrus.Warnf("Failed to decode metadata for %s: %v", sf.Name, err) + } else { + if err := json.Unmarshal(metadata, &esf.Spec.Metadata); err != nil { + logrus.Warnf("Failed to unmarshal metadata for %s: %v", sf.Name, err) + } + } + } + + if esf.ObjectMeta.Labels == nil { + esf.ObjectMeta.Labels = map[string]string{} + } + + if esf.ObjectMeta.Annotations == nil { + esf.ObjectMeta.Annotations = map[string]string{} + } + + if sf.TokenHash != "" { + esf.ObjectMeta.Annotations[AnnotationTokenHash] = sf.TokenHash + } + + if sf.S3 == nil { + esf.ObjectMeta.Labels[LabelStorageNode] = esf.Spec.NodeName + } else { + esf.ObjectMeta.Labels[LabelStorageNode] = "s3" + esf.Spec.S3 = &k3s.ETCDSnapshotS3{ + Endpoint: sf.S3.Endpoint, + EndpointCA: sf.S3.EndpointCA, + SkipSSLVerify: sf.S3.SkipSSLVerify, + Bucket: sf.S3.Bucket, + Region: sf.S3.Region, + Prefix: sf.S3.Folder, + Insecure: sf.S3.Insecure, + } + } +} + +// Marshal returns the JSON encoding of the snapshot File, with metadata inlined as base64. +func (sf *File) Marshal() ([]byte, error) { + if sf.MetadataSource != nil { + if m, err := json.Marshal(sf.MetadataSource.Data); err != nil { + logrus.Debugf("Error attempting to marshal extra metadata contained in %s ConfigMap, error: %v", ExtraMetadataConfigMapName, err) + } else { + sf.Metadata = base64.StdEncoding.EncodeToString(m) + } + } + return json.Marshal(sf) +} + +// IsNotExist returns true if the error is from http.StatusNotFound or os.IsNotExist +func IsNotExist(err error) bool { + if resp := minio.ToErrorResponse(err); resp.StatusCode == http.StatusNotFound || os.IsNotExist(err) { + return true + } + return false +} diff --git a/pkg/etcd/snapshot_controller.go b/pkg/etcd/snapshot_controller.go index 5d43de645791..9c62cc9c5022 100644 --- a/pkg/etcd/snapshot_controller.go +++ b/pkg/etcd/snapshot_controller.go @@ -9,6 +9,7 @@ import ( "time" apisv1 "github.com/k3s-io/k3s/pkg/apis/k3s.cattle.io/v1" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" controllersv1 "github.com/k3s-io/k3s/pkg/generated/controllers/k3s.cattle.io/v1" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" @@ -81,10 +82,10 @@ func (e *etcdSnapshotHandler) sync(key string, esf *apisv1.ETCDSnapshotFile) (*a return nil, nil } - sf := snapshotFile{} - sf.fromETCDSnapshotFile(esf) - sfKey := generateSnapshotConfigMapKey(sf) - m, err := marshalSnapshotFile(sf) + sf := &snapshot.File{} + sf.FromETCDSnapshotFile(esf) + sfKey := sf.GenerateConfigMapKey() + m, err := sf.Marshal() if err != nil { return nil, errors.Wrap(err, "failed to marshal snapshot ConfigMap data") } @@ -283,9 +284,9 @@ func (e *etcdSnapshotHandler) reconcile() error { // Ensure keys for existing snapshots for sfKey, esf := range snapshots { - sf := snapshotFile{} - sf.fromETCDSnapshotFile(esf) - m, err := marshalSnapshotFile(sf) + sf := &snapshot.File{} + sf.FromETCDSnapshotFile(esf) + m, err := sf.Marshal() if err != nil { logrus.Warnf("Failed to marshal snapshot ConfigMap data for %s", sfKey) continue @@ -327,12 +328,12 @@ func pruneConfigMap(snapshotConfigMap *v1.ConfigMap, pruneCount int) error { return errors.New("unable to reduce snapshot ConfigMap size by eliding old snapshots") } - var snapshotFiles []snapshotFile + var snapshotFiles []snapshot.File retention := len(snapshotConfigMap.Data) - pruneCount for name := range snapshotConfigMap.Data { - basename, compressed := strings.CutSuffix(name, compressedExtension) + basename, compressed := strings.CutSuffix(name, snapshot.CompressedExtension) ts, _ := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) - snapshotFiles = append(snapshotFiles, snapshotFile{Name: name, CreatedAt: &metav1.Time{Time: time.Unix(ts, 0)}, Compressed: compressed}) + snapshotFiles = append(snapshotFiles, snapshot.File{Name: name, CreatedAt: &metav1.Time{Time: time.Unix(ts, 0)}, Compressed: compressed}) } // sort newest-first so we can prune entries past the retention count diff --git a/pkg/etcd/snapshot_handler.go b/pkg/etcd/snapshot_handler.go index 0bae2e0401b7..23eefbc4c45b 100644 --- a/pkg/etcd/snapshot_handler.go +++ b/pkg/etcd/snapshot_handler.go @@ -11,8 +11,8 @@ import ( "github.com/k3s-io/k3s/pkg/cluster/managed" "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/util" + "github.com/pkg/errors" "github.com/sirupsen/logrus" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type SnapshotOperation string @@ -24,21 +24,13 @@ const ( SnapshotOperationDelete SnapshotOperation = "delete" ) -type SnapshotRequestS3 struct { - s3Config - Timeout metav1.Duration `json:"timeout"` - AccessKey string `json:"accessKey"` - SecretKey string `json:"secretKey"` -} - type SnapshotRequest struct { Operation SnapshotOperation `json:"operation"` Name []string `json:"name,omitempty"` Dir *string `json:"dir,omitempty"` Compress *bool `json:"compress,omitempty"` Retention *int `json:"retention,omitempty"` - - S3 *SnapshotRequestS3 `json:"s3,omitempty"` + S3 *config.EtcdS3 `json:"s3,omitempty"` ctx context.Context } @@ -76,9 +68,12 @@ func (e *ETCD) snapshotHandler() http.Handler { } func (e *ETCD) handleList(rw http.ResponseWriter, req *http.Request) error { - if err := e.initS3IfNil(req.Context()); err != nil { - util.SendError(err, rw, req, http.StatusBadRequest) - return nil + if e.config.EtcdS3 != nil { + if _, err := e.getS3Client(req.Context()); err != nil { + err = errors.Wrap(err, "failed to initialize S3 client") + util.SendError(err, rw, req, http.StatusBadRequest) + return nil + } } sf, err := e.ListSnapshots(req.Context()) if sf == nil { @@ -90,9 +85,12 @@ func (e *ETCD) handleList(rw http.ResponseWriter, req *http.Request) error { } func (e *ETCD) handleSave(rw http.ResponseWriter, req *http.Request) error { - if err := e.initS3IfNil(req.Context()); err != nil { - util.SendError(err, rw, req, http.StatusBadRequest) - return nil + if e.config.EtcdS3 != nil { + if _, err := e.getS3Client(req.Context()); err != nil { + err = errors.Wrap(err, "failed to initialize S3 client") + util.SendError(err, rw, req, http.StatusBadRequest) + return nil + } } sr, err := e.Snapshot(req.Context()) if sr == nil { @@ -104,9 +102,12 @@ func (e *ETCD) handleSave(rw http.ResponseWriter, req *http.Request) error { } func (e *ETCD) handlePrune(rw http.ResponseWriter, req *http.Request) error { - if err := e.initS3IfNil(req.Context()); err != nil { - util.SendError(err, rw, req, http.StatusBadRequest) - return nil + if e.config.EtcdS3 != nil { + if _, err := e.getS3Client(req.Context()); err != nil { + err = errors.Wrap(err, "failed to initialize S3 client") + util.SendError(err, rw, req, http.StatusBadRequest) + return nil + } } sr, err := e.PruneSnapshots(req.Context()) if sr == nil { @@ -118,9 +119,12 @@ func (e *ETCD) handlePrune(rw http.ResponseWriter, req *http.Request) error { } func (e *ETCD) handleDelete(rw http.ResponseWriter, req *http.Request, snapshots []string) error { - if err := e.initS3IfNil(req.Context()); err != nil { - util.SendError(err, rw, req, http.StatusBadRequest) - return nil + if e.config.EtcdS3 != nil { + if _, err := e.getS3Client(req.Context()); err != nil { + err = errors.Wrap(err, "failed to initialize S3 client") + util.SendError(err, rw, req, http.StatusBadRequest) + return nil + } } sr, err := e.DeleteSnapshots(req.Context(), snapshots) if sr == nil { @@ -149,7 +153,9 @@ func (e *ETCD) withRequest(sr *SnapshotRequest) *ETCD { EtcdSnapshotCompress: e.config.EtcdSnapshotCompress, EtcdSnapshotName: e.config.EtcdSnapshotName, EtcdSnapshotRetention: e.config.EtcdSnapshotRetention, + EtcdS3: sr.S3, }, + s3: e.s3, name: e.name, address: e.address, cron: e.cron, @@ -168,19 +174,6 @@ func (e *ETCD) withRequest(sr *SnapshotRequest) *ETCD { if sr.Retention != nil { re.config.EtcdSnapshotRetention = *sr.Retention } - if sr.S3 != nil { - re.config.EtcdS3 = true - re.config.EtcdS3AccessKey = sr.S3.AccessKey - re.config.EtcdS3BucketName = sr.S3.Bucket - re.config.EtcdS3Endpoint = sr.S3.Endpoint - re.config.EtcdS3EndpointCA = sr.S3.EndpointCA - re.config.EtcdS3Folder = sr.S3.Folder - re.config.EtcdS3Insecure = sr.S3.Insecure - re.config.EtcdS3Region = sr.S3.Region - re.config.EtcdS3SecretKey = sr.S3.SecretKey - re.config.EtcdS3SkipSSLVerify = sr.S3.SkipSSLVerify - re.config.EtcdS3Timeout = sr.S3.Timeout.Duration - } return re } diff --git a/tests/e2e/s3/Vagrantfile b/tests/e2e/s3/Vagrantfile index 652a990c12c5..75c44426607f 100644 --- a/tests/e2e/s3/Vagrantfile +++ b/tests/e2e/s3/Vagrantfile @@ -46,13 +46,8 @@ def provision(vm, role, role_num, node_num) cluster-init: true etcd-snapshot-schedule-cron: '*/1 * * * *' etcd-snapshot-retention: 2 - etcd-s3-insecure: true - etcd-s3-bucket: test-bucket - etcd-s3-folder: test-folder etcd-s3: true - etcd-s3-endpoint: localhost:9090 - etcd-s3-skip-ssl-verify: true - etcd-s3-access-key: test + etcd-s3-config-secret: k3s-etcd-s3-config YAML k3s.env = %W[K3S_KUBECONFIG_MODE=0644 #{install_type}] k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321 diff --git a/tests/e2e/s3/s3_test.go b/tests/e2e/s3/s3_test.go index f1aee914a21a..ac203f63d4e0 100644 --- a/tests/e2e/s3/s3_test.go +++ b/tests/e2e/s3/s3_test.go @@ -87,7 +87,31 @@ var _ = Describe("Verify Create", Ordered, func() { fmt.Println(res) Expect(err).NotTo(HaveOccurred()) }) - It("save s3 snapshot", func() { + It("save s3 snapshot using CLI", func() { + res, err := e2e.RunCmdOnNode("k3s etcd-snapshot save "+ + "--etcd-s3-insecure=true "+ + "--etcd-s3-bucket=test-bucket "+ + "--etcd-s3-folder=test-folder "+ + "--etcd-s3-endpoint=localhost:9090 "+ + "--etcd-s3-skip-ssl-verify=true "+ + "--etcd-s3-access-key=test ", + serverNodeNames[0]) + Expect(err).NotTo(HaveOccurred()) + Expect(res).To(ContainSubstring("Snapshot on-demand-server-0")) + }) + It("creates s3 config secret", func() { + res, err := e2e.RunCmdOnNode("k3s kubectl create secret generic k3s-etcd-s3-config --namespace=kube-system "+ + "--from-literal=etcd-s3-insecure=true "+ + "--from-literal=etcd-s3-bucket=test-bucket "+ + "--from-literal=etcd-s3-folder=test-folder "+ + "--from-literal=etcd-s3-endpoint=localhost:9090 "+ + "--from-literal=etcd-s3-skip-ssl-verify=true "+ + "--from-literal=etcd-s3-access-key=test ", + serverNodeNames[0]) + Expect(err).NotTo(HaveOccurred()) + Expect(res).To(ContainSubstring("secret/k3s-etcd-s3-config created")) + }) + It("save s3 snapshot using secret", func() { res, err := e2e.RunCmdOnNode("k3s etcd-snapshot save", serverNodeNames[0]) Expect(err).NotTo(HaveOccurred()) Expect(res).To(ContainSubstring("Snapshot on-demand-server-0")) From 4ff3422f0d0e3f2f1cb3e29b3539461fd5e99864 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Thu, 11 Jul 2024 20:37:56 +0000 Subject: [PATCH 07/12] Bump kine to v0.11.11 https://github.com/k3s-io/kine/compare/v0.11.9...v0.11.11 Signed-off-by: Brad Davidson --- go.mod | 8 ++++---- go.sum | 15 ++++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index e4b3ab7331d3..88e45a929e76 100644 --- a/go.mod +++ b/go.mod @@ -105,7 +105,7 @@ require ( github.com/joho/godotenv v1.5.1 github.com/json-iterator/go v1.1.12 github.com/k3s-io/helm-controller v0.15.10 - github.com/k3s-io/kine v0.11.9 + github.com/k3s-io/kine v0.11.11 github.com/klauspost/compress v1.17.7 github.com/kubernetes-sigs/cri-tools v0.0.0-00010101000000-000000000000 github.com/lib/pq v1.10.2 @@ -223,7 +223,7 @@ require ( github.com/containers/ocicrypt v1.1.10 // indirect github.com/coreos/go-oidc v2.2.1+incompatible // indirect github.com/coreos/go-semver v0.3.1 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/cyphar/filepath-securejoin v0.2.4 // indirect github.com/danwinship/knftables v0.0.13 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -420,14 +420,14 @@ require ( github.com/tchap/go-patricia/v2 v2.3.1 // indirect github.com/tidwall/btree v1.6.0 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 // indirect - github.com/urfave/cli/v2 v2.26.0 // indirect + github.com/urfave/cli/v2 v2.27.2 // indirect github.com/vbatts/tar-split v0.11.5 // indirect github.com/vishvananda/netns v0.0.4 // indirect github.com/vmware/govmomi v0.30.6 // indirect github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 // indirect github.com/xlab/treeprint v1.2.0 // indirect - github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect + github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect go.etcd.io/bbolt v1.3.9 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.13 // indirect go.etcd.io/etcd/client/v2 v2.305.13 // indirect diff --git a/go.sum b/go.sum index 12a0bf03c26a..24723a496349 100644 --- a/go.sum +++ b/go.sum @@ -455,8 +455,9 @@ github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwc github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= @@ -956,8 +957,8 @@ github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1 h1:Pqcxkg7V60c26ZpHoekP9QoUdLuduxF github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1/go.mod h1:K/8nbsGupHqmr5MkgaZpLlH1QdX1pcNQLAkODy44XcQ= github.com/k3s-io/helm-controller v0.15.10 h1:TIfbbCbv8mJ1AquPzSxH3vMqIcqfgZ9Pr/Pq/jka/zc= github.com/k3s-io/helm-controller v0.15.10/go.mod h1:AYitg40howLjKloL/zdjDDOPL1jg/K5R4af0tQcyPR8= -github.com/k3s-io/kine v0.11.9 h1:7HfWSwtOowb7GuV6nECnNlFKShgRgVBLdWXj0/4t0sE= -github.com/k3s-io/kine v0.11.9/go.mod h1:N8rc1GDmEvvYRuTxhKTZfSc4fm/vyI6GbDxwBjccAjs= +github.com/k3s-io/kine v0.11.11 h1:f1DhpGNjCDVd1HFWPbeA824YP7MtsrKgstoJ5M0SRgs= +github.com/k3s-io/kine v0.11.11/go.mod h1:L4x3qotFebVh1ZVzYwFVL5PPfqw2sRJTjDTIeViO70Y= github.com/k3s-io/klog/v2 v2.100.1-k3s1 h1:xb/Ta8dpQuIZueQEw2YTZUYrKoILdBmPiITVkNmYPa0= github.com/k3s-io/klog/v2 v2.100.1-k3s1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= github.com/k3s-io/kube-router/v2 v2.1.2 h1:/eLfIsELLsqqRW1skIJ2qe7bWL6IZZ9Hg3IniIgObXo= @@ -1594,8 +1595,8 @@ github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60Nt github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8= github.com/urfave/cli v1.22.14 h1:ebbhrRiGK2i4naQJr+1Xj92HXZCrK7MsyTS/ob3HnAk= github.com/urfave/cli v1.22.14/go.mod h1:X0eDS6pD6Exaclxm99NJ3FiCDRED7vIHpx2mDOHLvkA= -github.com/urfave/cli/v2 v2.26.0 h1:3f3AMg3HpThFNT4I++TKOejZO8yU55t3JnnSr4S4QEI= -github.com/urfave/cli/v2 v2.26.0/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= +github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI= +github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM= github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI= github.com/vbatts/tar-split v0.11.5 h1:3bHCTIheBm1qFTcgh9oPu+nNBtX+XJIupG/vacinCts= github.com/vbatts/tar-split v0.11.5/go.mod h1:yZbwRsSeGjusneWgA781EKej9HF8vme8okylkAeNKLk= @@ -1633,8 +1634,8 @@ github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= +github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 h1:+qGGcbkzsfDQNPPe9UDgpxAWQrhbbBXOYJFQDq/dtJw= +github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQutDLsQv2Deui4iYQ6DWTxR14g6m8Wv88+Xqk= github.com/yashtewari/glob-intersection v0.1.0/go.mod h1:LK7pIC3piUjovexikBbJ26Yml7g8xa5bsjfx2v1fwok= github.com/yashtewari/glob-intersection v0.2.0/go.mod h1:LK7pIC3piUjovexikBbJ26Yml7g8xa5bsjfx2v1fwok= github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU= From 2d8a18a0ac41e47f579c654ce21255151f599895 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 12 Jul 2024 16:03:15 +0000 Subject: [PATCH 08/12] Fix reentrant rlock in loadbalancer.dialContext Signed-off-by: Brad Davidson --- pkg/agent/loadbalancer/servers.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/agent/loadbalancer/servers.go b/pkg/agent/loadbalancer/servers.go index 3564a6a4ee03..6b7f25606064 100644 --- a/pkg/agent/loadbalancer/servers.go +++ b/pkg/agent/loadbalancer/servers.go @@ -111,10 +111,12 @@ func (lb *LoadBalancer) setServers(serverAddresses []string) bool { return true } +// nextServer attempts to get the next server in the loadbalancer server list. +// If another goroutine has already updated the current server address to point at +// a different address than just failed, nothing is changed. Otherwise, a new server address +// is stored to the currentServerAddress field, and returned for use. +// This function must always be called by a goroutine that holds a read lock on the loadbalancer mutex. func (lb *LoadBalancer) nextServer(failedServer string) (string, error) { - lb.mutex.RLock() - defer lb.mutex.RUnlock() - // note: these fields are not protected by the mutex, so we clamp the index value and update // the index/current address using local variables, to avoid time-of-check vs time-of-use // race conditions caused by goroutine A incrementing it in between the time goroutine B From ec4288f7279e0cab4efb45d5ec41be71e6f5c586 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 12 Jul 2024 17:24:52 +0000 Subject: [PATCH 09/12] Fix agents removing configured supervisor address We shouldn't be replacing the configured server address on agents. Doing so breaks the agent's ability to fall back to the fixed registration endpoint when all servers are down, since we replaced it with the first discovered apiserver address. The fixed registration endpoint will be restored as default when the service is restarted, but this is not the correct behavior. This should have only been done on etcd-only nodes that start up using their local supervisor, but need to switch to a control-plane node as soon as one is available. Signed-off-by: Brad Davidson --- pkg/agent/config/config.go | 2 +- pkg/agent/run.go | 19 +++++++++++++++---- pkg/agent/tunnel/tunnel.go | 21 ++++++++++++++++++--- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index 9318bc6e4677..e12cb3821232 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -103,7 +103,7 @@ func APIServers(ctx context.Context, node *config.Node, proxy proxy.Proxy) []str return false, err } if len(addresses) == 0 { - logrus.Infof("Waiting for apiserver addresses") + logrus.Infof("Waiting for supervisor to provide apiserver addresses") return false, nil } return true, nil diff --git a/pkg/agent/run.go b/pkg/agent/run.go index f3342767ad29..aa9f5a5ce5d7 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -530,20 +530,31 @@ func setupTunnelAndRunAgent(ctx context.Context, nodeConfig *daemonconfig.Node, } func waitForAPIServerAddresses(ctx context.Context, nodeConfig *daemonconfig.Node, cfg cmds.Agent, proxy proxy.Proxy) error { + var localSupervisorDefault bool + if addresses := proxy.SupervisorAddresses(); len(addresses) > 0 { + host, _, _ := net.SplitHostPort(addresses[0]) + if host == "127.0.0.1" || host == "::1" { + localSupervisorDefault = true + } + } + for { select { case <-time.After(5 * time.Second): - logrus.Info("Waiting for apiserver addresses") + logrus.Info("Waiting for control-plane node to register apiserver addresses in etcd") case addresses := <-cfg.APIAddressCh: for i, a := range addresses { host, _, err := net.SplitHostPort(a) if err == nil { addresses[i] = net.JoinHostPort(host, strconv.Itoa(nodeConfig.ServerHTTPSPort)) - if i == 0 { - proxy.SetSupervisorDefault(addresses[i]) - } } } + // If this is an etcd-only node that started up using its local supervisor, + // switch to using a control-plane node as the supervisor. Otherwise, leave the + // configured server address as the default. + if localSupervisorDefault && len(addresses) > 0 { + proxy.SetSupervisorDefault(addresses[0]) + } proxy.Update(addresses) return nil case <-ctx.Done(): diff --git a/pkg/agent/tunnel/tunnel.go b/pkg/agent/tunnel/tunnel.go index 79122c6b1f16..479288e0fb28 100644 --- a/pkg/agent/tunnel/tunnel.go +++ b/pkg/agent/tunnel/tunnel.go @@ -124,18 +124,33 @@ func Setup(ctx context.Context, config *daemonconfig.Node, proxy proxy.Proxy) er // The loadbalancer is only disabled when there is a local apiserver. Servers without a local // apiserver load-balance to themselves initially, then switch over to an apiserver node as soon // as we get some addresses from the code below. + var localSupervisorDefault bool + if addresses := proxy.SupervisorAddresses(); len(addresses) > 0 { + host, _, _ := net.SplitHostPort(addresses[0]) + if host == "127.0.0.1" || host == "::1" { + localSupervisorDefault = true + } + } + if proxy.IsSupervisorLBEnabled() && proxy.SupervisorURL() != "" { logrus.Info("Getting list of apiserver endpoints from server") // If not running an apiserver locally, try to get a list of apiservers from the server we're // connecting to. If that fails, fall back to querying the endpoints list from Kubernetes. This // fallback requires that the server we're joining be running an apiserver, but is the only safe // thing to do if its supervisor is down-level and can't provide us with an endpoint list. - if addresses := agentconfig.APIServers(ctx, config, proxy); len(addresses) > 0 { - proxy.SetSupervisorDefault(addresses[0]) + addresses := agentconfig.APIServers(ctx, config, proxy) + logrus.Infof("Got apiserver addresses from supervisor: %v", addresses) + + if len(addresses) > 0 { + if localSupervisorDefault { + proxy.SetSupervisorDefault(addresses[0]) + } proxy.Update(addresses) } else { if endpoint, _ := client.CoreV1().Endpoints(metav1.NamespaceDefault).Get(ctx, "kubernetes", metav1.GetOptions{}); endpoint != nil { - if addresses := util.GetAddresses(endpoint); len(addresses) > 0 { + addresses = util.GetAddresses(endpoint) + logrus.Infof("Got apiserver addresses from kubernetes endpoints: %v", addresses) + if len(addresses) > 0 { proxy.Update(addresses) } } From 935b96ec012037c2dd687f22b46c6fa00d4ec5cf Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Sat, 13 Jul 2024 22:57:53 +0000 Subject: [PATCH 10/12] Fix IPv6 primary node-ip handling I should have caught `[]string{cfg.NodeIP}[0]` and `[]string{envInfo.NodeIP.String()}[0]` in code review... Signed-off-by: Brad Davidson --- pkg/agent/config/config.go | 3 +-- pkg/agent/run.go | 2 +- pkg/daemons/agent/agent_linux.go | 6 ++---- pkg/daemons/agent/agent_windows.go | 25 +++++++++++++++++++------ 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index e12cb3821232..3f691d492a5e 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -370,10 +370,9 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N if err != nil { return nil, errors.Wrap(err, "failed to retrieve configuration from server") } - // If the supervisor and externally-facing apiserver are not on the same port, tell the proxy where to find the apiserver. if controlConfig.SupervisorPort != controlConfig.HTTPSPort { - isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{envInfo.NodeIP.String()}[0])) + isIPv6 := utilsnet.IsIPv6(net.ParseIP(util.GetFirstValidIPString(envInfo.NodeIP))) if err := proxy.SetAPIServerPort(controlConfig.HTTPSPort, isIPv6); err != nil { return nil, errors.Wrapf(err, "failed to set apiserver port to %d", controlConfig.HTTPSPort) } diff --git a/pkg/agent/run.go b/pkg/agent/run.go index aa9f5a5ce5d7..93b4e27b6230 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -322,7 +322,7 @@ func createProxyAndValidateToken(ctx context.Context, cfg *cmds.Agent) (proxy.Pr if err := os.MkdirAll(agentDir, 0700); err != nil { return nil, err } - isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{cfg.NodeIP.String()}[0])) + isIPv6 := utilsnet.IsIPv6(net.ParseIP(util.GetFirstValidIPString(cfg.NodeIP))) proxy, err := proxy.NewSupervisorProxy(ctx, !cfg.DisableLoadBalancer, agentDir, cfg.ServerURL, cfg.LBServerPort, isIPv6) if err != nil { diff --git a/pkg/daemons/agent/agent_linux.go b/pkg/daemons/agent/agent_linux.go index 23f7b46a6438..c0af31f78fa0 100644 --- a/pkg/daemons/agent/agent_linux.go +++ b/pkg/daemons/agent/agent_linux.go @@ -34,8 +34,7 @@ func createRootlessConfig(argsMap map[string]string, controllers map[string]bool func kubeProxyArgs(cfg *config.Agent) map[string]string { bindAddress := "127.0.0.1" - isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{cfg.NodeIP}[0])) - if isIPv6 { + if utilsnet.IsIPv6(net.ParseIP(cfg.NodeIP)) { bindAddress = "::1" } argsMap := map[string]string{ @@ -67,8 +66,7 @@ func kubeProxyArgs(cfg *config.Agent) map[string]string { func kubeletArgs(cfg *config.Agent) map[string]string { bindAddress := "127.0.0.1" - isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{cfg.NodeIP}[0])) - if isIPv6 { + if utilsnet.IsIPv6(net.ParseIP(cfg.NodeIP)) { bindAddress = "::1" } argsMap := map[string]string{ diff --git a/pkg/daemons/agent/agent_windows.go b/pkg/daemons/agent/agent_windows.go index eb020afdb8e5..11d6605b241f 100644 --- a/pkg/daemons/agent/agent_windows.go +++ b/pkg/daemons/agent/agent_windows.go @@ -4,6 +4,7 @@ package agent import ( + "net" "os" "path/filepath" "strings" @@ -11,8 +12,8 @@ import ( "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/util" "github.com/sirupsen/logrus" - "k8s.io/apimachinery/pkg/util/net" "k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes" + utilsnet "k8s.io/utils/net" ) const ( @@ -21,8 +22,7 @@ const ( func kubeProxyArgs(cfg *config.Agent) map[string]string { bindAddress := "127.0.0.1" - _, IPv6only, _ := util.GetFirstString([]string{cfg.NodeIP}) - if IPv6only { + if utilsnet.IsIPv6(net.ParseIP(cfg.NodeIP)) { bindAddress = "::1" } argsMap := map[string]string{ @@ -98,9 +98,22 @@ func kubeletArgs(cfg *config.Agent) map[string]string { if cfg.NodeName != "" { argsMap["hostname-override"] = cfg.NodeName } - defaultIP, err := net.ChooseHostInterface() - if err != nil || defaultIP.String() != cfg.NodeIP { - argsMap["node-ip"] = cfg.NodeIP + + // If the embedded CCM is disabled, don't assume that dual-stack node IPs are safe. + // When using an external CCM, the user wants dual-stack node IPs, they will need to set the node-ip kubelet arg directly. + // This should be fine since most cloud providers have their own way of finding node IPs that doesn't depend on the kubelet + // setting them. + if cfg.DisableCCM { + dualStack, err := utilsnet.IsDualStackIPs(cfg.NodeIPs) + if err == nil && !dualStack { + argsMap["node-ip"] = cfg.NodeIP + } + } else { + // Cluster is using the embedded CCM, we know that the feature-gate will be enabled there as well. + argsMap["feature-gates"] = util.AddFeatureGate(argsMap["feature-gates"], "CloudDualStackNodeIPs=true") + if nodeIPs := util.JoinIPs(cfg.NodeIPs); nodeIPs != "" { + argsMap["node-ip"] = util.JoinIPs(cfg.NodeIPs) + } } argsMap["node-labels"] = strings.Join(cfg.NodeLabels, ",") From 92906b06ea6088746de89097c3344449b2f6b148 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Sun, 14 Jul 2024 00:04:08 +0000 Subject: [PATCH 11/12] Add dial duration to debug error message This should give us more detail on how long dials take before failing, so that we can perhaps better tune the retry loop in the future. Signed-off-by: Brad Davidson --- pkg/agent/loadbalancer/loadbalancer.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/agent/loadbalancer/loadbalancer.go b/pkg/agent/loadbalancer/loadbalancer.go index 36019470c8d2..567d825a2bb7 100644 --- a/pkg/agent/loadbalancer/loadbalancer.go +++ b/pkg/agent/loadbalancer/loadbalancer.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "sync" + "time" "github.com/k3s-io/k3s/pkg/version" "github.com/sirupsen/logrus" @@ -167,11 +168,12 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net if server == nil || targetServer == "" { logrus.Debugf("Nil server for load balancer %s: %s", lb.serviceName, targetServer) } else if allChecksFailed || server.healthCheck() { + dialTime := time.Now() conn, err := server.dialContext(ctx, network, targetServer) if err == nil { return conn, nil } - logrus.Debugf("Dial error from load balancer %s: %s", lb.serviceName, err) + logrus.Debugf("Dial error from load balancer %s after %s: %s", lb.serviceName, time.Now().Sub(dialTime), err) // Don't close connections to the failed server if we're retrying with health checks ignored. // We don't want to disrupt active connections if it is unlikely they will have anywhere to go. if !allChecksFailed { From a159e4566e27db6a46edf19910f93ed43cc55da5 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Sun, 14 Jul 2024 00:14:43 +0000 Subject: [PATCH 12/12] Don't use server and token values from config file for etcd-snapshot commands Fixes an issue where running etcd-snapshot commands on a node that has a server address set in the config will manage snapshots on that server, instead of on the local node as intended. Signed-off-by: Brad Davidson --- pkg/cli/cmds/etcd_snapshot.go | 11 +++++++---- pkg/configfilearg/defaultparser_test.go | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/cli/cmds/etcd_snapshot.go b/pkg/cli/cmds/etcd_snapshot.go index e97228d1e21b..378b394f0a83 100644 --- a/pkg/cli/cmds/etcd_snapshot.go +++ b/pkg/cli/cmds/etcd_snapshot.go @@ -21,11 +21,14 @@ var EtcdSnapshotFlags = []cli.Flag{ Destination: &AgentConfig.NodeName, }, DataDirFlag, - ServerToken, &cli.StringFlag{ - Name: "server, s", - Usage: "(cluster) Server to connect to", - EnvVar: version.ProgramUpper + "_URL", + Name: "etcd-token,t", + Usage: "(cluster) Shared secret used to authenticate to etcd server", + Destination: &ServerConfig.Token, + }, + &cli.StringFlag{ + Name: "etcd-server, s", + Usage: "(cluster) Server with etcd role to connect to for snapshot management operations", Value: "https://127.0.0.1:6443", Destination: &ServerConfig.ServerURL, }, diff --git a/pkg/configfilearg/defaultparser_test.go b/pkg/configfilearg/defaultparser_test.go index e43a0c154598..8ae8decc26fa 100644 --- a/pkg/configfilearg/defaultparser_test.go +++ b/pkg/configfilearg/defaultparser_test.go @@ -48,7 +48,7 @@ func Test_UnitMustParse(t *testing.T) { name: "Etcd-snapshot with config with known and unknown flags", args: []string{"k3s", "etcd-snapshot", "save"}, config: "./testdata/defaultdata.yaml", - want: []string{"k3s", "etcd-snapshot", "save", "--token=12345", "--etcd-s3=true", "--etcd-s3-bucket=my-backup"}, + want: []string{"k3s", "etcd-snapshot", "save", "--etcd-s3=true", "--etcd-s3-bucket=my-backup"}, }, { name: "Agent with known flags",