From 3c519f5d0988a2e8d602415789c3a0f4c2dd3297 Mon Sep 17 00:00:00 2001 From: Nashwan Azhari Date: Tue, 17 Dec 2024 14:58:44 +0200 Subject: [PATCH 1/3] cleanup: fix cleanup of non-symlink containerd paths on failed `bootstrap/join-cluster`. Signed-off-by: Nashwan Azhari --- src/k8s/pkg/k8sd/app/hooks_remove.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/k8s/pkg/k8sd/app/hooks_remove.go b/src/k8s/pkg/k8sd/app/hooks_remove.go index 4e90e6c05..567d6d350 100644 --- a/src/k8s/pkg/k8sd/app/hooks_remove.go +++ b/src/k8s/pkg/k8sd/app/hooks_remove.go @@ -5,6 +5,7 @@ import ( "database/sql" "errors" "fmt" + "io/fs" "net" "os" @@ -177,17 +178,20 @@ func tryCleanupContainerdPaths(log log.Logger, s snap.Snap) { } // Check directory exists before attempting to remove: - if _, err := os.Stat(dirpath); os.IsNotExist(err) { + if stat, err := os.Stat(dirpath); os.IsNotExist(err) { log.Info("Containerd directory doesn't exist; skipping cleanup", "directory", dirpath) } else { - // NOTE(aznashwan): because of the convoluted interfaces-based way the snap - // composes and creates the original lockfiles (see k8sd/setup/containerd.go) - // this check is meant to defend against accidental code/configuration errors which - // might lead to the root FS being deleted: - realPath, err := os.Readlink(dirpath) - if err != nil { - log.Error(err, fmt.Sprintf("Failed to os.Readlink the directory path for lockfile %q pointing to %q. Skipping cleanup", lockpath, dirpath)) - continue + realPath := dirpath + if stat.Mode()&fs.ModeSymlink != 0 { + // NOTE(aznashwan): because of the convoluted interfaces-based way the snap + // composes and creates the original lockfiles (see k8sd/setup/containerd.go) + // this check is meant to defend against accidental code/configuration errors which + // might lead to the root FS being deleted: + realPath, err = os.Readlink(dirpath) + if err != nil { + log.Error(err, fmt.Sprintf("Failed to os.Readlink the directory path for lockfile %q pointing to %q. Skipping cleanup", lockpath, dirpath)) + continue + } } if realPath == "/" { From 39531c1c6a8c129c08945b29a3d8a599353b25a6 Mon Sep 17 00:00:00 2001 From: Nashwan Azhari Date: Tue, 17 Dec 2024 17:39:54 +0200 Subject: [PATCH 2/3] integration: fix test_containerd_path_cleanup_on_failed_init. Previously, the test was inducing the `bootstrap` failure by attempting to pre-allocate the port of the kube-controller-manager. This methodology wasn't applicable on any non-LocalHarness testing setups, so the test was updated to induce the failure by pre-creating the containerd socket directory (`/run/containerd`), which should lead to the same cleanup logic being triggered. Signed-off-by: Nashwan Azhari --- tests/integration/tests/test_cleanup.py | 57 ++++++++++++----------- tests/integration/tests/test_util/util.py | 34 -------------- 2 files changed, 31 insertions(+), 60 deletions(-) diff --git a/tests/integration/tests/test_cleanup.py b/tests/integration/tests/test_cleanup.py index be32d4719..71f0853ab 100644 --- a/tests/integration/tests/test_cleanup.py +++ b/tests/integration/tests/test_cleanup.py @@ -11,11 +11,11 @@ LOG = logging.getLogger(__name__) -KUBE_CONTROLLER_MANAGER_SNAP_PORT = 10257 +CONTAINERD_SOCKET_DIRECTORY_CLASSIC = "/run/containerd" CONTAINERD_PATHS = [ "/etc/containerd", - "/run/containerd", + CONTAINERD_SOCKET_DIRECTORY_CLASSIC, "/var/lib/containerd", ] CNI_PATH = "/opt/cni/bin" @@ -114,7 +114,6 @@ def test_node_cleanup_new_containerd_path(instances: List[harness.Instance]): @pytest.mark.node_count(1) @pytest.mark.no_setup() @pytest.mark.tags(tags.NIGHTLY) -@pytest.mark.skip(reason="the test fails when using a harness other than 'local'") def test_containerd_path_cleanup_on_failed_init( instances: List[harness.Instance], tmp_path ): @@ -122,38 +121,44 @@ def test_containerd_path_cleanup_on_failed_init( containerd-related paths it may have created as part of the failed `bootstrap`. - It induces a bootstrap failure by pre-binding a required k8s service - port (10257 for the kube-controller-manager) before running `k8s bootstrap`. + It introduces a bootstrap failure by pre-creating the containerd socket + path before running `k8s-bootstrap`. + + The bootstrap/join-cluster aborting behavior was added in this PR: + https://github.com/canonical/k8s-snap/pull/772 NOTE: a failed `join-cluster` will trigger the exact same cleanup hook, so the test implicitly applies to it as well. """ instance = instances[0] expected_code = 1 - expected_message = ( - "Encountered error(s) while verifying port availability for Kubernetes " - "services: Port 10257 (needed by: kube-controller-manager) is already in use." - ) + expected_message = "containerd socket already exists" + + util.setup_k8s_snap(instance, tmp_path, config.SNAP, connect_interfaces=False) - with util.open_port(KUBE_CONTROLLER_MANAGER_SNAP_PORT) as _: - util.setup_k8s_snap(instance, tmp_path, config.SNAP, connect_interfaces=False) + # Pre-create the containerd socket directory in the test instance: + instance.exec(["mkdir", "-p", CONTAINERD_SOCKET_DIRECTORY_CLASSIC], check=True) - proc = instance.exec( - ["k8s", "bootstrap"], capture_output=True, text=True, check=False + proc = instance.exec( + ["k8s", "bootstrap"], capture_output=True, text=True, check=False + ) + + if proc.returncode != expected_code: + raise AssertionError( + f"Expected `k8s bootstrap` to exit with code {expected_code}, " + f"but it exited with {proc.returncode}.\n" + f"Stdout was: \n{proc.stdout}.\nStderr was: \n{proc.stderr}" ) - if proc.returncode != expected_code: - raise AssertionError( - f"Expected `k8s bootstrap` to exit with code {expected_code}, " - f"but it exited with {proc.returncode}.\n" - f"Stdout was: \n{proc.stdout}.\nStderr was: \n{proc.stderr}" - ) + if expected_message not in proc.stderr: + raise AssertionError( + f"Expected to find socket-related warning '{expected_message}' in " + "stderr of the `k8s bootstrap` command.\n" + f"Stdout was: \n{proc.stdout}.\nStderr was: \n{proc.stderr}" + ) - if expected_message not in proc.stderr: - raise AssertionError( - f"Expected to find port-related warning '{expected_message}' in " - "stderr of the `k8s bootstrap` command.\n" - f"Stdout was: \n{proc.stdout}.\nStderr was: \n{proc.stderr}" - ) + _assert_paths_not_exist(instance, CONTAINERD_PATHS) - _assert_paths_not_exist(instance, CONTAINERD_PATHS) + # Remove the directory and ensure the bootstrap succeeds: + instance.exec(["rmdir", CONTAINERD_SOCKET_DIRECTORY_CLASSIC], check=True) + instance.exec(["k8s", "bootstrap"]) diff --git a/tests/integration/tests/test_util/util.py b/tests/integration/tests/test_util/util.py index f1d0d31b2..99fa3dfce 100644 --- a/tests/integration/tests/test_util/util.py +++ b/tests/integration/tests/test_util/util.py @@ -1,13 +1,11 @@ # # Copyright 2024 Canonical, Ltd. # -import contextlib import ipaddress import json import logging import re import shlex -import socket import subprocess import urllib.request from datetime import datetime @@ -518,38 +516,6 @@ def find_suitable_cidr(parent_cidr: str, excluded_ips: List[str]): raise RuntimeError("Could not find a suitable CIDR for LoadBalancer services") -@contextlib.contextmanager -def open_port( - port: int, - host: str = "", - address_family: socket.AddressFamily = socket.AF_INET, - socket_kind: socket.SocketKind = socket.SOCK_STREAM, - max_backlogged_connections: int = 0, -): - """Context manager which opens a socket with the given properties - and binds it to the given port. - - Yields the already setup and listening socket object for use. - - By default, it will only allow one single active connection - and instantly refuse any new ones. Use the `max_backlogged_connections` - argument if you'd like it to accept more connections as `pending`. - """ - sock = socket.socket(family=address_family, type=socket_kind) - if not host: - host = socket.gethostname() - sock.bind((host, port)) - LOG.info(f"Successfully bound new socket on '{host}:{port}'") - - try: - sock.listen(max_backlogged_connections) - LOG.info(f"Successfully started listening on '{host}:{port}'") - yield sock - finally: - sock.close() - LOG.info(f"Closed socket on '{host}:{port}'") - - def check_file_paths_exist( instance: harness.Instance, paths: List[str] ) -> Mapping[str, bool]: From 685c9eeb7de2563dfb3d30efeb8e8de7523608db Mon Sep 17 00:00:00 2001 From: Berkay Tekin Oz Date: Fri, 20 Dec 2024 10:08:30 +0000 Subject: [PATCH 3/3] Move containerd folder creation, update cleanup test --- src/k8s/pkg/k8sd/setup/containerd.go | 15 +++++++++ src/k8s/pkg/k8sd/setup/directories.go | 3 -- .../integration/templates/bootstrap-fail.yaml | 18 +++++++++++ tests/integration/tests/test_cleanup.py | 31 +++++-------------- 4 files changed, 40 insertions(+), 27 deletions(-) create mode 100644 tests/integration/templates/bootstrap-fail.yaml diff --git a/src/k8s/pkg/k8sd/setup/containerd.go b/src/k8s/pkg/k8sd/setup/containerd.go index 2aba6b70c..662999870 100644 --- a/src/k8s/pkg/k8sd/setup/containerd.go +++ b/src/k8s/pkg/k8sd/setup/containerd.go @@ -91,6 +91,21 @@ func defaultContainerdConfig( // Containerd configures configuration and arguments for containerd on the local node. // Optionally, a number of registry mirrors and auths can be configured. func Containerd(snap snap.Snap, extraContainerdConfig map[string]any, extraArgs map[string]*string) error { + // We create the directories here since PreInitCheck is called before this + // This ensures we only create the directories if we are going to configure containerd + for _, dir := range []string{ + snap.ContainerdConfigDir(), + snap.ContainerdExtraConfigDir(), + snap.ContainerdRegistryConfigDir(), + } { + if dir == "" { + continue + } + if err := os.MkdirAll(dir, 0o700); err != nil { + return fmt.Errorf("failed to create required directory: %w", err) + } + } + configToml := defaultContainerdConfig( snap.CNIConfDir(), snap.CNIBinDir(), diff --git a/src/k8s/pkg/k8sd/setup/directories.go b/src/k8s/pkg/k8sd/setup/directories.go index 73a06e0cf..41b289eb3 100644 --- a/src/k8s/pkg/k8sd/setup/directories.go +++ b/src/k8s/pkg/k8sd/setup/directories.go @@ -19,9 +19,6 @@ func EnsureAllDirectories(snap snap.Snap) error { for _, dir := range []string{ snap.CNIConfDir(), - snap.ContainerdConfigDir(), - snap.ContainerdExtraConfigDir(), - snap.ContainerdRegistryConfigDir(), snap.K8sDqliteStateDir(), snap.KubernetesConfigDir(), snap.KubernetesPKIDir(), diff --git a/tests/integration/templates/bootstrap-fail.yaml b/tests/integration/templates/bootstrap-fail.yaml new file mode 100644 index 000000000..62abb6aa5 --- /dev/null +++ b/tests/integration/templates/bootstrap-fail.yaml @@ -0,0 +1,18 @@ +# Contains the bootstrap configuration for the smoke test. +cluster-config: + network: + enabled: true + dns: + enabled: true + ingress: + enabled: true + load-balancer: + enabled: true + local-storage: + enabled: true + gateway: + enabled: true + metrics-server: + enabled: true +extra-node-kube-apiserver-args: + --foo: bar diff --git a/tests/integration/tests/test_cleanup.py b/tests/integration/tests/test_cleanup.py index 71f0853ab..f82dae1f5 100644 --- a/tests/integration/tests/test_cleanup.py +++ b/tests/integration/tests/test_cleanup.py @@ -112,17 +112,14 @@ def test_node_cleanup_new_containerd_path(instances: List[harness.Instance]): @pytest.mark.node_count(1) -@pytest.mark.no_setup() +@pytest.mark.disable_k8s_bootstrapping() @pytest.mark.tags(tags.NIGHTLY) -def test_containerd_path_cleanup_on_failed_init( - instances: List[harness.Instance], tmp_path -): +def test_containerd_path_cleanup_on_failed_init(instances: List[harness.Instance]): """Tests that a failed `bootstrap` properly cleans up any containerd-related paths it may have created as part of the failed `bootstrap`. - It introduces a bootstrap failure by pre-creating the containerd socket - path before running `k8s-bootstrap`. + It introduces a bootstrap failure by supplying an incorrect argument to the kube-apiserver. The bootstrap/join-cluster aborting behavior was added in this PR: https://github.com/canonical/k8s-snap/pull/772 @@ -132,33 +129,19 @@ def test_containerd_path_cleanup_on_failed_init( """ instance = instances[0] expected_code = 1 - expected_message = "containerd socket already exists" - - util.setup_k8s_snap(instance, tmp_path, config.SNAP, connect_interfaces=False) - # Pre-create the containerd socket directory in the test instance: - instance.exec(["mkdir", "-p", CONTAINERD_SOCKET_DIRECTORY_CLASSIC], check=True) + fail_bootstrap_config = (config.MANIFESTS_DIR / "bootstrap-fail.yaml").read_text() proc = instance.exec( - ["k8s", "bootstrap"], capture_output=True, text=True, check=False + ["k8s", "bootstrap", "--file", "-"], + input=str.encode(fail_bootstrap_config), + check=False, ) if proc.returncode != expected_code: raise AssertionError( f"Expected `k8s bootstrap` to exit with code {expected_code}, " f"but it exited with {proc.returncode}.\n" - f"Stdout was: \n{proc.stdout}.\nStderr was: \n{proc.stderr}" - ) - - if expected_message not in proc.stderr: - raise AssertionError( - f"Expected to find socket-related warning '{expected_message}' in " - "stderr of the `k8s bootstrap` command.\n" - f"Stdout was: \n{proc.stdout}.\nStderr was: \n{proc.stderr}" ) _assert_paths_not_exist(instance, CONTAINERD_PATHS) - - # Remove the directory and ensure the bootstrap succeeds: - instance.exec(["rmdir", CONTAINERD_SOCKET_DIRECTORY_CLASSIC], check=True) - instance.exec(["k8s", "bootstrap"])