diff --git a/pkg/rke2/spw.go b/pkg/rke2/spw.go index 083c653c7b4..1876f514cab 100644 --- a/pkg/rke2/spw.go +++ b/pkg/rke2/spw.go @@ -6,6 +6,7 @@ import ( "context" "os" "path/filepath" + "sort" "sync" "time" @@ -66,8 +67,10 @@ func reconcileStaticPods(containerRuntimeEndpoint, dataDir string) cmds.StartupH } } -// checkManifestDeployed returns an error if the static pod's manifest cannot be decoded and verified as present -// and exclusively running with the current pod uid. If old pods are found, they will be terminated and an error returned. +// checkManifestDeployed verified that a pod for this manifest is exclusively running with the current pod uid. +// Pods with a different uid are removed and an error returned indicating that cleanup is in progress. +// The state of the most recently created pod with a matching uid is used to determine state. In the case of pod +// restarts with the same uid, pods from old attempts will be ignored as long as the most recent pod is ready. func checkManifestDeployed(ctx context.Context, cRuntime runtimeapi.RuntimeServiceClient, manifestFile string) error { f, err := os.Open(manifestFile) if err != nil { @@ -94,6 +97,9 @@ func checkManifestDeployed(ctx context.Context, cRuntime runtimeapi.RuntimeServi return errors.Wrap(err, "failed to list pods") } + // Sort pods for this component in ascending order of creation time, so that we process newer pods last. + sort.Slice(resp.Items, func(i, j int) bool { return resp.Items[i].CreatedAt < resp.Items[j].CreatedAt }) + var currentPod, stalePod bool for _, pod := range resp.Items { if pod.Annotations["kubernetes.io/config.source"] != "file" {