diff --git a/bundle/bin/rke2-uninstall.ps1 b/bundle/bin/rke2-uninstall.ps1 index 50b8b83818f..61bd905509d 100644 --- a/bundle/bin/rke2-uninstall.ps1 +++ b/bundle/bin/rke2-uninstall.ps1 @@ -272,6 +272,13 @@ function Remove-Containerd () { } if (ctr) { + # We create a lockfile to prevent rke2 service from starting kubelet again + Create-Lockfile + Stop-Process -Name "kubelet" + while (-Not(Get-Process -Name "kubelet").HasExited) { + Write-LogInfo "Waiting for kubelet process to stop" + Start-Sleep -s 5 + } $namespaces = $(Find-Namespaces) if (-Not($namespaces)) { $ErrorActionPreference = 'SilentlyContinue' @@ -292,10 +299,23 @@ function Remove-Containerd () { foreach ($image in $images) { Remove-Image $ns $image } - Remove-Namespace $ns - # TODO - # clean pods with crictl - # $CONTAINER_RUNTIME_ENDPOINT = "npipe:\\.\\pipe\\containerd-containerd" + } + + # Some resources in the namespace take a while to disappear. Try several times to remove the namespace and give up after 30s + $endTime = (Get-Date).AddSeconds(30) + while ((Get-Date) -lt $endTime) { + $namespaces = $(Find-Namespaces) + if ($namespaces) { + foreach ($ns in $namespaces) { + Remove-Namespace $ns + } + } else { + break + } + Start-Sleep -Seconds 5 + if ((Get-Date) -ge $endTime) { + Write-Output "Unable to remove all namespaces" + } } } else { @@ -345,6 +365,17 @@ function Remove-Namespace() { Invoke-Ctr -cmd "namespace remove $namespace" } +function Create-Lockfile() { + $fullPath = Join-Path -Path "C:\var\lib\rancher\rke2\agent\" -ChildPath "kubelet.kubeconfig" + if (-not ($fullPath)) { + # For cases where Datadir is not the default location (can take a minute to find) + $fullPath = Get-ChildItem -Path C:\ -Recurse -ErrorAction SilentlyContinue | Where-Object { $_.Name -eq $file } | Select-Object -ExpandProperty FullName + } + $directory = Split-Path -Parent $fullPath + $lockFilePath = Join-Path -Path $directory -ChildPath "rke2-uninstall.lock" + New-Item -ItemType File -Path $lockFilePath -Force +} + function Invoke-Rke2Uninstall () { $env:PATH += ";$env:CATTLE_AGENT_BIN_PREFIX/bin/;c:\var\lib\rancher\rke2\bin" Remove-Containerd diff --git a/pkg/pebinaryexecutor/pebinary.go b/pkg/pebinaryexecutor/pebinary.go index d45ed81a469..6d9b9a9e0f1 100644 --- a/pkg/pebinaryexecutor/pebinary.go +++ b/pkg/pebinaryexecutor/pebinary.go @@ -165,6 +165,15 @@ func (p *PEBinaryConfig) Kubelet(ctx context.Context, args []string) error { cleanArgs = append(cleanArgs, arg) } + // It should never happen but just in case, we make sure the rke2-uninstall.lock does not exist before starting kubelet + lockFile := filepath.Join(p.DataDir, "agent", "rke2-uninstall.lock") + if _, err := os.Stat(lockFile); err == nil { + // If the file exists, delete it + if err := os.Remove(lockFile); err != nil { + logrus.Errorf("Failed to remove the %s file: %v", lockFile, err) + } + } + win.ProcessWaitGroup.StartWithContext(ctx, func(ctx context.Context) { for { logrus.Infof("Running RKE2 kubelet %v", cleanArgs) @@ -185,6 +194,12 @@ func (p *PEBinaryConfig) Kubelet(ctx context.Context, args []string) error { } cancel() + // If the rke2-uninstall.ps1 script created the lock file, we are removing rke2 and thus we don't restart kubelet + if _, err := os.Stat(lockFile); err == nil { + logrus.Infof("rke2-uninstall.lock exists. kubelet is not restarted") + return + } + select { case <-ctx.Done(): return