From 357d7754fd739e9e875d17e0f8e63c333553090e Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 5 Jun 2024 15:34:34 +0400 Subject: [PATCH] fix: clean up VM runners on cluster destroy This never worked properly, as `Wait()` doesn't work for child processes, and `talosctl cluster destroy` is not a child of processes created by `talosctl cluster create`. Signed-off-by: Andrey Smirnov --- pkg/provision/providers/vm/process.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/provision/providers/vm/process.go b/pkg/provision/providers/vm/process.go index f9d06a1ad9..1bfa49db74 100644 --- a/pkg/provision/providers/vm/process.go +++ b/pkg/provision/providers/vm/process.go @@ -5,10 +5,12 @@ package vm import ( - "errors" "fmt" "os" "syscall" + "time" + + "github.com/siderolabs/go-retry/retry" ) // StopProcessByPidfile stops a process by reading its PID from a file. @@ -43,13 +45,13 @@ func StopProcessByPidfile(pidPath string) error { return fmt.Errorf("error sending SIGTERM to %d (path %q): %w", pid, pidPath, err) } - if _, err = proc.Wait(); err != nil { - if errors.Is(err, syscall.ECHILD) { - return nil + // wait for the process to exit, this is using (unreliable and slow) polling + return retry.Constant(30*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(func() error { + err = proc.Signal(syscall.Signal(0)) + if err == nil { + return retry.ExpectedErrorf("process %d still running", pid) } - return fmt.Errorf("error waiting for %d to exit (path %q): %w", pid, pidPath, err) - } - - return nil + return nil + }) }