From 8e712e49fbdb99d39b2687dbf69b8f0ca220a91b Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Thu, 12 Dec 2024 00:10:40 -0500 Subject: [PATCH] jobs: don't retry revert if permanent job error occurs Release note: wip --- pkg/jobs/registry.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/jobs/registry.go b/pkg/jobs/registry.go index fffa4f734822..796b47f9487e 100644 --- a/pkg/jobs/registry.go +++ b/pkg/jobs/registry.go @@ -1758,6 +1758,10 @@ func (r *Registry) stepThroughStateMachine( // mark the job as failed because it can be resumed by another node. return errors.Errorf("job %d: node liveness error: restarting in background", job.ID()) } + if IsPermanentJobError(err) { + // If there was a permanent error while reverting, then give up on reverting. + return r.stepThroughStateMachine(ctx, execCtx, resumer, job, StatusRevertFailed, err) + } return onExecutionFailed(err) case StatusFailed: if jobErr == nil { @@ -1775,9 +1779,6 @@ func (r *Registry) stepThroughStateMachine( r.removeFromWaitingSets(job.ID()) return jobErr case StatusRevertFailed: - // TODO(sajjad): Remove StatusRevertFailed and related code in other places in v22.1. - // v21.2 modified all reverting jobs to retry instead of go to revert-failed. Therefore, - // revert-failed state is not reachable after 21.2. if jobErr == nil { return errors.AssertionFailedf("job %d: has StatusRevertFailed but no error was provided", job.ID())