From f791928df14e8c8560c77a7b7d08e3d669eb0462 Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Wed, 4 Dec 2024 15:59:20 +0000 Subject: [PATCH] roachtest: properly surface workload error in c2c tests Previously if the workload failed in the c2c roachtests which then caused the latency verifier to fail, only a latency error would get returned to the client. This patch adds yet another channel to the roachtest driver to properly surface the workload error. A more elegant solution would have involved ordering Monitor.Wait() calls, but moving those around would likely cause fallout. Informs: #136091 Release note: none --- pkg/cmd/roachtest/tests/cluster_to_cluster.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/cmd/roachtest/tests/cluster_to_cluster.go b/pkg/cmd/roachtest/tests/cluster_to_cluster.go index f51e2548e4ab..58b457f1d08a 100644 --- a/pkg/cmd/roachtest/tests/cluster_to_cluster.go +++ b/pkg/cmd/roachtest/tests/cluster_to_cluster.go @@ -932,6 +932,7 @@ func (rd *replicationDriver) main(ctx context.Context) { }() workloadDoneCh := make(chan struct{}) + workloadErrCh := make(chan error, 1) workloadMonitor.Go(func(ctx context.Context) error { defer close(workloadDoneCh) err := rd.runWorkload(ctx) @@ -940,8 +941,11 @@ func (rd *replicationDriver) main(ctx context.Context) { if err != nil && ctx.Err() == nil { // Implies the workload context was not cancelled and the workload cmd returned a // different error. + rd.t.L().Printf("Workload context was not cancelled. Error returned by workload cmd: %s", err) + workloadErrCh <- err return errors.Wrapf(err, `Workload context was not cancelled. Error returned by workload cmd`) } + workloadErrCh <- nil rd.t.L().Printf("workload successfully finished") return nil }) @@ -988,6 +992,9 @@ func (rd *replicationDriver) main(ctx context.Context) { select { case <-workloadDoneCh: rd.t.L().Printf("workload finished on its own") + if err := <-workloadErrCh; err != nil { + rd.t.Fatal(err) + } case <-time.After(rd.getWorkloadTimeout()): workloadCancel() rd.t.L().Printf("workload was cancelled after %s", rd.rs.additionalDuration)