From f0f8f94d135cd42e04bc0caebe7374b18d458bfb Mon Sep 17 00:00:00 2001 From: Stan Rosenberg Date: Tue, 21 May 2024 10:45:12 -0400 Subject: [PATCH] ci/roachtest: fix ssh key gen in private roachtest nightly Public roachtest nightlies have been generating an ssh keypair on startup, for some time [1]. It was a soft requirement until [2]. In [2], we streamlined the management and distribution of ssh keypairs, thus making it a hard requirement. This change synchronizes the above aspect of the CI config. with that of the public roachtest nightly. We also update the (roachtest) code to fail fast with `TransientError` instead of spamming logs with infra/connectivity issues. [1] https://github.com/cockroachdb/cockroach/blob/d146ecff6f687e438706cf63591cafca60cc116d/build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh#L9-L10 [2] https://github.com/cockroachdb/cockroach/pull/119106 Epic: none Release note: None Fixes: https://github.com/cockroachdb/cockroach/issues/107776 --- .../nightlies/private_roachtest_impl.sh | 3 ++ .../roachtest/tests/query_comparison_util.go | 21 +++++++++----- pkg/internal/workloadreplay/workloadreplay.go | 28 +++++++++++-------- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/build/teamcity/internal/cockroach/nightlies/private_roachtest_impl.sh b/build/teamcity/internal/cockroach/nightlies/private_roachtest_impl.sh index c9d8bb0a3442..27309f497536 100755 --- a/build/teamcity/internal/cockroach/nightlies/private_roachtest_impl.sh +++ b/build/teamcity/internal/cockroach/nightlies/private_roachtest_impl.sh @@ -5,6 +5,9 @@ set -exuo pipefail dir="$(dirname $(dirname $(dirname $(dirname $(dirname "${0}")))))" source "$dir/teamcity-support.sh" +if [[ ! -f ~/.ssh/id_rsa.pub ]]; then + ssh-keygen -q -C "private-roachtest-nightly-bazel $(date)" -N "" -f ~/.ssh/id_rsa +fi $root/build/teamcity/cockroach/nightlies/roachtest_compile_bits.sh amd64 diff --git a/pkg/cmd/roachtest/tests/query_comparison_util.go b/pkg/cmd/roachtest/tests/query_comparison_util.go index cb62b006ecb5..bf57a2b36574 100644 --- a/pkg/cmd/roachtest/tests/query_comparison_util.go +++ b/pkg/cmd/roachtest/tests/query_comparison_util.go @@ -30,6 +30,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/internal/sqlsmith" "github.com/cockroachdb/cockroach/pkg/internal/workloadreplay" + rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/testutils/floatcmp" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" @@ -201,21 +202,27 @@ func runOneRoundQueryComparison( var signatures map[string][]string for { - done := ctx.Done() - select { - case <-done: + case <-ctx.Done(): return default: } t.L().Printf("Choosing Random Query") - finalStmt, signatures = workloadreplay.ChooseRandomQuery(ctx, log) + finalStmt, signatures, err = workloadreplay.ChooseRandomQuery(ctx, log) + if err != nil { + // An error here likely denotes an infrastructure issue; i.e., unable to connect to snowflake. Wrapping + // it as a transient failure will route the test failure to TestEng, bypassing the (issue) owner. + t.Fatal(rperrors.TransientFailure(err, "snowflake connectivity issue")) + } if finalStmt == "" { continue } t.L().Printf("Generating Random Data in Snowflake") - schemaMap := workloadreplay.CreateRandomDataSnowflake(ctx, signatures, log) + schemaMap, err := workloadreplay.CreateRandomDataSnowflake(ctx, signatures, log) + if err != nil { + t.Fatal(rperrors.TransientFailure(err, "snowflake connectivity issue")) + } if schemaMap == nil { continue } @@ -237,7 +244,7 @@ func runOneRoundQueryComparison( credKey, ok := os.LookupEnv(keyTag) if !ok { t.L().Printf("%s not set\n", keyTag) - return + t.Fatal(rperrors.TransientFailure(err, "GCE credentials issue")) } encodedKey := b64.StdEncoding.EncodeToString([]byte(credKey)) importStr = "IMPORT INTO " + tableName + " (" + schemaInfo[1] + ")\n" @@ -246,7 +253,7 @@ func runOneRoundQueryComparison( logTest(queryStr, "QUERY_SNOWFLAKE_TO_GCS:") if _, err := conn.Exec(queryStr); err != nil { t.L().Printf("error while inserting rows: %v", err) - return + t.Fatal(rperrors.TransientFailure(err, "snowflake connectivity issue")) } } diff --git a/pkg/internal/workloadreplay/workloadreplay.go b/pkg/internal/workloadreplay/workloadreplay.go index d8fffffcf00e..01988d17f403 100644 --- a/pkg/internal/workloadreplay/workloadreplay.go +++ b/pkg/internal/workloadreplay/workloadreplay.go @@ -29,7 +29,7 @@ import ( ) const ( - accountName = "qy03275.us-east-1" + accountName = "lt53838.us-central1.gcp" bucketName = "roachtest-snowflake-costfuzz" keyTag = "COCKROACH_GOOGLE_EPHEMERAL_CREDENTIALS" sfUser = "COCKROACH_SFUSER" @@ -118,7 +118,7 @@ func writeRowsGCS(ctx context.Context, filename string, rows *gosql.Rows) (strin return cName, nil } -func ChooseRandomQuery(ctx context.Context, log *os.File) (string, map[string][]string) { +func ChooseRandomQuery(ctx context.Context, log *os.File) (string, map[string][]string, error) { rndNumber := rand.Float64() var query string // Randomly select between a non join or join query. @@ -137,14 +137,16 @@ func ChooseRandomQuery(ctx context.Context, log *os.File) (string, map[string][] if err != nil { fmt.Fprint(log, err) fmt.Fprint(log, "\n") - return "", nil + return "", nil, err } rows, err := db.QueryContext(ctx, query) if err != nil { fmt.Fprint(log, "Failure in choosing random query:") fmt.Fprint(log, err) fmt.Fprint(log, "\n") - return "", nil + // Connection is either invalid or we hit a transient error, so let's close it. + db.Close() + return "", nil, err } signatures := make(map[string][]string) @@ -175,7 +177,7 @@ func ChooseRandomQuery(ctx context.Context, log *os.File) (string, map[string][] signatures[tablename] = []string{tablesignature, tableschema} } - return finalStmt, signatures + return finalStmt, signatures, nil } // CreateRandomDataSnowflake creates random data in snowflake @@ -184,11 +186,11 @@ func ChooseRandomQuery(ctx context.Context, log *os.File) (string, map[string][] // a mapping of table name to gcs filepath and schema. func CreateRandomDataSnowflake( ctx context.Context, signatures map[string][]string, log *os.File, -) map[string][]string { +) (map[string][]string, error) { db, err := getConnect("workload_data") if err != nil { fmt.Fprint(log, "COULD NOT CONNECT TO SNOWFLAKE: "+fmt.Sprint(err)) - return nil + return nil, err } schemaMap := make(map[string][]string) @@ -198,7 +200,9 @@ func CreateRandomDataSnowflake( _, err := db.QueryContext(ctx, signatureSchema[0]) if err != nil { fmt.Fprint(log, "COULD NOT GENERATE DATASET - signature execution failed:"+signatureSchema[0]) - return nil + // Connection is either invalid or we hit a transient error, so let's close it. + db.Close() + return nil, err } // Get data from snowflake and write to GCS. @@ -208,18 +212,20 @@ func CreateRandomDataSnowflake( fmt.Fprint(log, "COULD NOT RETRIEVE DATA FROM TABLE:"+tableName+"\n") fmt.Fprint(log, err) fmt.Fprint(log, "\n") - return nil + // Connection is either invalid or we hit a transient error, so let's close it. + db.Close() + return nil, err } filename := "roachtest/" + fmt.Sprint(timeutil.Now().Unix()) + "_" + tableName cName, err = writeRowsGCS(ctx, filename, rows) if err != nil { fmt.Fprint(log, err) fmt.Fprint(log, "\n") - return nil + return nil, err } // Map "table name" to file path of gcs data, column names of data and schema of table. schemaMap[tableName] = []string{filename, cName, signatureSchema[1]} } - return schemaMap + return schemaMap, nil }