From addd225ecc370c3c83bd5a81c6e854b4951814a0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 9 Oct 2024 10:36:39 -0400 Subject: [PATCH 1/4] Improve AggregationFuzzer error reporting --- .../core/tests/fuzz_cases/aggregate_fuzz.rs | 33 ++++++-- .../fuzz_cases/aggregation_fuzzer/fuzzer.rs | 80 +++++++++++-------- 2 files changed, 72 insertions(+), 41 deletions(-) diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index 5cc5157c3af9..6fcfeb1f49ca 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -83,7 +83,8 @@ async fn test_basic_prim_aggr_no_group() { .table_name("fuzz_table") .build(); - fuzzer.run().await; + let res = fuzzer.run().await; + unwrap_and_report(res) } /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by single int64` @@ -121,7 +122,8 @@ async fn test_basic_prim_aggr_group_by_single_int64() { .table_name("fuzz_table") .build(); - fuzzer.run().await; + let res = fuzzer.run().await; + unwrap_and_report(res); } /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by single string` @@ -159,7 +161,8 @@ async fn test_basic_prim_aggr_group_by_single_string() { .table_name("fuzz_table") .build(); - fuzzer.run().await; + let res = fuzzer.run().await; + unwrap_and_report(res); } /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by string + int64` @@ -198,7 +201,8 @@ async fn test_basic_prim_aggr_group_by_mixed_string_int64() { .table_name("fuzz_table") .build(); - fuzzer.run().await; + let res = fuzzer.run().await; + unwrap_and_report(res); } /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `no group by` @@ -226,7 +230,8 @@ async fn test_basic_string_aggr_no_group() { .table_name("fuzz_table") .build(); - fuzzer.run().await; + let res = fuzzer.run().await; + unwrap_and_report(res); } /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by single int64` @@ -263,7 +268,8 @@ async fn test_basic_string_aggr_group_by_single_int64() { .table_name("fuzz_table") .build(); - fuzzer.run().await; + let res = fuzzer.run().await; + unwrap_and_report(res); } /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by single string` @@ -300,7 +306,8 @@ async fn test_basic_string_aggr_group_by_single_string() { .table_name("fuzz_table") .build(); - fuzzer.run().await; + let res = fuzzer.run().await; + unwrap_and_report(res); } /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by string + int64` @@ -338,7 +345,8 @@ async fn test_basic_string_aggr_group_by_mixed_string_int64() { .table_name("fuzz_table") .build(); - fuzzer.run().await; + let res = fuzzer.run().await; + unwrap_and_report(res); } // ======================================================================== @@ -706,3 +714,12 @@ fn extract_result_counts(results: Vec) -> HashMap, i } output } + +/// if res is an error, prints the erorr message neatly (e.g. with newlines +/// expanded) before panic'ing +fn unwrap_and_report(res: Result<()>) { + if let Err(e) = res { + println!("{e}"); + panic!("Error!"); + } +} diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index abb34048284d..a608f02d8506 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use arrow::util::pretty::pretty_format_batches; use arrow_array::RecordBatch; +use datafusion_common::{DataFusionError, Result}; use rand::{thread_rng, Rng}; use tokio::task::JoinSet; @@ -132,7 +133,8 @@ struct QueryGroup { } impl AggregationFuzzer { - pub async fn run(&self) { + /// Run the fuzzer, returning error if any inconsistency found + pub async fn run(&self) -> Result<()> { let mut join_set = JoinSet::new(); let mut rng = thread_rng(); @@ -157,16 +159,20 @@ impl AggregationFuzzer { let tasks = self.generate_fuzz_tasks(query_groups).await; for task in tasks { - join_set.spawn(async move { - task.run().await; - }); + join_set.spawn(async move { task.run().await }); } } while let Some(join_handle) = join_set.join_next().await { // propagate errors - join_handle.unwrap(); + join_handle.map_err(|e| { + DataFusionError::Internal(format!( + "AggregationFuzzer task error: {:?}", + e + )) + })??; } + Ok(()) } async fn generate_fuzz_tasks( @@ -237,45 +243,53 @@ struct AggregationFuzzTestTask { } impl AggregationFuzzTestTask { - async fn run(&self) { + async fn run(&self) -> Result<()> { let task_result = run_sql(&self.sql, &self.ctx_with_params.ctx) .await - .expect("should success to run sql"); - self.check_result(&task_result, &self.expected_result); + .map_err(|e| e.context(self.context_error_report()))?; + self.check_result(&task_result, &self.expected_result) } - // TODO: maybe we should persist the `expected_result` and `task_result`, - // because the readability is not so good if we just print it. - fn check_result(&self, task_result: &[RecordBatch], expected_result: &[RecordBatch]) { - let result = check_equality_of_batches(task_result, expected_result); - if let Err(e) = result { + fn check_result( + &self, + task_result: &[RecordBatch], + expected_result: &[RecordBatch], + ) -> Result<()> { + check_equality_of_batches(task_result, expected_result).map_err(|e| { // If we found inconsistent result, we print the test details for reproducing at first - println!( - "##### AggregationFuzzer error report ##### - ### Sql:\n{}\n\ - ### Schema:\n{}\n\ - ### Session context params:\n{:?}\n\ - ### Inconsistent row:\n\ - - row_idx:{}\n\ - - task_row:{}\n\ - - expected_row:{}\n\ - ### Task total result:\n{}\n\ - ### Expected total result:\n{}\n\ - ### Input:\n{}\n\ - ", - self.sql, - self.dataset_ref.batches[0].schema_ref(), - self.ctx_with_params.params, + let message = format!( + "{}\n\ + ### Inconsistent row:\n\ + - row_idx:{}\n\ + - task_row:{}\n\ + - expected_row:{}\n\ + ### Task total result:\n{}\n\ + ### Expected total result:\n{}\n\ + ", + self.context_error_report(), e.row_idx, e.lhs_row, e.rhs_row, pretty_format_batches(task_result).unwrap(), pretty_format_batches(expected_result).unwrap(), - pretty_format_batches(&self.dataset_ref.batches).unwrap(), ); + DataFusionError::Internal(message) + }) + } - // Then we just panic - panic!(); - } + /// Returns a formatted error message + fn context_error_report(&self) -> String { + format!( + "##### AggregationFuzzer error report #####\n\ + ### Sql:\n{}\n\ + ### Schema:\n{}\n\ + ### Session context params:\n{:?}\n\ + ### Input:\n{}\n\ + ", + self.sql, + self.dataset_ref.batches[0].schema_ref(), + self.ctx_with_params.params, + pretty_format_batches(&self.dataset_ref.batches).unwrap(), + ) } } From 61ab262bab828348d122de758075857c8a484145 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 9 Oct 2024 10:43:15 -0400 Subject: [PATCH 2/4] simplify --- .../core/tests/fuzz_cases/aggregate_fuzz.rs | 33 +++++-------------- .../fuzz_cases/aggregation_fuzzer/fuzzer.rs | 13 ++++++-- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index 6fcfeb1f49ca..ee390e8b5f55 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -83,8 +83,7 @@ async fn test_basic_prim_aggr_no_group() { .table_name("fuzz_table") .build(); - let res = fuzzer.run().await; - unwrap_and_report(res) + fuzzer.run().await } /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by single int64` @@ -122,8 +121,7 @@ async fn test_basic_prim_aggr_group_by_single_int64() { .table_name("fuzz_table") .build(); - let res = fuzzer.run().await; - unwrap_and_report(res); + fuzzer.run().await; } /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by single string` @@ -161,8 +159,7 @@ async fn test_basic_prim_aggr_group_by_single_string() { .table_name("fuzz_table") .build(); - let res = fuzzer.run().await; - unwrap_and_report(res); + fuzzer.run().await; } /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by string + int64` @@ -201,8 +198,7 @@ async fn test_basic_prim_aggr_group_by_mixed_string_int64() { .table_name("fuzz_table") .build(); - let res = fuzzer.run().await; - unwrap_and_report(res); + fuzzer.run().await; } /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `no group by` @@ -230,8 +226,7 @@ async fn test_basic_string_aggr_no_group() { .table_name("fuzz_table") .build(); - let res = fuzzer.run().await; - unwrap_and_report(res); + fuzzer.run().await; } /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by single int64` @@ -268,8 +263,7 @@ async fn test_basic_string_aggr_group_by_single_int64() { .table_name("fuzz_table") .build(); - let res = fuzzer.run().await; - unwrap_and_report(res); + fuzzer.run().await; } /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by single string` @@ -306,8 +300,7 @@ async fn test_basic_string_aggr_group_by_single_string() { .table_name("fuzz_table") .build(); - let res = fuzzer.run().await; - unwrap_and_report(res); + fuzzer.run().await; } /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by string + int64` @@ -345,8 +338,7 @@ async fn test_basic_string_aggr_group_by_mixed_string_int64() { .table_name("fuzz_table") .build(); - let res = fuzzer.run().await; - unwrap_and_report(res); + fuzzer.run().await; } // ======================================================================== @@ -714,12 +706,3 @@ fn extract_result_counts(results: Vec) -> HashMap, i } output } - -/// if res is an error, prints the erorr message neatly (e.g. with newlines -/// expanded) before panic'ing -fn unwrap_and_report(res: Result<()>) { - if let Err(e) = res { - println!("{e}"); - panic!("Error!"); - } -} diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index a608f02d8506..fcbb50d4fe28 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -133,8 +133,17 @@ struct QueryGroup { } impl AggregationFuzzer { - /// Run the fuzzer, returning error if any inconsistency found - pub async fn run(&self) -> Result<()> { + /// Run the fuzzer, printing an error and panicking if any of the tasks fail + pub async fn run(&self) { + let res = self.run_inner().await; + + if let Err(e) = res { + println!("{e}"); + panic!("Error!"); + } + } + + async fn run_inner(&self) -> Result<()> { let mut join_set = JoinSet::new(); let mut rng = thread_rng(); From 50c068c6ee68596fcec4864e56e6403ff4a08c63 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 9 Oct 2024 13:08:29 -0400 Subject: [PATCH 3/4] Update datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs --- datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index fcbb50d4fe28..8baaed39e3d6 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -138,6 +138,9 @@ impl AggregationFuzzer { let res = self.run_inner().await; if let Err(e) = res { + // Print the error via `Display` so that it displays nicely (the default `unwrap()` + // prints using `Debug` which escapes newlines, and makes multi-line messages + // hard to read println!("{e}"); panic!("Error!"); } From 601d8b1e32f031867dc6040d0c66e502b8aac207 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 9 Oct 2024 14:04:24 -0400 Subject: [PATCH 4/4] fmt --- datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index 8baaed39e3d6..6daebc894272 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -138,7 +138,7 @@ impl AggregationFuzzer { let res = self.run_inner().await; if let Err(e) = res { - // Print the error via `Display` so that it displays nicely (the default `unwrap()` + // Print the error via `Display` so that it displays nicely (the default `unwrap()` // prints using `Debug` which escapes newlines, and makes multi-line messages // hard to read println!("{e}");