From e109bbaf32da8f61a376366a66b4feef99c06a89 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 5 Aug 2024 07:26:26 -0400 Subject: [PATCH 1/3] Improve comments in row_hash.rs for skipping aggregation --- .../physical-plan/src/aggregates/row_hash.rs | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index 62ed79dad4aa..1036c89df4e6 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -100,22 +100,24 @@ struct SpillState { /// /// See "partial aggregation" discussion on [`GroupedHashAggregateStream`] struct SkipAggregationProbe { - /// Number of processed input rows + /// Number of processed input rows (updated during probing) input_rows: usize, - /// Number of total group values for `input_rows` + /// Number of total group values for `input_rows` (updated during probing) num_groups: usize, - /// Aggregation ratio check should be performed only when the - /// number of input rows exceeds this threshold + /// Aggregation ratio check performed when the number of input rows exceeds + /// this threshold (from `SessionConfig`) probe_rows_threshold: usize, - /// Maximum allowed value of `input_rows` / `num_groups` to - /// continue aggregation + /// Maximum ratio of `input_rows` / `num_groups` to continue aggregation + /// (from `SessionConfig`). If the ratio exceeds this value, aggregation + /// is skipped and input rows are directly converted to output probe_ratio_threshold: f64, - /// Flag indicating that further data aggregation mey be skipped + /// Flag indicating further data aggregation mey be skipped (decision made + /// when probing complete) should_skip: bool, - /// Flag indicating that further updates of `SkipAggregationProbe` - /// state won't make any effect + /// Flag indicating further updates of `SkipAggregationProbe` state won't + /// make any effect (decision made when probing complete) is_locked: bool, } From 3baa4cc13af72b8f1c9da052795cb290a3ded9cb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 9 Aug 2024 13:06:42 -0400 Subject: [PATCH 2/3] Update datafusion/physical-plan/src/aggregates/row_hash.rs Co-authored-by: Andy Grove --- datafusion/physical-plan/src/aggregates/row_hash.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index 1036c89df4e6..0bfc833c397f 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -108,7 +108,7 @@ struct SkipAggregationProbe { /// Aggregation ratio check performed when the number of input rows exceeds /// this threshold (from `SessionConfig`) probe_rows_threshold: usize, - /// Maximum ratio of `input_rows` / `num_groups` to continue aggregation + /// Maximum ratio of `num_groups` to `input_rows` for continuing aggregation /// (from `SessionConfig`). If the ratio exceeds this value, aggregation /// is skipped and input rows are directly converted to output probe_ratio_threshold: f64, From 49046c57bc263ce2e0126e175081d36dbbe7da5e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 9 Aug 2024 13:14:05 -0400 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Eduard Karacharov --- datafusion/physical-plan/src/aggregates/row_hash.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index 0bfc833c397f..5eec33c4f5a3 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -113,11 +113,11 @@ struct SkipAggregationProbe { /// is skipped and input rows are directly converted to output probe_ratio_threshold: f64, - /// Flag indicating further data aggregation mey be skipped (decision made + /// Flag indicating further data aggregation may be skipped (decision made /// when probing complete) should_skip: bool, /// Flag indicating further updates of `SkipAggregationProbe` state won't - /// make any effect (decision made when probing complete) + /// make any effect (set either while probing or on probing completion) is_locked: bool, }