diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs index 6085fca8761f..8f02fb30b013 100644 --- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs +++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs @@ -526,7 +526,6 @@ impl Accumulator for TimeSum { let arr = arr.as_primitive::(); for v in arr.values().iter() { - println!("Adding {v}"); self.sum += v; } Ok(()) @@ -538,7 +537,6 @@ impl Accumulator for TimeSum { } fn evaluate(&mut self) -> Result { - println!("Evaluating to {}", self.sum); Ok(ScalarValue::TimestampNanosecond(Some(self.sum), None)) } @@ -558,7 +556,6 @@ impl Accumulator for TimeSum { let arr = arr.as_primitive::(); for v in arr.values().iter() { - println!("Retracting {v}"); self.sum -= v; } Ok(()) diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 7598c805adf6..7a92a50ae15d 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -38,18 +38,33 @@ pub type ScalarFunctionImplementation = pub type ReturnTypeFunction = Arc Result> + Send + Sync>; -/// Arguments passed to create an accumulator +/// [`AccumulatorArgs`] contains information about how an aggregate +/// function was called, including the types of its arguments and any optional +/// ordering expressions. pub struct AccumulatorArgs<'a> { - // default arguments - /// the return type of the function + /// The return type of the aggregate function. pub data_type: &'a DataType, - /// the schema of the input arguments + /// The schema of the input arguments pub schema: &'a Schema, - /// whether to ignore nulls + /// Whether to ignore nulls. + /// + /// SQL allows the user to specify `IGNORE NULLS`, for example: + /// + /// ```sql + /// SELECT FIRST_VALUE(column1) IGNORE NULLS FROM t; + /// ``` pub ignore_nulls: bool, - // ordering arguments - /// the expressions of `order by`, if no ordering is required, this will be an empty slice + /// The expressions in the `ORDER BY` clause passed to this aggregator. + /// + /// SQL allows the user to specify the ordering of arguments to the + /// aggregate using an `ORDER BY`. For example: + /// + /// ```sql + /// SELECT FIRST_VALUE(column1 ORDER BY column2) FROM t; + /// ``` + /// + /// If no `ORDER BY` is specified, `sort_exprs`` will be empty. pub sort_exprs: &'a [Expr], } diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index 14e5195116b1..3cf1845aacd6 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -213,8 +213,8 @@ where /// See [`advanced_udaf.rs`] for a full example with complete implementation and /// [`AggregateUDF`] for other available options. /// -/// /// [`advanced_udaf.rs`]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs +/// /// # Basic Example /// ``` /// # use std::any::Any; @@ -282,7 +282,8 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { /// Return a new [`Accumulator`] that aggregates values for a specific /// group during query execution. /// - /// `acc_args`: the arguments to the accumulator. See [`AccumulatorArgs`] for more details. + /// acc_args: [`AccumulatorArgs`] contains information about how the + /// aggregate function was called. fn accumulator(&self, acc_args: AccumulatorArgs) -> Result>; /// Return the fields used to store the intermediate state of this accumulator. @@ -325,6 +326,13 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { /// If the aggregate expression has a specialized /// [`GroupsAccumulator`] implementation. If this returns true, /// `[Self::create_groups_accumulator]` will be called. + /// + /// # Notes + /// + /// Even if this function returns true, DataFusion will still use + /// `Self::accumulator` for certain queries, such as when this aggregate is + /// used as a window function or when there no GROUP BY columns in the + /// query. fn groups_accumulator_supported(&self) -> bool { false }