diff --git a/api/py/ai/chronon/group_by.py b/api/py/ai/chronon/group_by.py index 88f458d23..6af29ecd7 100644 --- a/api/py/ai/chronon/group_by.py +++ b/api/py/ai/chronon/group_by.py @@ -148,6 +148,22 @@ def Window(length: int, timeUnit: ttypes.TimeUnit) -> ttypes.Window: return ttypes.Window(length, timeUnit) +def Derivation(name: str, expression: str) -> ttypes.Derivation: + """ + Derivation allows arbitrary SQL select clauses to be computed using columns from the output of group by backfill + output schema. It is supported for offline computations for now. + + If both name and expression are set to "*", then every raw column will be included along with the derived columns. + + :param name: output column name of the SQL expression + :param expression: any valid Spark SQL select clause based on joinPart or externalPart columns + :return: a Derivation object representing a single derived column or a wildcard ("*") selection. + """ + return ttypes.Derivation( + name=name, expression=expression + ) + + def contains_windowed_aggregation(aggregations: Optional[List[ttypes.Aggregation]]): if not aggregations: return False @@ -295,6 +311,7 @@ def GroupBy(sources: Union[List[_ANY_SOURCE_TYPE], _ANY_SOURCE_TYPE], offline_schedule: str = '@daily', name: str = None, tags: Dict[str, str] = None, + derivations: List[ttypes.Derivation] = None, **kwargs) -> ttypes.GroupBy: """ @@ -388,13 +405,17 @@ def GroupBy(sources: Union[List[_ANY_SOURCE_TYPE], _ANY_SOURCE_TYPE], '@monthly': '0 0 1 * *', '@yearly': '0 0 1 1 *', :type offline_schedule: str - :param kwargs: - Additional properties that would be passed to run.py if specified under additional_args property. - And provides an option to pass custom values to the processing logic. - :type kwargs: Dict[str, str] :param tags: Additional metadata that does not directly affect feature computation, but is useful to track for management purposes. + :type tags: Dict[str, str] + :param derivations: + Derivation allows arbitrary SQL select clauses to be computed using columns from the output of group by backfill + output schema. It is supported for offline computations for now. + :type derivations: List[ai.chronon.api.ttypes.Drivation] + :param kwargs: + Additional properties that would be passed to run.py if specified under additional_args property. + And provides an option to pass custom values to the processing logic. :type kwargs: Dict[str, str] :return: A GroupBy object containing specified aggregations. @@ -476,7 +497,8 @@ def _normalize_source(source): aggregations=aggregations, metaData=metadata, backfillStartDate=backfill_start_date, - accuracy=accuracy + accuracy=accuracy, + derivations=derivations ) validate_group_by(group_by) return group_by diff --git a/api/py/test/sample/group_bys/sample_team/sample_group_by.py b/api/py/test/sample/group_bys/sample_team/sample_group_by.py index b0bfd6c1b..0155e51d7 100644 --- a/api/py/test/sample/group_bys/sample_team/sample_group_by.py +++ b/api/py/test/sample/group_bys/sample_team/sample_group_by.py @@ -3,6 +3,7 @@ GroupBy, Aggregation, Operation, + Derivation ) @@ -30,5 +31,15 @@ ], production=False, output_namespace="sample_namespace", - backfill_start_date="2023-01-01" + backfill_start_date="2023-01-01", + derivations=[ + Derivation( + name="derived_field", + expression="" + ), + Derivation( + name="*", + expression="*" + ) + ] )