From 75eed239e5e16eb49e76bc46ff8b8889fe2efbf2 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Fri, 1 Nov 2024 08:54:47 -0500 Subject: [PATCH 01/17] wip --- .../express/preprocess/FreqPreprocessor.py | 12 ++-- .../express/preprocess/HistPreprocessor.py | 44 ++++++++---- .../preprocess/UnivariateAwarePreprocessor.py | 70 +++++++++++++++++++ .../preprocess/UnivariatePreprocessor.py | 31 -------- 4 files changed, 108 insertions(+), 49 deletions(-) create mode 100644 plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py delete mode 100644 plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariatePreprocessor.py diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py index 847d1360e..e0ba0f5a9 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py @@ -4,11 +4,11 @@ from deephaven.table import Table -from .UnivariatePreprocessor import UnivariatePreprocessor +from .UnivariateAwarePreprocessor import UnivariateAwarePreprocessor from ..shared import get_unique_names -class FreqPreprocessor(UnivariatePreprocessor): +class FreqPreprocessor(UnivariateAwarePreprocessor): """ A type of univariate preprocessor for frequency bar plots @@ -33,14 +33,14 @@ def preprocess_partitioned_tables( A tuple containing (the new table, an update to make to the args) """ - column = self.col_val if not column else column + column = self.value_col if not column else column names = get_unique_names(self.table, ["count"]) - self.args[self.other_var] = names["count"] + self.args[self.value_var] = names["count"] for table in tables: yield table.view([column]).count_by(names["count"], by=column), { - self.var: column, - self.other_var: names["count"], + self.axis_var: column, + self.value_var: names["count"], } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 197b0b73d..0a9067cbd 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -5,7 +5,7 @@ from deephaven import agg, empty_table, new_table from deephaven.table import PartitionedTable, Table -from .UnivariatePreprocessor import UnivariatePreprocessor +from .UnivariateAwarePreprocessor import UnivariateAwarePreprocessor from ..shared import get_unique_names from deephaven.column import long_col from deephaven.updateby import cum_sum @@ -36,7 +36,7 @@ def get_aggs( ) -class HistPreprocessor(UnivariatePreprocessor): +class HistPreprocessor(UnivariateAwarePreprocessor): """ Preprocessor for histograms. @@ -73,7 +73,7 @@ def prepare_preprocess(self) -> None: ) self.range_table = create_range_table( self.args["table"], - self.cols, + self.axis_cols, self.range_bins, self.nbins, self.names["range"], @@ -99,17 +99,29 @@ def create_count_tables( raise ValueError("Range table not created") for i, table in enumerate(tables): # the column needs to be temporarily renamed to avoid collisions - tmp_name = f"tmp{i}" - tmp_col = get_unique_names(table, [tmp_name])[tmp_name] + tmp_axis_col_base = f"tmpaxis{i}" + tmp_value_col_base = f"tmpvalue{i}" + tmp_col_names = get_unique_names( + table, [tmp_axis_col_base, tmp_value_col_base] + ) + tmp_axis_col, tmp_value_col = ( + tmp_col_names[tmp_axis_col_base], + tmp_col_names[tmp_value_col_base], + ) count_table = ( - table.view(f"{tmp_col} = {column}") + table.view( + [ + f"{tmp_value_col} = {column}", + f"{tmp_axis_col} = {self.axis_cols[0]}", + ] + ) .join(self.range_table) - .update_view(f"{range_index} = {range_}.index({tmp_col})") + .update_view(f"{range_index} = {range_}.index({tmp_axis_col})") .where(f"!isNull({range_index})") .drop_columns(range_) - .agg_by([agg_func(tmp_col)], range_index) + .agg_by([agg_func(tmp_value_col)], range_index) ) - yield count_table, tmp_col + yield count_table, tmp_value_col def preprocess_partitioned_tables( self, tables: list[Table], column: str | None = None @@ -126,7 +138,7 @@ def preprocess_partitioned_tables( """ # column will only be set if there's a pivot var, which means the table has been restructured - column = self.col_val if not column else column + column = self.value_col if not column else column range_index, range_, bin_min, bin_max, total = ( self.names["range_index"], @@ -206,8 +218,16 @@ def preprocess_partitioned_tables( + [f"{col}={col} * {mult_factor} / {total}" for col in count_cols] ) + var_axis_displayed = var_axis_name + + if self.axis_cols[0] != column: + # if a different column is aggregated on, the axis name should reflect that + # todo: plumb this through the args + var_axis_displayed = f"{var_axis_name} of {column}" + for count_col in count_cols: + # todo: better way to handle this rather that flipping axis_var and value_var later???? yield bin_counts.view([var_axis_name, f"{column} = {count_col}"]), { - self.var: var_axis_name, - self.other_var: column, + self.axis_var: var_axis_name, + self.value_var: column, } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py new file mode 100644 index 000000000..3403505b9 --- /dev/null +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from typing import Any + + +class UnivariateAwarePreprocessor: + """ + A preprocessor that stores useful args for plots where possibly one of x or y or both can be specified, + which impacts the orientation of the plot in ways that affect the preprocessing. + Should be inherited from. + + Args: + args: Figure creation args + pivot_vars: Pivot vars that have the new column names + + Attributes: + args: dict[str, str]: Figure creation args + table: Table: The table to use + axis_var: str: The main var. The list of vars was passed to this arg. + value_var: The other var. + col_val: str: The value column, which is the value in pivot_var if + there is a list, otherwise the arg passed to var + cols: list[str]: The columns that are being used + """ + + def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str] | None = None): + self.args = args + self.table = args["table"] + self.orientation = self.calculate_orientation() + self.args["orientation"] = self.orientation + self.axis_var = "x" if args.get("x") else "y" + self.value_var = "y" if self.axis_var == "x" else "x" + self.axis_col: str = ( + pivot_vars["variable"] if pivot_vars else args[self.axis_var] + ) + self.axis_cols = ( + self.axis_col if isinstance(self.axis_col, list) else [self.axis_col] + ) + + # if value_var is not set, the value column is the same as the axis column because both the axis bins and value + # are computed from the same inputs + if self.args.get(self.value_var): + self.value_col: str = ( + pivot_vars["value"] if pivot_vars else args[self.value_var] + ) + self.value_cols = ( + self.value_col if isinstance(self.value_col, list) else [self.value_col] + ) + else: + self.value_col = self.axis_col + self.value_cols = self.axis_cols + + def calculate_orientation(self): + """ + Calculate the orientation of the plot + """ + orientation = self.args.get("orientation") + x = self.args.get("x") + y = self.args.get("y") + if orientation: + return orientation + elif x and y: + # TODO: more sophisticated orientation calculation like plotly express, which calculates based on data types + return "v" + elif x: + return "v" + elif y: + return "y" + + raise ValueError("Could not determine orientation") diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariatePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariatePreprocessor.py deleted file mode 100644 index 4c13574b3..000000000 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariatePreprocessor.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import annotations - -from typing import Any - - -class UnivariatePreprocessor: - """ - A univariate preprocessor that stores useful args. Should be inherited from. - - Args: - args: Figure creation args - pivot_vars: Pivot vars that have the new column names - - Attributes: - args: dict[str, str]: Figure creation args - table: Table: The table to use - var: str: The main var. The list of vars was passed to this arg. - other_var: The other var. - col_val: str: The value column, which is the value in pivot_var if - there is a list, otherwise the arg passed to var - cols: list[str]: The columns that are being used - """ - - def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str] | None = None): - self.args = args - self.table = args["table"] - self.var = "x" if args.get("x") else "y" - self.other_var = "y" if self.var == "x" else "x" - self.args["orientation"] = "h" if self.var == "y" else "v" - self.col_val: str = pivot_vars["value"] if pivot_vars else args[self.var] - self.cols = self.col_val if isinstance(self.col_val, list) else [self.col_val] From e1e5a591e3d0f0d699f8e6579484137ae92948ee Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Fri, 1 Nov 2024 12:49:00 -0500 Subject: [PATCH 02/17] wip --- .../plot/express/plots/PartitionManager.py | 2 +- .../express/preprocess/FreqPreprocessor.py | 6 +-- .../express/preprocess/HistPreprocessor.py | 49 +++++++++++++------ .../plot/express/preprocess/Preprocessor.py | 7 ++- .../preprocess/UnivariateAwarePreprocessor.py | 19 +++---- 5 files changed, 53 insertions(+), 30 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/PartitionManager.py b/plugins/plotly-express/src/deephaven/plot/express/plots/PartitionManager.py index 84f94dce5..cc0b33cce 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/PartitionManager.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/PartitionManager.py @@ -418,7 +418,7 @@ def process_partitions(self) -> Table | PartitionedTable: # preprocessor needs to be initialized after the always attached arguments are found self.preprocessor = Preprocessor( - args, self.groups, self.always_attached, self.pivot_vars + args, self.groups, self.always_attached, self.pivot_vars, self.list_var ) if partition_cols: diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py index e0ba0f5a9..bf5cf3dfb 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py @@ -33,14 +33,14 @@ def preprocess_partitioned_tables( A tuple containing (the new table, an update to make to the args) """ - column = self.value_col if not column else column + column = self.bar_col if not column else column names = get_unique_names(self.table, ["count"]) - self.args[self.value_var] = names["count"] + self.args[self.bar_var] = names["count"] for table in tables: yield table.view([column]).count_by(names["count"], by=column), { self.axis_var: column, - self.value_var: names["count"], + self.bar_var: names["count"], } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 0a9067cbd..0eb9c4d4f 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -51,8 +51,14 @@ class HistPreprocessor(UnivariateAwarePreprocessor): cumulative: If True, the bins are cumulative """ - def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str]): + def __init__( + self, + args: dict[str, Any], + pivot_vars: dict[str, str], + list_var: str | None = None, + ): super().__init__(args, pivot_vars) + self.list_var = list_var self.range_table = None self.names = {} self.nbins = args.pop("nbins", 10) @@ -80,14 +86,15 @@ def prepare_preprocess(self) -> None: ) def create_count_tables( - self, tables: list[Table], column: str | None = None + self, tables: list[Table], axis_col: str, bar_col: str ) -> Generator[tuple[Table, str], None, None]: """ Create count tables that aggregate up values. Args: tables: List of tables to create counts for - column: the column used + axis_col: The column to compute indices for + bar_col: The column to compute an aggregation over Yields: A tuple containing the table and a temporary column @@ -100,28 +107,28 @@ def create_count_tables( for i, table in enumerate(tables): # the column needs to be temporarily renamed to avoid collisions tmp_axis_col_base = f"tmpaxis{i}" - tmp_value_col_base = f"tmpvalue{i}" + tmp_bar_col_base = f"tmpbar{i}" tmp_col_names = get_unique_names( - table, [tmp_axis_col_base, tmp_value_col_base] + table, [tmp_axis_col_base, tmp_bar_col_base] ) - tmp_axis_col, tmp_value_col = ( + tmp_axis_col, tmp_bar_col = ( tmp_col_names[tmp_axis_col_base], - tmp_col_names[tmp_value_col_base], + tmp_col_names[tmp_bar_col_base], ) count_table = ( table.view( [ - f"{tmp_value_col} = {column}", - f"{tmp_axis_col} = {self.axis_cols[0]}", + f"{tmp_bar_col} = {bar_col}", + f"{tmp_axis_col} = {axis_col}", ] ) .join(self.range_table) .update_view(f"{range_index} = {range_}.index({tmp_axis_col})") .where(f"!isNull({range_index})") .drop_columns(range_) - .agg_by([agg_func(tmp_value_col)], range_index) + .agg_by([agg_func(tmp_bar_col)], range_index) ) - yield count_table, tmp_value_col + yield count_table, tmp_bar_col def preprocess_partitioned_tables( self, tables: list[Table], column: str | None = None @@ -137,8 +144,18 @@ def preprocess_partitioned_tables( A tuple containing the table and a mapping of metadata """ + + axis_col = self.axis_col + bar_col = self.bar_col + # column will only be set if there's a pivot var, which means the table has been restructured - column = self.value_col if not column else column + # the column passed will be associated with whatever the list_var was, so the list_var needs to + # be matched to the axis_var or bar_var, which determines how the aggregations are calculated + if self.list_var: + if self.list_var == self.axis_var: + axis_col = column if column else self.axis_col + elif self.list_var == self.bar_var: + bar_col = column if column else self.bar_col range_index, range_, bin_min, bin_max, total = ( self.names["range_index"], @@ -153,7 +170,9 @@ def preprocess_partitioned_tables( ) count_cols = [] - for count_table, count_col in self.create_count_tables(tables, column): + for count_table, count_col in self.create_count_tables( + tables, axis_col, bar_col + ): bin_counts = bin_counts.natural_join( count_table, on=[range_index], joins=[count_col] ) @@ -220,7 +239,7 @@ def preprocess_partitioned_tables( var_axis_displayed = var_axis_name - if self.axis_cols[0] != column: + if self.axis_col != column: # if a different column is aggregated on, the axis name should reflect that # todo: plumb this through the args var_axis_displayed = f"{var_axis_name} of {column}" @@ -229,5 +248,5 @@ def preprocess_partitioned_tables( # todo: better way to handle this rather that flipping axis_var and value_var later???? yield bin_counts.view([var_axis_name, f"{column} = {count_col}"]), { self.axis_var: var_axis_name, - self.value_var: column, + self.bar_var: column, } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/Preprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/Preprocessor.py index 1cba930a0..596c71de3 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/Preprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/Preprocessor.py @@ -34,21 +34,24 @@ def __init__( groups: set[str], always_attached: dict[tuple[str, str], tuple[dict[str, str], list[str], str]], pivot_vars: dict[str, str], + list_var: str | None, ): self.args = args self.groups = groups self.preprocesser = None self.always_attached = always_attached self.pivot_vars = pivot_vars + self.list_var = list_var self.prepare_preprocess() - pass def prepare_preprocess(self) -> None: """ Prepare for preprocessing by capturing information needed """ if "preprocess_hist" in self.groups: - self.preprocesser = HistPreprocessor(self.args, self.pivot_vars) + self.preprocesser = HistPreprocessor( + self.args, self.pivot_vars, self.list_var + ) elif "preprocess_freq" in self.groups: self.preprocesser = FreqPreprocessor(self.args) elif "always_attached" in self.groups and self.always_attached: diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py index 3403505b9..1446be886 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -17,7 +17,7 @@ class UnivariateAwarePreprocessor: args: dict[str, str]: Figure creation args table: Table: The table to use axis_var: str: The main var. The list of vars was passed to this arg. - value_var: The other var. + bar_var: The other var. col_val: str: The value column, which is the value in pivot_var if there is a list, otherwise the arg passed to var cols: list[str]: The columns that are being used @@ -29,7 +29,7 @@ def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str] | None = Non self.orientation = self.calculate_orientation() self.args["orientation"] = self.orientation self.axis_var = "x" if args.get("x") else "y" - self.value_var = "y" if self.axis_var == "x" else "x" + self.bar_var = "y" if self.axis_var == "x" else "x" self.axis_col: str = ( pivot_vars["variable"] if pivot_vars else args[self.axis_var] ) @@ -39,16 +39,17 @@ def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str] | None = Non # if value_var is not set, the value column is the same as the axis column because both the axis bins and value # are computed from the same inputs - if self.args.get(self.value_var): - self.value_col: str = ( - pivot_vars["value"] if pivot_vars else args[self.value_var] + if self.args.get(self.bar_var): + self.bar_col: str = ( + pivot_vars["value"] if pivot_vars else args[self.bar_var] ) - self.value_cols = ( - self.value_col if isinstance(self.value_col, list) else [self.value_col] + self.bar_cols = ( + self.bar_col if isinstance(self.bar_col, list) else [self.bar_col] ) else: - self.value_col = self.axis_col - self.value_cols = self.axis_cols + self.bar_col = self.axis_col + self.bar_cols = self.axis_cols + print(self.axis_col, self.axis_cols, self.bar_col, self.bar_cols, self.table) def calculate_orientation(self): """ From f495a108a3913473f156cb8a0cd5ce8b6ce5c8d3 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Fri, 1 Nov 2024 15:00:50 -0500 Subject: [PATCH 03/17] wip --- .../express/preprocess/HistPreprocessor.py | 18 +++++++++++++----- .../preprocess/UnivariateAwarePreprocessor.py | 16 ++++++++++++---- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 0eb9c4d4f..5cad27c84 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -57,7 +57,7 @@ def __init__( pivot_vars: dict[str, str], list_var: str | None = None, ): - super().__init__(args, pivot_vars) + super().__init__(args, pivot_vars, list_var) self.list_var = list_var self.range_table = None self.names = {} @@ -77,6 +77,7 @@ def prepare_preprocess(self) -> None: self.args["table"], ["range_index", "range", "bin_min", "bin_max", self.histfunc, "total"], ) + print(self.axis_cols) self.range_table = create_range_table( self.args["table"], self.axis_cols, @@ -151,12 +152,12 @@ def preprocess_partitioned_tables( # column will only be set if there's a pivot var, which means the table has been restructured # the column passed will be associated with whatever the list_var was, so the list_var needs to # be matched to the axis_var or bar_var, which determines how the aggregations are calculated - if self.list_var: + """if self.list_var: if self.list_var == self.axis_var: axis_col = column if column else self.axis_col elif self.list_var == self.bar_var: bar_col = column if column else self.bar_col - + """ range_index, range_, bin_min, bin_max, total = ( self.names["range_index"], self.names["range"], @@ -244,9 +245,16 @@ def preprocess_partitioned_tables( # todo: plumb this through the args var_axis_displayed = f"{var_axis_name} of {column}" + print( + { + self.axis_var: var_axis_name, + self.bar_var: column, + } + ) + for count_col in count_cols: # todo: better way to handle this rather that flipping axis_var and value_var later???? - yield bin_counts.view([var_axis_name, f"{column} = {count_col}"]), { + yield bin_counts.view([var_axis_name, f"{bar_col} = {count_col}"]), { self.axis_var: var_axis_name, - self.bar_var: column, + self.bar_var: self.bar_col, } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py index 1446be886..aa15bf3ac 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -23,7 +23,12 @@ class UnivariateAwarePreprocessor: cols: list[str]: The columns that are being used """ - def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str] | None = None): + def __init__( + self, + args: dict[str, Any], + pivot_vars: dict[str, str] | None = None, + list_var: str | None = None, + ): self.args = args self.table = args["table"] self.orientation = self.calculate_orientation() @@ -31,7 +36,9 @@ def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str] | None = Non self.axis_var = "x" if args.get("x") else "y" self.bar_var = "y" if self.axis_var == "x" else "x" self.axis_col: str = ( - pivot_vars["variable"] if pivot_vars else args[self.axis_var] + pivot_vars["value"] + if pivot_vars and list_var and list_var == self.axis_var + else args[self.axis_var] ) self.axis_cols = ( self.axis_col if isinstance(self.axis_col, list) else [self.axis_col] @@ -41,7 +48,9 @@ def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str] | None = Non # are computed from the same inputs if self.args.get(self.bar_var): self.bar_col: str = ( - pivot_vars["value"] if pivot_vars else args[self.bar_var] + pivot_vars["value"] + if pivot_vars and list_var and list_var == self.bar_var + else args[self.bar_var] ) self.bar_cols = ( self.bar_col if isinstance(self.bar_col, list) else [self.bar_col] @@ -49,7 +58,6 @@ def __init__(self, args: dict[str, Any], pivot_vars: dict[str, str] | None = Non else: self.bar_col = self.axis_col self.bar_cols = self.axis_cols - print(self.axis_col, self.axis_cols, self.bar_col, self.bar_cols, self.table) def calculate_orientation(self): """ From 07588a500e13e9228943fdeda07c2aea66055386 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Fri, 1 Nov 2024 15:35:00 -0500 Subject: [PATCH 04/17] wip --- .../deephaven/plot/express/preprocess/HistPreprocessor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 5cad27c84..4372d5add 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -63,7 +63,9 @@ def __init__( self.names = {} self.nbins = args.pop("nbins", 10) self.range_bins = args.pop("range_bins", None) - self.histfunc = args.pop("histfunc", "count") + self.histfunc = args.pop( + "histfunc", "count" + ) # should be sum if both x and y are set self.barnorm = args.pop("barnorm", None) self.histnorm = args.pop("histnorm", None) self.cumulative = args.pop("cumulative", False) @@ -254,6 +256,7 @@ def preprocess_partitioned_tables( for count_col in count_cols: # todo: better way to handle this rather that flipping axis_var and value_var later???? + # todo: this seems to be flipped the wrong way too yield bin_counts.view([var_axis_name, f"{bar_col} = {count_col}"]), { self.axis_var: var_axis_name, self.bar_var: self.bar_col, From c50d052f14eab4571b006b235096945f8c5debc4 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Mon, 4 Nov 2024 12:31:00 -0600 Subject: [PATCH 05/17] wip --- .../plot/express/plots/_private_utils.py | 2 ++ .../src/deephaven/plot/express/plots/bar.py | 9 ++++++++- .../plot/express/plots/distribution.py | 9 ++++++++- .../preprocess/UnivariateAwarePreprocessor.py | 17 ++++++++++++++--- .../plot/express/preprocess/utilities.py | 19 +++++++++++++++++++ .../deephaven/plot/express/types/__init__.py | 2 +- .../src/deephaven/plot/express/types/plots.py | 4 +++- 7 files changed, 55 insertions(+), 7 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py b/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py index 33cd43513..ec3f6ff53 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py @@ -25,6 +25,8 @@ ) from ..types import PartitionableTableLike +NUMERIC_TYPES = {"short", "int", "long", "float", "double"} + def validate_common_args(args: dict) -> None: """Validate common args amongst plots diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/bar.py b/plugins/plotly-express/src/deephaven/plot/express/plots/bar.py index 41762e9b8..28c91411a 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/bar.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/bar.py @@ -10,7 +10,7 @@ from ._private_utils import validate_common_args, process_args from ..shared import default_callback from ..deephaven_figure import generate_figure, DeephavenFigure -from ..types import PartitionableTableLike +from ..types import PartitionableTableLike, Orientation def bar( @@ -42,6 +42,7 @@ def bar( range_color: list[float] | None = None, color_continuous_midpoint: float | None = None, opacity: float | None = None, + orientation: Orientation | None = None, barmode: str = "relative", log_x: bool = False, log_y: bool = False, @@ -114,6 +115,12 @@ def bar( color_continuous_midpoint: A number that is the midpoint of the color axis opacity: Opacity to apply to all markers. 0 is completely transparent and 1 is completely opaque. + orientation: The orientation of the bars. + If 'v', the bars are vertical. + If 'h', the bars are horizontal. + Defaults to 'v' if only `x` is specified. + Defaults to 'h' if only `y` is specified. + Defaults to 'v' if both `x` and `y` are specified unless `x` is passed only numeric columns and `y` is not. barmode: If 'relative', bars are stacked. If 'overlay', bars are drawn on top of each other. If 'group', bars are drawn next to each other. log_x: A boolean or list of booleans that specify if diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py b/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py index 47063a9a4..5965999c2 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py @@ -18,7 +18,7 @@ HISTOGRAM_DEFAULTS, default_callback, ) -from ..types import PartitionableTableLike +from ..types import PartitionableTableLike, Orientation def violin( @@ -321,6 +321,7 @@ def histogram( pattern_shape_map: dict[str | tuple[str], str] | None = None, marginal: str | None = None, opacity: float | None = None, + orientation: Orientation | None = None, barmode: str = HISTOGRAM_DEFAULTS["barmode"], barnorm: str = HISTOGRAM_DEFAULTS["barnorm"], histnorm: str = HISTOGRAM_DEFAULTS["histnorm"], @@ -375,6 +376,12 @@ def histogram( marginal: The type of marginal; histogram, violin, rug, box opacity: Opacity to apply to all markers. 0 is completely transparent and 1 is completely opaque. + orientation: The orientation of the bars. + If 'v', the bars are vertical. + If 'h', the bars are horizontal. + Defaults to 'v' if only `x` is specified. + Defaults to 'h' if only `y` is specified. + Defaults to 'v' if both `x` and `y` are specified unless `x` is passed only numeric columns and `y` is not. barmode: If 'relative', bars are stacked. If 'overlay', bars are drawn on top of each other. If 'group', bars are drawn next to each other. diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py index aa15bf3ac..5e99afe49 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import Any +from .utilities import has_all_numeric_columns class UnivariateAwarePreprocessor: @@ -31,7 +32,7 @@ def __init__( ): self.args = args self.table = args["table"] - self.orientation = self.calculate_orientation() + self.orientation = self.calculate_bar_orientation() self.args["orientation"] = self.orientation self.axis_var = "x" if args.get("x") else "y" self.bar_var = "y" if self.axis_var == "x" else "x" @@ -59,17 +60,27 @@ def __init__( self.bar_col = self.axis_col self.bar_cols = self.axis_cols - def calculate_orientation(self): + def calculate_bar_orientation(self): """ Calculate the orientation of the plot """ orientation = self.args.get("orientation") x = self.args.get("x") y = self.args.get("y") + if orientation: return orientation elif x and y: - # TODO: more sophisticated orientation calculation like plotly express, which calculates based on data types + numeric_x = has_all_numeric_columns( + self.table, x if isinstance(x, list) else [x] + ) + numeric_y = has_all_numeric_columns( + self.table, y if isinstance(y, list) else [y] + ) + if numeric_x and not numeric_y: + # if both x and y are specified, the only case plotly sets orientation to 'h' by default is when x is + # numeric and y is not + return "h" return "v" elif x: return "v" diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py index 0c97823be..b1afe8be2 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py @@ -20,6 +20,8 @@ "var": agg.var, } +NUMERIC_TYPES = {"short", "int", "long", "float", "double"} + def get_aggs( base: str, @@ -282,3 +284,20 @@ def calculate_bin_locations( f"{histfunc_col} = {agg_col}", ] ) + + +def has_all_numeric_columns(table: Table, columns: list[str]) -> bool: + """ + Check if the columns are numeric in the table + + Args: + table: The table to check + columns: The columns to check + + Returns: + True if all columns are numeric, False otherwise + """ + for col in table.columns: + if col.name in columns and col.data_type.j_name not in NUMERIC_TYPES: + return False + return True diff --git a/plugins/plotly-express/src/deephaven/plot/express/types/__init__.py b/plugins/plotly-express/src/deephaven/plot/express/types/__init__.py index 0417cd135..de8059bb5 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/types/__init__.py +++ b/plugins/plotly-express/src/deephaven/plot/express/types/__init__.py @@ -1 +1 @@ -from .plots import PartitionableTableLike, TableLike +from .plots import PartitionableTableLike, TableLike, Orientation diff --git a/plugins/plotly-express/src/deephaven/plot/express/types/plots.py b/plugins/plotly-express/src/deephaven/plot/express/types/plots.py index 25cc51aa9..6f8ab4c54 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/types/plots.py +++ b/plugins/plotly-express/src/deephaven/plot/express/types/plots.py @@ -1,6 +1,8 @@ -from typing import Union +from typing import Union, Literal from pandas import DataFrame from deephaven.table import Table, PartitionedTable TableLike = Union[Table, DataFrame] PartitionableTableLike = Union[PartitionedTable, TableLike] + +Orientation = Literal["v", "h"] From 1f8e9ae5c484dfaabc6f37f368013473451b31a5 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Mon, 4 Nov 2024 13:46:07 -0600 Subject: [PATCH 06/17] wip --- .../express/preprocess/HistPreprocessor.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 4372d5add..3e6816f5e 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -181,7 +181,7 @@ def preprocess_partitioned_tables( ) count_cols.append(count_col) - var_axis_name = self.names[self.histfunc] + bar_var_name = self.names[self.histfunc] if not self.range_table: raise ValueError("Range table not created") @@ -190,7 +190,7 @@ def preprocess_partitioned_tables( [ f"{bin_min} = {range_}.binMin({range_index})", f"{bin_max} = {range_}.binMax({range_index})", - f"{var_axis_name}=0.5*({bin_min}+{bin_max})", + f"{bar_var_name}=0.5*({bin_min}+{bin_max})", ] ) @@ -204,13 +204,13 @@ def preprocess_partitioned_tables( ] # range_ and bin cols need to be kept for probability density - # var_axis_name needs to be kept for plotting + # bar_var_name needs to be kept for plotting bin_counts = ( bin_counts.agg_by( [ agg.sum_(sums), agg.group( - count_cols + [var_axis_name, range_, bin_min, bin_max] + count_cols + [bar_var_name, range_, bin_min, bin_max] ), ] ) @@ -240,24 +240,23 @@ def preprocess_partitioned_tables( + [f"{col}={col} * {mult_factor} / {total}" for col in count_cols] ) - var_axis_displayed = var_axis_name + bar_var_displayed = bar_var_name if self.axis_col != column: # if a different column is aggregated on, the axis name should reflect that # todo: plumb this through the args - var_axis_displayed = f"{var_axis_name} of {column}" + bar_var_displayed = f"{bar_var_name} of {column}" print( { - self.axis_var: var_axis_name, + self.axis_var: bar_var_name, self.bar_var: column, } ) for count_col in count_cols: # todo: better way to handle this rather that flipping axis_var and value_var later???? - # todo: this seems to be flipped the wrong way too - yield bin_counts.view([var_axis_name, f"{bar_col} = {count_col}"]), { - self.axis_var: var_axis_name, - self.bar_var: self.bar_col, + yield bin_counts.view([bar_var_name, f"{bar_col} = {count_col}"]), { + self.bar_var: bar_var_name, + self.axis_var: self.bar_col, } From ae85f062c4490bc92ffe857c0d763729331ee0df Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Mon, 4 Nov 2024 16:37:31 -0600 Subject: [PATCH 07/17] wip --- .../plot/express/deephaven_figure/generate.py | 46 +++++++++----- .../plot/express/plots/_private_utils.py | 1 - .../express/preprocess/HistPreprocessor.py | 63 +++++++------------ .../preprocess/UnivariateAwarePreprocessor.py | 2 +- .../plot/express/preprocess/utilities.py | 10 ++- 5 files changed, 63 insertions(+), 59 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py b/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py index 63d20ad55..0ec5215f8 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py +++ b/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py @@ -111,7 +111,8 @@ "current_col", "current_var", "labels", - "hist_val_name", + "bar_col_displayed_h", + "bar_col_displayed_v", "pivot_vars", "current_partition", "colors", @@ -824,7 +825,8 @@ def hover_text_generator( def compute_labels( hover_mapping: list[dict[str, str]], - hist_val_name: str | None, + bar_col_displayed_h: str | None, + bar_col_displayed_v: str | None, heatmap_agg_label: str | None, # hover_data - todo, dependent on arrays supported in data mappings types: set[str], @@ -847,7 +849,9 @@ def compute_labels( the renamed current_col """ - calculate_hist_labels(hist_val_name, hover_mapping[0]) + calculate_hist_labels( + bar_col_displayed_h, bar_col_displayed_v, hover_mapping[0], labels + ) calculate_density_heatmap_labels(heatmap_agg_label, hover_mapping[0], labels) @@ -880,7 +884,10 @@ def calculate_density_heatmap_labels( def calculate_hist_labels( - hist_val_name: str | None, current_mapping: dict[str, str] + bar_col_displayed_h: str | None, + bar_col_displayed_v: str | None, + hover_mapping: dict[str, str], + labels: dict[str, str] | None, ) -> None: """Calculate the histogram labels @@ -889,18 +896,19 @@ def calculate_hist_labels( current_mapping: The mapping of variables to columns """ - if hist_val_name: - # swap the names - current_mapping["x"], current_mapping["y"] = ( - current_mapping["y"], - current_mapping["x"], - ) + # only one should be set + if bar_col_displayed_h: + # a bar chart oriented horizontally has the histfunc on the x-axis + hover_mapping["x"] = bar_col_displayed_h + elif bar_col_displayed_v: + hover_mapping["y"] = bar_col_displayed_v def add_axis_titles( custom_call_args: dict[str, Any], hover_mapping: list[dict[str, str]], - hist_val_name: str | None, + bar_col_displayed_h: str | None, + bar_col_displayed_v: str | None, heatmap_agg_label: str | None, ) -> None: """Add axis titles. Generally, this only applies when there is a list variable @@ -919,7 +927,7 @@ def add_axis_titles( new_xaxis_titles = None new_yaxis_titles = None - if hist_val_name: + if bar_col_displayed_h or bar_col_displayed_v: # hist names are already set up in the mapping new_xaxis_titles = [hover_mapping[0].get("x", None)] new_yaxis_titles = [hover_mapping[0].get("y", None)] @@ -978,14 +986,16 @@ def create_hover_and_axis_titles( types = get_list_var_info(data_cols) labels = custom_call_args.get("labels", None) - hist_val_name = custom_call_args.get("hist_val_name", None) + bar_col_displayed_h = custom_call_args.get("bar_col_displayed_h", None) + bar_col_displayed_v = custom_call_args.get("bar_col_displayed_v", None) heatmap_agg_label = custom_call_args.get("heatmap_agg_label", None) current_partition = custom_call_args.get("current_partition", {}) compute_labels( hover_mapping, - hist_val_name, + bar_col_displayed_h, + bar_col_displayed_v, heatmap_agg_label, types, labels, @@ -998,7 +1008,13 @@ def create_hover_and_axis_titles( # it's possible that heatmap_agg_label was relabeled, so grab the new label heatmap_agg_label = hover_mapping[0]["z"] - add_axis_titles(custom_call_args, hover_mapping, hist_val_name, heatmap_agg_label) + add_axis_titles( + custom_call_args, + hover_mapping, + bar_col_displayed_h, + bar_col_displayed_v, + heatmap_agg_label, + ) return hover_text diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py b/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py index ec3f6ff53..8fd0f7da0 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py @@ -491,7 +491,6 @@ def shared_histogram(is_marginal: bool = True, **args: Any) -> DeephavenFigure: set_all(args, HISTOGRAM_DEFAULTS) args["bargap"] = 0 - args["hist_val_name"] = args.get("histfunc", "count") func = px.bar groups = {"bar", "preprocess_hist", "supports_lists"} diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 3e6816f5e..4ba4e2bc5 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -77,7 +77,7 @@ def prepare_preprocess(self) -> None: """ self.names = get_unique_names( self.args["table"], - ["range_index", "range", "bin_min", "bin_max", self.histfunc, "total"], + ["range_index", "range", "bin_min", "bin_max", "bin_mid", "total"], ) print(self.axis_cols) self.range_table = create_range_table( @@ -151,15 +151,6 @@ def preprocess_partitioned_tables( axis_col = self.axis_col bar_col = self.bar_col - # column will only be set if there's a pivot var, which means the table has been restructured - # the column passed will be associated with whatever the list_var was, so the list_var needs to - # be matched to the axis_var or bar_var, which determines how the aggregations are calculated - """if self.list_var: - if self.list_var == self.axis_var: - axis_col = column if column else self.axis_col - elif self.list_var == self.bar_var: - bar_col = column if column else self.bar_col - """ range_index, range_, bin_min, bin_max, total = ( self.names["range_index"], self.names["range"], @@ -172,16 +163,18 @@ def preprocess_partitioned_tables( [long_col(self.names["range_index"], [i for i in range(self.nbins)])] ) - count_cols = [] + agg_cols = [] for count_table, count_col in self.create_count_tables( tables, axis_col, bar_col ): bin_counts = bin_counts.natural_join( count_table, on=[range_index], joins=[count_col] ) - count_cols.append(count_col) + agg_cols.append(count_col) + + print(agg_cols) - bar_var_name = self.names[self.histfunc] + bin_mid = self.names["bin_mid"] if not self.range_table: raise ValueError("Range table not created") @@ -190,18 +183,16 @@ def preprocess_partitioned_tables( [ f"{bin_min} = {range_}.binMin({range_index})", f"{bin_max} = {range_}.binMax({range_index})", - f"{bar_var_name}=0.5*({bin_min}+{bin_max})", + f"{bin_mid}=0.5*({bin_min}+{bin_max})", ] ) if self.histnorm in {"percent", "probability", "probability density"}: mult_factor = 100 if self.histnorm == "percent" else 1 - sums = [f"{col}_sum = {col}" for col in count_cols] + sums = [f"{col}_sum = {col}" for col in agg_cols] - normed = [ - f"{col} = {col} * {mult_factor} / {col}_sum" for col in count_cols - ] + normed = [f"{col} = {col} * {mult_factor} / {col}_sum" for col in agg_cols] # range_ and bin cols need to be kept for probability density # bar_var_name needs to be kept for plotting @@ -209,9 +200,7 @@ def preprocess_partitioned_tables( bin_counts.agg_by( [ agg.sum_(sums), - agg.group( - count_cols + [bar_var_name, range_, bin_min, bin_max] - ), + agg.group(agg_cols + [bin_mid, range_, bin_min, bin_max]), ] ) .update_view(normed) @@ -219,7 +208,7 @@ def preprocess_partitioned_tables( ) if self.cumulative: - bin_counts = bin_counts.update_by(cum_sum(count_cols)) + bin_counts = bin_counts.update_by(cum_sum(agg_cols)) # with plotly express, cumulative=True will ignore density (including # the density part of probability density, but not the probability @@ -229,34 +218,28 @@ def preprocess_partitioned_tables( if self.histnorm in {"density", "probability density"}: bin_counts = bin_counts.update_view( - [f"{col} = {col} / ({bin_max} - {bin_min})" for col in count_cols] + [f"{col} = {col} / ({bin_max} - {bin_min})" for col in agg_cols] ) if self.barnorm: mult_factor = 100 if self.barnorm == "percent" else 1 - sum_form = f"sum({','.join(count_cols)})" + sum_form = f"sum({','.join(agg_cols)})" bin_counts = bin_counts.update_view( [f"{total}={sum_form}"] - + [f"{col}={col} * {mult_factor} / {total}" for col in count_cols] + + [f"{col}={col} * {mult_factor} / {total}" for col in agg_cols] ) - bar_var_displayed = bar_var_name + # bar_var_displayed = bar_var_name if self.axis_col != column: # if a different column is aggregated on, the axis name should reflect that # todo: plumb this through the args - bar_var_displayed = f"{bar_var_name} of {column}" - - print( - { - self.axis_var: bar_var_name, - self.bar_var: column, - } - ) - - for count_col in count_cols: - # todo: better way to handle this rather that flipping axis_var and value_var later???? - yield bin_counts.view([bar_var_name, f"{bar_col} = {count_col}"]), { - self.bar_var: bar_var_name, - self.axis_var: self.bar_col, + # bar_var_displayed = f"{bar_var_name} of {column}" + pass + + for agg_col in agg_cols: + yield bin_counts.view([f"{axis_col} = {bin_mid}", agg_col]), { + self.bar_var: agg_col, + self.axis_var: axis_col, + f"bar_col_displayed_{self.orientation}": "test", } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py index 5e99afe49..3ccc43f46 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -85,6 +85,6 @@ def calculate_bar_orientation(self): elif x: return "v" elif y: - return "y" + return "h" raise ValueError("Could not determine orientation") diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py index b1afe8be2..8e3a3a3ef 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py @@ -286,7 +286,9 @@ def calculate_bin_locations( ) -def has_all_numeric_columns(table: Table, columns: list[str]) -> bool: +def has_all_numeric_columns( + table: Table | PartitionedTable, columns: list[str] +) -> bool: """ Check if the columns are numeric in the table @@ -297,7 +299,11 @@ def has_all_numeric_columns(table: Table, columns: list[str]) -> bool: Returns: True if all columns are numeric, False otherwise """ - for col in table.columns: + if isinstance(table, PartitionedTable): + cols = table.constituent_table_columns + else: + cols = table.columns + for col in cols: if col.name in columns and col.data_type.j_name not in NUMERIC_TYPES: return False return True From cdbed39de25fd1431e25309269ad4fcd2c2e7823 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Tue, 5 Nov 2024 12:14:37 -0600 Subject: [PATCH 08/17] wip --- .../plot/express/plots/distribution.py | 1 + .../express/preprocess/HistPreprocessor.py | 80 ++++++++++++++++--- .../plot/express/shared/distribution_args.py | 2 +- 3 files changed, 71 insertions(+), 12 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py b/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py index 5965999c2..3eb9b2102 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py @@ -403,6 +403,7 @@ def histogram( histfunc: The function to use when aggregating within bins. One of 'abs_sum', 'avg', 'count', 'count_distinct', 'max', 'median', 'min', 'std', 'sum', or 'var' + Defaults to 'count' if only one of x or y is specified and 'sum' if both are. cumulative: If True, values are cumulative. nbins: The number of bins to use. text_auto: If True, display the value at each bar. diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 4ba4e2bc5..626adc5b0 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -63,8 +63,9 @@ def __init__( self.names = {} self.nbins = args.pop("nbins", 10) self.range_bins = args.pop("range_bins", None) + # plotly express defaults to sum if both x and y are set, count if only one is set self.histfunc = args.pop( - "histfunc", "count" + "histfunc", "count" if self.bar_col == self.axis_col else "sum" ) # should be sum if both x and y are set self.barnorm = args.pop("barnorm", None) self.histnorm = args.pop("histnorm", None) @@ -79,7 +80,6 @@ def prepare_preprocess(self) -> None: self.args["table"], ["range_index", "range", "bin_min", "bin_max", "bin_mid", "total"], ) - print(self.axis_cols) self.range_table = create_range_table( self.args["table"], self.axis_cols, @@ -133,6 +133,46 @@ def create_count_tables( ) yield count_table, tmp_bar_col + def create_bar_col_displayed(self) -> str: + """ + Create the bar column name displayed. + This mirrors the logic in plotly express. + + Returns: + The bar column name displayed + """ + # in the case where only one column is aggregated on, the axis name should reflect the histfunc used + bar_col_displayed = self.histfunc + + if self.histfunc != "count" and self.axis_col != self.bar_col: + # if a different column is aggregated on, the axis name should reflect that + # plotly express will not do this in case of count because the value of count is the same + # whether aggregating on the same column or a different one + # note that plotly express also does not allow histfunc to be anything other than count + # if only one column is aggregated on but we do, hence our extra check for column names + bar_col_displayed = f"{self.histfunc} of {self.bar_col}" + + if self.histnorm: + if self.histfunc == "sum": + if self.histnorm == "probability": + bar_col_displayed = f"fraction of {bar_col_displayed}" + elif self.histnorm == "percent": + bar_col_displayed = f"percent of {bar_col_displayed}" + else: + # in this case, plotly express uses the original column name + bar_col_displayed = f"{self.histnorm} weighted by {self.bar_col}" + elif self.histnorm == "probability": + bar_col_displayed = f"fraction of sum of {bar_col_displayed}" + elif self.histnorm == "percent": + bar_col_displayed = f"percent of sum of {bar_col_displayed}" + else: + bar_col_displayed = f"{self.histnorm} of {bar_col_displayed}" + + if self.barnorm: + bar_col_displayed = f"{bar_col_displayed} (normalized as {self.barnorm})" + + return bar_col_displayed + def preprocess_partitioned_tables( self, tables: list[Table], column: str | None = None ) -> Generator[tuple[Table, dict[str, str | None]], None, None]: @@ -172,10 +212,34 @@ def preprocess_partitioned_tables( ) agg_cols.append(count_col) - print(agg_cols) - bin_mid = self.names["bin_mid"] + # in the case where only one column is aggregated on, the axis name should reflect the histfunc used + bar_col_displayed = self.histfunc + + if axis_col != bar_col: + # if a different column is aggregated on, the axis name should reflect that + bar_col_displayed = f"{self.histfunc} of {bar_col}" + + if self.histnorm: + if self.histfunc == "sum": + if self.histnorm == "probability": + bar_col_displayed = f"fraction of {bar_col_displayed}" + elif self.histnorm == "percent": + bar_col_displayed = f"percent of {bar_col_displayed}" + else: + # in this case, plotly express uses the original column name + bar_col_displayed = f"{self.histnorm} weighted by {bar_col}" + elif self.histnorm == "probability": + bar_col_displayed = f"fraction of sum of {bar_col_displayed}" + elif self.histnorm == "percent": + bar_col_displayed = f"percent of sum of {bar_col_displayed}" + else: + bar_col_displayed = f"{self.histnorm} of {bar_col_displayed}" + + if self.barnorm: + bar_col_displayed = f"{bar_col_displayed} (normalizes as {self.barnorm})" + if not self.range_table: raise ValueError("Range table not created") @@ -231,15 +295,9 @@ def preprocess_partitioned_tables( # bar_var_displayed = bar_var_name - if self.axis_col != column: - # if a different column is aggregated on, the axis name should reflect that - # todo: plumb this through the args - # bar_var_displayed = f"{bar_var_name} of {column}" - pass - for agg_col in agg_cols: yield bin_counts.view([f"{axis_col} = {bin_mid}", agg_col]), { self.bar_var: agg_col, self.axis_var: axis_col, - f"bar_col_displayed_{self.orientation}": "test", + f"bar_col_displayed_{self.orientation}": bar_col_displayed, } diff --git a/plugins/plotly-express/src/deephaven/plot/express/shared/distribution_args.py b/plugins/plotly-express/src/deephaven/plot/express/shared/distribution_args.py index 057b7459a..b33342628 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/shared/distribution_args.py +++ b/plugins/plotly-express/src/deephaven/plot/express/shared/distribution_args.py @@ -31,7 +31,7 @@ HISTOGRAM_DEFAULTS = { "barmode": "group", "nbins": 10, - "histfunc": "count", + "histfunc": None, "histnorm": None, "cumulative": False, "range_bins": None, From d0110e7e7106e3f90426c8395782ec8c9fd72cc6 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Tue, 5 Nov 2024 15:52:43 -0600 Subject: [PATCH 09/17] wip --- .../express/preprocess/HistPreprocessor.py | 6 +- .../plot/express/plots/test_distribution.py | 357 +++++++++++++++++- .../preprocess/test_HistPreprocessor.py | 2 + 3 files changed, 360 insertions(+), 5 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 626adc5b0..f50844f36 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -2,8 +2,8 @@ from typing import Any, Generator -from deephaven import agg, empty_table, new_table -from deephaven.table import PartitionedTable, Table +from deephaven import agg, new_table +from deephaven.table import Table from .UnivariateAwarePreprocessor import UnivariateAwarePreprocessor from ..shared import get_unique_names @@ -82,7 +82,7 @@ def prepare_preprocess(self) -> None: ) self.range_table = create_range_table( self.args["table"], - self.axis_cols, + self.axis_col, self.range_bins, self.nbins, self.names["range"], diff --git a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py index d452e5139..1e97153ba 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py +++ b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py @@ -6,7 +6,7 @@ class DistributionTestCase(BaseTestCase): def setUp(self) -> None: from deephaven import new_table - from deephaven.column import int_col + from deephaven.column import int_col, string_col self.source = new_table( [ @@ -14,10 +14,14 @@ def setUp(self) -> None: int_col("X2", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("Y", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("Y2", [1, 2, 2, 3, 3, 3, 4, 4, 5]), - int_col("size", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("text", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("hover_name", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("category", [1, 2, 1, 2, 1, 2, 1, 2, 1]), + string_col( + "color", + ["red", "blue", "red", "blue", "red", "blue", "red", "blue", "red"], + ), + string_col("size", ["1", "2", "1", "2", "1", "2", "1", "2", "1"]), ] ) @@ -368,6 +372,355 @@ def test_marginal_histogram_y(self): self.assertEqual(deephaven["is_user_set_template"], False) self.assertEqual(deephaven["is_user_set_color"], False) + def test_basic_histogram_x_y(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + chart = dx.histogram(self.source, x="X", y="Y").to_dict(self.exporter) + plotly, deephaven = chart["plotly"], chart["deephaven"] + + # pop template as we currently do not modify it + plotly["layout"].pop("template") + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "X=%{x}
count=%{y}", + "legendgroup": "", + "marker": {"color": "#636efa", "pattern": {"shape": ""}}, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": False, + "textposition": "auto", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + "type": "bar", + } + ] + + self.assertEqual(plotly["data"], expected_data) + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "showlegend": False, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "X"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "count"}, + }, + } + + self.assertEqual(plotly["layout"], expected_layout) + + expected_mappings = [ + { + "table": 0, + "data_columns": { + "count": ["/plotly/data/0/x"], + "X": ["/plotly/data/0/y"], + }, + } + ] + + self.assertEqual(deephaven["mappings"], expected_mappings) + + self.assertEqual(deephaven["is_user_set_template"], False) + self.assertEqual(deephaven["is_user_set_color"], False) + + def test_basic_histogram_x_y_h(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + chart = dx.histogram(self.source, x="X", y="Y", orientation="h").to_dict( + self.exporter + ) + plotly, deephaven = chart["plotly"], chart["deephaven"] + + # pop template as we currently do not modify it + plotly["layout"].pop("template") + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "X=%{x}
count=%{y}", + "legendgroup": "", + "marker": {"color": "#636efa", "pattern": {"shape": ""}}, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": False, + "textposition": "auto", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + "type": "bar", + } + ] + + self.assertEqual(plotly["data"], expected_data) + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "showlegend": False, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "X"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "count"}, + }, + } + + self.assertEqual(plotly["layout"], expected_layout) + + expected_mappings = [ + { + "table": 0, + "data_columns": { + "count": ["/plotly/data/0/x"], + "X": ["/plotly/data/0/y"], + }, + } + ] + + self.assertEqual(deephaven["mappings"], expected_mappings) + + self.assertEqual(deephaven["is_user_set_template"], False) + self.assertEqual(deephaven["is_user_set_color"], False) + + def test_basic_histogram_x_cat_y(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + # since y is a category, the orientation should be set to h + chart = dx.histogram(self.source, x="X", y="color").to_dict(self.exporter) + plotly, deephaven = chart["plotly"], chart["deephaven"] + + # pop template as we currently do not modify it + plotly["layout"].pop("template") + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "X=%{x}
count=%{y}", + "legendgroup": "", + "marker": {"color": "#636efa", "pattern": {"shape": ""}}, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": False, + "textposition": "auto", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + "type": "bar", + } + ] + + self.assertEqual(plotly["data"], expected_data) + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "showlegend": False, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "X"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "count"}, + }, + } + + self.assertEqual(plotly["layout"], expected_layout) + + expected_mappings = [ + { + "table": 0, + "data_columns": { + "count": ["/plotly/data/0/x"], + "X": ["/plotly/data/0/y"], + }, + } + ] + + self.assertEqual(deephaven["mappings"], expected_mappings) + + self.assertEqual(deephaven["is_user_set_template"], False) + self.assertEqual(deephaven["is_user_set_color"], False) + + def test_basic_histogram_cat_x_cat_y(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + # since both x and y are categories, the orientation should be set to v + chart = dx.histogram(self.source, x="size", y="color").to_dict(self.exporter) + plotly, deephaven = chart["plotly"], chart["deephaven"] + + # pop template as we currently do not modify it + plotly["layout"].pop("template") + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "X=%{x}
count=%{y}", + "legendgroup": "", + "marker": {"color": "#636efa", "pattern": {"shape": ""}}, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": False, + "textposition": "auto", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + "type": "bar", + } + ] + + self.assertEqual(plotly["data"], expected_data) + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "showlegend": False, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "X"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "count"}, + }, + } + + self.assertEqual(plotly["layout"], expected_layout) + + expected_mappings = [ + { + "table": 0, + "data_columns": { + "count": ["/plotly/data/0/x"], + "X": ["/plotly/data/0/y"], + }, + } + ] + + self.assertEqual(deephaven["mappings"], expected_mappings) + + self.assertEqual(deephaven["is_user_set_template"], False) + self.assertEqual(deephaven["is_user_set_color"], False) + + def test_basic_histogram_x_cat_y_v(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + # setting a specific orientation overrides any other logic + chart = dx.histogram(self.source, x="X", y="color", orientation="v").to_dict( + self.exporter + ) + plotly, deephaven = chart["plotly"], chart["deephaven"] + + # pop template as we currently do not modify it + plotly["layout"].pop("template") + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "X=%{x}
count=%{y}", + "legendgroup": "", + "marker": {"color": "#636efa", "pattern": {"shape": ""}}, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": False, + "textposition": "auto", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + "type": "bar", + } + ] + + self.assertEqual(plotly["data"], expected_data) + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "showlegend": False, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "X"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "count"}, + }, + } + + self.assertEqual(plotly["layout"], expected_layout) + + expected_mappings = [ + { + "table": 0, + "data_columns": { + "count": ["/plotly/data/0/x"], + "X": ["/plotly/data/0/y"], + }, + } + ] + + self.assertEqual(deephaven["mappings"], expected_mappings) + + self.assertEqual(deephaven["is_user_set_template"], False) + self.assertEqual(deephaven["is_user_set_color"], False) + + # histnorm, barnorm, histfunc + def test_basic_violin_x(self): import src.deephaven.plot.express as dx from deephaven.constants import NULL_INT diff --git a/plugins/plotly-express/test/deephaven/plot/express/preprocess/test_HistPreprocessor.py b/plugins/plotly-express/test/deephaven/plot/express/preprocess/test_HistPreprocessor.py index 77765a628..012fbc8bb 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/preprocess/test_HistPreprocessor.py +++ b/plugins/plotly-express/test/deephaven/plot/express/preprocess/test_HistPreprocessor.py @@ -60,6 +60,8 @@ def test_basic_hist(self): self.tables_equal(args, expected_df) + # TODO add more tests + def test_partitioned_hist(self): args = { "x": "X", From 1034629030a48981b29175511ba7db88b2ecbf2f Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Wed, 6 Nov 2024 14:37:46 -0600 Subject: [PATCH 10/17] wip --- .../express/preprocess/HistPreprocessor.py | 46 ++- .../preprocess/UnivariateAwarePreprocessor.py | 17 +- .../plot/express/preprocess/utilities.py | 25 +- .../plot/express/plots/test_distribution.py | 302 ++++++------------ .../plot/express/plots/test_scatter.py | 4 +- .../preprocess/test_HistPreprocessor.py | 8 +- 6 files changed, 127 insertions(+), 275 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index f50844f36..20d13449d 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -64,14 +64,28 @@ def __init__( self.nbins = args.pop("nbins", 10) self.range_bins = args.pop("range_bins", None) # plotly express defaults to sum if both x and y are set, count if only one is set - self.histfunc = args.pop( - "histfunc", "count" if self.bar_col == self.axis_col else "sum" - ) # should be sum if both x and y are set + self.histfunc = self.determine_histfunc() self.barnorm = args.pop("barnorm", None) self.histnorm = args.pop("histnorm", None) self.cumulative = args.pop("cumulative", False) self.prepare_preprocess() + def determine_histfunc(self) -> str: + """ + Determine the histfunc to use based on the arguments passed in + + Returns: + The histfunc to use + """ + histfunc = self.args.pop("histfunc", None) + if histfunc is None: + histfunc = ( + "count" + if self.args.get("x") is None or self.args.get("y") is None + else "sum" + ) + return histfunc + def prepare_preprocess(self) -> None: """ Prepare for preprocessing by creating a range table over all values @@ -215,30 +229,7 @@ def preprocess_partitioned_tables( bin_mid = self.names["bin_mid"] # in the case where only one column is aggregated on, the axis name should reflect the histfunc used - bar_col_displayed = self.histfunc - - if axis_col != bar_col: - # if a different column is aggregated on, the axis name should reflect that - bar_col_displayed = f"{self.histfunc} of {bar_col}" - - if self.histnorm: - if self.histfunc == "sum": - if self.histnorm == "probability": - bar_col_displayed = f"fraction of {bar_col_displayed}" - elif self.histnorm == "percent": - bar_col_displayed = f"percent of {bar_col_displayed}" - else: - # in this case, plotly express uses the original column name - bar_col_displayed = f"{self.histnorm} weighted by {bar_col}" - elif self.histnorm == "probability": - bar_col_displayed = f"fraction of sum of {bar_col_displayed}" - elif self.histnorm == "percent": - bar_col_displayed = f"percent of sum of {bar_col_displayed}" - else: - bar_col_displayed = f"{self.histnorm} of {bar_col_displayed}" - - if self.barnorm: - bar_col_displayed = f"{bar_col_displayed} (normalizes as {self.barnorm})" + bar_col_displayed = self.create_bar_col_displayed() if not self.range_table: raise ValueError("Range table not created") @@ -299,5 +290,6 @@ def preprocess_partitioned_tables( yield bin_counts.view([f"{axis_col} = {bin_mid}", agg_col]), { self.bar_var: agg_col, self.axis_var: axis_col, + # hist col f"bar_col_displayed_{self.orientation}": bar_col_displayed, } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py index 3ccc43f46..1d1e78c3c 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -1,7 +1,6 @@ from __future__ import annotations from typing import Any -from .utilities import has_all_numeric_columns class UnivariateAwarePreprocessor: @@ -70,19 +69,11 @@ def calculate_bar_orientation(self): if orientation: return orientation - elif x and y: - numeric_x = has_all_numeric_columns( - self.table, x if isinstance(x, list) else [x] - ) - numeric_y = has_all_numeric_columns( - self.table, y if isinstance(y, list) else [y] - ) - if numeric_x and not numeric_y: - # if both x and y are specified, the only case plotly sets orientation to 'h' by default is when x is - # numeric and y is not - return "h" - return "v" elif x: + # Note that this will also be the default if both are specified + # plotly express does some more sophisticated checking for data types + # when both are specified but categorical data will fail due to the + # engine preprocessing in our implementation so just assume vertical return "v" elif y: return "h" diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py index 8e3a3a3ef..79e8e9575 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py @@ -19,7 +19,7 @@ "sum": agg.sum_, "var": agg.var, } - +# todo: more types here? NUMERIC_TYPES = {"short", "int", "long", "float", "double"} @@ -284,26 +284,3 @@ def calculate_bin_locations( f"{histfunc_col} = {agg_col}", ] ) - - -def has_all_numeric_columns( - table: Table | PartitionedTable, columns: list[str] -) -> bool: - """ - Check if the columns are numeric in the table - - Args: - table: The table to check - columns: The columns to check - - Returns: - True if all columns are numeric, False otherwise - """ - if isinstance(table, PartitionedTable): - cols = table.constituent_table_columns - else: - cols = table.columns - for col in cols: - if col.name in columns and col.data_type.j_name not in NUMERIC_TYPES: - return False - return True diff --git a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py index 1e97153ba..115237218 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py +++ b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py @@ -25,6 +25,24 @@ def setUp(self) -> None: ] ) + def assert_bar_col_displayed_equal(self, chart, x, y): + """ + Assert that the x and y titles are set correctly + + Args: + chart: The chart to check + x: The x title + y: The y title + """ + + plotly = chart.to_dict(self.exporter)["plotly"] + self.assertEqual( + plotly["data"][0]["hovertemplate"], + f"{x}=%{{x}}
{y}=%{{y}}", + ) + self.assertEqual(plotly["layout"]["xaxis"]["title"]["text"], x) + self.assertEqual(plotly["layout"]["yaxis"]["title"]["text"], y) + def test_basic_histogram_x(self): import src.deephaven.plot.express as dx from deephaven.constants import NULL_LONG, NULL_DOUBLE @@ -82,8 +100,8 @@ def test_basic_histogram_x(self): { "table": 0, "data_columns": { - "count": ["/plotly/data/0/x"], - "X": ["/plotly/data/0/y"], + "X": ["/plotly/data/0/x"], + "tmpbar0": ["/plotly/data/0/y"], }, } ] @@ -97,7 +115,7 @@ def test_basic_histogram_y(self): import src.deephaven.plot.express as dx from deephaven.constants import NULL_LONG, NULL_DOUBLE - chart = dx.histogram(self.source, y="X").to_dict(self.exporter) + chart = dx.histogram(self.source, y="Y").to_dict(self.exporter) plotly, deephaven = chart["plotly"], chart["deephaven"] # pop template as we currently do not modify it @@ -106,7 +124,7 @@ def test_basic_histogram_y(self): expected_data = [ { "alignmentgroup": "True", - "hovertemplate": "count=%{x}
X=%{y}", + "hovertemplate": "count=%{x}
Y=%{y}", "legendgroup": "", "marker": {"color": "#636efa", "pattern": {"shape": ""}}, "name": "", @@ -140,7 +158,7 @@ def test_basic_histogram_y(self): "anchor": "x", "domain": [0.0, 1.0], "side": "left", - "title": {"text": "X"}, + "title": {"text": "Y"}, }, } @@ -150,8 +168,8 @@ def test_basic_histogram_y(self): { "table": 0, "data_columns": { - "X": ["/plotly/data/0/x"], - "count": ["/plotly/data/0/y"], + "tmpbar0": ["/plotly/data/0/x"], + "Y": ["/plotly/data/0/y"], }, } ] @@ -256,8 +274,8 @@ def test_marginal_histogram_x(self): { "table": 0, "data_columns": { - "count": ["/plotly/data/0/x"], - "X": ["/plotly/data/0/y"], + "X": ["/plotly/data/0/x"], + "tmpbar0": ["/plotly/data/0/y"], }, }, {"table": 0, "data_columns": {"X": ["/plotly/data/1/x"]}}, @@ -360,8 +378,8 @@ def test_marginal_histogram_y(self): { "table": 0, "data_columns": { - "count": ["/plotly/data/0/x"], - "X": ["/plotly/data/0/y"], + "X": ["/plotly/data/0/x"], + "tmpbar0": ["/plotly/data/0/y"], }, }, {"table": 0, "data_columns": {"X": ["/plotly/data/1/x"]}}, @@ -385,7 +403,7 @@ def test_basic_histogram_x_y(self): expected_data = [ { "alignmentgroup": "True", - "hovertemplate": "X=%{x}
count=%{y}", + "hovertemplate": "X=%{x}
sum of Y=%{y}", "legendgroup": "", "marker": {"color": "#636efa", "pattern": {"shape": ""}}, "name": "", @@ -419,7 +437,7 @@ def test_basic_histogram_x_y(self): "anchor": "x", "domain": [0.0, 1.0], "side": "left", - "title": {"text": "count"}, + "title": {"text": "sum of Y"}, }, } @@ -429,8 +447,8 @@ def test_basic_histogram_x_y(self): { "table": 0, "data_columns": { - "count": ["/plotly/data/0/x"], - "X": ["/plotly/data/0/y"], + "X": ["/plotly/data/0/x"], + "tmpbar0": ["/plotly/data/0/y"], }, } ] @@ -455,12 +473,12 @@ def test_basic_histogram_x_y_h(self): expected_data = [ { "alignmentgroup": "True", - "hovertemplate": "X=%{x}
count=%{y}", + "hovertemplate": "X=%{x}
sum of Y=%{y}", "legendgroup": "", "marker": {"color": "#636efa", "pattern": {"shape": ""}}, "name": "", "offsetgroup": "", - "orientation": "v", + "orientation": "h", "showlegend": False, "textposition": "auto", "x": [NULL_DOUBLE], @@ -489,7 +507,7 @@ def test_basic_histogram_x_y_h(self): "anchor": "x", "domain": [0.0, 1.0], "side": "left", - "title": {"text": "count"}, + "title": {"text": "sum of Y"}, }, } @@ -499,7 +517,7 @@ def test_basic_histogram_x_y_h(self): { "table": 0, "data_columns": { - "count": ["/plotly/data/0/x"], + "tmpbar0": ["/plotly/data/0/x"], "X": ["/plotly/data/0/y"], }, } @@ -510,216 +528,92 @@ def test_basic_histogram_x_y_h(self): self.assertEqual(deephaven["is_user_set_template"], False) self.assertEqual(deephaven["is_user_set_color"], False) - def test_basic_histogram_x_cat_y(self): + def test_basic_histogram_bar_col_displayed(self): import src.deephaven.plot.express as dx - from deephaven.constants import NULL_LONG, NULL_DOUBLE - # since y is a category, the orientation should be set to h - chart = dx.histogram(self.source, x="X", y="color").to_dict(self.exporter) - plotly, deephaven = chart["plotly"], chart["deephaven"] + chart = dx.histogram(self.source, x="X") + self.assert_bar_col_displayed_equal(chart, "X", "count") - # pop template as we currently do not modify it - plotly["layout"].pop("template") + chart = dx.histogram(self.source, y="Y") + self.assert_bar_col_displayed_equal(chart, "count", "Y") - expected_data = [ - { - "alignmentgroup": "True", - "hovertemplate": "X=%{x}
count=%{y}", - "legendgroup": "", - "marker": {"color": "#636efa", "pattern": {"shape": ""}}, - "name": "", - "offsetgroup": "", - "orientation": "v", - "showlegend": False, - "textposition": "auto", - "x": [NULL_DOUBLE], - "xaxis": "x", - "y": [NULL_LONG], - "yaxis": "y", - "type": "bar", - } - ] + # default histfunc is sum when both x and y are provided + chart = dx.histogram(self.source, x="X", y="Y", histfunc="count") + self.assert_bar_col_displayed_equal(chart, "X", "count") - self.assertEqual(plotly["data"], expected_data) + chart = dx.histogram(self.source, x="X", histfunc="sum") + self.assert_bar_col_displayed_equal(chart, "X", "sum") - expected_layout = { - "bargap": 0, - "barmode": "group", - "legend": {"tracegroupgap": 0}, - "margin": {"t": 60}, - "showlegend": False, - "xaxis": { - "anchor": "y", - "domain": [0.0, 1.0], - "side": "bottom", - "title": {"text": "X"}, - }, - "yaxis": { - "anchor": "x", - "domain": [0.0, 1.0], - "side": "left", - "title": {"text": "count"}, - }, - } + chart = dx.histogram(self.source, y="Y", histfunc="sum") + self.assert_bar_col_displayed_equal(chart, "sum", "Y") - self.assertEqual(plotly["layout"], expected_layout) + chart = dx.histogram(self.source, x="X", y="Y") + self.assert_bar_col_displayed_equal(chart, "X", "sum of Y") - expected_mappings = [ - { - "table": 0, - "data_columns": { - "count": ["/plotly/data/0/x"], - "X": ["/plotly/data/0/y"], - }, - } - ] + chart = dx.histogram(self.source, x="X", histfunc="abs_sum") + self.assert_bar_col_displayed_equal(chart, "X", "abs_sum") - self.assertEqual(deephaven["mappings"], expected_mappings) + chart = dx.histogram(self.source, x="X", histfunc="avg") + self.assert_bar_col_displayed_equal(chart, "X", "avg") - self.assertEqual(deephaven["is_user_set_template"], False) - self.assertEqual(deephaven["is_user_set_color"], False) + chart = dx.histogram(self.source, x="X", histfunc="count_distinct") + self.assert_bar_col_displayed_equal(chart, "X", "count_distinct") - def test_basic_histogram_cat_x_cat_y(self): - import src.deephaven.plot.express as dx - from deephaven.constants import NULL_LONG, NULL_DOUBLE + chart = dx.histogram(self.source, x="X", histfunc="max") + self.assert_bar_col_displayed_equal(chart, "X", "max") - # since both x and y are categories, the orientation should be set to v - chart = dx.histogram(self.source, x="size", y="color").to_dict(self.exporter) - plotly, deephaven = chart["plotly"], chart["deephaven"] + chart = dx.histogram(self.source, x="X", histfunc="median") + self.assert_bar_col_displayed_equal(chart, "X", "median") - # pop template as we currently do not modify it - plotly["layout"].pop("template") + chart = dx.histogram(self.source, x="X", histfunc="min") + self.assert_bar_col_displayed_equal(chart, "X", "min") - expected_data = [ - { - "alignmentgroup": "True", - "hovertemplate": "X=%{x}
count=%{y}", - "legendgroup": "", - "marker": {"color": "#636efa", "pattern": {"shape": ""}}, - "name": "", - "offsetgroup": "", - "orientation": "v", - "showlegend": False, - "textposition": "auto", - "x": [NULL_DOUBLE], - "xaxis": "x", - "y": [NULL_LONG], - "yaxis": "y", - "type": "bar", - } - ] + chart = dx.histogram(self.source, x="X", histfunc="std") + self.assert_bar_col_displayed_equal(chart, "X", "std") - self.assertEqual(plotly["data"], expected_data) + chart = dx.histogram(self.source, x="X", histfunc="var") + self.assert_bar_col_displayed_equal(chart, "X", "var") - expected_layout = { - "bargap": 0, - "barmode": "group", - "legend": {"tracegroupgap": 0}, - "margin": {"t": 60}, - "showlegend": False, - "xaxis": { - "anchor": "y", - "domain": [0.0, 1.0], - "side": "bottom", - "title": {"text": "X"}, - }, - "yaxis": { - "anchor": "x", - "domain": [0.0, 1.0], - "side": "left", - "title": {"text": "count"}, - }, - } + chart = dx.histogram(self.source, x="X", histnorm="probability") + self.assert_bar_col_displayed_equal(chart, "X", "fraction of sum of count") - self.assertEqual(plotly["layout"], expected_layout) + chart = dx.histogram(self.source, x="X", histnorm="percent") + self.assert_bar_col_displayed_equal(chart, "X", "percent of sum of count") - expected_mappings = [ - { - "table": 0, - "data_columns": { - "count": ["/plotly/data/0/x"], - "X": ["/plotly/data/0/y"], - }, - } - ] + chart = dx.histogram(self.source, x="X", histnorm="density") + self.assert_bar_col_displayed_equal(chart, "X", "density of count") - self.assertEqual(deephaven["mappings"], expected_mappings) - - self.assertEqual(deephaven["is_user_set_template"], False) - self.assertEqual(deephaven["is_user_set_color"], False) + chart = dx.histogram(self.source, x="X", histnorm="probability density") + self.assert_bar_col_displayed_equal(chart, "X", "probability density of count") - def test_basic_histogram_x_cat_y_v(self): - import src.deephaven.plot.express as dx - from deephaven.constants import NULL_LONG, NULL_DOUBLE + chart = dx.histogram(self.source, x="X", y="Y", histnorm="probability") + self.assert_bar_col_displayed_equal(chart, "X", "fraction of sum of Y") - # setting a specific orientation overrides any other logic - chart = dx.histogram(self.source, x="X", y="color", orientation="v").to_dict( - self.exporter - ) - plotly, deephaven = chart["plotly"], chart["deephaven"] + chart = dx.histogram(self.source, x="X", y="Y", histnorm="percent") + self.assert_bar_col_displayed_equal(chart, "X", "percent of sum of Y") - # pop template as we currently do not modify it - plotly["layout"].pop("template") + chart = dx.histogram(self.source, x="X", y="Y", histnorm="density") + self.assert_bar_col_displayed_equal(chart, "X", "density weighted by Y") - expected_data = [ - { - "alignmentgroup": "True", - "hovertemplate": "X=%{x}
count=%{y}", - "legendgroup": "", - "marker": {"color": "#636efa", "pattern": {"shape": ""}}, - "name": "", - "offsetgroup": "", - "orientation": "v", - "showlegend": False, - "textposition": "auto", - "x": [NULL_DOUBLE], - "xaxis": "x", - "y": [NULL_LONG], - "yaxis": "y", - "type": "bar", - } - ] - - self.assertEqual(plotly["data"], expected_data) - - expected_layout = { - "bargap": 0, - "barmode": "group", - "legend": {"tracegroupgap": 0}, - "margin": {"t": 60}, - "showlegend": False, - "xaxis": { - "anchor": "y", - "domain": [0.0, 1.0], - "side": "bottom", - "title": {"text": "X"}, - }, - "yaxis": { - "anchor": "x", - "domain": [0.0, 1.0], - "side": "left", - "title": {"text": "count"}, - }, - } - - self.assertEqual(plotly["layout"], expected_layout) - - expected_mappings = [ - { - "table": 0, - "data_columns": { - "count": ["/plotly/data/0/x"], - "X": ["/plotly/data/0/y"], - }, - } - ] - - self.assertEqual(deephaven["mappings"], expected_mappings) + chart = dx.histogram(self.source, x="X", barnorm="fraction") + self.assert_bar_col_displayed_equal( + chart, "X", "count (normalized as fraction)" + ) - self.assertEqual(deephaven["is_user_set_template"], False) - self.assertEqual(deephaven["is_user_set_color"], False) + chart = dx.histogram(self.source, x="X", barnorm="percent") + self.assert_bar_col_displayed_equal(chart, "X", "count (normalized as percent)") - # histnorm, barnorm, histfunc + chart = dx.histogram( + self.source, + x="X", + y="Y", + histfunc="avg", + barnorm="percent", + histnorm="density", + ) + self.assert_bar_col_displayed_equal( + chart, "X", "density of avg of Y (normalized as percent)" + ) def test_basic_violin_x(self): import src.deephaven.plot.express as dx diff --git a/plugins/plotly-express/test/deephaven/plot/express/plots/test_scatter.py b/plugins/plotly-express/test/deephaven/plot/express/plots/test_scatter.py index 3f728acc0..c4231f69d 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/plots/test_scatter.py +++ b/plugins/plotly-express/test/deephaven/plot/express/plots/test_scatter.py @@ -748,8 +748,8 @@ def test_marginal_scatter(self): { "table": 0, "data_columns": { - "count": ["/plotly/data/2/y"], - "Y": ["/plotly/data/2/x"], + "Y": ["/plotly/data/2/y"], + "tmpbar0": ["/plotly/data/2/x"], }, }, ] diff --git a/plugins/plotly-express/test/deephaven/plot/express/preprocess/test_HistPreprocessor.py b/plugins/plotly-express/test/deephaven/plot/express/preprocess/test_HistPreprocessor.py index 012fbc8bb..36a78fbfc 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/preprocess/test_HistPreprocessor.py +++ b/plugins/plotly-express/test/deephaven/plot/express/preprocess/test_HistPreprocessor.py @@ -55,13 +55,11 @@ def test_basic_hist(self): "nbins": 2, } - expected_df = pd.DataFrame({"count": [1.0, 3.0], "X": [2, 2]}) + expected_df = pd.DataFrame({"X": [1.0, 3.0], "tmpbar0": [2, 2]}) remap_types(expected_df) self.tables_equal(args, expected_df) - # TODO add more tests - def test_partitioned_hist(self): args = { "x": "X", @@ -69,12 +67,12 @@ def test_partitioned_hist(self): "nbins": 2, } - expected_df = pd.DataFrame({"count": [1.0, 3.0], "X": [1, 1]}) + expected_df = pd.DataFrame({"X": [1.0, 3.0], "tmpbar0": [1, 1]}) remap_types(expected_df) self.tables_equal(args, expected_df, t=self.partitioned.constituent_tables[0]) - expected_df = pd.DataFrame({"count": [1.0, 3.0], "X": [1, 1]}) + expected_df = pd.DataFrame({"X": [1.0, 3.0], "tmpbar0": [1, 1]}) remap_types(expected_df) self.tables_equal(args, expected_df, t=self.partitioned.constituent_tables[1]) From fdf86e70c4701af9576ca0aa739290c37f62e8d0 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Wed, 6 Nov 2024 15:51:06 -0600 Subject: [PATCH 11/17] wip --- .../plot/express/deephaven_figure/generate.py | 1 + .../plot/express/preprocess/HistPreprocessor.py | 2 +- .../preprocess/UnivariateAwarePreprocessor.py | 2 +- .../plot/express/plots/test_distribution.py | 12 ++++++------ 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py b/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py index 0ec5215f8..87393c6b0 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py +++ b/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py @@ -897,6 +897,7 @@ def calculate_hist_labels( """ # only one should be set + # TODO: USE labels if bar_col_displayed_h: # a bar chart oriented horizontally has the histfunc on the x-axis hover_mapping["x"] = bar_col_displayed_h diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 20d13449d..3df7b6d81 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -290,6 +290,6 @@ def preprocess_partitioned_tables( yield bin_counts.view([f"{axis_col} = {bin_mid}", agg_col]), { self.bar_var: agg_col, self.axis_var: axis_col, - # hist col + # todo: rename to hist col f"bar_col_displayed_{self.orientation}": bar_col_displayed, } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py index 1d1e78c3c..f697699eb 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -33,7 +33,7 @@ def __init__( self.table = args["table"] self.orientation = self.calculate_bar_orientation() self.args["orientation"] = self.orientation - self.axis_var = "x" if args.get("x") else "y" + self.axis_var = "x" if self.orientation == "v" else "y" self.bar_var = "y" if self.axis_var == "x" else "x" self.axis_col: str = ( pivot_vars["value"] diff --git a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py index 115237218..01486d0b4 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py +++ b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py @@ -473,7 +473,7 @@ def test_basic_histogram_x_y_h(self): expected_data = [ { "alignmentgroup": "True", - "hovertemplate": "X=%{x}
sum of Y=%{y}", + "hovertemplate": "sum of X=%{x}
Y=%{y}", "legendgroup": "", "marker": {"color": "#636efa", "pattern": {"shape": ""}}, "name": "", @@ -481,9 +481,9 @@ def test_basic_histogram_x_y_h(self): "orientation": "h", "showlegend": False, "textposition": "auto", - "x": [NULL_DOUBLE], + "x": [NULL_LONG], "xaxis": "x", - "y": [NULL_LONG], + "y": [NULL_DOUBLE], "yaxis": "y", "type": "bar", } @@ -501,13 +501,13 @@ def test_basic_histogram_x_y_h(self): "anchor": "y", "domain": [0.0, 1.0], "side": "bottom", - "title": {"text": "X"}, + "title": {"text": "sum of X"}, }, "yaxis": { "anchor": "x", "domain": [0.0, 1.0], "side": "left", - "title": {"text": "sum of Y"}, + "title": {"text": "Y"}, }, } @@ -518,7 +518,7 @@ def test_basic_histogram_x_y_h(self): "table": 0, "data_columns": { "tmpbar0": ["/plotly/data/0/x"], - "X": ["/plotly/data/0/y"], + "Y": ["/plotly/data/0/y"], }, } ] From cb7f42f27171b54de828bcda2ce7b2f68510b902 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Thu, 7 Nov 2024 14:28:17 -0600 Subject: [PATCH 12/17] wip --- .../plot/express/deephaven_figure/generate.py | 57 +- .../express/preprocess/FreqPreprocessor.py | 8 +- .../express/preprocess/HistPreprocessor.py | 113 ++-- .../preprocess/UnivariateAwarePreprocessor.py | 50 +- .../plot/express/preprocess/utilities.py | 2 - .../plot/express/plots/test_distribution.py | 488 ++++++++++++++++-- 6 files changed, 551 insertions(+), 167 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py b/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py index 87393c6b0..4e4ec36a2 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py +++ b/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py @@ -111,8 +111,8 @@ "current_col", "current_var", "labels", - "bar_col_displayed_h", - "bar_col_displayed_v", + "hist_agg_label_h", + "hist_agg_label_v", "pivot_vars", "current_partition", "colors", @@ -825,8 +825,8 @@ def hover_text_generator( def compute_labels( hover_mapping: list[dict[str, str]], - bar_col_displayed_h: str | None, - bar_col_displayed_v: str | None, + hist_agg_label_h: str | None, + hist_agg_label_v: str | None, heatmap_agg_label: str | None, # hover_data - todo, dependent on arrays supported in data mappings types: set[str], @@ -839,7 +839,8 @@ def compute_labels( Args: hover_mapping: The mapping of variables to columns - hist_val_name: The histogram name for the value axis, generally histfunc + hist_agg_label_h: The histogram agg label when oriented horizontally + hist_agg_label_v: The histogram agg label when oriented vertically heatmap_agg_label: The aggregate density heatmap column title types: Any types of this chart that require special processing labels: A dictionary of old column name to new column name mappings @@ -849,9 +850,7 @@ def compute_labels( the renamed current_col """ - calculate_hist_labels( - bar_col_displayed_h, bar_col_displayed_v, hover_mapping[0], labels - ) + calculate_hist_labels(hist_agg_label_h, hist_agg_label_v, hover_mapping[0]) calculate_density_heatmap_labels(heatmap_agg_label, hover_mapping[0], labels) @@ -884,32 +883,31 @@ def calculate_density_heatmap_labels( def calculate_hist_labels( - bar_col_displayed_h: str | None, - bar_col_displayed_v: str | None, + hist_agg_label_h: str | None, + hist_agg_label_v: str | None, hover_mapping: dict[str, str], - labels: dict[str, str] | None, ) -> None: """Calculate the histogram labels Args: - hist_val_name: The histogram name for the value axis, generally histfunc - current_mapping: The mapping of variables to columns + hist_agg_label_h: The histogram agg label when oriented horizontally + hist_agg_label_v: The histogram agg label when oriented vertically + hover_mapping: The mapping of variables to columns """ # only one should be set - # TODO: USE labels - if bar_col_displayed_h: + if hist_agg_label_h: # a bar chart oriented horizontally has the histfunc on the x-axis - hover_mapping["x"] = bar_col_displayed_h - elif bar_col_displayed_v: - hover_mapping["y"] = bar_col_displayed_v + hover_mapping["x"] = hist_agg_label_h + elif hist_agg_label_v: + hover_mapping["y"] = hist_agg_label_v def add_axis_titles( custom_call_args: dict[str, Any], hover_mapping: list[dict[str, str]], - bar_col_displayed_h: str | None, - bar_col_displayed_v: str | None, + hist_agg_label_h: str | None, + hist_agg_label_v: str | None, heatmap_agg_label: str | None, ) -> None: """Add axis titles. Generally, this only applies when there is a list variable @@ -918,7 +916,8 @@ def add_axis_titles( custom_call_args: The custom_call_args that are used to create hover and axis titles hover_mapping: The mapping of variables to columns - hist_val_name: The histogram name for the value axis, generally histfunc + hist_agg_label_h: The histogram agg label when oriented horizontally + hist_agg_label_v: The histogram agg label when oriented vertically heatmap_agg_label: The aggregate density heatmap column title """ @@ -928,8 +927,8 @@ def add_axis_titles( new_xaxis_titles = None new_yaxis_titles = None - if bar_col_displayed_h or bar_col_displayed_v: - # hist names are already set up in the mapping + if hist_agg_label_h or hist_agg_label_v: + # hist labels are already set up in the mapping new_xaxis_titles = [hover_mapping[0].get("x", None)] new_yaxis_titles = [hover_mapping[0].get("y", None)] @@ -987,16 +986,16 @@ def create_hover_and_axis_titles( types = get_list_var_info(data_cols) labels = custom_call_args.get("labels", None) - bar_col_displayed_h = custom_call_args.get("bar_col_displayed_h", None) - bar_col_displayed_v = custom_call_args.get("bar_col_displayed_v", None) + hist_agg_label_h = custom_call_args.get("hist_agg_label_h", None) + hist_agg_label_v = custom_call_args.get("hist_agg_label_v", None) heatmap_agg_label = custom_call_args.get("heatmap_agg_label", None) current_partition = custom_call_args.get("current_partition", {}) compute_labels( hover_mapping, - bar_col_displayed_h, - bar_col_displayed_v, + hist_agg_label_h, + hist_agg_label_v, heatmap_agg_label, types, labels, @@ -1012,8 +1011,8 @@ def create_hover_and_axis_titles( add_axis_titles( custom_call_args, hover_mapping, - bar_col_displayed_h, - bar_col_displayed_v, + hist_agg_label_h, + hist_agg_label_v, heatmap_agg_label, ) diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py index bf5cf3dfb..a6b467cff 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/FreqPreprocessor.py @@ -33,14 +33,14 @@ def preprocess_partitioned_tables( A tuple containing (the new table, an update to make to the args) """ - column = self.bar_col if not column else column + column = self.agg_col if not column else column names = get_unique_names(self.table, ["count"]) - self.args[self.bar_var] = names["count"] + self.args[self.agg_var] = names["count"] for table in tables: yield table.view([column]).count_by(names["count"], by=column), { - self.axis_var: column, - self.bar_var: names["count"], + self.bin_var: column, + self.agg_var: names["count"], } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 3df7b6d81..ebfd889ad 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -58,7 +58,6 @@ def __init__( list_var: str | None = None, ): super().__init__(args, pivot_vars, list_var) - self.list_var = list_var self.range_table = None self.names = {} self.nbins = args.pop("nbins", 10) @@ -96,22 +95,22 @@ def prepare_preprocess(self) -> None: ) self.range_table = create_range_table( self.args["table"], - self.axis_col, + self.bin_col, self.range_bins, self.nbins, self.names["range"], ) def create_count_tables( - self, tables: list[Table], axis_col: str, bar_col: str + self, tables: list[Table], bin_col: str, agg_col: str ) -> Generator[tuple[Table, str], None, None]: """ Create count tables that aggregate up values. Args: tables: List of tables to create counts for - axis_col: The column to compute indices for - bar_col: The column to compute an aggregation over + bin_col: The column to compute indices for + agg_col: The column to compute an aggregation over Yields: A tuple containing the table and a temporary column @@ -123,69 +122,75 @@ def create_count_tables( raise ValueError("Range table not created") for i, table in enumerate(tables): # the column needs to be temporarily renamed to avoid collisions - tmp_axis_col_base = f"tmpaxis{i}" - tmp_bar_col_base = f"tmpbar{i}" + tmp_bin_col_base = f"tmpbin{i}" + tmp_agg_col_base = f"tmpbar{i}" tmp_col_names = get_unique_names( - table, [tmp_axis_col_base, tmp_bar_col_base] + table, [tmp_bin_col_base, tmp_agg_col_base] ) - tmp_axis_col, tmp_bar_col = ( - tmp_col_names[tmp_axis_col_base], - tmp_col_names[tmp_bar_col_base], + tmp_bin_col, tmp_agg_col = ( + tmp_col_names[tmp_bin_col_base], + tmp_col_names[tmp_agg_col_base], ) count_table = ( table.view( [ - f"{tmp_bar_col} = {bar_col}", - f"{tmp_axis_col} = {axis_col}", + f"{tmp_agg_col} = {agg_col}", + f"{tmp_bin_col} = {bin_col}", ] ) .join(self.range_table) - .update_view(f"{range_index} = {range_}.index({tmp_axis_col})") + .update_view(f"{range_index} = {range_}.index({tmp_bin_col})") .where(f"!isNull({range_index})") .drop_columns(range_) - .agg_by([agg_func(tmp_bar_col)], range_index) + .agg_by([agg_func(tmp_agg_col)], range_index) ) - yield count_table, tmp_bar_col + yield count_table, tmp_agg_col - def create_bar_col_displayed(self) -> str: + def create_hist_agg_label(self) -> str: """ - Create the bar column name displayed. + Create the agg column name displayed. This mirrors the logic in plotly express. Returns: - The bar column name displayed + The agg column name displayed """ - # in the case where only one column is aggregated on, the axis name should reflect the histfunc used - bar_col_displayed = self.histfunc + # in the case where only one column is aggregated on, the label should reflect the histfunc used + hist_agg_label = self.histfunc - if self.histfunc != "count" and self.axis_col != self.bar_col: - # if a different column is aggregated on, the axis name should reflect that + # it's possible that the user has relabeled the columns, and it's difficult to do it later + labels = self.args.get("labels", {}) + relabeled_agg_col = ( + labels.get(self.agg_col, self.agg_col) if labels else self.agg_col + ) + + if self.histfunc != "count" and self.bin_col != self.agg_col: + # if a different column is aggregated on, the label name should reflect that # plotly express will not do this in case of count because the value of count is the same # whether aggregating on the same column or a different one # note that plotly express also does not allow histfunc to be anything other than count # if only one column is aggregated on but we do, hence our extra check for column names - bar_col_displayed = f"{self.histfunc} of {self.bar_col}" + hist_agg_label = f"{self.histfunc} of {relabeled_agg_col}" if self.histnorm: if self.histfunc == "sum": if self.histnorm == "probability": - bar_col_displayed = f"fraction of {bar_col_displayed}" + hist_agg_label = f"fraction of {hist_agg_label}" elif self.histnorm == "percent": - bar_col_displayed = f"percent of {bar_col_displayed}" + hist_agg_label = f"percent of {hist_agg_label}" else: # in this case, plotly express uses the original column name - bar_col_displayed = f"{self.histnorm} weighted by {self.bar_col}" + hist_agg_label = f"{self.histnorm} weighted by {relabeled_agg_col}" elif self.histnorm == "probability": - bar_col_displayed = f"fraction of sum of {bar_col_displayed}" + hist_agg_label = f"fraction of sum of {hist_agg_label}" elif self.histnorm == "percent": - bar_col_displayed = f"percent of sum of {bar_col_displayed}" + hist_agg_label = f"percent of sum of {hist_agg_label}" else: - bar_col_displayed = f"{self.histnorm} of {bar_col_displayed}" + hist_agg_label = f"{self.histnorm} of {hist_agg_label}" if self.barnorm: - bar_col_displayed = f"{bar_col_displayed} (normalized as {self.barnorm})" + hist_agg_label = f"{hist_agg_label} (normalized as {self.barnorm})" - return bar_col_displayed + return hist_agg_label def preprocess_partitioned_tables( self, tables: list[Table], column: str | None = None @@ -202,8 +207,8 @@ def preprocess_partitioned_tables( """ - axis_col = self.axis_col - bar_col = self.bar_col + bin_col = self.bin_col + agg_col = self.agg_col range_index, range_, bin_min, bin_max, total = ( self.names["range_index"], @@ -217,19 +222,18 @@ def preprocess_partitioned_tables( [long_col(self.names["range_index"], [i for i in range(self.nbins)])] ) - agg_cols = [] + new_agg_cols = [] for count_table, count_col in self.create_count_tables( - tables, axis_col, bar_col + tables, bin_col, agg_col ): bin_counts = bin_counts.natural_join( count_table, on=[range_index], joins=[count_col] ) - agg_cols.append(count_col) + new_agg_cols.append(count_col) bin_mid = self.names["bin_mid"] - # in the case where only one column is aggregated on, the axis name should reflect the histfunc used - bar_col_displayed = self.create_bar_col_displayed() + hist_agg_label = self.create_hist_agg_label() if not self.range_table: raise ValueError("Range table not created") @@ -245,17 +249,19 @@ def preprocess_partitioned_tables( if self.histnorm in {"percent", "probability", "probability density"}: mult_factor = 100 if self.histnorm == "percent" else 1 - sums = [f"{col}_sum = {col}" for col in agg_cols] + sums = [f"{col}_sum = {col}" for col in new_agg_cols] - normed = [f"{col} = {col} * {mult_factor} / {col}_sum" for col in agg_cols] + normed = [ + f"{col} = {col} * {mult_factor} / {col}_sum" for col in new_agg_cols + ] # range_ and bin cols need to be kept for probability density - # bar_var_name needs to be kept for plotting + # agg_var_name needs to be kept for plotting bin_counts = ( bin_counts.agg_by( [ agg.sum_(sums), - agg.group(agg_cols + [bin_mid, range_, bin_min, bin_max]), + agg.group(new_agg_cols + [bin_mid, range_, bin_min, bin_max]), ] ) .update_view(normed) @@ -263,7 +269,7 @@ def preprocess_partitioned_tables( ) if self.cumulative: - bin_counts = bin_counts.update_by(cum_sum(agg_cols)) + bin_counts = bin_counts.update_by(cum_sum(new_agg_cols)) # with plotly express, cumulative=True will ignore density (including # the density part of probability density, but not the probability @@ -273,23 +279,20 @@ def preprocess_partitioned_tables( if self.histnorm in {"density", "probability density"}: bin_counts = bin_counts.update_view( - [f"{col} = {col} / ({bin_max} - {bin_min})" for col in agg_cols] + [f"{col} = {col} / ({bin_max} - {bin_min})" for col in new_agg_cols] ) if self.barnorm: mult_factor = 100 if self.barnorm == "percent" else 1 - sum_form = f"sum({','.join(agg_cols)})" + sum_form = f"sum({','.join(new_agg_cols)})" bin_counts = bin_counts.update_view( [f"{total}={sum_form}"] - + [f"{col}={col} * {mult_factor} / {total}" for col in agg_cols] + + [f"{col}={col} * {mult_factor} / {total}" for col in new_agg_cols] ) - # bar_var_displayed = bar_var_name - - for agg_col in agg_cols: - yield bin_counts.view([f"{axis_col} = {bin_mid}", agg_col]), { - self.bar_var: agg_col, - self.axis_var: axis_col, - # todo: rename to hist col - f"bar_col_displayed_{self.orientation}": bar_col_displayed, + for new_agg_col in new_agg_cols: + yield bin_counts.view([f"{bin_col} = {bin_mid}", new_agg_col]), { + self.agg_var: new_agg_col, + self.bin_var: bin_col, + f"hist_agg_label_{self.orientation}": hist_agg_label, } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py index f697699eb..b551859b4 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -11,16 +11,21 @@ class UnivariateAwarePreprocessor: Args: args: Figure creation args - pivot_vars: Pivot vars that have the new column names + pivot_vars: The vars with new column names if a list was passed in + list_var: The var that was passed in as a list Attributes: args: dict[str, str]: Figure creation args table: Table: The table to use - axis_var: str: The main var. The list of vars was passed to this arg. - bar_var: The other var. - col_val: str: The value column, which is the value in pivot_var if - there is a list, otherwise the arg passed to var - cols: list[str]: The columns that are being used + bin_var: str: The arg that the bins are calculated on. Should be x or y. + agg_var: str: The arg that the values are aggregated on. Should be y or x. + bin_col: str: The column that the bins are calculated on. + Generally will be whatever column is specified by bin_var, + but can be different if a list was passed in. + agg_col: str: The column that the values are aggregated on. + Generally will be whatever column is specified by agg_var, + but can be different if a list was passed in. + orientation: str: The orientation of the plot. Should be 'v' or 'h'. """ def __init__( @@ -33,35 +38,28 @@ def __init__( self.table = args["table"] self.orientation = self.calculate_bar_orientation() self.args["orientation"] = self.orientation - self.axis_var = "x" if self.orientation == "v" else "y" - self.bar_var = "y" if self.axis_var == "x" else "x" - self.axis_col: str = ( + self.bin_var = "x" if self.orientation == "v" else "y" + self.agg_var = "y" if self.bin_var == "x" else "x" + self.bin_col: str = ( pivot_vars["value"] - if pivot_vars and list_var and list_var == self.axis_var - else args[self.axis_var] - ) - self.axis_cols = ( - self.axis_col if isinstance(self.axis_col, list) else [self.axis_col] + if pivot_vars and list_var and list_var == self.bin_var + else args[self.bin_var] ) - # if value_var is not set, the value column is the same as the axis column because both the axis bins and value - # are computed from the same inputs - if self.args.get(self.bar_var): - self.bar_col: str = ( + if self.args.get(self.agg_var): + self.agg_col: str = ( pivot_vars["value"] - if pivot_vars and list_var and list_var == self.bar_var - else args[self.bar_var] - ) - self.bar_cols = ( - self.bar_col if isinstance(self.bar_col, list) else [self.bar_col] + if pivot_vars and list_var and list_var == self.agg_var + else args[self.agg_var] ) else: - self.bar_col = self.axis_col - self.bar_cols = self.axis_cols + # if bar_var is not set, the value column is the same as the axis column + # because both the axis bins and value are computed from the same inputs + self.agg_col = self.bin_col def calculate_bar_orientation(self): """ - Calculate the orientation of the plot + Calculate the orientation of the plot. """ orientation = self.args.get("orientation") x = self.args.get("x") diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py index 79e8e9575..0c97823be 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/utilities.py @@ -19,8 +19,6 @@ "sum": agg.sum_, "var": agg.var, } -# todo: more types here? -NUMERIC_TYPES = {"short", "int", "long", "float", "double"} def get_aggs( diff --git a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py index 01486d0b4..eae0c266d 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py +++ b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py @@ -25,7 +25,7 @@ def setUp(self) -> None: ] ) - def assert_bar_col_displayed_equal(self, chart, x, y): + def assert_hist_agg_label_equal(self, chart, x, y): """ Assert that the x and y titles are set correctly @@ -395,10 +395,6 @@ def test_basic_histogram_x_y(self): from deephaven.constants import NULL_LONG, NULL_DOUBLE chart = dx.histogram(self.source, x="X", y="Y").to_dict(self.exporter) - plotly, deephaven = chart["plotly"], chart["deephaven"] - - # pop template as we currently do not modify it - plotly["layout"].pop("template") expected_data = [ { @@ -419,8 +415,6 @@ def test_basic_histogram_x_y(self): } ] - self.assertEqual(plotly["data"], expected_data) - expected_layout = { "bargap": 0, "barmode": "group", @@ -441,8 +435,6 @@ def test_basic_histogram_x_y(self): }, } - self.assertEqual(plotly["layout"], expected_layout) - expected_mappings = [ { "table": 0, @@ -453,10 +445,14 @@ def test_basic_histogram_x_y(self): } ] - self.assertEqual(deephaven["mappings"], expected_mappings) - - self.assertEqual(deephaven["is_user_set_template"], False) - self.assertEqual(deephaven["is_user_set_color"], False) + self.assert_chart_equals( + chart, + expected_data=expected_data, + expected_layout=expected_layout, + expected_mappings=expected_mappings, + expected_is_user_set_color=False, + expected_is_user_set_template=False, + ) def test_basic_histogram_x_y_h(self): import src.deephaven.plot.express as dx @@ -465,10 +461,6 @@ def test_basic_histogram_x_y_h(self): chart = dx.histogram(self.source, x="X", y="Y", orientation="h").to_dict( self.exporter ) - plotly, deephaven = chart["plotly"], chart["deephaven"] - - # pop template as we currently do not modify it - plotly["layout"].pop("template") expected_data = [ { @@ -489,8 +481,6 @@ def test_basic_histogram_x_y_h(self): } ] - self.assertEqual(plotly["data"], expected_data) - expected_layout = { "bargap": 0, "barmode": "group", @@ -511,8 +501,6 @@ def test_basic_histogram_x_y_h(self): }, } - self.assertEqual(plotly["layout"], expected_layout) - expected_mappings = [ { "table": 0, @@ -523,85 +511,443 @@ def test_basic_histogram_x_y_h(self): } ] - self.assertEqual(deephaven["mappings"], expected_mappings) + self.assert_chart_equals( + chart, + expected_data=expected_data, + expected_layout=expected_layout, + expected_mappings=expected_mappings, + expected_is_user_set_color=False, + expected_is_user_set_template=False, + ) - self.assertEqual(deephaven["is_user_set_template"], False) - self.assertEqual(deephaven["is_user_set_color"], False) + def test_basic_histogram_x_list(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + chart = dx.histogram(self.source, x=["X", "X2"]).to_dict(self.exporter) + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "variable=X
value=%{x}
count=%{y}", + "legendgroup": "X", + "marker": {"color": "#636EFA", "pattern": {"shape": ""}}, + "name": "X", + "offsetgroup": "variable0", + "orientation": "v", + "showlegend": True, + "textposition": "auto", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + "type": "bar", + }, + { + "alignmentgroup": "True", + "hovertemplate": "variable=X2
value=%{x}
count=%{y}", + "legendgroup": "X2", + "marker": {"color": "#EF553B", "pattern": {"shape": ""}}, + "name": "X2", + "offsetgroup": "variable1", + "orientation": "v", + "showlegend": True, + "textposition": "auto", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + "type": "bar", + }, + ] + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "value"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "count"}, + }, + } + + expected_mappings = [ + { + "data_columns": { + "tmpbar0": ["/plotly/data/0/y"], + "value": ["/plotly/data/0/x"], + }, + "table": 0, + }, + { + "data_columns": { + "tmpbar1": ["/plotly/data/1/y"], + "value": ["/plotly/data/1/x"], + }, + "table": 0, + }, + ] + + self.assert_chart_equals( + chart, + expected_data=expected_data, + expected_layout=expected_layout, + expected_mappings=expected_mappings, + expected_is_user_set_color=True, + expected_is_user_set_template=False, + ) + + def test_basic_histogram_y_list(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + chart = dx.histogram(self.source, y=["Y", "Y2"]).to_dict(self.exporter) + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "variable=Y
count=%{x}
value=%{y}", + "legendgroup": "Y", + "marker": {"color": "#636EFA", "pattern": {"shape": ""}}, + "name": "Y", + "offsetgroup": "variable0", + "orientation": "h", + "showlegend": True, + "textposition": "auto", + "type": "bar", + "x": [NULL_LONG], + "xaxis": "x", + "y": [NULL_DOUBLE], + "yaxis": "y", + }, + { + "alignmentgroup": "True", + "hovertemplate": "variable=Y2
count=%{x}
value=%{y}", + "legendgroup": "Y2", + "marker": {"color": "#EF553B", "pattern": {"shape": ""}}, + "name": "Y2", + "offsetgroup": "variable1", + "orientation": "h", + "showlegend": True, + "textposition": "auto", + "type": "bar", + "x": [NULL_LONG], + "xaxis": "x", + "y": [NULL_DOUBLE], + "yaxis": "y", + }, + ] + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "count"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "value"}, + }, + } + + expected_mappings = [ + { + "data_columns": { + "tmpbar0": ["/plotly/data/0/x"], + "value": ["/plotly/data/0/y"], + }, + "table": 0, + }, + { + "data_columns": { + "tmpbar1": ["/plotly/data/1/x"], + "value": ["/plotly/data/1/y"], + }, + "table": 0, + }, + ] + + self.assert_chart_equals( + chart, + expected_data=expected_data, + expected_layout=expected_layout, + expected_mappings=expected_mappings, + expected_is_user_set_color=True, + expected_is_user_set_template=False, + ) + + def test_basic_histogram_x_y_list(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + chart = dx.histogram(self.source, x="X", y=["Y", "Y2"]).to_dict(self.exporter) + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "variable=Y
X=%{x}
sum of value=%{y}", + "legendgroup": "Y", + "marker": {"color": "#636EFA", "pattern": {"shape": ""}}, + "name": "Y", + "offsetgroup": "variable0", + "orientation": "v", + "showlegend": True, + "textposition": "auto", + "type": "bar", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + }, + { + "alignmentgroup": "True", + "hovertemplate": "variable=Y2
X=%{x}
sum of value=%{y}", + "legendgroup": "Y2", + "marker": {"color": "#EF553B", "pattern": {"shape": ""}}, + "name": "Y2", + "offsetgroup": "variable1", + "orientation": "v", + "showlegend": True, + "textposition": "auto", + "type": "bar", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + }, + ] + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "X"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "sum of value"}, + }, + } + + expected_mappings = [ + { + "data_columns": { + "X": ["/plotly/data/0/x"], + "tmpbar0": ["/plotly/data/0/y"], + }, + "table": 0, + }, + { + "data_columns": { + "X": ["/plotly/data/1/x"], + "tmpbar1": ["/plotly/data/1/y"], + }, + "table": 0, + }, + ] + + self.assert_chart_equals( + chart, + expected_data=expected_data, + expected_layout=expected_layout, + expected_mappings=expected_mappings, + expected_is_user_set_color=True, + expected_is_user_set_template=False, + ) + + def test_basic_histogram_x_list_y(self): + import src.deephaven.plot.express as dx + from deephaven.constants import NULL_LONG, NULL_DOUBLE + + chart = dx.histogram(self.source, x=["X", "X2"], y="Y").to_dict(self.exporter) + + expected_data = [ + { + "alignmentgroup": "True", + "hovertemplate": "variable=X
value=%{x}
sum of Y=%{y}", + "legendgroup": "X", + "marker": {"color": "#636EFA", "pattern": {"shape": ""}}, + "name": "X", + "offsetgroup": "variable0", + "orientation": "v", + "showlegend": True, + "textposition": "auto", + "type": "bar", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + }, + { + "alignmentgroup": "True", + "hovertemplate": "variable=X2
value=%{x}
sum of Y=%{y}", + "legendgroup": "X2", + "marker": {"color": "#EF553B", "pattern": {"shape": ""}}, + "name": "X2", + "offsetgroup": "variable1", + "orientation": "v", + "showlegend": True, + "textposition": "auto", + "type": "bar", + "x": [NULL_DOUBLE], + "xaxis": "x", + "y": [NULL_LONG], + "yaxis": "y", + }, + ] + + expected_layout = { + "bargap": 0, + "barmode": "group", + "legend": {"tracegroupgap": 0}, + "margin": {"t": 60}, + "xaxis": { + "anchor": "y", + "domain": [0.0, 1.0], + "side": "bottom", + "title": {"text": "value"}, + }, + "yaxis": { + "anchor": "x", + "domain": [0.0, 1.0], + "side": "left", + "title": {"text": "sum of Y"}, + }, + } + + expected_mappings = [ + { + "data_columns": { + "tmpbar0": ["/plotly/data/0/y"], + "value": ["/plotly/data/0/x"], + }, + "table": 0, + }, + { + "data_columns": { + "tmpbar1": ["/plotly/data/1/y"], + "value": ["/plotly/data/1/x"], + }, + "table": 0, + }, + ] + + self.assert_chart_equals( + chart, + expected_data=expected_data, + expected_layout=expected_layout, + expected_mappings=expected_mappings, + expected_is_user_set_color=True, + expected_is_user_set_template=False, + ) - def test_basic_histogram_bar_col_displayed(self): + def test_basic_histogram_hist_agg_label(self): import src.deephaven.plot.express as dx chart = dx.histogram(self.source, x="X") - self.assert_bar_col_displayed_equal(chart, "X", "count") + self.assert_hist_agg_label_equal(chart, "X", "count") chart = dx.histogram(self.source, y="Y") - self.assert_bar_col_displayed_equal(chart, "count", "Y") + self.assert_hist_agg_label_equal(chart, "count", "Y") # default histfunc is sum when both x and y are provided chart = dx.histogram(self.source, x="X", y="Y", histfunc="count") - self.assert_bar_col_displayed_equal(chart, "X", "count") + self.assert_hist_agg_label_equal(chart, "X", "count") chart = dx.histogram(self.source, x="X", histfunc="sum") - self.assert_bar_col_displayed_equal(chart, "X", "sum") + self.assert_hist_agg_label_equal(chart, "X", "sum") chart = dx.histogram(self.source, y="Y", histfunc="sum") - self.assert_bar_col_displayed_equal(chart, "sum", "Y") + self.assert_hist_agg_label_equal(chart, "sum", "Y") chart = dx.histogram(self.source, x="X", y="Y") - self.assert_bar_col_displayed_equal(chart, "X", "sum of Y") + self.assert_hist_agg_label_equal(chart, "X", "sum of Y") chart = dx.histogram(self.source, x="X", histfunc="abs_sum") - self.assert_bar_col_displayed_equal(chart, "X", "abs_sum") + self.assert_hist_agg_label_equal(chart, "X", "abs_sum") chart = dx.histogram(self.source, x="X", histfunc="avg") - self.assert_bar_col_displayed_equal(chart, "X", "avg") + self.assert_hist_agg_label_equal(chart, "X", "avg") chart = dx.histogram(self.source, x="X", histfunc="count_distinct") - self.assert_bar_col_displayed_equal(chart, "X", "count_distinct") + self.assert_hist_agg_label_equal(chart, "X", "count_distinct") chart = dx.histogram(self.source, x="X", histfunc="max") - self.assert_bar_col_displayed_equal(chart, "X", "max") + self.assert_hist_agg_label_equal(chart, "X", "max") chart = dx.histogram(self.source, x="X", histfunc="median") - self.assert_bar_col_displayed_equal(chart, "X", "median") + self.assert_hist_agg_label_equal(chart, "X", "median") chart = dx.histogram(self.source, x="X", histfunc="min") - self.assert_bar_col_displayed_equal(chart, "X", "min") + self.assert_hist_agg_label_equal(chart, "X", "min") chart = dx.histogram(self.source, x="X", histfunc="std") - self.assert_bar_col_displayed_equal(chart, "X", "std") + self.assert_hist_agg_label_equal(chart, "X", "std") chart = dx.histogram(self.source, x="X", histfunc="var") - self.assert_bar_col_displayed_equal(chart, "X", "var") + self.assert_hist_agg_label_equal(chart, "X", "var") chart = dx.histogram(self.source, x="X", histnorm="probability") - self.assert_bar_col_displayed_equal(chart, "X", "fraction of sum of count") + self.assert_hist_agg_label_equal(chart, "X", "fraction of sum of count") chart = dx.histogram(self.source, x="X", histnorm="percent") - self.assert_bar_col_displayed_equal(chart, "X", "percent of sum of count") + self.assert_hist_agg_label_equal(chart, "X", "percent of sum of count") chart = dx.histogram(self.source, x="X", histnorm="density") - self.assert_bar_col_displayed_equal(chart, "X", "density of count") + self.assert_hist_agg_label_equal(chart, "X", "density of count") chart = dx.histogram(self.source, x="X", histnorm="probability density") - self.assert_bar_col_displayed_equal(chart, "X", "probability density of count") + self.assert_hist_agg_label_equal(chart, "X", "probability density of count") chart = dx.histogram(self.source, x="X", y="Y", histnorm="probability") - self.assert_bar_col_displayed_equal(chart, "X", "fraction of sum of Y") + self.assert_hist_agg_label_equal(chart, "X", "fraction of sum of Y") chart = dx.histogram(self.source, x="X", y="Y", histnorm="percent") - self.assert_bar_col_displayed_equal(chart, "X", "percent of sum of Y") + self.assert_hist_agg_label_equal(chart, "X", "percent of sum of Y") chart = dx.histogram(self.source, x="X", y="Y", histnorm="density") - self.assert_bar_col_displayed_equal(chart, "X", "density weighted by Y") + self.assert_hist_agg_label_equal(chart, "X", "density weighted by Y") chart = dx.histogram(self.source, x="X", barnorm="fraction") - self.assert_bar_col_displayed_equal( - chart, "X", "count (normalized as fraction)" - ) + self.assert_hist_agg_label_equal(chart, "X", "count (normalized as fraction)") chart = dx.histogram(self.source, x="X", barnorm="percent") - self.assert_bar_col_displayed_equal(chart, "X", "count (normalized as percent)") + self.assert_hist_agg_label_equal(chart, "X", "count (normalized as percent)") + + chart = dx.histogram( + self.source, + x="X", + y="Y", + histfunc="avg", + barnorm="percent", + histnorm="density", + ) + self.assert_hist_agg_label_equal( + chart, "X", "density of avg of Y (normalized as percent)" + ) chart = dx.histogram( self.source, @@ -610,11 +956,51 @@ def test_basic_histogram_bar_col_displayed(self): histfunc="avg", barnorm="percent", histnorm="density", + labels={}, ) - self.assert_bar_col_displayed_equal( + self.assert_hist_agg_label_equal( chart, "X", "density of avg of Y (normalized as percent)" ) + chart = dx.histogram( + self.source, + x="X", + y="Y", + histfunc="avg", + barnorm="percent", + histnorm="density", + labels={"Y": "Labeled"}, + ) + self.assert_hist_agg_label_equal( + chart, "X", "density of avg of Labeled (normalized as percent)" + ) + + # we allow relabeling both the variables and the agg label + chart = dx.histogram( + self.source, + x="X", + y="Y", + histfunc="avg", + barnorm="percent", + histnorm="density", + labels={"density of avg of Y (normalized as percent)": "Y"}, + ) + self.assert_hist_agg_label_equal(chart, "X", "Y") + + chart = dx.histogram( + self.source, + x="X", + y="Y", + histfunc="avg", + barnorm="percent", + histnorm="density", + labels={ + "Y": "Labeled", + "density of avg of Labeled (normalized as percent)": "Y", + }, + ) + self.assert_hist_agg_label_equal(chart, "X", "Y") + def test_basic_violin_x(self): import src.deephaven.plot.express as dx from deephaven.constants import NULL_INT From 4c7dca72a47a1a09db4654a8c87d26160abc7d68 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Thu, 7 Nov 2024 15:52:12 -0600 Subject: [PATCH 13/17] wip --- .../express/preprocess/HistPreprocessor.py | 4 ++- .../plot/express/plots/test_distribution.py | 28 ++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index ebfd889ad..5a369bd3d 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -172,7 +172,9 @@ def create_hist_agg_label(self) -> str: hist_agg_label = f"{self.histfunc} of {relabeled_agg_col}" if self.histnorm: - if self.histfunc == "sum": + if self.histfunc == "count": + hist_agg_label = self.histnorm + elif self.histfunc == "sum": if self.histnorm == "probability": hist_agg_label = f"fraction of {hist_agg_label}" elif self.histnorm == "percent": diff --git a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py index eae0c266d..de8f34798 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py +++ b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py @@ -911,16 +911,16 @@ def test_basic_histogram_hist_agg_label(self): self.assert_hist_agg_label_equal(chart, "X", "var") chart = dx.histogram(self.source, x="X", histnorm="probability") - self.assert_hist_agg_label_equal(chart, "X", "fraction of sum of count") + self.assert_hist_agg_label_equal(chart, "X", "probability") chart = dx.histogram(self.source, x="X", histnorm="percent") - self.assert_hist_agg_label_equal(chart, "X", "percent of sum of count") + self.assert_hist_agg_label_equal(chart, "X", "percent") chart = dx.histogram(self.source, x="X", histnorm="density") - self.assert_hist_agg_label_equal(chart, "X", "density of count") + self.assert_hist_agg_label_equal(chart, "X", "density") chart = dx.histogram(self.source, x="X", histnorm="probability density") - self.assert_hist_agg_label_equal(chart, "X", "probability density of count") + self.assert_hist_agg_label_equal(chart, "X", "probability density") chart = dx.histogram(self.source, x="X", y="Y", histnorm="probability") self.assert_hist_agg_label_equal(chart, "X", "fraction of sum of Y") @@ -931,6 +931,26 @@ def test_basic_histogram_hist_agg_label(self): chart = dx.histogram(self.source, x="X", y="Y", histnorm="density") self.assert_hist_agg_label_equal(chart, "X", "density weighted by Y") + chart = dx.histogram( + self.source, x="X", y="Y", histnorm="probability", histfunc="avg" + ) + self.assert_hist_agg_label_equal(chart, "X", "fraction of sum of avg of Y") + + chart = dx.histogram( + self.source, x="X", y="Y", histnorm="percent", histfunc="avg" + ) + self.assert_hist_agg_label_equal(chart, "X", "percent of sum of avg of Y") + + chart = dx.histogram( + self.source, x="X", y="Y", histnorm="density", histfunc="avg" + ) + self.assert_hist_agg_label_equal(chart, "X", "density of avg of Y") + + chart = dx.histogram( + self.source, x="X", y="Y", histnorm="probability density", histfunc="avg" + ) + self.assert_hist_agg_label_equal(chart, "X", "probability density of avg of Y") + chart = dx.histogram(self.source, x="X", barnorm="fraction") self.assert_hist_agg_label_equal(chart, "X", "count (normalized as fraction)") From 42a346af7b376f915d41578361a00fd6daa7e10a Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Thu, 7 Nov 2024 16:01:19 -0600 Subject: [PATCH 14/17] wip --- .../deephaven/plot/express/plots/_private_utils.py | 2 -- .../src/deephaven/plot/express/plots/distribution.py | 11 +++++------ .../deephaven/plot/express/plots/test_distribution.py | 8 ++------ 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py b/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py index 8fd0f7da0..cd287c0df 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py @@ -25,8 +25,6 @@ ) from ..types import PartitionableTableLike -NUMERIC_TYPES = {"short", "int", "long", "float", "double"} - def validate_common_args(args: dict) -> None: """Validate common args amongst plots diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py b/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py index 3eb9b2102..0dcf23119 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/distribution.py @@ -343,11 +343,11 @@ def histogram( Args: table: A table to pull data from. x: A column name or list of columns that contain x-axis values. - Only one of x or y can be specified. If x is specified, - the bars are drawn horizontally. + Column values must be numeric. If x is specified, + the bars are drawn vertically by default. y: A column name or list of columns that contain y-axis values. - Only one of x or y can be specified. If y is specified, the - bars are drawn vertically. + Column values must be numeric. If only y is specified, + the bars are drawn horizontally by default. by: A column or list of columns that contain values to plot the figure traces by. All values or combination of values map to a unique design. The variable by_vars specifies which design elements are used. @@ -379,9 +379,8 @@ def histogram( orientation: The orientation of the bars. If 'v', the bars are vertical. If 'h', the bars are horizontal. - Defaults to 'v' if only `x` is specified. + Defaults to 'v' if `x` is specified. Defaults to 'h' if only `y` is specified. - Defaults to 'v' if both `x` and `y` are specified unless `x` is passed only numeric columns and `y` is not. barmode: If 'relative', bars are stacked. If 'overlay', bars are drawn on top of each other. If 'group', bars are drawn next to each other. diff --git a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py index de8f34798..cdac42cb3 100644 --- a/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py +++ b/plugins/plotly-express/test/deephaven/plot/express/plots/test_distribution.py @@ -6,7 +6,7 @@ class DistributionTestCase(BaseTestCase): def setUp(self) -> None: from deephaven import new_table - from deephaven.column import int_col, string_col + from deephaven.column import int_col self.source = new_table( [ @@ -14,14 +14,10 @@ def setUp(self) -> None: int_col("X2", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("Y", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("Y2", [1, 2, 2, 3, 3, 3, 4, 4, 5]), + int_col("size", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("text", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("hover_name", [1, 2, 2, 3, 3, 3, 4, 4, 5]), int_col("category", [1, 2, 1, 2, 1, 2, 1, 2, 1]), - string_col( - "color", - ["red", "blue", "red", "blue", "red", "blue", "red", "blue", "red"], - ), - string_col("size", ["1", "2", "1", "2", "1", "2", "1", "2", "1"]), ] ) From ba4ccc097073d9c3da12089feddbef4739d93280 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Fri, 8 Nov 2024 13:21:59 -0600 Subject: [PATCH 15/17] wip --- plugins/plotly-express/docs/histogram.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/plugins/plotly-express/docs/histogram.md b/plugins/plotly-express/docs/histogram.md index 83f60eb18..e44ab40d5 100644 --- a/plugins/plotly-express/docs/histogram.md +++ b/plugins/plotly-express/docs/histogram.md @@ -47,6 +47,27 @@ hist_3_bins = dx.histogram(setosa, x="SepalLength", nbins=3) hist_8_bins = dx.histogram(setosa, x="SepalLength", nbins=8) ``` +### Bin and aggregate on different columns + +If both `x` and `y` are specified, the histogram will be binned across one column and aggregated on the other. + +```python order=hist_v,hist_h,hist_avg,iris +import deephaven.plot.express as dx +iris = dx.data.iris() + +# subset to get specific species +setosa = iris.where("Species == `setosa`") + +# The default orientation is "v" (vertical) and the default aggregation function is "sum" +hist_v = dx.histogram(setosa, x="SepalLength", y="SepalWidth") + +# Control the plot orientation using orientation +hist_h = dx.histogram(setosa, x="SepalLength", y="SepalWidth", orientation="h") + +# Control the aggregation function using histfunc +hist_avg = dx.histogram(setosa, x="SepalLength", y="SepalWidth", histfunc="avg") +``` + ### Distributions of several groups Histograms can also be used to compare the distributional properties of different groups of data, though they may be a little harder to read than [box plots](box.md) or [violin plots](violin.md). Pass the name of the grouping column(s) to the `by` argument. From b8a512a9f4cb1df97e669950f8d4821a0172377e Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Wed, 18 Dec 2024 13:41:31 -0600 Subject: [PATCH 16/17] wip --- .../plot/express/deephaven_figure/generate.py | 57 +++++++++---------- .../plot/express/plots/PartitionManager.py | 54 +++++++++++------- .../plot/express/plots/_private_utils.py | 12 ++-- .../express/preprocess/HistPreprocessor.py | 9 +-- .../plot/express/preprocess/Preprocessor.py | 14 ++--- .../preprocess/UnivariateAwarePreprocessor.py | 45 +++++++++++---- 6 files changed, 113 insertions(+), 78 deletions(-) diff --git a/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py b/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py index 4e4ec36a2..8e8ab45d7 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py +++ b/plugins/plotly-express/src/deephaven/plot/express/deephaven_figure/generate.py @@ -111,9 +111,9 @@ "current_col", "current_var", "labels", - "hist_agg_label_h", - "hist_agg_label_v", - "pivot_vars", + "hist_agg_label", + "hist_orientation", + "stacked_column_names", "current_partition", "colors", "unsafe_update_figure", @@ -685,7 +685,7 @@ def handle_custom_args( return trace_generator -def get_list_var_info(data_cols: Mapping[str, str | list[str]]) -> set[str]: +def get_list_param_info(data_cols: Mapping[str, str | list[str]]) -> set[str]: """Extract the variable that is a list. Args: @@ -825,8 +825,8 @@ def hover_text_generator( def compute_labels( hover_mapping: list[dict[str, str]], - hist_agg_label_h: str | None, - hist_agg_label_v: str | None, + hist_agg_label: str | None, + hist_orientation: str | None, heatmap_agg_label: str | None, # hover_data - todo, dependent on arrays supported in data mappings types: set[str], @@ -839,8 +839,8 @@ def compute_labels( Args: hover_mapping: The mapping of variables to columns - hist_agg_label_h: The histogram agg label when oriented horizontally - hist_agg_label_v: The histogram agg label when oriented vertically + hist_agg_label: The histogram agg label + hist_orientation: The histogram orientation heatmap_agg_label: The aggregate density heatmap column title types: Any types of this chart that require special processing labels: A dictionary of old column name to new column name mappings @@ -850,7 +850,7 @@ def compute_labels( the renamed current_col """ - calculate_hist_labels(hist_agg_label_h, hist_agg_label_v, hover_mapping[0]) + calculate_hist_labels(hist_agg_label, hist_orientation, hover_mapping[0]) calculate_density_heatmap_labels(heatmap_agg_label, hover_mapping[0], labels) @@ -883,31 +883,30 @@ def calculate_density_heatmap_labels( def calculate_hist_labels( - hist_agg_label_h: str | None, - hist_agg_label_v: str | None, + hist_agg_label: str | None, + hist_orientation: str | None, hover_mapping: dict[str, str], ) -> None: """Calculate the histogram labels Args: - hist_agg_label_h: The histogram agg label when oriented horizontally - hist_agg_label_v: The histogram agg label when oriented vertically + hist_agg_label: The histogram agg label + hist_orientation: The histogram orientation hover_mapping: The mapping of variables to columns """ # only one should be set - if hist_agg_label_h: + if hist_orientation == "h" and hist_agg_label: # a bar chart oriented horizontally has the histfunc on the x-axis - hover_mapping["x"] = hist_agg_label_h - elif hist_agg_label_v: - hover_mapping["y"] = hist_agg_label_v + hover_mapping["x"] = hist_agg_label + elif hist_orientation == "v" and hist_agg_label: + hover_mapping["y"] = hist_agg_label def add_axis_titles( custom_call_args: dict[str, Any], hover_mapping: list[dict[str, str]], - hist_agg_label_h: str | None, - hist_agg_label_v: str | None, + hist_agg_label: str | None, heatmap_agg_label: str | None, ) -> None: """Add axis titles. Generally, this only applies when there is a list variable @@ -916,8 +915,7 @@ def add_axis_titles( custom_call_args: The custom_call_args that are used to create hover and axis titles hover_mapping: The mapping of variables to columns - hist_agg_label_h: The histogram agg label when oriented horizontally - hist_agg_label_v: The histogram agg label when oriented vertically + hist_agg_label: The histogram agg label heatmap_agg_label: The aggregate density heatmap column title """ @@ -927,7 +925,7 @@ def add_axis_titles( new_xaxis_titles = None new_yaxis_titles = None - if hist_agg_label_h or hist_agg_label_v: + if hist_agg_label: # hist labels are already set up in the mapping new_xaxis_titles = [hover_mapping[0].get("x", None)] new_yaxis_titles = [hover_mapping[0].get("y", None)] @@ -953,7 +951,7 @@ def create_hover_and_axis_titles( hover_mapping: list[dict[str, str]], ) -> Generator[dict[str, Any], None, None]: """Create hover text and axis titles. There are three main behaviors. - First is "current_col", "current_var", and "pivot_vars" are specified in + First is "current_col", "current_var", and "stacked_column_names" are specified in "custom_call_args". In this case, there is a list of variables, but they are layered outside the generate function. @@ -983,19 +981,19 @@ def create_hover_and_axis_titles( Yields: Dicts containing hover updates """ - types = get_list_var_info(data_cols) + types = get_list_param_info(data_cols) labels = custom_call_args.get("labels", None) - hist_agg_label_h = custom_call_args.get("hist_agg_label_h", None) - hist_agg_label_v = custom_call_args.get("hist_agg_label_v", None) + hist_agg_label = custom_call_args.get("hist_agg_label", None) + hist_orientation = custom_call_args.get("hist_orientation", None) heatmap_agg_label = custom_call_args.get("heatmap_agg_label", None) current_partition = custom_call_args.get("current_partition", {}) compute_labels( hover_mapping, - hist_agg_label_h, - hist_agg_label_v, + hist_agg_label, + hist_orientation, heatmap_agg_label, types, labels, @@ -1011,8 +1009,7 @@ def create_hover_and_axis_titles( add_axis_titles( custom_call_args, hover_mapping, - hist_agg_label_h, - hist_agg_label_v, + hist_agg_label, heatmap_agg_label, ) diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/PartitionManager.py b/plugins/plotly-express/src/deephaven/plot/express/plots/PartitionManager.py index cc0b33cce..5a18b576a 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/PartitionManager.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/PartitionManager.py @@ -9,7 +9,7 @@ from deephaven.table import Table, PartitionedTable from deephaven import pandas as dhpd -from deephaven import merge, empty_table +from deephaven import merge from ._layer import atomic_layer from .. import DeephavenFigure @@ -109,10 +109,10 @@ class PartitionManager: Attributes: by_vars: set[str]: The set of by_vars that can be used in a plot by - list_var: str: "x" or "y" depending on which var is a list - cols: str | list: The columns set by the list_var - pivot_vars: dict[str, str]: A dictionary that stores the "real" column - names if there is a list_var. This is needed in case the column names + list_param: str: "x" or "y" depending on which param is a list + cols: str | list: The columns set by the list_param + stacked_column_names: dict[str, str]: A dictionary that stores the "real" column + names if there is a list_param. This is needed in case the column names used are already in the table. has_color: bool: True if this figure has user set color, False otherwise facet_row: str: The facet row @@ -146,9 +146,9 @@ def __init__( ): self.by = None self.by_vars = None - self.list_var = None + self.list_param = None self.cols = None - self.pivot_vars = {} + self.stacked_column_names = {} self.has_color = None self.facet_row = None self.facet_col = None @@ -199,13 +199,13 @@ def set_long_mode_variables(self) -> None: self.groups.discard("supports_lists") return - self.list_var = var + self.list_param = var self.cols = cols - args["current_var"] = self.list_var + args["current_var"] = self.list_param - self.pivot_vars = get_unique_names(table, ["variable", "value"]) - self.args["pivot_vars"] = self.pivot_vars + self.stacked_column_names = get_unique_names(table, ["variable", "value"]) + self.args["stacked_column_names"] = self.stacked_column_names def convert_table_to_long_mode( self, @@ -227,7 +227,7 @@ def convert_table_to_long_mode( # if there is no plot by arg, the variable column becomes it if not self.args.get("by", None): - args["by"] = self.pivot_vars["variable"] + args["by"] = self.stacked_column_names["variable"] args["table"] = self.to_long_mode(table, self.cols) @@ -418,7 +418,11 @@ def process_partitions(self) -> Table | PartitionedTable: # preprocessor needs to be initialized after the always attached arguments are found self.preprocessor = Preprocessor( - args, self.groups, self.always_attached, self.pivot_vars, self.list_var + args, + self.groups, + self.always_attached, + self.stacked_column_names, + self.list_param, ) if partition_cols: @@ -468,12 +472,14 @@ def build_ternary_chain(self, cols: list[str]) -> str: Returns: The ternary string that builds the new column """ - ternary_string = f"{self.pivot_vars['value']} = " + ternary_string = f"{self.stacked_column_names['value']} = " for i, col in enumerate(cols): if i == len(cols) - 1: ternary_string += f"{col}" else: - ternary_string += f"{self.pivot_vars['variable']} == `{col}` ? {col} : " + ternary_string += ( + f"{self.stacked_column_names['variable']} == `{col}` ? {col} : " + ) return ternary_string def to_long_mode(self, table: Table, cols: list[str] | None) -> Table: @@ -494,7 +500,7 @@ def to_long_mode(self, table: Table, cols: list[str] | None) -> Table: new_tables = [] for col in cols: new_tables.append( - table.update_view(f"{self.pivot_vars['variable']} = `{col}`") + table.update_view(f"{self.stacked_column_names['variable']} = `{col}`") ) merged = merge(new_tables) @@ -545,7 +551,9 @@ def table_partition_generator( Yields: The tuple of table and current partition """ - column = self.pivot_vars["value"] if self.pivot_vars else None + column = ( + self.stacked_column_names["value"] if self.stacked_column_names else None + ) if self.preprocessor: tables = self.preprocessor.preprocess_partitioned_tables( self.constituents, column @@ -571,9 +579,13 @@ def partition_generator(self) -> Generator[dict[str, Any], None, None]: # if a tuple is returned here, it was preprocessed already so pivots aren't needed table, arg_update = table args.update(arg_update) - elif self.pivot_vars and self.pivot_vars["value"] and self.list_var: + elif ( + self.stacked_column_names + and self.stacked_column_names["value"] + and self.list_param + ): # there is a list of variables, so replace them with the combined column - args[self.list_var] = self.pivot_vars["value"] + args[self.list_param] = self.stacked_column_names["value"] args["current_partition"] = current_partition @@ -680,8 +692,8 @@ def create_figure(self) -> DeephavenFigure: # by color (colors might be used multiple times) self.marg_args["table"] = self.partitioned_table - if self.pivot_vars and self.pivot_vars["value"]: - self.marg_args[self.list_var] = self.pivot_vars["value"] + if self.stacked_column_names and self.stacked_column_names["value"]: + self.marg_args[self.list_param] = self.stacked_column_names["value"] self.marg_args["color"] = self.marg_color diff --git a/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py b/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py index cd287c0df..41e9fe920 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py +++ b/plugins/plotly-express/src/deephaven/plot/express/plots/_private_utils.py @@ -233,8 +233,12 @@ def create_deephaven_figure( # this is a marginal, so provide an empty update function update_wrapper = lambda x: x - list_var = partitioned.list_var - pivot_col = partitioned.pivot_vars["value"] if partitioned.pivot_vars else None + list_param = partitioned.list_param + pivot_col = ( + partitioned.stacked_column_names["value"] + if partitioned.stacked_column_names + else None + ) by = partitioned.by update = {} @@ -243,9 +247,9 @@ def create_deephaven_figure( # by needs to be updated as if there is a list variable but by is None, the pivot column is used as the by update["by"] = by - if list_var: + if list_param: # if there is a list variable, update the list variable to the pivot column - update[list_var] = pivot_col + update[list_param] = pivot_col return ( update_wrapper(partitioned.create_figure()), diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py index 5a369bd3d..8320d29c2 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/HistPreprocessor.py @@ -54,10 +54,10 @@ class HistPreprocessor(UnivariateAwarePreprocessor): def __init__( self, args: dict[str, Any], - pivot_vars: dict[str, str], - list_var: str | None = None, + stacked_column_names: dict[str, str], + list_param: str | None = None, ): - super().__init__(args, pivot_vars, list_var) + super().__init__(args, stacked_column_names, list_param) self.range_table = None self.names = {} self.nbins = args.pop("nbins", 10) @@ -296,5 +296,6 @@ def preprocess_partitioned_tables( yield bin_counts.view([f"{bin_col} = {bin_mid}", new_agg_col]), { self.agg_var: new_agg_col, self.bin_var: bin_col, - f"hist_agg_label_{self.orientation}": hist_agg_label, + f"hist_agg_label": hist_agg_label, + f"hist_orientation": self.orientation, } diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/Preprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/Preprocessor.py index 596c71de3..398de015a 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/Preprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/Preprocessor.py @@ -17,8 +17,8 @@ class Preprocessor: Preprocessor for tables Attributes: - pivot_vars: dict[str, str]: A dictionary that stores the "real" column - names if there is a list_var. This is needed in case the column names + stacked_column_names: dict[str, str]: A dictionary that stores the "real" column + names if there is a list_param. This is needed in case the column names used are already in the table. always_attached: dict[tuple[str, str], tuple[dict[str, str], list[str], str]: The dict mapping the arg and column @@ -33,15 +33,15 @@ def __init__( args: dict[str, Any], groups: set[str], always_attached: dict[tuple[str, str], tuple[dict[str, str], list[str], str]], - pivot_vars: dict[str, str], - list_var: str | None, + stacked_column_names: dict[str, str], + list_param: str | None, ): self.args = args self.groups = groups self.preprocesser = None self.always_attached = always_attached - self.pivot_vars = pivot_vars - self.list_var = list_var + self.stacked_column_names = stacked_column_names + self.list_param = list_param self.prepare_preprocess() def prepare_preprocess(self) -> None: @@ -50,7 +50,7 @@ def prepare_preprocess(self) -> None: """ if "preprocess_hist" in self.groups: self.preprocesser = HistPreprocessor( - self.args, self.pivot_vars, self.list_var + self.args, self.stacked_column_names, self.list_param ) elif "preprocess_freq" in self.groups: self.preprocesser = FreqPreprocessor(self.args) diff --git a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py index b551859b4..c7760936f 100644 --- a/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py +++ b/plugins/plotly-express/src/deephaven/plot/express/preprocess/UnivariateAwarePreprocessor.py @@ -1,9 +1,12 @@ from __future__ import annotations -from typing import Any +import abc +from typing import Any, Generator +from deephaven.table import Table -class UnivariateAwarePreprocessor: + +class UnivariateAwarePreprocessor(abc.ABC): """ A preprocessor that stores useful args for plots where possibly one of x or y or both can be specified, which impacts the orientation of the plot in ways that affect the preprocessing. @@ -11,8 +14,10 @@ class UnivariateAwarePreprocessor: Args: args: Figure creation args - pivot_vars: The vars with new column names if a list was passed in - list_var: The var that was passed in as a list + stacked_column_names: A dictionary that stores the "real" column + names if there is a list_param. This is needed in case the column names + used are already in the table. + list_param: The param that was passed in as a list Attributes: args: dict[str, str]: Figure creation args @@ -31,8 +36,8 @@ class UnivariateAwarePreprocessor: def __init__( self, args: dict[str, Any], - pivot_vars: dict[str, str] | None = None, - list_var: str | None = None, + stacked_column_names: dict[str, str] | None = None, + list_param: str | None = None, ): self.args = args self.table = args["table"] @@ -41,19 +46,19 @@ def __init__( self.bin_var = "x" if self.orientation == "v" else "y" self.agg_var = "y" if self.bin_var == "x" else "x" self.bin_col: str = ( - pivot_vars["value"] - if pivot_vars and list_var and list_var == self.bin_var + stacked_column_names["value"] + if stacked_column_names and list_param and list_param == self.bin_var else args[self.bin_var] ) if self.args.get(self.agg_var): self.agg_col: str = ( - pivot_vars["value"] - if pivot_vars and list_var and list_var == self.agg_var + stacked_column_names["value"] + if stacked_column_names and list_param and list_param == self.agg_var else args[self.agg_var] ) else: - # if bar_var is not set, the value column is the same as the axis column + # if agg_var is not set, the value column is the same as the axis column # because both the axis bins and value are computed from the same inputs self.agg_col = self.bin_col @@ -71,9 +76,25 @@ def calculate_bar_orientation(self): # Note that this will also be the default if both are specified # plotly express does some more sophisticated checking for data types # when both are specified but categorical data will fail due to the - # engine preprocessing in our implementation so just assume vertical + # engine preprocessing in our implementation so just assume verticals return "v" elif y: return "h" raise ValueError("Could not determine orientation") + + @abc.abstractmethod + def preprocess_partitioned_tables( + self, tables: list[Table], column: str | None = None + ) -> Generator[tuple[Table, dict[str, str | None]], None, None]: + """ + Preprocess the tables into the appropriate format for the plot. + + Args: + tables: A list of tables to preprocess + column: The column to aggregate on + + Returns: + A tuple containing (the new table, an update to make to the args) + """ + raise NotImplementedError From 857a4881e2a0360824f43b03414a168736e2f726 Mon Sep 17 00:00:00 2001 From: Joe Numainville Date: Thu, 19 Dec 2024 09:33:25 -0600 Subject: [PATCH 17/17] wip --- plugins/plotly-express/docs/histogram.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/plotly-express/docs/histogram.md b/plugins/plotly-express/docs/histogram.md index e44ab40d5..7a9bf4fd1 100644 --- a/plugins/plotly-express/docs/histogram.md +++ b/plugins/plotly-express/docs/histogram.md @@ -49,7 +49,8 @@ hist_8_bins = dx.histogram(setosa, x="SepalLength", nbins=8) ### Bin and aggregate on different columns -If both `x` and `y` are specified, the histogram will be binned across one column and aggregated on the other. +If the plot orientation is vertical (`"v"`), the `x` column is binned and the `y` column is aggregated. The operations are flipped if the plot orientation is horizontal. + ```python order=hist_v,hist_h,hist_avg,iris import deephaven.plot.express as dx