Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Make dx histogram behavior consistent with px #1002

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
21 changes: 21 additions & 0 deletions plugins/plotly-express/docs/histogram.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,27 @@ hist_3_bins = dx.histogram(setosa, x="SepalLength", nbins=3)
hist_8_bins = dx.histogram(setosa, x="SepalLength", nbins=8)
```

### Bin and aggregate on different columns

If both `x` and `y` are specified, the histogram will be binned across one column and aggregated on the other.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would mention something like "If the plot orientation is vertical, the x column will be binned and the y column aggregated. The operations are flipped if the plot orientation is horizontal."

I don't know which orientation corresponds with which pairing

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


```python order=hist_v,hist_h,hist_avg,iris
import deephaven.plot.express as dx
iris = dx.data.iris()

# subset to get specific species
setosa = iris.where("Species == `setosa`")

# The default orientation is "v" (vertical) and the default aggregation function is "sum"
hist_v = dx.histogram(setosa, x="SepalLength", y="SepalWidth")

# Control the plot orientation using orientation
hist_h = dx.histogram(setosa, x="SepalLength", y="SepalWidth", orientation="h")

# Control the aggregation function using histfunc
hist_avg = dx.histogram(setosa, x="SepalLength", y="SepalWidth", histfunc="avg")
```

### Distributions of several groups

Histograms can also be used to compare the distributional properties of different groups of data, though they may be a little harder to read than [box plots](box.md) or [violin plots](violin.md). Pass the name of the grouping column(s) to the `by` argument.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@
"current_col",
"current_var",
"labels",
"hist_val_name",
"hist_agg_label_h",
"hist_agg_label_v",
"pivot_vars",
"current_partition",
"colors",
Expand Down Expand Up @@ -824,7 +825,8 @@ def hover_text_generator(

def compute_labels(
hover_mapping: list[dict[str, str]],
hist_val_name: str | None,
hist_agg_label_h: str | None,
hist_agg_label_v: str | None,
heatmap_agg_label: str | None,
# hover_data - todo, dependent on arrays supported in data mappings
types: set[str],
Expand All @@ -837,7 +839,8 @@ def compute_labels(

Args:
hover_mapping: The mapping of variables to columns
hist_val_name: The histogram name for the value axis, generally histfunc
hist_agg_label_h: The histogram agg label when oriented horizontally
hist_agg_label_v: The histogram agg label when oriented vertically
heatmap_agg_label: The aggregate density heatmap column title
types: Any types of this chart that require special processing
labels: A dictionary of old column name to new column name mappings
Expand All @@ -847,7 +850,7 @@ def compute_labels(
the renamed current_col
"""

calculate_hist_labels(hist_val_name, hover_mapping[0])
calculate_hist_labels(hist_agg_label_h, hist_agg_label_v, hover_mapping[0])

calculate_density_heatmap_labels(heatmap_agg_label, hover_mapping[0], labels)

Expand Down Expand Up @@ -880,27 +883,31 @@ def calculate_density_heatmap_labels(


def calculate_hist_labels(
hist_val_name: str | None, current_mapping: dict[str, str]
hist_agg_label_h: str | None,
hist_agg_label_v: str | None,
hover_mapping: dict[str, str],
) -> None:
"""Calculate the histogram labels

Args:
hist_val_name: The histogram name for the value axis, generally histfunc
current_mapping: The mapping of variables to columns
hist_agg_label_h: The histogram agg label when oriented horizontally
hist_agg_label_v: The histogram agg label when oriented vertically
hover_mapping: The mapping of variables to columns

"""
if hist_val_name:
# swap the names
current_mapping["x"], current_mapping["y"] = (
current_mapping["y"],
current_mapping["x"],
)
# only one should be set
if hist_agg_label_h:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be a bit clearer to make the args hist_agg_label and hist_orientation

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

# a bar chart oriented horizontally has the histfunc on the x-axis
hover_mapping["x"] = hist_agg_label_h
elif hist_agg_label_v:
hover_mapping["y"] = hist_agg_label_v


def add_axis_titles(
custom_call_args: dict[str, Any],
hover_mapping: list[dict[str, str]],
hist_val_name: str | None,
hist_agg_label_h: str | None,
hist_agg_label_v: str | None,
heatmap_agg_label: str | None,
) -> None:
"""Add axis titles. Generally, this only applies when there is a list variable
Expand All @@ -909,7 +916,8 @@ def add_axis_titles(
custom_call_args: The custom_call_args that are used to
create hover and axis titles
hover_mapping: The mapping of variables to columns
hist_val_name: The histogram name for the value axis, generally histfunc
hist_agg_label_h: The histogram agg label when oriented horizontally
hist_agg_label_v: The histogram agg label when oriented vertically
heatmap_agg_label: The aggregate density heatmap column title

"""
Expand All @@ -919,8 +927,8 @@ def add_axis_titles(
new_xaxis_titles = None
new_yaxis_titles = None

if hist_val_name:
# hist names are already set up in the mapping
if hist_agg_label_h or hist_agg_label_v:
# hist labels are already set up in the mapping
new_xaxis_titles = [hover_mapping[0].get("x", None)]
new_yaxis_titles = [hover_mapping[0].get("y", None)]

Expand Down Expand Up @@ -978,14 +986,16 @@ def create_hover_and_axis_titles(
types = get_list_var_info(data_cols)

labels = custom_call_args.get("labels", None)
hist_val_name = custom_call_args.get("hist_val_name", None)
hist_agg_label_h = custom_call_args.get("hist_agg_label_h", None)
hist_agg_label_v = custom_call_args.get("hist_agg_label_v", None)
heatmap_agg_label = custom_call_args.get("heatmap_agg_label", None)

current_partition = custom_call_args.get("current_partition", {})

compute_labels(
hover_mapping,
hist_val_name,
hist_agg_label_h,
hist_agg_label_v,
heatmap_agg_label,
types,
labels,
Expand All @@ -998,7 +1008,13 @@ def create_hover_and_axis_titles(
# it's possible that heatmap_agg_label was relabeled, so grab the new label
heatmap_agg_label = hover_mapping[0]["z"]

add_axis_titles(custom_call_args, hover_mapping, hist_val_name, heatmap_agg_label)
add_axis_titles(
custom_call_args,
hover_mapping,
hist_agg_label_h,
hist_agg_label_v,
heatmap_agg_label,
)

return hover_text

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def process_partitions(self) -> Table | PartitionedTable:

# preprocessor needs to be initialized after the always attached arguments are found
self.preprocessor = Preprocessor(
args, self.groups, self.always_attached, self.pivot_vars
args, self.groups, self.always_attached, self.pivot_vars, self.list_var
)

if partition_cols:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,6 @@ def shared_histogram(is_marginal: bool = True, **args: Any) -> DeephavenFigure:
set_all(args, HISTOGRAM_DEFAULTS)

args["bargap"] = 0
args["hist_val_name"] = args.get("histfunc", "count")

func = px.bar
groups = {"bar", "preprocess_hist", "supports_lists"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ._private_utils import validate_common_args, process_args
from ..shared import default_callback
from ..deephaven_figure import generate_figure, DeephavenFigure
from ..types import PartitionableTableLike
from ..types import PartitionableTableLike, Orientation


def bar(
Expand Down Expand Up @@ -42,6 +42,7 @@ def bar(
range_color: list[float] | None = None,
color_continuous_midpoint: float | None = None,
opacity: float | None = None,
orientation: Orientation | None = None,
barmode: str = "relative",
log_x: bool = False,
log_y: bool = False,
Expand Down Expand Up @@ -114,6 +115,12 @@ def bar(
color_continuous_midpoint: A number that is the midpoint of the color axis
opacity: Opacity to apply to all markers. 0 is completely transparent
and 1 is completely opaque.
orientation: The orientation of the bars.
If 'v', the bars are vertical.
If 'h', the bars are horizontal.
Defaults to 'v' if only `x` is specified.
Defaults to 'h' if only `y` is specified.
Defaults to 'v' if both `x` and `y` are specified unless `x` is passed only numeric columns and `y` is not.
barmode: If 'relative', bars are stacked. If 'overlay', bars are drawn on top
of each other. If 'group', bars are drawn next to each other.
log_x: A boolean or list of booleans that specify if
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
HISTOGRAM_DEFAULTS,
default_callback,
)
from ..types import PartitionableTableLike
from ..types import PartitionableTableLike, Orientation


def violin(
Expand Down Expand Up @@ -321,6 +321,7 @@ def histogram(
pattern_shape_map: dict[str | tuple[str], str] | None = None,
marginal: str | None = None,
opacity: float | None = None,
orientation: Orientation | None = None,
barmode: str = HISTOGRAM_DEFAULTS["barmode"],
barnorm: str = HISTOGRAM_DEFAULTS["barnorm"],
histnorm: str = HISTOGRAM_DEFAULTS["histnorm"],
Expand All @@ -342,11 +343,11 @@ def histogram(
Args:
table: A table to pull data from.
x: A column name or list of columns that contain x-axis values.
Only one of x or y can be specified. If x is specified,
the bars are drawn horizontally.
Column values must be numeric. If x is specified,
the bars are drawn vertically by default.
y: A column name or list of columns that contain y-axis values.
Only one of x or y can be specified. If y is specified, the
bars are drawn vertically.
Column values must be numeric. If only y is specified,
the bars are drawn horizontally by default.
Comment on lines +346 to +350
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just making sure this has different behavior from bar in that the columns must be numeric. From the bar orientation docstring it looks like 1 value can be non-numeric

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's intentional. Our histogram never allows non-numeric data. Our bar does. It's worth noting that the logic in calculate_bar_orientation isn't called by bar when both x and y are specified, only when x or y is (as that is the logic that was formally known as frequency_bar) so the orientation in that case is set by the wrapped px.bar function.

by: A column or list of columns that contain values to plot the figure traces by.
All values or combination of values map to a unique design. The variable
by_vars specifies which design elements are used.
Expand Down Expand Up @@ -375,6 +376,11 @@ def histogram(
marginal: The type of marginal; histogram, violin, rug, box
opacity: Opacity to apply to all markers. 0 is completely transparent
and 1 is completely opaque.
orientation: The orientation of the bars.
If 'v', the bars are vertical.
If 'h', the bars are horizontal.
Defaults to 'v' if `x` is specified.
Defaults to 'h' if only `y` is specified.
barmode: If 'relative', bars are stacked. If
'overlay', bars are drawn on top of each other. If 'group', bars are
drawn next to each other.
Expand All @@ -396,6 +402,7 @@ def histogram(
histfunc: The function to use when aggregating within bins. One of
'abs_sum', 'avg', 'count', 'count_distinct', 'max', 'median', 'min', 'std',
'sum', or 'var'
Defaults to 'count' if only one of x or y is specified and 'sum' if both are.
cumulative: If True, values are cumulative.
nbins: The number of bins to use.
text_auto: If True, display the value at each bar.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

from deephaven.table import Table

from .UnivariatePreprocessor import UnivariatePreprocessor
from .UnivariateAwarePreprocessor import UnivariateAwarePreprocessor
from ..shared import get_unique_names


class FreqPreprocessor(UnivariatePreprocessor):
class FreqPreprocessor(UnivariateAwarePreprocessor):
"""
A type of univariate preprocessor for frequency bar plots

Expand All @@ -33,14 +33,14 @@ def preprocess_partitioned_tables(
A tuple containing (the new table, an update to make to the args)

"""
column = self.col_val if not column else column
column = self.agg_col if not column else column

names = get_unique_names(self.table, ["count"])

self.args[self.other_var] = names["count"]
self.args[self.agg_var] = names["count"]

for table in tables:
yield table.view([column]).count_by(names["count"], by=column), {
self.var: column,
self.other_var: names["count"],
self.bin_var: column,
self.agg_var: names["count"],
}
Loading
Loading