Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tabulator aggregators: allow nested dict, fix data aggregation #7450

Merged
merged 28 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions examples/reference/widgets/Tabulator.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@
"\n",
"The `Tabulator` widget can also render a hierarchical multi-index and aggregate over specific categories. If a DataFrame with a hierarchical multi-index is supplied and the `hierarchical` is enabled the widget will group data by the categories in the order they are defined in. Additionally for each group in the multi-index an aggregator may be provided which will aggregate over the values in that category.\n",
"\n",
"For example we may load population data for locations around the world broken down by sex and age-group. If we specify aggregators over the 'AgeGrp' and 'Sex' indexes we can see the aggregated values for each of those groups (note that we do not have to specify an aggregator for the outer index since we specify the aggregators over the subgroups in this case the 'Sex'):"
"We will use the Automobile Mileage dataset for various car models from the 1970s and 1980s around the world, broken down by regions, model years and manufacturers. The dataset includes details on car characteristics and performance metrics."
]
},
{
Expand All @@ -891,11 +891,45 @@
"metadata": {},
"outputs": [],
"source": [
"from bokeh.sampledata.population import data as population_data \n",
"from bokeh.sampledata.autompg import autompg_clean as autompg_df\n",
"\n",
"pop_df = population_data[population_data.Year == 2020].set_index(['Location', 'AgeGrp', 'Sex'])[['Value']]\n",
"autompg_df = autompg_df.set_index([\"origin\", \"yr\", \"mfr\"])\n",
"autompg_df.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If we specify aggregators over the 'origin' (region) and 'yr' (model year) indexes, we can see the aggregated values for each of those groups. Note that if no aggregators are specified to an outer index level, it will be aggregated with the default method of `sum`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pn.widgets.Tabulator(value=autompg_df, hierarchical=True, aggregators={\"origin\": \"mean\", \"yr\": \"mean\"}, height=200)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Separate aggregators for different columns are also supported. You can specify the `aggregators` as a nested dictionary as `{index_name: {column_name: aggregator}}`\n",
"\n",
"pn.widgets.Tabulator(value=pop_df, hierarchical=True, aggregators={'Sex': 'sum', 'AgeGrp': 'sum'}, height=200)"
"Applied to the same dataset, we can aggregate the data in the `mpg` (miles per galon) and `hp` columns differently, with `mean` and `max`, respectively."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nested_aggs = {\"origin\": {\"mpg\": \"mean\", \"hp\": \"max\"}, \"yr\": {\"mpg\": \"mean\", \"hp\": \"max\"}}\n",
"pn.widgets.Tabulator(value=autompg_df[[\"mpg\", \"hp\"]], hierarchical=True, aggregators=nested_aggs, height=200)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion panel/models/tabulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class DataTabulator(HTMLBox):
See http://tabulator.info/
"""

aggregators = Dict(String, String)
aggregators = Dict(Either(String, Int), Either(String, Dict(Either(String, Int), String)))

buttons = Dict(String, String)

Expand Down
35 changes: 28 additions & 7 deletions panel/models/tabulator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,17 @@ function find_group(key: any, value: string, records: any[]): any {
return null
}

function summarize(grouped: any[], columns: any[], aggregators: string[], depth: number = 0): any {
function summarize(grouped: any[], columns: any[], aggregators: any[], depth: number = 0): any {
const summary: any = {}
if (grouped.length == 0) {
return summary
}
const agg = aggregators[depth]
// depth level 0 is the root, finish here
let aggs = ""
if (depth > 0) {
aggs = aggregators[depth-1]
}

for (const group of grouped) {
const subsummary = summarize(group._children, columns, aggregators, depth+1)
for (const col in subsummary) {
Expand All @@ -88,14 +93,23 @@ function summarize(grouped: any[], columns: any[], aggregators: string[], depth:
group[col] = subsummary[col]
}
}

for (const column of columns.slice(1)) {
// if no aggregation method provided for an index level,
// or a specific column of an index level, do not aggregate data
let agg: string = ""
if (typeof aggs === "string") {
agg = aggs
} else if (column.field in aggs) {
agg = aggs[column.field]
}
const val = group[column.field]
if (column.field in summary) {
const old_val = summary[column.field]
if (agg === "min") {
summary[column.field] = Math.min(val, old_val)
summary[column.field] = (val < old_val) ? val : old_val
} else if (agg === "max") {
summary[column.field] = Math.max(val, old_val)
summary[column.field] = (val > old_val) ? val : old_val
} else if (agg === "sum") {
summary[column.field] = val + old_val
} else if (agg === "mean") {
Expand Down Expand Up @@ -125,15 +139,13 @@ function group_data(records: any[], columns: any[], indexes: string[], aggregato
grouped.push(group)
}
let subgroup = group
const groups: any = {}
for (const index of indexes.slice(1)) {
subgroup = find_group(index_field, record[index], subgroup._children)
if (subgroup == null) {
subgroup = {_children: []}
subgroup[index_field] = record[index]
group._children.push(subgroup)
}
groups[index] = group
for (const column of columns.slice(1)) {
subgroup[column.field] = record[column]
}
Expand All @@ -145,7 +157,16 @@ function group_data(records: any[], columns: any[], indexes: string[], aggregato
}
const aggs = []
for (const index of indexes) {
aggs.push((index in aggregators) ? aggregators[index] : "sum")
if (index in aggregators) {
if (aggregators[index] instanceof Map) {
// when some column names are numeric, need to convert that from a Map to an Object
aggs.push(Object.fromEntries(aggregators[index]))
thuydotm marked this conversation as resolved.
Show resolved Hide resolved
} else {
aggs.push(aggregators[index])
}
} else {
aggs.push("sum")
}
}
summarize(grouped, columns, aggs)
return grouped
Expand Down
207 changes: 207 additions & 0 deletions panel/tests/ui/widgets/test_tabulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,7 @@

cell = page.locator('text="target"').first
# Scroll to the right
cell.scroll_into_view_if_needed()

Check failure on line 1090 in panel/tests/ui/widgets/test_tabulator.py

View workflow job for this annotation

GitHub Actions / ui:test-ui:ubuntu-latest

test_tabulator_patch_no_horizontal_rescroll playwright._impl._errors.Error: Locator.scroll_into_view_if_needed: Element is not attached to the DOM Call log: attempting scroll into view action - waiting for element to be stable
page.wait_for_timeout(200)
bb = page.locator('text="tomodify"').bounding_box()
# Patch a cell in the latest column
Expand Down Expand Up @@ -2437,7 +2437,7 @@
widget.patch({'int': [(0, 100)]}, as_index=False)

max_int = df_mixed['int'].max()
expect(page.locator('.tabulator-cell', has=page.locator(f'text="{max_int}"'))).to_have_count(1)

Check failure on line 2440 in panel/tests/ui/widgets/test_tabulator.py

View workflow job for this annotation

GitHub Actions / ui:test-ui:ubuntu-latest

test_tabulator_patching_and_styling AssertionError: Locator expected to have count '1' Actual value: 0 Call log: LocatorAssertions.to_have_count with timeout 5000ms - waiting for locator(".tabulator-cell").filter(has=locator("text=\"100\"")) - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0"

Check failure on line 2440 in panel/tests/ui/widgets/test_tabulator.py

View workflow job for this annotation

GitHub Actions / ui:test-ui:ubuntu-latest

test_tabulator_patching_and_styling AssertionError: Locator expected to have count '1' Actual value: 0 Call log: LocatorAssertions.to_have_count with timeout 5000ms - waiting for locator(".tabulator-cell").filter(has=locator("text=\"100\"")) - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0" - locator resolved to 0 elements - unexpected value "0"
max_cell = page.locator('.tabulator-cell', has=page.locator(f'text="{max_int}"'))
expect(max_cell).to_have_count(1)
expect(max_cell).to_have_css('background-color', _color_mapping['yellow'])
Expand Down Expand Up @@ -2897,7 +2897,7 @@
cell = page.locator('text="B"').first
cell.click()
editable_cell = page.locator('input[type="text"]')
editable_cell.fill("Q")

Check failure on line 2900 in panel/tests/ui/widgets/test_tabulator.py

View workflow job for this annotation

GitHub Actions / ui:test-ui:macos-latest

test_tabulator_edit_event_and_header_filters_same_column_pagination[local] playwright._impl._errors.TimeoutError: Locator.fill: Timeout 20000ms exceeded. Call log: waiting for locator("input[type=\"text\"]")
editable_cell.press('Enter')

wait_until(lambda: len(values) == 1, page)
Expand Down Expand Up @@ -4094,3 +4094,210 @@
md = page.locator('.row-content .bk-panel-models-markup-HTML')

assert md.bounding_box()['height'] >= 130


@pytest.fixture(scope='session')
def df_agg():
data = {
"employee_id": range(1, 6),
"gender": ["Male", "Male", "Female", "Male", "Female"],
"region": ["East", "North", "North", "North", "North"],
"name": ["Charlie", "Bob", "Alice", "David", "Eve"],
"salary": [75000.0, 82000.5, np.nan, 64000.0, 91000.0],
"date_joined": [
np.nan, # Charlie
dt.datetime(2019, 3, 15), # Bob
dt.datetime(2020, 1, 10), # Alice
dt.datetime(2021, 5, 20), # David
dt.datetime(2022, 7, 30), # Eve
],
}
return pd.DataFrame(data)


@pytest.fixture(scope='session')
def df_agg_int_column_names(df_agg):
return df_agg.rename(columns={"salary": 1, "date_joined": 2})


@pytest.mark.parametrize("df", ["df_agg", "df_agg_int_column_names"])
def test_tabulator_hierarchical_data_grouping(page, df, request):
df_agg = request.getfixturevalue(df)
widget = Tabulator(df_agg.set_index(["region", "gender", "employee_id"]), hierarchical=True)
serve_component(page, widget)

expanded_groups = page.locator('.tabulator-tree-level-0 .tabulator-data-tree-control-collapse')
collapsed_groups = page.locator('.tabulator-tree-level-0 .tabulator-data-tree-control-expand')

expect(collapsed_groups).to_have_count(2)
expect(expanded_groups).to_have_count(0)
group_east = collapsed_groups.nth(0)
group_north = collapsed_groups.nth(1)

# expand first group and see the data there
group_east.click()
expect(collapsed_groups).to_have_count(1)
expect(expanded_groups).to_have_count(1)
collapsed_genders = page.locator(".tabulator-tree-level-1 .tabulator-data-tree-control-expand")
expanded_genders = page.locator(".tabulator-tree-level-1 .tabulator-data-tree-control-collapse")
expect(collapsed_genders).to_have_count(1)
expect(expanded_genders).to_have_count(0)
# TODO: uncomment when showing indexes fixed
# expect(collapsed_genders).to_contain_text("Male")
collapsed_genders.click()
employees = page.locator(".tabulator-tree-level-2")
expect(employees).to_have_count(1)
# TODO: assert employee id
expect(employees).to_contain_text("Charlie")

# collapse 1st group and expand 2nd group and see the data there
expanded_groups.click()
group_north.click()
expect(collapsed_genders).to_have_count(2)
# note: after clicking 1st gender group, `gender` now has count 1 as we queries for css class
# .tabulator-data-tree-control-expand
collapsed_genders.nth(0).click()
expect(collapsed_genders).to_have_count(1)
expect(expanded_genders).to_have_count(1)
expect(employees).to_have_count(2)
expect(employees.nth(0)).to_contain_text("Bob")
expect(employees.nth(1)).to_contain_text("David")

collapsed_genders.nth(0).click()
expanded_genders.nth(0).click()
expect(employees).to_have_count(2)
expect(employees.nth(0)).to_contain_text("Alice")
expect(employees.nth(1)).to_contain_text("Eve")


@pytest.mark.parametrize("aggs", [
{"region": "min", "gender": "max"},
{"region": "min", "gender": {"salary": "max", "date_joined": "max"}},
{"region": {"salary": "min", "date_joined": "min"}, "gender": {"salary": "max", "date_joined": "max"}},
{"region": {"salary": "min", "date_joined": "min"}, "gender": "max"},
])
def test_tabulator_aggregators_data_aggregation(page, df_agg, aggs):
# TODO: parametrize agg_method, index level and column
thuydotm marked this conversation as resolved.
Show resolved Hide resolved
widget = Tabulator(df_agg.set_index(["region", "gender", "employee_id"]), hierarchical=True, aggregators=aggs)
serve_component(page, widget)

column_titles = page.locator('.tabulator-col-title')
col_mapping = {"salary": 3, "date_joined": 4}
for col in col_mapping:
expect(column_titles.nth(col_mapping[col])).to_have_text(col)

expected_results = {
"region": {
"region1": {"salary": "75,000.0", "date_joined": "-"},
"region2": {"salary": "82,000.5", "date_joined": "2021-05-20 00:00:00"},
},
"gender": {
"region1": {
"Male": {"salary": "75,000.0", "date_joined": "-"},
# "Female": {}, # no female in this region
},
"region2": {
"Male": {"salary": "82,000.5", "date_joined": "2021-05-20 00:00:00"},
"Female": {"salary": "-", "date_joined": "2022-07-30 00:00:00"},
},
}
}

# region level
rows = page.locator('.tabulator-row')
expect(rows).to_have_count(2)
agged = {
"region1": rows.nth(0).inner_text().split("\n"),
"region2": rows.nth(1).inner_text().split("\n"),
}
region_agged = {
region: {col: agged[region][col_mapping[col] - 1] for col in col_mapping} for region in agged
}
assert region_agged == expected_results["region"]

regions = page.locator('.tabulator-tree-level-0 .tabulator-data-tree-control-expand')
# expand all region groups and see the data there
regions.nth(0).click()
regions.nth(0).click()
rows = page.locator(".tabulator-row.tabulator-tree-level-1")
expect(rows).to_have_count(3)
# gender level
agged = {
"region1": {"Male": rows.nth(0).inner_text().split("\n")},
"region2": {
"Male": rows.nth(1).inner_text().split("\n"),
"Female": rows.nth(2).inner_text().split("\n"),
},
}
gender_agged = {
region: {
gender: {col: agged[region][gender][col_mapping[col] - 1] for col in col_mapping} for gender in agged[region]} for region in agged
}
assert gender_agged == expected_results["gender"]


@pytest.mark.parametrize("aggs", [
{"region": "min", "gender": "max"},
{"region": "min", "gender": {1: "max", 2: "max"}},
{"region": {1: "min", 2: "min"}, "gender": {1: "max", 2: "max"}},
{"region": {1: "min", 2: "min"}, "gender": "max"},
])
def test_tabulator_aggregators_data_aggregation_numeric_column_names(page, df_agg_int_column_names, aggs):
# TODO: parametrize agg_method, index level and column
df_agg = df_agg_int_column_names
widget = Tabulator(df_agg.set_index(["region", "gender", "employee_id"]), hierarchical=True, aggregators=aggs)
serve_component(page, widget)

column_titles = page.locator('.tabulator-col-title')
col_mapping = {1: 3, 2: 4}
for col in col_mapping:
expect(column_titles.nth(col_mapping[col])).to_have_text(str(col))

expected_results = {
"region": {
"region1": {1: "75,000.0", 2: "-"},
"region2": {1: "82,000.5", 2: "2021-05-20 00:00:00"},
},
"gender": {
"region1": {
"Male": {1: "75,000.0", 2: "-"},
# "Female": {}, # no female in this region
},
"region2": {
"Male": {1: "82,000.5", 2: "2021-05-20 00:00:00"},
"Female": {1: "-", 2: "2022-07-30 00:00:00"},
},
}
}

# region level
rows = page.locator('.tabulator-row')
expect(rows).to_have_count(2)
agged = {
"region1": rows.nth(0).inner_text().split("\n"),
"region2": rows.nth(1).inner_text().split("\n"),
}
region_agged = {
region: {col: agged[region][col_mapping[col] - 1] for col in col_mapping} for region in agged
}
assert region_agged == expected_results["region"]

regions = page.locator('.tabulator-tree-level-0 .tabulator-data-tree-control-expand')
# expand all region groups and see the data there
regions.nth(0).click()
regions.nth(0).click()
rows = page.locator(".tabulator-row.tabulator-tree-level-1")
expect(rows).to_have_count(3)
# gender level
agged = {
"region1": {"Male": rows.nth(0).inner_text().split("\n")},
"region2": {
"Male": rows.nth(1).inner_text().split("\n"),
"Female": rows.nth(2).inner_text().split("\n"),
},
}
gender_agged = {
region: {
gender: {col: agged[region][gender][col_mapping[col] - 1] for col in col_mapping} for gender in agged[region]} for region in agged
}
assert gender_agged == expected_results["gender"]
13 changes: 11 additions & 2 deletions panel/tests/widgets/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ def test_dataframe_duplicate_column_name(document, comm):
table.value = table.value.rename(columns={'a': 'b'})


def test_hierarchical_index(document, comm):
@pytest.fixture
def df_agg():
df = pd.DataFrame([
('Germany', 2020, 9, 2.4, 'A'),
('Germany', 2021, 3, 7.3, 'C'),
Expand All @@ -209,8 +210,11 @@ def test_hierarchical_index(document, comm):
('UK', 2021, 1, 3.9, 'B'),
('UK', 2022, 9, 2.2, 'A')
], columns=['Country', 'Year', 'Int', 'Float', 'Str']).set_index(['Country', 'Year'])
return df

table = DataFrame(value=df, hierarchical=True,

def test_hierarchical_index(document, comm, df_agg):
table = DataFrame(value=df_agg, hierarchical=True,
aggregators={'Year': {'Int': 'sum', 'Float': 'mean'}})

model = table.get_root(document, comm)
Expand Down Expand Up @@ -2713,3 +2717,8 @@ def test_header_filters_categorial_dtype():
widget = Tabulator(df, header_filters=True)
widget.filters = [{'field': 'model', 'type': 'like', 'value': 'A'}]
assert widget.current_view.size == 1

@pytest.mark.parametrize('aggs', [{}, {'Country': 'sum'}, {'Country': {'Int': 'sum', 'Float': 'mean'}}])
def test_tabulator_aggregators(document, comm, df_agg, aggs):
tabulator = Tabulator(df_agg, hierarchical=True, aggregators=aggs)
tabulator.get_root(document, comm)
Loading