holoviz · maximlt · Dec 2, 2024 · Oct 29, 2024 · Oct 29, 2024 · Nov 1, 2024
diff --git a/examples/reference/widgets/Tabulator.ipynb b/examples/reference/widgets/Tabulator.ipynb
@@ -882,7 +882,7 @@
     "\n",
     "The `Tabulator` widget can also render a hierarchical multi-index and aggregate over specific categories. If a DataFrame with a hierarchical multi-index is supplied and the `hierarchical` is enabled the widget will group data by the categories in the order they are defined in. Additionally for each group in the multi-index an aggregator may be provided which will aggregate over the values in that category.\n",
     "\n",
-    "For example we may load population data for locations around the world broken down by sex and age-group. If we specify aggregators over the 'AgeGrp' and 'Sex' indexes we can see the aggregated values for each of those groups (note that we do not have to specify an aggregator for the outer index since we specify the aggregators over the subgroups in this case the 'Sex'):"
+    "We will use the Automobile Mileage dataset for various car models from the 1970s and 1980s around the world, broken down by regions, model years and manufacturers. The dataset includes details on car characteristics and performance metrics."
    ]
   },
   {
@@ -891,11 +891,45 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from bokeh.sampledata.population import data as population_data \n",
+    "from bokeh.sampledata.autompg import autompg_clean as autompg_df\n",
     "\n",
-    "pop_df = population_data[population_data.Year == 2020].set_index(['Location', 'AgeGrp', 'Sex'])[['Value']]\n",
+    "autompg_df = autompg_df.set_index([\"origin\", \"yr\", \"mfr\"])\n",
+    "autompg_df.head(3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If we specify aggregators over the 'origin' (region) and 'yr' (model year) indexes, we can see the aggregated values for each of those groups. Note that if no aggregators are specified to an outer index level, it will be aggregated with the default method of `sum`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pn.widgets.Tabulator(value=autompg_df, hierarchical=True, aggregators={\"origin\": \"mean\", \"yr\": \"mean\"}, height=200)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Separate aggregators for different columns are also supported. You can specify the `aggregators` as a nested dictionary as `{index_name: {column_name: aggregator}}`\n",
     "\n",
-    "pn.widgets.Tabulator(value=pop_df, hierarchical=True, aggregators={'Sex': 'sum', 'AgeGrp': 'sum'}, height=200)"
+    "Applied to the same dataset, we can aggregate the data in the `mpg` (miles per galon) and `hp` columns differently, with `mean` and `max`, respectively."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nested_aggs = {\"origin\": {\"mpg\": \"mean\", \"hp\": \"max\"}, \"yr\": {\"mpg\": \"mean\", \"hp\": \"max\"}}\n",
+    "pn.widgets.Tabulator(value=autompg_df[[\"mpg\", \"hp\"]], hierarchical=True, aggregators=nested_aggs, height=200)"
    ]
   },
   {

diff --git a/panel/models/tabulator.py b/panel/models/tabulator.py
@@ -113,7 +113,7 @@ class DataTabulator(HTMLBox):
     See http://tabulator.info/
     """
 
-    aggregators = Dict(String, String)
+    aggregators = Dict(Either(String, Int), Either(String, Dict(Either(String, Int), String)))
 
     buttons = Dict(String, String)
 

diff --git a/panel/models/tabulator.ts b/panel/models/tabulator.ts
@@ -73,12 +73,17 @@ function find_group(key: any, value: string, records: any[]): any {
   return null
 }
 
-function summarize(grouped: any[], columns: any[], aggregators: string[], depth: number = 0): any {
+function summarize(grouped: any[], columns: any[], aggregators: any[], depth: number = 0): any {
   const summary: any = {}
   if (grouped.length == 0) {
     return summary
   }
-  const agg = aggregators[depth]
+  // depth level 0 is the root, finish here
+  let aggs = ""
+  if (depth > 0) {
+    aggs = aggregators[depth-1]
+  }
+
   for (const group of grouped) {
     const subsummary = summarize(group._children, columns, aggregators, depth+1)
     for (const col in subsummary) {
@@ -88,14 +93,23 @@ function summarize(grouped: any[], columns: any[], aggregators: string[], depth:
         group[col] = subsummary[col]
       }
     }
+
     for (const column of columns.slice(1)) {
+      // if no aggregation method provided for an index level,
+      // or a specific column of an index level, do not aggregate data
+      let agg: string = ""
+      if (typeof aggs === "string") {
+        agg = aggs
+      } else if (column.field in aggs) {
+        agg = aggs[column.field]
+      }
       const val = group[column.field]
       if (column.field in summary) {
         const old_val = summary[column.field]
         if (agg === "min") {
-          summary[column.field] = Math.min(val, old_val)
+          summary[column.field] = (val < old_val) ? val : old_val
         } else if (agg === "max") {
-          summary[column.field] = Math.max(val, old_val)
+          summary[column.field] = (val > old_val) ? val : old_val
         } else if (agg === "sum") {
           summary[column.field] = val + old_val
         } else if (agg === "mean") {
@@ -125,15 +139,13 @@ function group_data(records: any[], columns: any[], indexes: string[], aggregato
       grouped.push(group)
     }
     let subgroup = group
-    const groups: any = {}
     for (const index of indexes.slice(1)) {
       subgroup = find_group(index_field, record[index], subgroup._children)
       if (subgroup == null) {
         subgroup = {_children: []}
         subgroup[index_field] = record[index]
         group._children.push(subgroup)
       }
-      groups[index] = group
       for (const column of columns.slice(1)) {
         subgroup[column.field] = record[column]
       }
@@ -145,7 +157,16 @@ function group_data(records: any[], columns: any[], indexes: string[], aggregato
   }
   const aggs = []
   for (const index of indexes) {
-    aggs.push((index in aggregators) ? aggregators[index] : "sum")
+    if (index in aggregators) {
+      if (aggregators[index] instanceof Map) {
+        // when some column names are numeric, need to convert that from a Map to an Object
+        aggs.push(Object.fromEntries(aggregators[index]))
+      } else {
+        aggs.push(aggregators[index])
+      }
+    } else {
+      aggs.push("sum")
+    }
   }
   summarize(grouped, columns, aggs)
   return grouped

diff --git a/panel/tests/ui/widgets/test_tabulator.py b/panel/tests/ui/widgets/test_tabulator.py
@@ -1087,7 +1087,7 @@

    cell = page.locator('text="target"').first
    # Scroll to the right
    cell.scroll_into_view_if_needed()
    page.wait_for_timeout(200)
    bb = page.locator('text="tomodify"').bounding_box()
    # Patch a cell in the latest column
@@ -2437,7 +2437,7 @@
    widget.patch({'int': [(0, 100)]}, as_index=False)

    max_int = df_mixed['int'].max()
    expect(page.locator('.tabulator-cell', has=page.locator(f'text="{max_int}"'))).to_have_count(1)
    max_cell = page.locator('.tabulator-cell', has=page.locator(f'text="{max_int}"'))
    expect(max_cell).to_have_count(1)
    expect(max_cell).to_have_css('background-color', _color_mapping['yellow'])
@@ -2897,7 +2897,7 @@
    cell = page.locator('text="B"').first
    cell.click()
    editable_cell = page.locator('input[type="text"]')
    editable_cell.fill("Q")
    editable_cell.press('Enter')

    wait_until(lambda: len(values) == 1, page)
@@ -4094,3 +4094,210 @@
     md = page.locator('.row-content .bk-panel-models-markup-HTML')
 
     assert md.bounding_box()['height'] >= 130
+
+
+@pytest.fixture(scope='session')
+def df_agg():
+    data = {
+        "employee_id": range(1, 6),
+        "gender": ["Male", "Male", "Female", "Male", "Female"],
+        "region": ["East", "North", "North", "North", "North"],
+        "name": ["Charlie", "Bob", "Alice", "David", "Eve"],
+        "salary": [75000.0, 82000.5, np.nan, 64000.0, 91000.0],
+        "date_joined": [
+            np.nan,  # Charlie
+            dt.datetime(2019, 3, 15),  # Bob
+            dt.datetime(2020, 1, 10),  # Alice
+            dt.datetime(2021, 5, 20),  # David
+            dt.datetime(2022, 7, 30),  # Eve
+        ],
+    }
+    return pd.DataFrame(data)
+
+
+@pytest.fixture(scope='session')
+def df_agg_int_column_names(df_agg):
+    return df_agg.rename(columns={"salary": 1, "date_joined": 2})
+
+
+@pytest.mark.parametrize("df", ["df_agg", "df_agg_int_column_names"])
+def test_tabulator_hierarchical_data_grouping(page, df, request):
+    df_agg = request.getfixturevalue(df)
+    widget = Tabulator(df_agg.set_index(["region", "gender", "employee_id"]), hierarchical=True)
+    serve_component(page, widget)
+
+    expanded_groups = page.locator('.tabulator-tree-level-0 .tabulator-data-tree-control-collapse')
+    collapsed_groups = page.locator('.tabulator-tree-level-0 .tabulator-data-tree-control-expand')
+
+    expect(collapsed_groups).to_have_count(2)
+    expect(expanded_groups).to_have_count(0)
+    group_east = collapsed_groups.nth(0)
+    group_north = collapsed_groups.nth(1)
+
+    # expand first group and see the data there
+    group_east.click()
+    expect(collapsed_groups).to_have_count(1)
+    expect(expanded_groups).to_have_count(1)
+    collapsed_genders = page.locator(".tabulator-tree-level-1 .tabulator-data-tree-control-expand")
+    expanded_genders = page.locator(".tabulator-tree-level-1 .tabulator-data-tree-control-collapse")
+    expect(collapsed_genders).to_have_count(1)
+    expect(expanded_genders).to_have_count(0)
+    # TODO: uncomment when showing indexes fixed
+    # expect(collapsed_genders).to_contain_text("Male")
+    collapsed_genders.click()
+    employees = page.locator(".tabulator-tree-level-2")
+    expect(employees).to_have_count(1)
+    # TODO: assert employee id
+    expect(employees).to_contain_text("Charlie")
+
+    # collapse 1st group and expand 2nd group and see the data there
+    expanded_groups.click()
+    group_north.click()
+    expect(collapsed_genders).to_have_count(2)
+    # note: after clicking 1st gender group, `gender` now has count 1 as we queries for css class
+    # .tabulator-data-tree-control-expand
+    collapsed_genders.nth(0).click()
+    expect(collapsed_genders).to_have_count(1)
+    expect(expanded_genders).to_have_count(1)
+    expect(employees).to_have_count(2)
+    expect(employees.nth(0)).to_contain_text("Bob")
+    expect(employees.nth(1)).to_contain_text("David")
+
+    collapsed_genders.nth(0).click()
+    expanded_genders.nth(0).click()
+    expect(employees).to_have_count(2)
+    expect(employees.nth(0)).to_contain_text("Alice")
+    expect(employees.nth(1)).to_contain_text("Eve")
+
+
+@pytest.mark.parametrize("aggs", [
+    {"region": "min", "gender": "max"},
+    {"region": "min", "gender": {"salary": "max", "date_joined": "max"}},
+    {"region": {"salary": "min", "date_joined": "min"}, "gender": {"salary": "max", "date_joined": "max"}},
+    {"region": {"salary": "min", "date_joined": "min"}, "gender": "max"},
+])
+def test_tabulator_aggregators_data_aggregation(page, df_agg, aggs):
+    # TODO: parametrize agg_method, index level and column
+    widget = Tabulator(df_agg.set_index(["region", "gender", "employee_id"]), hierarchical=True, aggregators=aggs)
+    serve_component(page, widget)
+
+    column_titles = page.locator('.tabulator-col-title')
+    col_mapping = {"salary": 3, "date_joined": 4}
+    for col in col_mapping:
+        expect(column_titles.nth(col_mapping[col])).to_have_text(col)
+
+    expected_results = {
+        "region": {
+            "region1": {"salary": "75,000.0", "date_joined": "-"},
+            "region2": {"salary": "82,000.5", "date_joined": "2021-05-20 00:00:00"},
+        },
+        "gender": {
+            "region1": {
+                "Male": {"salary": "75,000.0", "date_joined": "-"},
+                # "Female": {},  # no female in this region
+            },
+            "region2": {
+                "Male": {"salary": "82,000.5", "date_joined": "2021-05-20 00:00:00"},
+                "Female": {"salary": "-", "date_joined": "2022-07-30 00:00:00"},
+            },
+        }
+    }
+
+    # region level
+    rows = page.locator('.tabulator-row')
+    expect(rows).to_have_count(2)
+    agged = {
+        "region1": rows.nth(0).inner_text().split("\n"),
+        "region2": rows.nth(1).inner_text().split("\n"),
+    }
+    region_agged = {
+        region: {col: agged[region][col_mapping[col] - 1] for col in col_mapping} for region in agged
+    }
+    assert region_agged == expected_results["region"]
+
+    regions = page.locator('.tabulator-tree-level-0 .tabulator-data-tree-control-expand')
+    # expand all region groups and see the data there
+    regions.nth(0).click()
+    regions.nth(0).click()
+    rows = page.locator(".tabulator-row.tabulator-tree-level-1")
+    expect(rows).to_have_count(3)
+    # gender level
+    agged = {
+        "region1": {"Male": rows.nth(0).inner_text().split("\n")},
+        "region2": {
+            "Male": rows.nth(1).inner_text().split("\n"),
+            "Female": rows.nth(2).inner_text().split("\n"),
+        },
+    }
+    gender_agged = {
+        region: {
+            gender: {col: agged[region][gender][col_mapping[col] - 1] for col in col_mapping} for gender in agged[region]} for region in agged
+    }
+    assert gender_agged == expected_results["gender"]
+
+
+@pytest.mark.parametrize("aggs", [
+    {"region": "min", "gender": "max"},
+    {"region": "min", "gender": {1: "max", 2: "max"}},
+    {"region": {1: "min", 2: "min"}, "gender": {1: "max", 2: "max"}},
+    {"region": {1: "min", 2: "min"}, "gender": "max"},
+])
+def test_tabulator_aggregators_data_aggregation_numeric_column_names(page, df_agg_int_column_names, aggs):
+    # TODO: parametrize agg_method, index level and column
+    df_agg = df_agg_int_column_names
+    widget = Tabulator(df_agg.set_index(["region", "gender", "employee_id"]), hierarchical=True, aggregators=aggs)
+    serve_component(page, widget)
+
+    column_titles = page.locator('.tabulator-col-title')
+    col_mapping = {1: 3, 2: 4}
+    for col in col_mapping:
+        expect(column_titles.nth(col_mapping[col])).to_have_text(str(col))
+
+    expected_results = {
+        "region": {
+            "region1": {1: "75,000.0", 2: "-"},
+            "region2": {1: "82,000.5", 2: "2021-05-20 00:00:00"},
+        },
+        "gender": {
+            "region1": {
+                "Male": {1: "75,000.0", 2: "-"},
+                # "Female": {},  # no female in this region
+            },
+            "region2": {
+                "Male": {1: "82,000.5", 2: "2021-05-20 00:00:00"},
+                "Female": {1: "-", 2: "2022-07-30 00:00:00"},
+            },
+        }
+    }
+
+    # region level
+    rows = page.locator('.tabulator-row')
+    expect(rows).to_have_count(2)
+    agged = {
+        "region1": rows.nth(0).inner_text().split("\n"),
+        "region2": rows.nth(1).inner_text().split("\n"),
+    }
+    region_agged = {
+        region: {col: agged[region][col_mapping[col] - 1] for col in col_mapping} for region in agged
+    }
+    assert region_agged == expected_results["region"]
+
+    regions = page.locator('.tabulator-tree-level-0 .tabulator-data-tree-control-expand')
+    # expand all region groups and see the data there
+    regions.nth(0).click()
+    regions.nth(0).click()
+    rows = page.locator(".tabulator-row.tabulator-tree-level-1")
+    expect(rows).to_have_count(3)
+    # gender level
+    agged = {
+        "region1": {"Male": rows.nth(0).inner_text().split("\n")},
+        "region2": {
+            "Male": rows.nth(1).inner_text().split("\n"),
+            "Female": rows.nth(2).inner_text().split("\n"),
+        },
+    }
+    gender_agged = {
+        region: {
+            gender: {col: agged[region][gender][col_mapping[col] - 1] for col in col_mapping} for gender in agged[region]} for region in agged
+    }
+    assert gender_agged == expected_results["gender"]
diff --git a/panel/tests/widgets/test_tables.py b/panel/tests/widgets/test_tables.py
@@ -200,7 +200,8 @@ def test_dataframe_duplicate_column_name(document, comm):
         table.value = table.value.rename(columns={'a': 'b'})
 
 
-def test_hierarchical_index(document, comm):
+@pytest.fixture
+def df_agg():
     df = pd.DataFrame([
         ('Germany', 2020, 9, 2.4, 'A'),
         ('Germany', 2021, 3, 7.3, 'C'),
@@ -209,8 +210,11 @@ def test_hierarchical_index(document, comm):
         ('UK', 2021, 1, 3.9, 'B'),
         ('UK', 2022, 9, 2.2, 'A')
     ], columns=['Country', 'Year', 'Int', 'Float', 'Str']).set_index(['Country', 'Year'])
+    return df
 
-    table = DataFrame(value=df, hierarchical=True,
+
+def test_hierarchical_index(document, comm, df_agg):
+    table = DataFrame(value=df_agg, hierarchical=True,
                       aggregators={'Year': {'Int': 'sum', 'Float': 'mean'}})
 
     model = table.get_root(document, comm)
@@ -2713,3 +2717,8 @@ def test_header_filters_categorial_dtype():
     widget = Tabulator(df, header_filters=True)
     widget.filters = [{'field': 'model', 'type': 'like', 'value': 'A'}]
     assert widget.current_view.size == 1
+
+@pytest.mark.parametrize('aggs', [{}, {'Country': 'sum'}, {'Country': {'Int': 'sum', 'Float': 'mean'}}])
+def test_tabulator_aggregators(document, comm, df_agg, aggs):
+    tabulator = Tabulator(df_agg, hierarchical=True, aggregators=aggs)
+    tabulator.get_root(document, comm)