Stop running use_legacy_dataset=true for wide-benchmark (#156)

* Stop running `use_legacy_dataset=true` for wide-benchmark * fix test * feedback
voltrondata-labs · Jan 8, 2024 · ab8a175 · ab8a175
1 parent f58e572
commit ab8a175
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 26 deletions.
diff --git a/benchmarks/tests/test_wide_dataframe_benchmark.py b/benchmarks/tests/test_wide_dataframe_benchmark.py
@@ -13,28 +13,27 @@
   For each benchmark option, the first option value is the default.
 
   Valid benchmark combinations:
-  --use-legacy-dataset=true
   --use-legacy-dataset=false
 
   To run all combinations:
   $ conbench wide-dataframe --all=true
 
 Options:
-  --use-legacy-dataset [false|true]
-  --all BOOLEAN                   [default: false]
+  --use-legacy-dataset [false]
+  --all BOOLEAN                 [default: false]
   --cpu-count INTEGER
-  --iterations INTEGER            [default: 1]
-  --drop-caches BOOLEAN           [default: false]
-  --gc-collect BOOLEAN            [default: true]
-  --gc-disable BOOLEAN            [default: true]
-  --show-result BOOLEAN           [default: true]
-  --show-output BOOLEAN           [default: false]
-  --run-id TEXT                   Group executions together with a run id.
-  --run-name TEXT                 Free-text name of run (commit ABC, pull
-                                  request 123, etc).
-  --run-reason TEXT               Low-cardinality reason for run (commit, pull
-                                  request, manual, etc).
-  --help                          Show this message and exit.
+  --iterations INTEGER          [default: 1]
+  --drop-caches BOOLEAN         [default: false]
+  --gc-collect BOOLEAN          [default: true]
+  --gc-disable BOOLEAN          [default: true]
+  --show-result BOOLEAN         [default: true]
+  --show-output BOOLEAN         [default: false]
+  --run-id TEXT                 Group executions together with a run id.
+  --run-name TEXT               Free-text name of run (commit ABC, pull
+                                request 123, etc).
+  --run-reason TEXT             Low-cardinality reason for run (commit, pull
+                                request, manual, etc).
+  --help                        Show this message and exit.
 """
 
 

diff --git a/benchmarks/wide_dataframe_benchmark.py b/benchmarks/wide_dataframe_benchmark.py
@@ -18,26 +18,21 @@ class WideDataframeBenchmark(_benchmark.Benchmark):
     """
 
     name = "wide-dataframe"
-    valid_cases = (
-        ["use_legacy_dataset"],
-        ["true"],
-        ["false"],
-    )
+    # 'use_legacy_dataset' used to be a meaningful benchmark parameter, but since that
+    # behavior is deprecated we only keep it around to preserve benchmark history.
+    valid_cases = (["use_legacy_dataset"], ["false"])
 
     def run(self, case=None, **kwargs):
         path = os.path.join(_sources.temp_dir, "wide.parquet")
         self._create_if_not_exists(path)
 
         for case in self.get_cases(case, kwargs):
-            (legacy,) = case
-            # not using actual booleans... see hacks.py in conbench
-            legacy = True if legacy == "true" else False
             tags = self.get_tags(kwargs)
-            f = self._get_benchmark_function(path, legacy)
+            f = self._get_benchmark_function(path)
             yield self.benchmark(f, tags, kwargs, case)
 
-    def _get_benchmark_function(self, path, legacy):
-        return lambda: pandas.read_parquet(path, use_legacy_dataset=legacy)
+    def _get_benchmark_function(self, path):
+        return lambda: pandas.read_parquet(path)
 
     def _create_if_not_exists(self, path):
         if not pathlib.Path(path).exists():