Skip to content

Commit

Permalink
Stop running use_legacy_dataset=true for wide-benchmark (#156)
Browse files Browse the repository at this point in the history
* Stop running `use_legacy_dataset=true` for wide-benchmark

* fix test

* feedback
  • Loading branch information
austin3dickey authored Jan 8, 2024
1 parent f58e572 commit ab8a175
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 26 deletions.
29 changes: 14 additions & 15 deletions benchmarks/tests/test_wide_dataframe_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,27 @@
For each benchmark option, the first option value is the default.
Valid benchmark combinations:
--use-legacy-dataset=true
--use-legacy-dataset=false
To run all combinations:
$ conbench wide-dataframe --all=true
Options:
--use-legacy-dataset [false|true]
--all BOOLEAN [default: false]
--use-legacy-dataset [false]
--all BOOLEAN [default: false]
--cpu-count INTEGER
--iterations INTEGER [default: 1]
--drop-caches BOOLEAN [default: false]
--gc-collect BOOLEAN [default: true]
--gc-disable BOOLEAN [default: true]
--show-result BOOLEAN [default: true]
--show-output BOOLEAN [default: false]
--run-id TEXT Group executions together with a run id.
--run-name TEXT Free-text name of run (commit ABC, pull
request 123, etc).
--run-reason TEXT Low-cardinality reason for run (commit, pull
request, manual, etc).
--help Show this message and exit.
--iterations INTEGER [default: 1]
--drop-caches BOOLEAN [default: false]
--gc-collect BOOLEAN [default: true]
--gc-disable BOOLEAN [default: true]
--show-result BOOLEAN [default: true]
--show-output BOOLEAN [default: false]
--run-id TEXT Group executions together with a run id.
--run-name TEXT Free-text name of run (commit ABC, pull
request 123, etc).
--run-reason TEXT Low-cardinality reason for run (commit, pull
request, manual, etc).
--help Show this message and exit.
"""


Expand Down
17 changes: 6 additions & 11 deletions benchmarks/wide_dataframe_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,21 @@ class WideDataframeBenchmark(_benchmark.Benchmark):
"""

name = "wide-dataframe"
valid_cases = (
["use_legacy_dataset"],
["true"],
["false"],
)
# 'use_legacy_dataset' used to be a meaningful benchmark parameter, but since that
# behavior is deprecated we only keep it around to preserve benchmark history.
valid_cases = (["use_legacy_dataset"], ["false"])

def run(self, case=None, **kwargs):
path = os.path.join(_sources.temp_dir, "wide.parquet")
self._create_if_not_exists(path)

for case in self.get_cases(case, kwargs):
(legacy,) = case
# not using actual booleans... see hacks.py in conbench
legacy = True if legacy == "true" else False
tags = self.get_tags(kwargs)
f = self._get_benchmark_function(path, legacy)
f = self._get_benchmark_function(path)
yield self.benchmark(f, tags, kwargs, case)

def _get_benchmark_function(self, path, legacy):
return lambda: pandas.read_parquet(path, use_legacy_dataset=legacy)
def _get_benchmark_function(self, path):
return lambda: pandas.read_parquet(path)

def _create_if_not_exists(self, path):
if not pathlib.Path(path).exists():
Expand Down

0 comments on commit ab8a175

Please sign in to comment.