Skip to content

Commit

Permalink
reduce benchmark profiling to generating the dataset only. Not inferr…
Browse files Browse the repository at this point in the history
…ing (that is dome mocking anyhow) and not evaluating (of the mocked results)

Signed-off-by: dafnapension <[email protected]>
  • Loading branch information
dafnapension committed Jan 21, 2025
1 parent bcac3aa commit 43ec7fe
Showing 1 changed file with 10 additions and 11 deletions.
21 changes: 10 additions & 11 deletions performance/bluebench_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
TextGenerationInferenceOutput,
)
from unitxt.logging_utils import get_logger
from unitxt.schema import UNITXT_DATASET_SCHEMA, loads_instance
from unitxt.settings_utils import get_settings

logger = get_logger()
Expand Down Expand Up @@ -72,12 +71,12 @@ def profiler_generate_benchmark_dataset(
):
stream = benchmark_recipe()[split]

dataset = stream.to_dataset(
features=UNITXT_DATASET_SCHEMA, disable_cache=False
).with_transform(loads_instance)
# dataset = stream.to_dataset(
# features=UNITXT_DATASET_SCHEMA, disable_cache=False
# ).with_transform(loads_instance)

# to charge here for the time of generating all instances
return list(dataset)
return list(stream)

def profiler_instantiate_model(self) -> InferenceEngine:
return CrossProviderInferenceEngine(
Expand All @@ -102,14 +101,14 @@ def profiler_do_the_profiling(self, dataset_query: str, split: str, **kwargs):
benchmark_recipe=benchmark_recipe, split=split, **kwargs
)

model = self.profiler_instantiate_model()
# model = self.profiler_instantiate_model()

predictions = self.profiler_infer_predictions(model=model, dataset=dataset)
# predictions = self.profiler_infer_predictions(model=model, dataset=dataset)

evaluation_result = self.profiler_evaluate_predictions(
predictions=predictions, dataset=dataset
)
logger.critical(f"length of evaluation_result: {len(evaluation_result)}")
# evaluation_result = self.profiler_evaluate_predictions(
# predictions=predictions, dataset=dataset
# )
logger.critical(f"length of evaluation_result: {len(dataset)}")


dataset_query = "benchmarks.bluebench[loader_limit=30,max_samples_per_subset=30]"
Expand Down

0 comments on commit 43ec7fe

Please sign in to comment.