Skip to content

Commit

Permalink
set trust remote
Browse files Browse the repository at this point in the history
Signed-off-by: dafnapension <[email protected]>
  • Loading branch information
dafnapension committed Jan 22, 2025
1 parent 6080c1e commit 36b46e3
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 27 deletions.
26 changes: 13 additions & 13 deletions performance/bluebench_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,25 @@ def profiler_instantiate_benchmark_recipe(
def profiler_generate_benchmark_dataset(
self, benchmark_recipe: Benchmark, split: str, **kwargs
) -> List[Dict[str, Any]]:
stream = benchmark_recipe()[split]

# to charge here for the time of generating all instances of the split
return list(stream)

def profiler_do_the_profiling(self, dataset_query: str, split: str, **kwargs):
with settings.context(
disable_hf_datasets_cache=False,
allow_unverified_code=True,
):
stream = benchmark_recipe()[split]

# to charge here for the time of generating all instances
return list(stream)
benchmark_recipe = self.profiler_instantiate_benchmark_recipe(
dataset_query=dataset_query, **kwargs
)

def profiler_do_the_profiling(self, dataset_query: str, split: str, **kwargs):
benchmark_recipe = self.profiler_instantiate_benchmark_recipe(
dataset_query=dataset_query, **kwargs
)

dataset = self.profiler_generate_benchmark_dataset(
benchmark_recipe=benchmark_recipe, split=split, **kwargs
)
dataset = self.profiler_generate_benchmark_dataset(
benchmark_recipe=benchmark_recipe, split=split, **kwargs
)

logger.critical(f"length of evaluation_result: {len(dataset)}")
logger.critical(f"length of bluegench generated dataset: {len(dataset)}")


dataset_query = "benchmarks.bluebench[loader_limit=30,max_samples_per_subset=30]"
Expand Down
31 changes: 17 additions & 14 deletions src/unitxt/test_utils/card.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,18 +291,21 @@ def test_card(
else:
template_card_indices = range(len(card.templates))

for template_card_index in template_card_indices:
examples = load_examples_from_dataset_recipe(
card, template_card_index=template_card_index, debug=debug, **kwargs
)
if test_exact_match_score_when_predictions_equal_references:
test_correct_predictions(
examples=examples, strict=strict, exact_match_score=exact_match_score
)
if test_full_mismatch_score_with_full_mismatch_prediction_values:
test_wrong_predictions(
examples=examples,
strict=strict,
maximum_full_mismatch_score=maximum_full_mismatch_score,
full_mismatch_prediction_values=full_mismatch_prediction_values,
with settings.context(allow_unverified_code=True):
for template_card_index in template_card_indices:
examples = load_examples_from_dataset_recipe(
card, template_card_index=template_card_index, debug=debug, **kwargs
)
if test_exact_match_score_when_predictions_equal_references:
test_correct_predictions(
examples=examples,
strict=strict,
exact_match_score=exact_match_score,
)
if test_full_mismatch_score_with_full_mismatch_prediction_values:
test_wrong_predictions(
examples=examples,
strict=strict,
maximum_full_mismatch_score=maximum_full_mismatch_score,
full_mismatch_prediction_values=full_mismatch_prediction_values,
)

0 comments on commit 36b46e3

Please sign in to comment.