Skip to content

Commit

Permalink
only DatasetRecipe
Browse files Browse the repository at this point in the history
Signed-off-by: dafnapension <[email protected]>
  • Loading branch information
dafnapension committed Dec 19, 2024
1 parent 411ef3d commit 03ffdde
Show file tree
Hide file tree
Showing 78 changed files with 294 additions and 295 deletions.
18 changes: 9 additions & 9 deletions examples/evaluate_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,41 @@
from unitxt.inference import (
CrossProviderInferenceEngine,
)
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe
from unitxt.text_utils import print_dict

benchmark = Benchmark(
format="formats.user_agent",
max_samples_per_subset=5,
loader_limit=300,
subsets={
"cola": StandardRecipe(
"cola": DatasetRecipe(
card="cards.cola",
template="templates.classification.multi_class.instruction",
),
"mnli": StandardRecipe(
"mnli": DatasetRecipe(
card="cards.mnli",
template="templates.classification.multi_class.relation.default",
),
"mrpc": StandardRecipe(
"mrpc": DatasetRecipe(
card="cards.mrpc",
template="templates.classification.multi_class.relation.default",
),
"qnli": StandardRecipe(
"qnli": DatasetRecipe(
card="cards.qnli",
template="templates.classification.multi_class.relation.default",
),
"rte": StandardRecipe(
"rte": DatasetRecipe(
card="cards.rte",
template="templates.classification.multi_class.relation.default",
),
"sst2": StandardRecipe(
"sst2": DatasetRecipe(
card="cards.sst2", template="templates.classification.multi_class.title"
),
"stsb": StandardRecipe(
"stsb": DatasetRecipe(
card="cards.stsb", template="templates.regression.two_texts.title"
),
"wnli": StandardRecipe(
"wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
LMMSEvalInferenceEngine,
)
from unitxt.logging_utils import get_logger
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

logger = get_logger()

Expand All @@ -16,17 +16,17 @@

dataset = Benchmark(
subsets={
"capitals": StandardRecipe(
"capitals": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=capitals]",
loader_limit=20,
),
"lowercase": StandardRecipe(
"lowercase": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=lowercase]",
loader_limit=20,
),
"capitals-greyscale": StandardRecipe(
"capitals-greyscale": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=capitals]",
loader_limit=20,
Expand Down
4 changes: 2 additions & 2 deletions examples/robustness_testing_for_vision_text_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
LMMSEvalInferenceEngine,
)
from unitxt.logging_utils import get_logger
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

logger = get_logger()

Expand All @@ -16,7 +16,7 @@
for card in ["cards.seed_bench", "cards.ai2d"]:
for enumerator in ["capitals", "lowercase"]:
for augmentor in [None, "augmentors.image.white_noise"]:
subsets[f"{card} {enumerator} {augmentor}"] = StandardRecipe(
subsets[f"{card} {enumerator} {augmentor}"] = DatasetRecipe(
card=card,
template=f"templates.qa.multiple_choice.with_context.lmms_eval[enumerator={enumerator}]",
loader_limit=100,
Expand Down
12 changes: 6 additions & 6 deletions performance/card_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from unitxt.artifact import fetch_artifact
from unitxt.logging_utils import get_logger
from unitxt.settings_utils import get_settings
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe
from unitxt.stream import MultiStream
from unitxt.templates import TemplatesDict, TemplatesList

Expand Down Expand Up @@ -49,28 +49,28 @@ class CardProfiler:
You will find the total time of each step, accumulated over all cards in the benchmark.
"""

def profiler_instantiate_recipe(self, **kwargs) -> StandardRecipe:
def profiler_instantiate_recipe(self, **kwargs) -> DatasetRecipe:
return load_recipe(**kwargs)

def profiler_load_by_recipe(self, recipe: StandardRecipe) -> MultiStream:
def profiler_load_by_recipe(self, recipe: DatasetRecipe) -> MultiStream:
ms = recipe.loading.process()
assert isinstance(ms, MultiStream)
return ms

def profiler_metadata_and_standardization(
self, ms: MultiStream, recipe: StandardRecipe
self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.metadata.process(ms)
return recipe.standardization.process(ms)

def profiler_processing_demos_metadata(
self, ms: MultiStream, recipe: StandardRecipe
self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.processing.process(ms)
return recipe.metadata.process(ms)

def profiler_verbalize_and_finalize(
self, ms: MultiStream, recipe: StandardRecipe
self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.verbalization.process(ms)
return recipe.finalize.process(ms)
Expand Down
18 changes: 9 additions & 9 deletions prepare/benchmarks/glue.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,36 @@
from unitxt.benchmark import Benchmark
from unitxt.catalog import add_to_catalog
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

benchmark = Benchmark(
subsets={
"cola": StandardRecipe(
"cola": DatasetRecipe(
card="cards.cola",
template="templates.classification.multi_class.instruction",
),
"mnli": StandardRecipe(
"mnli": DatasetRecipe(
card="cards.mnli",
template="templates.classification.multi_class.relation.default",
),
"mrpc": StandardRecipe(
"mrpc": DatasetRecipe(
card="cards.mrpc",
template="templates.classification.multi_class.relation.default",
),
"qnli": StandardRecipe(
"qnli": DatasetRecipe(
card="cards.qnli",
template="templates.classification.multi_class.relation.default",
),
"rte": StandardRecipe(
"rte": DatasetRecipe(
card="cards.rte",
template="templates.classification.multi_class.relation.default",
),
"sst2": StandardRecipe(
"sst2": DatasetRecipe(
card="cards.sst2", template="templates.classification.multi_class.title"
),
"stsb": StandardRecipe(
"stsb": DatasetRecipe(
card="cards.stsb", template="templates.regression.two_texts.title"
),
"wnli": StandardRecipe(
"wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
),
Expand Down
4 changes: 2 additions & 2 deletions prepare/recipes/bluebench.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from unitxt import add_to_catalog
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

subsets = { # the key must appear in the card name
"cards.legalbench": [
Expand Down Expand Up @@ -82,7 +82,7 @@ def prepare_recipe(default_args, specific_args):

if "template" in recipe and "template_card_index" in recipe:
del recipe["template_card_index"]
return StandardRecipe(**recipe, format="formats.chat_api")
return DatasetRecipe(**recipe, format="formats.chat_api")


### Reasoning
Expand Down
14 changes: 7 additions & 7 deletions src/unitxt/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .operator import SourceOperator
from .schema import UNITXT_DATASET_SCHEMA, loads_instance
from .settings_utils import get_constants, get_settings
from .standard import StandardRecipe
from .standard import DatasetRecipe

logger = get_logger()
constants = get_constants()
Expand All @@ -30,7 +30,7 @@ def load(source: Union[SourceOperator, str]):
return source().to_dataset()


def _get_recipe_from_query(dataset_query: str) -> StandardRecipe:
def _get_recipe_from_query(dataset_query: str) -> DatasetRecipe:
dataset_query = dataset_query.replace("sys_prompt", "instruction")
try:
dataset_stream, _ = fetch_artifact(dataset_query)
Expand All @@ -39,14 +39,14 @@ def _get_recipe_from_query(dataset_query: str) -> StandardRecipe:
return dataset_stream


def _get_recipe_from_dict(dataset_params: Dict[str, Any]) -> StandardRecipe:
recipe_attributes = list(StandardRecipe.__dict__["__fields__"].keys())
def _get_recipe_from_dict(dataset_params: Dict[str, Any]) -> DatasetRecipe:
recipe_attributes = list(DatasetRecipe.__dict__["__fields__"].keys())
for param in dataset_params.keys():
assert param in recipe_attributes, (
f"The parameter '{param}' is not an attribute of the 'StandardRecipe' class. "
f"Please check if the name is correct. The available attributes are: '{recipe_attributes}'."
)
return StandardRecipe(**dataset_params)
return DatasetRecipe(**dataset_params)


def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None):
Expand All @@ -71,8 +71,8 @@ def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None)
)


def load_recipe(dataset_query: Optional[str] = None, **kwargs) -> StandardRecipe:
if isinstance(dataset_query, StandardRecipe):
def load_recipe(dataset_query: Optional[str] = None, **kwargs) -> DatasetRecipe:
if isinstance(dataset_query, DatasetRecipe):
return dataset_query

_verify_dataset_args(dataset_query, kwargs)
Expand Down
4 changes: 2 additions & 2 deletions src/unitxt/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .formats import Format
from .fusion import FixedFusion, WeightedFusion
from .operator import SourceOperator
from .standard import StandardRecipe
from .standard import DatasetRecipe
from .stream import MultiStream
from .system_prompts import SystemPrompt

Expand All @@ -22,7 +22,7 @@ def reset(self):


class Benchmark(BaseBenchmark):
subsets: Dict[str, Union[StandardRecipe, BaseBenchmark]]
subsets: Dict[str, Union[DatasetRecipe, BaseBenchmark]]

max_total_samples: int = None
max_samples_per_subset: int = None
Expand Down
16 changes: 8 additions & 8 deletions src/unitxt/catalog/benchmarks/glue.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,42 @@
"__type__": "benchmark",
"subsets": {
"cola": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.cola",
"template": "templates.classification.multi_class.instruction"
},
"mnli": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.mnli",
"template": "templates.classification.multi_class.relation.default"
},
"mrpc": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.mrpc",
"template": "templates.classification.multi_class.relation.default"
},
"qnli": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.qnli",
"template": "templates.classification.multi_class.relation.default"
},
"rte": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.rte",
"template": "templates.classification.multi_class.relation.default"
},
"sst2": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.sst2",
"template": "templates.classification.multi_class.title"
},
"stsb": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.stsb",
"template": "templates.regression.two_texts.title"
},
"wnli": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.wnli",
"template": "templates.classification.multi_class.relation.default"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Loading

0 comments on commit 03ffdde

Please sign in to comment.