Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make demos_pool a local var rather than a separate stream #1436

Merged
merged 11 commits into from
Dec 24, 2024
2 changes: 1 addition & 1 deletion docs/blog/vision_robustness_blog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Here’s the code used to set up our tests. This example uses Unitxt to create s
for card in ["cards.seed_bench", "cards.ai2d"]:
for enumerator in ["capitals", "lowercase"]:
for augmentor in [None, "augmentors.image.white_noise"]:
subsets[f"{card} {enumerator} {augmentor}"] = StandardRecipe(
subsets[f"{card} {enumerator} {augmentor}"] = DatasetRecipe(
card=card,
template=f"templates.qa.multiple_choice.with_context.lmms_eval[enumerator={enumerator}]",
loader_limit=100,
Expand Down
20 changes: 10 additions & 10 deletions docs/docs/benchmark.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,21 @@ We can compile them together using Unitxt Benchmark:
.. code-block:: python

from unitxt.benchmark import Benchmark
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

benchmark = Benchmark(
format="formats.user_agent",
max_samples_per_subset=5,
loader_limit=300,
subsets={
"cola": StandardRecipe(card="cards.cola", template="templates.classification.multi_class.instruction"),
"mnli": StandardRecipe(card="cards.mnli", template="templates.classification.multi_class.relation.default"),
"mrpc": StandardRecipe(card="cards.mrpc", template="templates.classification.multi_class.relation.default"),
"qnli": StandardRecipe(card="cards.qnli", template="templates.classification.multi_class.relation.default"),
"rte": StandardRecipe(card="cards.rte", template="templates.classification.multi_class.relation.default"),
"sst2": StandardRecipe(card="cards.sst2", template="templates.classification.multi_class.title"),
"stsb": StandardRecipe(card="cards.stsb", template="templates.regression.two_texts.title"),
"wnli": StandardRecipe(card="cards.wnli", template="templates.classification.multi_class.relation.default"),
"cola": DatasetRecipe(card="cards.cola", template="templates.classification.multi_class.instruction"),
"mnli": DatasetRecipe(card="cards.mnli", template="templates.classification.multi_class.relation.default"),
"mrpc": DatasetRecipe(card="cards.mrpc", template="templates.classification.multi_class.relation.default"),
"qnli": DatasetRecipe(card="cards.qnli", template="templates.classification.multi_class.relation.default"),
"rte": DatasetRecipe(card="cards.rte", template="templates.classification.multi_class.relation.default"),
"sst2": DatasetRecipe(card="cards.sst2", template="templates.classification.multi_class.title"),
"stsb": DatasetRecipe(card="cards.stsb", template="templates.regression.two_texts.title"),
"wnli": DatasetRecipe(card="cards.wnli", template="templates.classification.multi_class.relation.default"),
},
)

Expand Down Expand Up @@ -128,7 +128,7 @@ If you want to explore different templates, you can do so by defining a list of

.. code-block:: python

StandardRecipe(
DatasetRecipe(
card="cards.cola",
template=[
"templates.classification.multi_class.instruction",
Expand Down
18 changes: 9 additions & 9 deletions examples/evaluate_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,40 @@
from unitxt.inference import (
CrossProviderInferenceEngine,
)
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

benchmark = Benchmark(
format="formats.user_agent",
max_samples_per_subset=5,
loader_limit=30,
subsets={
"cola": StandardRecipe(
"cola": DatasetRecipe(
card="cards.cola",
template="templates.classification.multi_class.instruction",
),
"mnli": StandardRecipe(
"mnli": DatasetRecipe(
card="cards.mnli",
template="templates.classification.multi_class.relation.default",
),
"mrpc": StandardRecipe(
"mrpc": DatasetRecipe(
card="cards.mrpc",
template="templates.classification.multi_class.relation.default",
),
"qnli": StandardRecipe(
"qnli": DatasetRecipe(
card="cards.qnli",
template="templates.classification.multi_class.relation.default",
),
"rte": StandardRecipe(
"rte": DatasetRecipe(
card="cards.rte",
template="templates.classification.multi_class.relation.default",
),
"sst2": StandardRecipe(
"sst2": DatasetRecipe(
card="cards.sst2", template="templates.classification.multi_class.title"
),
"stsb": StandardRecipe(
"stsb": DatasetRecipe(
card="cards.stsb", template="templates.regression.two_texts.title"
),
"wnli": StandardRecipe(
"wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
LMMSEvalInferenceEngine,
)
from unitxt.logging_utils import get_logger
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

logger = get_logger()

Expand All @@ -16,17 +16,17 @@

dataset = Benchmark(
subsets={
"capitals": StandardRecipe(
"capitals": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=capitals]",
loader_limit=20,
),
"lowercase": StandardRecipe(
"lowercase": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=lowercase]",
loader_limit=20,
),
"capitals-greyscale": StandardRecipe(
"capitals-greyscale": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=capitals]",
loader_limit=20,
Expand Down
4 changes: 2 additions & 2 deletions examples/robustness_testing_for_vision_text_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
LMMSEvalInferenceEngine,
)
from unitxt.logging_utils import get_logger
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

logger = get_logger()

Expand All @@ -16,7 +16,7 @@
for card in ["cards.seed_bench", "cards.ai2d"]:
for enumerator in ["capitals", "lowercase"]:
for augmentor in [None, "augmentors.image.white_noise"]:
subsets[f"{card} {enumerator} {augmentor}"] = StandardRecipe(
subsets[f"{card} {enumerator} {augmentor}"] = DatasetRecipe(
card=card,
template=f"templates.qa.multiple_choice.with_context.lmms_eval[enumerator={enumerator}]",
format="formats.chat_api",
Expand Down
12 changes: 6 additions & 6 deletions performance/card_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from unitxt.artifact import fetch_artifact
from unitxt.logging_utils import get_logger
from unitxt.settings_utils import get_settings
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe
from unitxt.stream import MultiStream
from unitxt.templates import TemplatesDict, TemplatesList

Expand Down Expand Up @@ -49,28 +49,28 @@ class CardProfiler:
You will find the total time of each step, accumulated over all cards in the benchmark.
"""

def profiler_instantiate_recipe(self, **kwargs) -> StandardRecipe:
def profiler_instantiate_recipe(self, **kwargs) -> DatasetRecipe:
return load_recipe(**kwargs)

def profiler_load_by_recipe(self, recipe: StandardRecipe) -> MultiStream:
def profiler_load_by_recipe(self, recipe: DatasetRecipe) -> MultiStream:
ms = recipe.loading.process()
assert isinstance(ms, MultiStream)
return ms

def profiler_metadata_and_standardization(
self, ms: MultiStream, recipe: StandardRecipe
self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.metadata.process(ms)
return recipe.standardization.process(ms)

def profiler_processing_demos_metadata(
self, ms: MultiStream, recipe: StandardRecipe
self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.processing.process(ms)
return recipe.metadata.process(ms)

def profiler_verbalize_and_finalize(
self, ms: MultiStream, recipe: StandardRecipe
self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.verbalization.process(ms)
return recipe.finalize.process(ms)
Expand Down
18 changes: 9 additions & 9 deletions prepare/benchmarks/glue.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,36 @@
from unitxt.benchmark import Benchmark
from unitxt.catalog import add_to_catalog
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

benchmark = Benchmark(
subsets={
"cola": StandardRecipe(
"cola": DatasetRecipe(
card="cards.cola",
template="templates.classification.multi_class.instruction",
),
"mnli": StandardRecipe(
"mnli": DatasetRecipe(
card="cards.mnli",
template="templates.classification.multi_class.relation.default",
),
"mrpc": StandardRecipe(
"mrpc": DatasetRecipe(
card="cards.mrpc",
template="templates.classification.multi_class.relation.default",
),
"qnli": StandardRecipe(
"qnli": DatasetRecipe(
card="cards.qnli",
template="templates.classification.multi_class.relation.default",
),
"rte": StandardRecipe(
"rte": DatasetRecipe(
card="cards.rte",
template="templates.classification.multi_class.relation.default",
),
"sst2": StandardRecipe(
"sst2": DatasetRecipe(
card="cards.sst2", template="templates.classification.multi_class.title"
),
"stsb": StandardRecipe(
"stsb": DatasetRecipe(
card="cards.stsb", template="templates.regression.two_texts.title"
),
"wnli": StandardRecipe(
"wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
),
Expand Down
4 changes: 2 additions & 2 deletions prepare/recipes/bluebench.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from unitxt import add_to_catalog
from unitxt.standard import StandardRecipe
from unitxt.standard import DatasetRecipe

subsets = { # the key must appear in the card name
"cards.legalbench": [
Expand Down Expand Up @@ -82,7 +82,7 @@ def prepare_recipe(default_args, specific_args):

if "template" in recipe and "template_card_index" in recipe:
del recipe["template_card_index"]
return StandardRecipe(**recipe, format="formats.chat_api")
return DatasetRecipe(**recipe, format="formats.chat_api")


### Reasoning
Expand Down
16 changes: 8 additions & 8 deletions src/unitxt/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .operator import SourceOperator
from .schema import UNITXT_DATASET_SCHEMA, loads_instance
from .settings_utils import get_constants, get_settings
from .standard import StandardRecipe
from .standard import DatasetRecipe
from .task import Task

logger = get_logger()
Expand All @@ -35,7 +35,7 @@ def load(source: Union[SourceOperator, str]):
return source().to_dataset()


def _get_recipe_from_query(dataset_query: str) -> StandardRecipe:
def _get_recipe_from_query(dataset_query: str) -> DatasetRecipe:
dataset_query = dataset_query.replace("sys_prompt", "instruction")
try:
dataset_stream, _ = fetch_artifact(dataset_query)
Expand All @@ -44,14 +44,14 @@ def _get_recipe_from_query(dataset_query: str) -> StandardRecipe:
return dataset_stream


def _get_recipe_from_dict(dataset_params: Dict[str, Any]) -> StandardRecipe:
recipe_attributes = list(StandardRecipe.__dict__["__fields__"].keys())
def _get_recipe_from_dict(dataset_params: Dict[str, Any]) -> DatasetRecipe:
recipe_attributes = list(DatasetRecipe.__dict__["__fields__"].keys())
for param in dataset_params.keys():
assert param in recipe_attributes, (
f"The parameter '{param}' is not an attribute of the 'StandardRecipe' class. "
f"The parameter '{param}' is not an attribute of the 'DatasetRecipe' class. "
f"Please check if the name is correct. The available attributes are: '{recipe_attributes}'."
)
return StandardRecipe(**dataset_params)
return DatasetRecipe(**dataset_params)


def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None):
Expand All @@ -76,8 +76,8 @@ def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None)
)


def load_recipe(dataset_query: Optional[str] = None, **kwargs) -> StandardRecipe:
if isinstance(dataset_query, StandardRecipe):
def load_recipe(dataset_query: Optional[str] = None, **kwargs) -> DatasetRecipe:
if isinstance(dataset_query, DatasetRecipe):
return dataset_query

_verify_dataset_args(dataset_query, kwargs)
Expand Down
4 changes: 2 additions & 2 deletions src/unitxt/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .formats import Format
from .fusion import FixedFusion, WeightedFusion
from .operator import SourceOperator
from .standard import StandardRecipe
from .standard import DatasetRecipe
from .stream import MultiStream
from .system_prompts import SystemPrompt

Expand All @@ -22,7 +22,7 @@ def reset(self):


class Benchmark(BaseBenchmark):
subsets: Dict[str, Union[StandardRecipe, BaseBenchmark]]
subsets: Dict[str, Union[DatasetRecipe, BaseBenchmark]]

max_total_samples: int = None
max_samples_per_subset: int = None
Expand Down
2 changes: 1 addition & 1 deletion src/unitxt/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
)
from .processors import ToString, ToStringStripped
from .recipe import SequentialRecipe
from .splitters import RandomSampler, Sample, SliceSplit, SplitRandomMix
from .splitters import AssignDemosToInstance, RandomSampler, SliceSplit, SplitRandomMix
from .stream import MultiStream
from .struct_data_operators import (
ConstructTableFromRowsCols,
Expand Down
16 changes: 8 additions & 8 deletions src/unitxt/catalog/benchmarks/glue.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,42 @@
"__type__": "benchmark",
"subsets": {
"cola": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.cola",
"template": "templates.classification.multi_class.instruction"
},
"mnli": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.mnli",
"template": "templates.classification.multi_class.relation.default"
},
"mrpc": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.mrpc",
"template": "templates.classification.multi_class.relation.default"
},
"qnli": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.qnli",
"template": "templates.classification.multi_class.relation.default"
},
"rte": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.rte",
"template": "templates.classification.multi_class.relation.default"
},
"sst2": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.sst2",
"template": "templates.classification.multi_class.title"
},
"stsb": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.stsb",
"template": "templates.regression.two_texts.title"
},
"wnli": {
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"card": "cards.wnli",
"template": "templates.classification.multi_class.relation.default"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"__type__": "standard_recipe",
"__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
Expand Down
Loading
Loading