diff --git a/docs/blog/vision_robustness_blog.rst b/docs/blog/vision_robustness_blog.rst
index 29edcd48b0..641b7e56cf 100644
--- a/docs/blog/vision_robustness_blog.rst
+++ b/docs/blog/vision_robustness_blog.rst
@@ -39,7 +39,7 @@ Here’s the code used to set up our tests. This example uses Unitxt to create s
for card in ["cards.seed_bench", "cards.ai2d"]:
for enumerator in ["capitals", "lowercase"]:
for augmentor in [None, "augmentors.image.white_noise"]:
- subsets[f"{card} {enumerator} {augmentor}"] = StandardRecipe(
+ subsets[f"{card} {enumerator} {augmentor}"] = DatasetRecipe(
card=card,
template=f"templates.qa.multiple_choice.with_context.lmms_eval[enumerator={enumerator}]",
loader_limit=100,
diff --git a/docs/docs/benchmark.rst b/docs/docs/benchmark.rst
index 6c920e2770..d279d9ce9e 100644
--- a/docs/docs/benchmark.rst
+++ b/docs/docs/benchmark.rst
@@ -37,21 +37,21 @@ We can compile them together using Unitxt Benchmark:
.. code-block:: python
from unitxt.benchmark import Benchmark
- from unitxt.standard import StandardRecipe
+ from unitxt.standard import DatasetRecipe
benchmark = Benchmark(
format="formats.user_agent",
max_samples_per_subset=5,
loader_limit=300,
subsets={
- "cola": StandardRecipe(card="cards.cola", template="templates.classification.multi_class.instruction"),
- "mnli": StandardRecipe(card="cards.mnli", template="templates.classification.multi_class.relation.default"),
- "mrpc": StandardRecipe(card="cards.mrpc", template="templates.classification.multi_class.relation.default"),
- "qnli": StandardRecipe(card="cards.qnli", template="templates.classification.multi_class.relation.default"),
- "rte": StandardRecipe(card="cards.rte", template="templates.classification.multi_class.relation.default"),
- "sst2": StandardRecipe(card="cards.sst2", template="templates.classification.multi_class.title"),
- "stsb": StandardRecipe(card="cards.stsb", template="templates.regression.two_texts.title"),
- "wnli": StandardRecipe(card="cards.wnli", template="templates.classification.multi_class.relation.default"),
+ "cola": DatasetRecipe(card="cards.cola", template="templates.classification.multi_class.instruction"),
+ "mnli": DatasetRecipe(card="cards.mnli", template="templates.classification.multi_class.relation.default"),
+ "mrpc": DatasetRecipe(card="cards.mrpc", template="templates.classification.multi_class.relation.default"),
+ "qnli": DatasetRecipe(card="cards.qnli", template="templates.classification.multi_class.relation.default"),
+ "rte": DatasetRecipe(card="cards.rte", template="templates.classification.multi_class.relation.default"),
+ "sst2": DatasetRecipe(card="cards.sst2", template="templates.classification.multi_class.title"),
+ "stsb": DatasetRecipe(card="cards.stsb", template="templates.regression.two_texts.title"),
+ "wnli": DatasetRecipe(card="cards.wnli", template="templates.classification.multi_class.relation.default"),
},
)
@@ -128,7 +128,7 @@ If you want to explore different templates, you can do so by defining a list of
.. code-block:: python
- StandardRecipe(
+ DatasetRecipe(
card="cards.cola",
template=[
"templates.classification.multi_class.instruction",
diff --git a/examples/evaluate_benchmark.py b/examples/evaluate_benchmark.py
index e29168c193..fc3fd12e54 100644
--- a/examples/evaluate_benchmark.py
+++ b/examples/evaluate_benchmark.py
@@ -3,40 +3,40 @@
from unitxt.inference import (
CrossProviderInferenceEngine,
)
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
benchmark = Benchmark(
format="formats.user_agent",
max_samples_per_subset=5,
loader_limit=30,
subsets={
- "cola": StandardRecipe(
+ "cola": DatasetRecipe(
card="cards.cola",
template="templates.classification.multi_class.instruction",
),
- "mnli": StandardRecipe(
+ "mnli": DatasetRecipe(
card="cards.mnli",
template="templates.classification.multi_class.relation.default",
),
- "mrpc": StandardRecipe(
+ "mrpc": DatasetRecipe(
card="cards.mrpc",
template="templates.classification.multi_class.relation.default",
),
- "qnli": StandardRecipe(
+ "qnli": DatasetRecipe(
card="cards.qnli",
template="templates.classification.multi_class.relation.default",
),
- "rte": StandardRecipe(
+ "rte": DatasetRecipe(
card="cards.rte",
template="templates.classification.multi_class.relation.default",
),
- "sst2": StandardRecipe(
+ "sst2": DatasetRecipe(
card="cards.sst2", template="templates.classification.multi_class.title"
),
- "stsb": StandardRecipe(
+ "stsb": DatasetRecipe(
card="cards.stsb", template="templates.regression.two_texts.title"
),
- "wnli": StandardRecipe(
+ "wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
),
diff --git a/examples/evaluate_image_text_to_text_with_different_templates.py b/examples/evaluate_image_text_to_text_with_different_templates.py
index 5480f571b2..cc3816ea08 100644
--- a/examples/evaluate_image_text_to_text_with_different_templates.py
+++ b/examples/evaluate_image_text_to_text_with_different_templates.py
@@ -5,7 +5,7 @@
LMMSEvalInferenceEngine,
)
from unitxt.logging_utils import get_logger
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
logger = get_logger()
@@ -16,17 +16,17 @@
dataset = Benchmark(
subsets={
- "capitals": StandardRecipe(
+ "capitals": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=capitals]",
loader_limit=20,
),
- "lowercase": StandardRecipe(
+ "lowercase": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=lowercase]",
loader_limit=20,
),
- "capitals-greyscale": StandardRecipe(
+ "capitals-greyscale": DatasetRecipe(
card=card,
template="templates.qa.multiple_choice.with_context.lmms_eval[enumerator=capitals]",
loader_limit=20,
diff --git a/examples/robustness_testing_for_vision_text_models.py b/examples/robustness_testing_for_vision_text_models.py
index bf382a5293..3074bd3109 100644
--- a/examples/robustness_testing_for_vision_text_models.py
+++ b/examples/robustness_testing_for_vision_text_models.py
@@ -5,7 +5,7 @@
LMMSEvalInferenceEngine,
)
from unitxt.logging_utils import get_logger
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
logger = get_logger()
@@ -16,7 +16,7 @@
for card in ["cards.seed_bench", "cards.ai2d"]:
for enumerator in ["capitals", "lowercase"]:
for augmentor in [None, "augmentors.image.white_noise"]:
- subsets[f"{card} {enumerator} {augmentor}"] = StandardRecipe(
+ subsets[f"{card} {enumerator} {augmentor}"] = DatasetRecipe(
card=card,
template=f"templates.qa.multiple_choice.with_context.lmms_eval[enumerator={enumerator}]",
format="formats.chat_api",
diff --git a/performance/card_profiler.py b/performance/card_profiler.py
index 4e79b8dbf6..03325ac0ef 100644
--- a/performance/card_profiler.py
+++ b/performance/card_profiler.py
@@ -10,7 +10,7 @@
from unitxt.artifact import fetch_artifact
from unitxt.logging_utils import get_logger
from unitxt.settings_utils import get_settings
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
from unitxt.stream import MultiStream
from unitxt.templates import TemplatesDict, TemplatesList
@@ -49,28 +49,28 @@ class CardProfiler:
You will find the total time of each step, accumulated over all cards in the benchmark.
"""
- def profiler_instantiate_recipe(self, **kwargs) -> StandardRecipe:
+ def profiler_instantiate_recipe(self, **kwargs) -> DatasetRecipe:
return load_recipe(**kwargs)
- def profiler_load_by_recipe(self, recipe: StandardRecipe) -> MultiStream:
+ def profiler_load_by_recipe(self, recipe: DatasetRecipe) -> MultiStream:
ms = recipe.loading.process()
assert isinstance(ms, MultiStream)
return ms
def profiler_metadata_and_standardization(
- self, ms: MultiStream, recipe: StandardRecipe
+ self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.metadata.process(ms)
return recipe.standardization.process(ms)
def profiler_processing_demos_metadata(
- self, ms: MultiStream, recipe: StandardRecipe
+ self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.processing.process(ms)
return recipe.metadata.process(ms)
def profiler_verbalize_and_finalize(
- self, ms: MultiStream, recipe: StandardRecipe
+ self, ms: MultiStream, recipe: DatasetRecipe
) -> MultiStream:
ms = recipe.verbalization.process(ms)
return recipe.finalize.process(ms)
diff --git a/prepare/benchmarks/glue.py b/prepare/benchmarks/glue.py
index 34dd521522..328002553c 100644
--- a/prepare/benchmarks/glue.py
+++ b/prepare/benchmarks/glue.py
@@ -1,36 +1,36 @@
from unitxt.benchmark import Benchmark
from unitxt.catalog import add_to_catalog
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
benchmark = Benchmark(
subsets={
- "cola": StandardRecipe(
+ "cola": DatasetRecipe(
card="cards.cola",
template="templates.classification.multi_class.instruction",
),
- "mnli": StandardRecipe(
+ "mnli": DatasetRecipe(
card="cards.mnli",
template="templates.classification.multi_class.relation.default",
),
- "mrpc": StandardRecipe(
+ "mrpc": DatasetRecipe(
card="cards.mrpc",
template="templates.classification.multi_class.relation.default",
),
- "qnli": StandardRecipe(
+ "qnli": DatasetRecipe(
card="cards.qnli",
template="templates.classification.multi_class.relation.default",
),
- "rte": StandardRecipe(
+ "rte": DatasetRecipe(
card="cards.rte",
template="templates.classification.multi_class.relation.default",
),
- "sst2": StandardRecipe(
+ "sst2": DatasetRecipe(
card="cards.sst2", template="templates.classification.multi_class.title"
),
- "stsb": StandardRecipe(
+ "stsb": DatasetRecipe(
card="cards.stsb", template="templates.regression.two_texts.title"
),
- "wnli": StandardRecipe(
+ "wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
),
diff --git a/prepare/recipes/bluebench.py b/prepare/recipes/bluebench.py
index 3368ea6374..eb63bbfc6d 100644
--- a/prepare/recipes/bluebench.py
+++ b/prepare/recipes/bluebench.py
@@ -1,5 +1,5 @@
from unitxt import add_to_catalog
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
subsets = { # the key must appear in the card name
"cards.legalbench": [
@@ -82,7 +82,7 @@ def prepare_recipe(default_args, specific_args):
if "template" in recipe and "template_card_index" in recipe:
del recipe["template_card_index"]
- return StandardRecipe(**recipe, format="formats.chat_api")
+ return DatasetRecipe(**recipe, format="formats.chat_api")
### Reasoning
diff --git a/src/unitxt/api.py b/src/unitxt/api.py
index c899019c15..020b665c99 100644
--- a/src/unitxt/api.py
+++ b/src/unitxt/api.py
@@ -18,7 +18,7 @@
from .operator import SourceOperator
from .schema import UNITXT_DATASET_SCHEMA, loads_instance
from .settings_utils import get_constants, get_settings
-from .standard import StandardRecipe
+from .standard import DatasetRecipe
from .task import Task
logger = get_logger()
@@ -35,7 +35,7 @@ def load(source: Union[SourceOperator, str]):
return source().to_dataset()
-def _get_recipe_from_query(dataset_query: str) -> StandardRecipe:
+def _get_recipe_from_query(dataset_query: str) -> DatasetRecipe:
dataset_query = dataset_query.replace("sys_prompt", "instruction")
try:
dataset_stream, _ = fetch_artifact(dataset_query)
@@ -44,14 +44,14 @@ def _get_recipe_from_query(dataset_query: str) -> StandardRecipe:
return dataset_stream
-def _get_recipe_from_dict(dataset_params: Dict[str, Any]) -> StandardRecipe:
- recipe_attributes = list(StandardRecipe.__dict__["__fields__"].keys())
+def _get_recipe_from_dict(dataset_params: Dict[str, Any]) -> DatasetRecipe:
+ recipe_attributes = list(DatasetRecipe.__dict__["__fields__"].keys())
for param in dataset_params.keys():
assert param in recipe_attributes, (
- f"The parameter '{param}' is not an attribute of the 'StandardRecipe' class. "
+ f"The parameter '{param}' is not an attribute of the 'DatasetRecipe' class. "
f"Please check if the name is correct. The available attributes are: '{recipe_attributes}'."
)
- return StandardRecipe(**dataset_params)
+ return DatasetRecipe(**dataset_params)
def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None):
@@ -76,8 +76,8 @@ def _verify_dataset_args(dataset_query: Optional[str] = None, dataset_args=None)
)
-def load_recipe(dataset_query: Optional[str] = None, **kwargs) -> StandardRecipe:
- if isinstance(dataset_query, StandardRecipe):
+def load_recipe(dataset_query: Optional[str] = None, **kwargs) -> DatasetRecipe:
+ if isinstance(dataset_query, DatasetRecipe):
return dataset_query
_verify_dataset_args(dataset_query, kwargs)
diff --git a/src/unitxt/benchmark.py b/src/unitxt/benchmark.py
index b1750eacb8..a3f4562d83 100644
--- a/src/unitxt/benchmark.py
+++ b/src/unitxt/benchmark.py
@@ -5,7 +5,7 @@
from .formats import Format
from .fusion import FixedFusion, WeightedFusion
from .operator import SourceOperator
-from .standard import StandardRecipe
+from .standard import DatasetRecipe
from .stream import MultiStream
from .system_prompts import SystemPrompt
@@ -22,7 +22,7 @@ def reset(self):
class Benchmark(BaseBenchmark):
- subsets: Dict[str, Union[StandardRecipe, BaseBenchmark]]
+ subsets: Dict[str, Union[DatasetRecipe, BaseBenchmark]]
max_total_samples: int = None
max_samples_per_subset: int = None
diff --git a/src/unitxt/blocks.py b/src/unitxt/blocks.py
index bfd52c4249..65a9164b13 100644
--- a/src/unitxt/blocks.py
+++ b/src/unitxt/blocks.py
@@ -18,7 +18,7 @@
)
from .processors import ToString, ToStringStripped
from .recipe import SequentialRecipe
-from .splitters import RandomSampler, Sample, SliceSplit, SplitRandomMix
+from .splitters import AssignDemosToInstance, RandomSampler, SliceSplit, SplitRandomMix
from .stream import MultiStream
from .struct_data_operators import (
ConstructTableFromRowsCols,
diff --git a/src/unitxt/catalog/benchmarks/glue.json b/src/unitxt/catalog/benchmarks/glue.json
index f720ffdd05..10fd726603 100644
--- a/src/unitxt/catalog/benchmarks/glue.json
+++ b/src/unitxt/catalog/benchmarks/glue.json
@@ -2,42 +2,42 @@
"__type__": "benchmark",
"subsets": {
"cola": {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.cola",
"template": "templates.classification.multi_class.instruction"
},
"mnli": {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.mnli",
"template": "templates.classification.multi_class.relation.default"
},
"mrpc": {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.mrpc",
"template": "templates.classification.multi_class.relation.default"
},
"qnli": {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.qnli",
"template": "templates.classification.multi_class.relation.default"
},
"rte": {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.rte",
"template": "templates.classification.multi_class.relation.default"
},
"sst2": {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.sst2",
"template": "templates.classification.multi_class.title"
},
"stsb": {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.stsb",
"template": "templates.regression.two_texts.title"
},
"wnli": {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.wnli",
"template": "templates.classification.multi_class.relation.default"
}
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_age.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_age.json
index 5e68da8756..0bff65bac5 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_age.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_age.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_disability_status.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_disability_status.json
index d173106dc6..ebfbaa92e7 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_disability_status.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_disability_status.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_gender_identity.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_gender_identity.json
index f3fcc9208d..f23da00b29 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_gender_identity.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_gender_identity.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_nationality.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_nationality.json
index 6a3eeaf00b..78cf913f10 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_nationality.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_nationality.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_physical_appearance.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_physical_appearance.json
index d6bebf0925..aa19e829d4 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_physical_appearance.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_physical_appearance.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_ethnicity.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_ethnicity.json
index 89e23a3d3c..e7123fb7b2 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_ethnicity.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_ethnicity.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_x_gender.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_x_gender.json
index 508c8bf374..be5977efc9 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_x_gender.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_x_gender.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_x_ses.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_x_ses.json
index 6ffc6e24f4..0eac47994b 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_x_ses.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_race_x_ses.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_religion.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_religion.json
index 520f062480..5cac2697b5 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_religion.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_religion.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_ses.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_ses.json
index e3700dd1d4..6309539561 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_ses.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_ses.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_sexual_orientation.json b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_sexual_orientation.json
index 1863d20c81..ac5cc0a9cd 100644
--- a/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_sexual_orientation.json
+++ b/src/unitxt/catalog/recipes/bluebench/bias/safety_bbq_sexual_orientation.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/chatbot_abilities/arena_hard_generation_english_gpt_4_0314_reference.json b/src/unitxt/catalog/recipes/bluebench/chatbot_abilities/arena_hard_generation_english_gpt_4_0314_reference.json
index 8259767cab..595ad5c404 100644
--- a/src/unitxt/catalog/recipes/bluebench/chatbot_abilities/arena_hard_generation_english_gpt_4_0314_reference.json
+++ b/src/unitxt/catalog/recipes/bluebench/chatbot_abilities/arena_hard_generation_english_gpt_4_0314_reference.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 0,
"num_demos": 0,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/entity_extraction/universal_ner_en_ewt.json b/src/unitxt/catalog/recipes/bluebench/entity_extraction/universal_ner_en_ewt.json
index 4dab5b9be2..662dba9b3c 100644
--- a/src/unitxt/catalog/recipes/bluebench/entity_extraction/universal_ner_en_ewt.json
+++ b/src/unitxt/catalog/recipes/bluebench/entity_extraction/universal_ner_en_ewt.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 10000,
"num_demos": 5,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_biology.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_biology.json
index 27908d8581..a6e1f335a7 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_biology.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_biology.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_business.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_business.json
index dfeae0c7bb..c8ab7ce711 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_business.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_business.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_chemistry.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_chemistry.json
index 828444520e..6d2379095f 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_chemistry.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_chemistry.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_computer_science.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_computer_science.json
index 5b2c35fffc..7d9ab632d9 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_computer_science.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_computer_science.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_economics.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_economics.json
index a9ddc4c927..d9534bc91a 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_economics.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_economics.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_engineering.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_engineering.json
index 7cb3dfeaea..7199f5b1ff 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_engineering.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_engineering.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_health.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_health.json
index ec54ce638b..fddb9854c9 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_health.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_health.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_history.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_history.json
index 56e7143b82..9a6132008e 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_history.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_history.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_law.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_law.json
index 7c053421f9..2228a1852a 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_law.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_law.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_math.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_math.json
index befdf9defc..1aaa37e694 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_math.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_math.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_other.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_other.json
index a050d42592..1bcbac2706 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_other.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_other.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_philosophy.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_philosophy.json
index a90ece0cf6..1b9d4f4e3e 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_philosophy.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_philosophy.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_physics.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_physics.json
index f3d69ebbc1..15cbfd85ae 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_physics.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_physics.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_psychology.json b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_psychology.json
index 97c4b2b4aa..daf77596bf 100644
--- a/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_psychology.json
+++ b/src/unitxt/catalog/recipes/bluebench/knowledge/mmlu_pro_psychology.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 20,
"num_demos": 5,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_abercrombie.json b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_abercrombie.json
index 98461e10b9..8a9ba754a5 100644
--- a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_abercrombie.json
+++ b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_abercrombie.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 10,
"num_demos": 1,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_corporate_lobbying.json b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_corporate_lobbying.json
index b03a43787d..cf1248e1de 100644
--- a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_corporate_lobbying.json
+++ b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_corporate_lobbying.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 10,
"num_demos": 1,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_function_of_decision_section.json b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_function_of_decision_section.json
index e3eab4ac9b..b9fb501388 100644
--- a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_function_of_decision_section.json
+++ b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_function_of_decision_section.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 10,
"num_demos": 1,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_international_citizenship_questions.json b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_international_citizenship_questions.json
index ce0711eee6..ff74b89d2b 100644
--- a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_international_citizenship_questions.json
+++ b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_international_citizenship_questions.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 10,
"num_demos": 1,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_proa.json b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_proa.json
index 3a40cae4b7..56277304c1 100644
--- a/src/unitxt/catalog/recipes/bluebench/legal/legalbench_proa.json
+++ b/src/unitxt/catalog/recipes/bluebench/legal/legalbench_proa.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 10,
"num_demos": 1,
"demos_taken_from": "test",
diff --git a/src/unitxt/catalog/recipes/bluebench/news_classification/20_newsgroups_short.json b/src/unitxt/catalog/recipes/bluebench/news_classification/20_newsgroups_short.json
index 3694028d97..c9ab0a4975 100644
--- a/src/unitxt/catalog/recipes/bluebench/news_classification/20_newsgroups_short.json
+++ b/src/unitxt/catalog/recipes/bluebench/news_classification/20_newsgroups_short.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 1,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/product_help/cfpb_product_2023.json b/src/unitxt/catalog/recipes/bluebench/product_help/cfpb_product_2023.json
index 722badc306..029a89e4d0 100644
--- a/src/unitxt/catalog/recipes/bluebench/product_help/cfpb_product_2023.json
+++ b/src/unitxt/catalog/recipes/bluebench/product_help/cfpb_product_2023.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/product_help/cfpb_product_watsonx.json b/src/unitxt/catalog/recipes/bluebench/product_help/cfpb_product_watsonx.json
index 106fba290c..4266568ad9 100644
--- a/src/unitxt/catalog/recipes/bluebench/product_help/cfpb_product_watsonx.json
+++ b/src/unitxt/catalog/recipes/bluebench/product_help/cfpb_product_watsonx.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/qa_finance/fin_qa.json b/src/unitxt/catalog/recipes/bluebench/qa_finance/fin_qa.json
index dafd58b260..7b5f125aae 100644
--- a/src/unitxt/catalog/recipes/bluebench/qa_finance/fin_qa.json
+++ b/src/unitxt/catalog/recipes/bluebench/qa_finance/fin_qa.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 1,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/rag_general/rag_response_generation_clapnq.json b/src/unitxt/catalog/recipes/bluebench/rag_general/rag_response_generation_clapnq.json
index 576811d64d..ce8916eadb 100644
--- a/src/unitxt/catalog/recipes/bluebench/rag_general/rag_response_generation_clapnq.json
+++ b/src/unitxt/catalog/recipes/bluebench/rag_general/rag_response_generation_clapnq.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 1,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/reasoning/hellaswag.json b/src/unitxt/catalog/recipes/bluebench/reasoning/hellaswag.json
index a8720501e2..8ebb6c983a 100644
--- a/src/unitxt/catalog/recipes/bluebench/reasoning/hellaswag.json
+++ b/src/unitxt/catalog/recipes/bluebench/reasoning/hellaswag.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/reasoning/openbook_qa.json b/src/unitxt/catalog/recipes/bluebench/reasoning/openbook_qa.json
index a0afa2058e..545bd4ac12 100644
--- a/src/unitxt/catalog/recipes/bluebench/reasoning/openbook_qa.json
+++ b/src/unitxt/catalog/recipes/bluebench/reasoning/openbook_qa.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/safety/attaq_500.json b/src/unitxt/catalog/recipes/bluebench/safety/attaq_500.json
index ebfe9cb615..ffb7d005a3 100644
--- a/src/unitxt/catalog/recipes/bluebench/safety/attaq_500.json
+++ b/src/unitxt/catalog/recipes/bluebench/safety/attaq_500.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 0,
"num_demos": 0,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/summarization/billsum_document_filtered_to_6000_chars.json b/src/unitxt/catalog/recipes/bluebench/summarization/billsum_document_filtered_to_6000_chars.json
index 370b2a5017..e8386de0e9 100644
--- a/src/unitxt/catalog/recipes/bluebench/summarization/billsum_document_filtered_to_6000_chars.json
+++ b/src/unitxt/catalog/recipes/bluebench/summarization/billsum_document_filtered_to_6000_chars.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 0,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/summarization/tldr_document_filtered_to_6000_chars.json b/src/unitxt/catalog/recipes/bluebench/summarization/tldr_document_filtered_to_6000_chars.json
index 53ea41cd14..cf7cfb54c6 100644
--- a/src/unitxt/catalog/recipes/bluebench/summarization/tldr_document_filtered_to_6000_chars.json
+++ b/src/unitxt/catalog/recipes/bluebench/summarization/tldr_document_filtered_to_6000_chars.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 0,
"demos_taken_from": "train",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_ara_eng.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_ara_eng.json
index 56e2c4280c..d2488c7143 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_ara_eng.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_ara_eng.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_deu_eng.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_deu_eng.json
index dfac2ad756..eb2884d696 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_deu_eng.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_deu_eng.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_ara.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_ara.json
index f8bec1ec6b..19d16e5b67 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_ara.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_ara.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_deu.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_deu.json
index c5c9f590fc..58bca3fcb0 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_deu.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_deu.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_fra.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_fra.json
index b56f10b4d1..49dc5df5d0 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_fra.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_fra.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_kor.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_kor.json
index ae8a136df4..c0a296c09e 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_kor.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_kor.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_por.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_por.json
index ac3c3edbcf..e059f9624c 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_por.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_por.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_ron.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_ron.json
index 2290bc3592..d91dfdab5a 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_ron.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_ron.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_spa.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_spa.json
index a5e359bf56..e12bcefba2 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_spa.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_eng_spa.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_fra_eng.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_fra_eng.json
index 840eda4505..6006dc2002 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_fra_eng.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_fra_eng.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_jpn_eng.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_jpn_eng.json
index 6ab8c49028..f19c6de7d9 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_jpn_eng.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_jpn_eng.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_kor_eng.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_kor_eng.json
index 9aa2ba1756..72338d3f0d 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_kor_eng.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_kor_eng.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_por_eng.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_por_eng.json
index 5d251a3296..8dd3cf2553 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_por_eng.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_por_eng.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_ron_eng.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_ron_eng.json
index 324565f134..26e04f2ce6 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_ron_eng.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_ron_eng.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_spa_eng.json b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_spa_eng.json
index 790b55448d..8bc58a689a 100644
--- a/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_spa_eng.json
+++ b/src/unitxt/catalog/recipes/bluebench/translation/mt_flores_101_spa_eng.json
@@ -1,5 +1,5 @@
{
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"demos_pool_size": 100,
"num_demos": 5,
"demos_taken_from": "validation",
diff --git a/src/unitxt/dataset_utils.py b/src/unitxt/dataset_utils.py
index b32fe86fc6..c837b37f0d 100644
--- a/src/unitxt/dataset_utils.py
+++ b/src/unitxt/dataset_utils.py
@@ -5,7 +5,7 @@
from .parsing_utils import parse_key_equals_value_string_to_dict
from .register import _reset_env_local_catalogs, register_all_artifacts
from .settings_utils import get_settings
-from .standard import BaseRecipe
+from .standard import DatasetRecipe
logger = get_logger()
settings = get_settings()
@@ -24,7 +24,7 @@ def parse(query: str):
def get_dataset_artifact(dataset):
- if isinstance(dataset, BaseRecipe):
+ if isinstance(dataset, DatasetRecipe):
return dataset
assert isinstance(
dataset, str
diff --git a/src/unitxt/loaders.py b/src/unitxt/loaders.py
index f068b269b6..4fc60567cd 100644
--- a/src/unitxt/loaders.py
+++ b/src/unitxt/loaders.py
@@ -41,6 +41,7 @@
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Union
import pandas as pd
+from datasets import IterableDatasetDict
from datasets import load_dataset as hf_load_dataset
from huggingface_hub import HfApi
from tqdm import tqdm
@@ -51,7 +52,7 @@
from .operator import SourceOperator
from .operators import Set
from .settings_utils import get_settings
-from .stream import DynamicStream, MultiStream
+from .stream import MultiStream
from .type_utils import isoftype
from .utils import LRUCache
@@ -122,7 +123,7 @@ def add_data_classification(self, multi_stream: MultiStream) -> MultiStream:
)
return operator(multi_stream)
- def sef_default_data_classification(
+ def set_default_data_classification(
self, default_data_classification_policy, additional_info
):
if self.data_classification_policy is None:
@@ -166,20 +167,20 @@ class LoadHF(Loader):
The path or identifier of the dataset on the HuggingFace Hub.
name:
An optional dataset name.
- data_dir (optional):
- directory to store downloaded data.
- split (optional):
- specification of which split to load.
- data_files (optional):
- specification of particular data files to load.
- revision (optional):
- The revision of the dataset. Often the commit id. Use in case you want to set the dataset version.
+ data_dir:
+ Optional directory to store downloaded data.
+ split:
+ Optional specification of which split to load.
+ data_files:
+ Optional specification of particular data files to load.
+ revision:
+ Optional. The revision of the dataset. Often the commit id. Use in case you want to set the dataset version.
streaming (bool):
indicating if streaming should be used.
- filtering_lambda (optional):
+ filtering_lambda (str, optional):
A lambda function for filtering the data after loading.
num_proc (int, optional):
- specify the number of processes to use for parallel dataset loading.
+ Specifies the number of processes to use for parallel dataset loading.
Example:
Loading glue's mrpc dataset
@@ -279,40 +280,22 @@ def load_dataset(self):
for split in dataset.keys():
dataset[split] = dataset[split].to_iterable_dataset()
else:
- dataset = {self.split: dataset}
-
- if self.filtering_lambda is not None:
- dataset = self.filter_load(dataset)
+ dataset = {self.split: dataset.to_iterable_dataset()}
return dataset
- def split_limited_load(self, dataset, split_name):
- yield from itertools.islice(dataset[split_name], self.get_limit())
-
- def limited_load(self, dataset):
- self.log_limited_loading()
- return MultiStream(
- {
- name: DynamicStream(
- generator=self.split_limited_load,
- gen_kwargs={"dataset": dataset, "split_name": name},
- )
- for name in dataset.keys()
- }
- )
-
def _maybe_set_classification_policy(self):
if os.path.exists(self.path):
- self.sef_default_data_classification(
+ self.set_default_data_classification(
["proprietary"], "when loading from local files"
)
else:
- self.sef_default_data_classification(
+ self.set_default_data_classification(
["public"],
None, # No warning when loading from public hub
)
- def load_iterables(self):
+ def load_iterables(self) -> IterableDatasetDict:
try:
dataset = self.stream_dataset()
except (
@@ -320,8 +303,15 @@ def load_iterables(self):
): # streaming is not supported for zipped files so we load without streaming
dataset = self.load_dataset()
+ if self.filtering_lambda is not None:
+ dataset = self.filter_load(dataset)
+
if self.get_limit() is not None:
- return self.limited_load(dataset=dataset)
+ self.log_limited_loading()
+ return {
+ split_name: dataset[split_name].take(self.get_limit())
+ for split_name in dataset
+ }
return dataset
@@ -353,7 +343,7 @@ class LoadCSV(Loader):
sep: str = ","
def _maybe_set_classification_policy(self):
- self.sef_default_data_classification(
+ self.set_default_data_classification(
["proprietary"], "when loading from local files"
)
@@ -366,9 +356,7 @@ def load_iterables(self):
file_path, nrows=self.get_limit(), sep=self.sep
).to_dict("records")
else:
- iterables[split_name] = pd.read_csv(file_path, sep=self.sep).to_dict(
- "records"
- )
+ iterables[split_name] = pd.read_csv(file_path).to_dict("records")
return iterables
@@ -476,14 +464,22 @@ class LoadFromIBMCloud(Loader):
3. Mapping: split -> file_names, e.g. {"test" : ["test1.json", "test2.json"], "train": ["train.json"]}
Args:
- endpoint_url_env: Environment variable name for the IBM Cloud endpoint URL.
- aws_access_key_id_env: Environment variable name for the AWS access key ID.
- aws_secret_access_key_env: Environment variable name for the AWS secret access key.
- bucket_name: Name of the S3 bucket from which to load data.
- data_dir: Optional directory path within the bucket.
- data_files: Union type allowing either a list of file names or a mapping of splits to file names.
- data_field: The dataset key for nested JSON file, i.e. when multiple datasets are nested in the same file
- caching: Bool indicating if caching is enabled to avoid re-downloading data.
+ endpoint_url_env:
+ Environment variable name for the IBM Cloud endpoint URL.
+ aws_access_key_id_env:
+ Environment variable name for the AWS access key ID.
+ aws_secret_access_key_env:
+ Environment variable name for the AWS secret access key.
+ bucket_name:
+ Name of the S3 bucket from which to load data.
+ data_dir:
+ Optional directory path within the bucket.
+ data_files:
+ Union type allowing either a list of file names or a mapping of splits to file names.
+ data_field:
+ The dataset key for nested JSON file, i.e. when multiple datasets are nested in the same file
+ caching (bool):
+ indicating if caching is enabled to avoid re-downloading data.
Example:
Loading from IBM Cloud
@@ -579,7 +575,7 @@ def lazy_verify(self):
raise NotImplementedError("LoadFromKaggle cannot load with streaming.")
def _maybe_set_classification_policy(self):
- self.sef_default_data_classification(
+ self.set_default_data_classification(
["proprietary"], "when loading from IBM COS"
)
@@ -730,7 +726,7 @@ def verify(self):
)
def _maybe_set_classification_policy(self):
- self.sef_default_data_classification(
+ self.set_default_data_classification(
["proprietary"], "when loading from python dictionary"
)
@@ -745,25 +741,24 @@ class LoadFromHFSpace(LoadHF):
from the given space and then reads them as a HuggingFace Dataset.
Args:
- space_name (str): Name of the HuggingFace Space to be accessed.
-
- data_files (str | Sequence[str] | Mapping[str, str | Sequence[str]]): Relative
- paths to files within a given repository. If given as a mapping, paths should
- be values, while keys should represent the type of respective files
- (training, testing etc.).
-
- path (str, optional): Absolute path to a directory where data should be downloaded.
-
- revision (str, optional): ID of a Git branch or commit to be used. By default, it is
- set to None, thus data is downloaded from the main branch of the accessed
- repository.
-
- use_token (bool, optional): Whether a token is used for authentication when accessing
- the HuggingFace Space. If necessary, the token is read from the HuggingFace
- config folder.
-
- token_env (str, optional): Key of an env variable which value will be used for
- authentication when accessing the HuggingFace Space - if necessary.
+ space_name (str):
+ Name of the HuggingFace Space to be accessed.
+ data_files (str | Sequence[str] | Mapping[str, str | Sequence[str]]):
+ Relative paths to files within a given repository. If given as a mapping,
+ paths should be values, while keys should represent the type of respective files
+ (training, testing etc.).
+ path (str, optional):
+ Absolute path to a directory where data should be downloaded.
+ revision (str, optional):
+ ID of a Git branch or commit to be used. By default, it is set to None,
+ thus data is downloaded from the main branch of the accessed repository.
+ use_token (bool, optional):
+ Whether a token is used for authentication when accessing
+ the HuggingFace Space. If necessary, the token is read from the HuggingFace
+ config folder.
+ token_env (str, optional):
+ Key of an env variable which value will be used for
+ authentication when accessing the HuggingFace Space - if necessary.
Example:
Loading from a HuggingFace Space
@@ -911,7 +906,7 @@ def _map_wildcard_path_to_full_paths(self):
)
def _maybe_set_classification_policy(self):
- self.sef_default_data_classification(
+ self.set_default_data_classification(
["public"], "when loading from Huggingface spaces"
)
diff --git a/src/unitxt/schema.py b/src/unitxt/schema.py
index 82d838141d..4bd3e01312 100644
--- a/src/unitxt/schema.py
+++ b/src/unitxt/schema.py
@@ -143,6 +143,9 @@ def process(
)
task_data["metadata"]["num_demos"] = instance["recipe_metadata"]["num_demos"]
+ task_data["metadata"]["demos_pool_size"] = instance["recipe_metadata"][
+ "demos_pool_size"
+ ]
task_data["metadata"]["template"] = self.artifact_to_jsonable(
instance["recipe_metadata"]["template"]
)
diff --git a/src/unitxt/settings_utils.py b/src/unitxt/settings_utils.py
index ae3e00a9f6..75a3bd641c 100644
--- a/src/unitxt/settings_utils.py
+++ b/src/unitxt/settings_utils.py
@@ -138,7 +138,7 @@ def __getattr__(self, key):
settings.max_log_message_size = (int, 100000)
settings.catalogs = None
settings.artifactories = None
- settings.default_recipe = "standard_recipe"
+ settings.default_recipe = "dataset_recipe"
settings.default_verbosity = "info"
settings.use_eager_execution = False
settings.remote_metrics = []
@@ -186,6 +186,7 @@ def __getattr__(self, key):
constants.inference_stream = "__INFERENCE_STREAM__"
constants.instance_stream = "__INSTANCE_STREAM__"
constants.image_tag = "unitxt-img"
+ constants.demos_pool_field = "_demos_pool_"
def get_settings() -> Settings:
diff --git a/src/unitxt/splitters.py b/src/unitxt/splitters.py
index dd6ee45b1e..f496ac31b6 100644
--- a/src/unitxt/splitters.py
+++ b/src/unitxt/splitters.py
@@ -1,11 +1,11 @@
import itertools
from abc import abstractmethod
from difflib import get_close_matches
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
from .artifact import Artifact
from .dict_utils import dict_get
-from .operator import InstanceOperatorWithMultiStreamAccess, MultiStreamOperator
+from .operator import InstanceOperator, MultiStreamOperator
from .random_utils import new_random_generator
from .split_utils import (
parse_random_mix_string,
@@ -14,7 +14,7 @@
rename_split,
slice_streams,
)
-from .stream import EmptyStreamError, FaultyStreamError, MultiStream
+from .stream import MultiStream
from .type_utils import isoftype
from .utils import recursive_copy
@@ -118,14 +118,14 @@ class Sampler(Artifact):
def sample(
self,
sample_size: int,
- instances_pool: List[Dict[str, object]],
- instance: Dict[str, object],
- ) -> List[Dict[str, object]]:
+ instances_pool: List[Dict[str, Any]],
+ instance: Dict[str, Any],
+ ) -> List[Dict[str, Any]]:
pass
def filter_source_by_instance(
- self, instances_pool: List[Dict[str, object]], instance: Dict[str, object]
- ) -> List[Dict[str, object]]:
+ self, instances_pool: List[Dict[str, Any]], instance: Dict[str, Any]
+ ) -> List[Dict[str, Any]]:
if "input_fields" not in instance:
raise ValueError(f"'input_fields' field is missing from '{instance}'.")
try:
@@ -336,10 +336,11 @@ def sample(
return result
-class Sample(InstanceOperatorWithMultiStreamAccess):
- from_stream: str
+class AssignDemosToInstance(InstanceOperator):
+ from_field: str
to_field: str
sampler: Sampler
+ skip_demoed_instances: bool = False
def prepare(self):
self.local_cache = None
@@ -350,40 +351,36 @@ def get_sample_size(self, instance) -> int:
pass
def process(
- self, instance: Dict[str, object], multi_stream: MultiStream
- ) -> Dict[str, object]:
- sample_size = self.get_sample_size(instance)
- try:
- if self.local_cache is None:
- self.local_cache = recursive_copy(list(multi_stream[self.from_stream]))
+ self, instance: Dict[str, Any], multi_stream: MultiStream
+ ) -> Dict[str, Any]:
+ if self.skip_demoed_instances and self.to_field in instance:
+ if self.from_field in instance:
+ instance.pop(self.from_field)
+ return instance
- source_stream = self.local_cache
- source_stream = self.sampler.filter_source_by_instance(
- source_stream, instance
- )
- if len(source_stream) < sample_size:
- raise ValueError(
- f"Size of population to sample from: {len(source_stream)} is smaller than the needed sample_size: {self.sampler.sample_size}."
- )
- sampled_instances = self.sampler.sample(
- sample_size=sample_size, instances_pool=source_stream, instance=instance
+ demos_pool = instance[self.from_field]
+ sample_size = self.get_sample_size(instance)
+ source_stream = self.sampler.filter_source_by_instance(demos_pool, instance)
+ if len(source_stream) < sample_size:
+ raise ValueError(
+ f"Size of population to sample from: {len(source_stream)} is smaller than the needed sample_size: {sample_size}. Please consider increasing increasing the demos pool, for which you may need to increase loader_limit or employ a less strict stream filtering."
)
- instance[self.to_field] = sampled_instances
- return instance
- except FaultyStreamError as e:
- raise EmptyStreamError(
- f"Unable to fetch instances from '{self.from_stream}' to '{self.to_field}', due to {e.__class__.__name__}: {e}"
- ) from e
+ sampled_instances = self.sampler.sample(
+ sample_size=sample_size, instances_pool=source_stream, instance=instance
+ )
+ instance[self.to_field] = recursive_copy(sampled_instances)
+ instance.pop(self.from_field) # pop the field pointing to the demos_pool
+ return instance
-class ConstantSizeSample(Sample):
+class ConstantSizeSample(AssignDemosToInstance):
sample_size: int
def get_sample_size(self, instance) -> int:
return self.sample_size
-class RandomSizeSample(Sample):
+class RandomSizeSample(AssignDemosToInstance):
sample_sizes: List[int]
def get_sample_size(self, instance) -> int:
diff --git a/src/unitxt/standard.py b/src/unitxt/standard.py
index d18eaab904..73b1000725 100644
--- a/src/unitxt/standard.py
+++ b/src/unitxt/standard.py
@@ -1,26 +1,35 @@
-from typing import List, Optional, Union
+import itertools
+import json
+import sys
+from typing import Any, Dict, Generator, List, Optional, Union
from .artifact import fetch_artifact
from .augmentors import Augmentor, NullAugmentor
from .card import TaskCard
from .collections_operators import GetLength
from .dataclass import Field, InternalField, NonPositionalField, OptionalField
+from .deprecation_utils import deprecation
from .error_utils import UnitxtError
from .formats import Format, SystemFormat
+from .generator_utils import ReusableGenerator
from .logging_utils import get_logger
-from .operator import SequentialOperator, SourceSequentialOperator, StreamingOperator
+from .operator import (
+ MultiStreamOperator,
+ SequentialOperator,
+ SourceSequentialOperator,
+ StreamingOperator,
+)
from .operators import Set, StreamRefiner
-from .recipe import Recipe
from .schema import FinalizeDataset
from .serializers import SingleTypeSerializer
from .settings_utils import get_constants, get_settings
-from .splitters import ConstantSizeSample, RandomSizeSample, Sampler, SeparateSplit
+from .splitters import ConstantSizeSample, RandomSizeSample, Sampler
from .stream import MultiStream
from .system_prompts import EmptySystemPrompt, SystemPrompt
from .task import Task
from .templates import ApplyRandomTemplate, ApplySingleTemplate, Template, TemplatesList
from .type_utils import isoftype
-from .utils import LRUCache
+from .utils import LRUCache, recursive_copy
constants = get_constants()
settings = get_settings()
@@ -28,11 +37,205 @@
# Used to give meaningful name to recipe steps
-class CreateDemosPool(SeparateSplit):
- pass
+class CreateDemosPool(MultiStreamOperator):
+ from_stream: str = None
+ demos_pool_size: int = None
+ demos_removed_from_data: bool = None
+ to_field: str = constants.demos_pool_field
+
+ # flake8: noqa: B007
+ def process(self, multi_stream: MultiStream) -> MultiStream:
+ # generate the demos_pool as a selection of demos_pool_size distinct instances
+ # (distinct by their "input_fields" field). The selection is taken from stream named from_stream.
+ # The selected instances are later treated as ordinary instances or not, depending on parameter
+ # demos_removed_from_data.
+ # The selection of instances is done from the first instances of the stream named from_stream.
+ # instances that are not distinct from previously selected demo instances, are kept aside, to be later
+ # treated like all the remaining instances of stream from_stream.
+ if self.from_stream not in multi_stream:
+ raise ValueError(
+ f"Input multi-stream is missing a stream named '{self.from_stream}' to take demo instances from for the demos_pool."
+ )
+ if (
+ self.demos_removed_from_data is not None
+ and self.demos_removed_from_data is True
+ and (self.demos_pool_size == sys.maxsize)
+ ):
+ # going to consume the whole of input stream named self.from_stream for demo instances,
+ # and not let demos instances to behave as regular instances. so self.from_stream
+ # ends here its life as an input stream that is expected to reach the end of the recipe
+ if len(multi_stream) == 1:
+ raise ValueError(
+ f"The single input stream, '{self.from_stream}' is to be wholly consumed for generating demos, and no instance is left to use these demos."
+ )
+ from_stream = multi_stream[self.from_stream]
+ demos_pool = []
+ input_fields_of_demos_pool = []
+ not_selected_from_from_stream = []
+ for num_scanned, instance in enumerate(from_stream):
+ if "input_fields" not in instance:
+ raise ValueError(f"'input_fields' field is missing from '{instance}'.")
+ input_fields_signature = json.dumps(
+ instance["input_fields"], sort_keys=True
+ )
+ if input_fields_signature in input_fields_of_demos_pool:
+ not_selected_from_from_stream.append(instance)
+ continue
+ demos_pool.append(instance)
+ input_fields_of_demos_pool.append(input_fields_signature)
+ if len(demos_pool) >= self.demos_pool_size:
+ break
+
+ # for backward compatibility, do not throw exception here if demos pool is smaller than expected.
+ # Delay that for the event (if occurs) that Sample is not be able to sample num_demos demos.
+
+ # to avoid endless recursion in case of not demos_removed_from_data
+ demos_pool = recursive_copy(demos_pool)
+
+ set_demos_pool = Set(fields={self.to_field: demos_pool})
+ if (
+ self.demos_removed_from_data is not None
+ and self.demos_removed_from_data is False
+ ):
+ # all input instances go out. No one is "killed" because selected as demo
+ return set_demos_pool(multi_stream)
+
+ if (
+ self.demos_removed_from_data is not None
+ and self.demos_removed_from_data is True
+ ):
+ if self.demos_pool_size == sys.maxsize:
+ # consume the whole of input stream self.from_stream, just for demos, and do not
+ # take any of its instances to behave as a non-demo instance, i.e., a regular instance
+ # that consume the demos
+ out_ms = MultiStream(
+ {
+ stream_name: multi_stream[stream_name]
+ for stream_name in multi_stream
+ if stream_name != self.from_stream
+ }
+ )
+ return set_demos_pool(out_ms)
+
+ # self.demos_removed_from_data and not consume the whole of self.from_stream just for demos
+ def from_stream_generator(
+ first_layer: list, ms: MultiStream, stream_name: str, start: int
+ ) -> Generator:
+ yield from first_layer
+ yield from itertools.islice(ms[stream_name], start, None)
+
+ new_streams = {}
+ for stream_name in multi_stream:
+ if stream_name == self.from_stream:
+ new_streams[stream_name] = ReusableGenerator(
+ generator=from_stream_generator,
+ gen_kwargs={
+ "first_layer": not_selected_from_from_stream,
+ "ms": multi_stream,
+ "stream_name": self.from_stream,
+ "start": num_scanned + 1,
+ },
+ )
+ else:
+ new_streams[stream_name] = ReusableGenerator(
+ generator=from_stream_generator,
+ gen_kwargs={
+ "first_layer": [],
+ "ms": multi_stream,
+ "stream_name": stream_name,
+ "start": 0,
+ },
+ )
+
+ ms = MultiStream.from_generators(new_streams)
+ return set_demos_pool(ms)
+
+
+class AddDemosPool(MultiStreamOperator):
+ demos_pool: List[Dict[str, Any]]
+ demos_pool_field_name: str = constants.demos_pool_field
+
+ def process(self, multi_stream: MultiStream) -> MultiStream:
+ set_demos_pool = Set(fields={self.demos_pool_field_name: self.demos_pool})
+ return set_demos_pool(multi_stream)
+
+
+class DatasetRecipe(SourceSequentialOperator):
+ """This class represents a standard recipe for data processing and preparation.
+ This class can be used to prepare a recipe.
+ with all necessary steps, refiners and renderers included. It allows to set various
+ parameters and steps in a sequential manner for preparing the recipe.
+
+ Args:
+ card (TaskCard):
+ TaskCard object associated with the recipe.
+ template (Template, optional):
+ Template object to be used for the recipe.
+ system_prompt (SystemPrompt, optional):
+ SystemPrompt object to be used for the recipe.
+ loader_limit (int, optional):
+ Specifies the maximum number of instances per stream to be returned from the loader (used to reduce loading time in large datasets)
+ format (SystemFormat, optional):
+ SystemFormat object to be used for the recipe.
+ metrics (List[str]):
+ list of catalog metrics to use with this recipe.
+ postprocessors (List[str]):
+ list of catalog processors to apply at post processing. (Not recommended to use from here)
+ group_by (List[Union[str, List[str]]]):
+ list of task_data or metadata keys to group global scores by.
+ train_refiner (StreamRefiner, optional):
+ Train refiner to be used in the recipe.
+ max_train_instances (int, optional):
+ Maximum training instances for the refiner.
+ validation_refiner (StreamRefiner, optional):
+ Validation refiner to be used in the recipe.
+ max_validation_instances (int, optional):
+ Maximum validation instances for the refiner.
+ test_refiner (StreamRefiner, optional):
+ Test refiner to be used in the recipe.
+ max_test_instances (int, optional):
+ Maximum test instances for the refiner.
+ demos_pool_size (int, optional):
+ Size of the demos pool. -1 for taking the whole of stream 'demos_taken_from'.
+ demos_pool(List[Dict[str, Any]], optional):
+ a list of instances to make the demos_pool
+ num_demos (int, optional):
+ Number of demos to add to each instance, to become part of the source to be generated for this instance.
+ demos_taken_from (str, optional):
+ Specifies the stream from where the demos are taken. Default is "train".
+ demos_field (str, optional):
+ Field name for demos. Default is "demos".
+ The num_demos demos selected for an instance are stored in this field of that instance.
+ demos_pool_field_name (str, optional):
+ field name to maintain the demos_pool, until sampled from, in order to make the demos.
+ Defaults to constants.demos_pool_field.
+ demos_removed_from_data (bool, optional):
+ whether to remove the demos taken to demos_pool from the source data, Default is True
+ sampler (Sampler, optional):
+ The Sampler used to select the demonstrations when num_demos > 0.
+ skip_demoed_instances (bool, optional):
+ whether to skip pushing demos to an instance whose demos_field is
+ already populated. Defaults to False.
+ steps (List[StreamingOperator], optional):
+ List of StreamingOperator objects to be used in the recipe.
+ augmentor (Augmentor) :
+ Augmentor to be used to pseudo randomly augment the source text
+ instruction_card_index (int, optional):
+ Index of instruction card to be used for preparing the recipe.
+ template_card_index (int, optional):
+ Index of template card to be used for preparing the recipe.
+
+ Methods:
+ prepare():
+ This overridden method is used for preparing the recipe
+ by arranging all the steps, refiners, and renderers in a sequential manner.
+
+ Raises:
+ AssertionError:
+ If both template and template_card_index are specified at the same time.
+ """
-class BaseRecipe(Recipe, SourceSequentialOperator):
# Base parameters
card: TaskCard = None
task: Task = None
@@ -59,14 +262,18 @@ class BaseRecipe(Recipe, SourceSequentialOperator):
test_refiner: StreamRefiner = OptionalField(default_factory=StreamRefiner)
demos_pool_size: int = None
+ demos_pool: List[Dict[str, Any]] = None
num_demos: Optional[Union[int, List[int]]] = 0
demos_removed_from_data: bool = True
+ demos_pool_field_name: str = constants.demos_pool_field
- demos_pool_name: str = "demos_pool"
demos_taken_from: str = "train"
demos_field: str = "demos"
sampler: Sampler = None
+ # do not push demos to instances whose "demos" field is already populated
+ skip_demoed_instances: bool = False
+
augmentor: Union[Augmentor, List[Augmentor]] = OptionalField(default=None)
steps: List[StreamingOperator] = InternalField(default_factory=list)
@@ -101,11 +308,16 @@ def verify(self):
raise ValueError(
"When using demonstrations both num_demos and demos_pool_size should be assigned with positive integers."
)
- if self.demos_pool_size < self.max_demos_size:
+ if self.demos_pool_size < self.max_demos_size + 1:
raise ValueError(
- f"num_demos (got: {self.max_demos_size}) should not exceed demos_pool_size (got: {self.demos_pool_size})"
+ f"num_demos (got: {self.max_demos_size}) should not exceed demos_pool_size - 1 (got: {self.demos_pool_size}), (-1: to always allow filtering of a demo identical to the processed instance)."
)
- if self.loader_limit and self.demos_pool_size > self.loader_limit:
+ if (
+ (not self.demos_pool)
+ and (self.demos_pool_size != sys.maxsize)
+ and self.loader_limit
+ and (self.demos_pool_size > self.loader_limit)
+ ):
raise ValueError(
f"demos_pool_size should not exceed loader_limit ({self.loader_limit}), Got demos_pool_size={self.demos_pool_size}"
)
@@ -220,29 +432,21 @@ def set_pipelines(self):
self.loading,
self.metadata,
self.standardization,
- self.processing,
]
self.inference = SequentialOperator()
- self.inference.steps = [self.metadata, self.verbalization, self.finalize]
+ self.inference.steps = [self.processing, self.verbalization, self.finalize]
def production_preprocess(self, task_instances):
ms = MultiStream.from_iterables({constants.inference_stream: task_instances})
- return list(self.inference_instance(ms)[constants.inference_stream])
-
- def production_demos_pool(self):
- if self.use_demos:
- demos_pool = self.__class__._demos_pool_cache.get(str(self), None)
- if demos_pool is None:
- demos_pool = list(self.inference_demos()[self.demos_pool_name])
- self.__class__._demos_pool_cache[str(self)] = demos_pool
- return demos_pool
- return []
+ return list(self.metadata(ms)[constants.inference_stream])
@property
def has_custom_demos_pool(self):
- return self.demos_pool_size is not None and self.demos_pool_size > 0
+ return self.demos_pool_size is not None and (
+ self.demos_pool_size > 0 or self.demos_pool_size == -1
+ )
@property
def use_demos(self):
@@ -251,13 +455,22 @@ def use_demos(self):
def produce(self, task_instances):
"""Use the recipe in production to produce model ready query from standard task instance."""
self.before_process_multi_stream()
- streams = {
- constants.inference_stream: self.production_preprocess(task_instances),
- }
- if self.use_demos:
- streams[self.demos_pool_name] = self.production_demos_pool()
- multi_stream = MultiStream.from_iterables(streams)
- multi_stream = self.inference(multi_stream)
+
+ ms = MultiStream.from_iterables({constants.inference_stream: task_instances})
+ # does not hurt to set metadata
+ # task_instances are assumed to be as if passed through self.standardization
+ ms = self.metadata(ms)
+ if not self.use_demos:
+ # go with task_instances all the way, it does not need other streams:
+ ms = self.inference(ms)
+ return list(ms[constants.inference_stream])
+
+ streams = self.inference_demos()
+ # streams stopped before processing
+ # ms is ready to join, it will get the demos from streams
+ streams[constants.inference_stream] = ms[constants.inference_stream]
+ # multi_stream = MultiStream(streams)
+ multi_stream = self.inference(streams)
return list(multi_stream[constants.inference_stream])
def reset(self):
@@ -321,15 +534,29 @@ def reset_pipeline(self):
augmentor.set_fields(self.card.task.augmentable_inputs)
self.processing.steps.append(augmentor)
+ # for backward compatibility, consume the demos instances even if not pushed into demos field of the ordinary instances,
+ # in order to use the very same ordinary instances as in back releases.
+ # one example of consume but not used, and indeed skips over a problematic (json-wise) input:
+ # prepare/cards/rag/end_to_end/clapnq.py
if self.has_custom_demos_pool:
- self.processing.steps.append(
- CreateDemosPool(
- from_split=self.demos_taken_from,
- to_split_names=[self.demos_pool_name, self.demos_taken_from],
- to_split_sizes=[int(self.demos_pool_size)],
- remove_targets_from_source_split=self.demos_removed_from_data,
+ if self.demos_pool:
+ self.processing.steps.append(
+ AddDemosPool(
+ demos_pool=self.demos_pool,
+ demos_pool_field_name=self.demos_pool_field_name,
+ )
+ )
+ else:
+ self.processing.steps.append(
+ CreateDemosPool(
+ from_stream=self.demos_taken_from,
+ demos_pool_size=self.demos_pool_size
+ if self.demos_pool is None
+ else None,
+ demos_removed_from_data=self.demos_removed_from_data,
+ to_field=self.demos_pool_field_name,
+ )
)
- )
if self.use_demos:
if self.sampler is None:
@@ -346,28 +573,41 @@ def reset_pipeline(self):
if isinstance(self.num_demos, int):
self.verbalization.steps.append(
ConstantSizeSample(
- from_stream=self.demos_pool_name,
+ from_field=self.demos_pool_field_name,
to_field=self.demos_field,
sampler=self.sampler,
sample_size=self.num_demos,
+ skip_demoed_instances=self.skip_demoed_instances,
)
)
self.verbalization.steps.append(
- Set(fields={"recipe_metadata/num_demos": self.num_demos})
+ Set(
+ fields={
+ "recipe_metadata/num_demos": self.num_demos,
+ "recipe_metadata/demos_pool_size": self.demos_pool_size,
+ }
+ )
)
elif isinstance(self.num_demos, list):
self.verbalization.steps.append(
RandomSizeSample(
- from_stream=self.demos_pool_name,
+ from_field=self.demos_pool_field_name,
to_field=self.demos_field,
sampler=self.sampler,
sample_sizes=self.num_demos,
+ skip_demoed_instances=self.skip_demoed_instances,
)
)
self.verbalization.steps.append(
GetLength(field="demos", to_field="recipe_metadata/num_demos")
)
+ self.verbalization.steps.append(
+ Set(
+ fields={"recipe_metadata/demos_pool_size": self.demos_pool_size}
+ )
+ )
+
else:
raise ValueError("num_demos must be int or List[int]")
@@ -383,9 +623,15 @@ def reset_pipeline(self):
template=self.template, demos_field=self.demos_field
)
)
+
else:
self.verbalization.steps.append(
- Set(fields={"recipe_metadata/num_demos": 0})
+ Set(
+ fields={
+ "recipe_metadata/num_demos": 0,
+ "recipe_metadata/demos_pool_size": 0,
+ }
+ )
)
if isinstance(self.template, list):
self.verbalization.steps.append(
@@ -409,15 +655,6 @@ def reset_pipeline(self):
self.finalize.steps.append(FinalizeDataset(group_by=self.group_by))
- def prepare(self):
- if isinstance(self.template, TemplatesList):
- self.template = self.template.items
- self.reset_pipeline()
-
-
-class StandardRecipeWithIndexes(BaseRecipe):
- template_card_index: int = None
-
def prepare(self):
assert (
self.template_card_index is None or self.template is None
@@ -464,77 +701,41 @@ def prepare(self):
raise ValueError(
"No template was specified in the the 'template' or 'template_card_index' recipe arguments, and no default templates are defined the card or task"
)
+ if self.use_demos:
+ assert (
+ self.demos_pool is not None
+ and isoftype(self.demos_pool, List[Dict[str, Any]])
+ ) != (
+ self.demos_taken_from is not None
+ and self.demos_pool_size is not None
+ and self.demos_removed_from_data is not None
+ ), (
+ "The demos_pool must be specified by exactly one of two ways: explicitly, as a list of instances coming through parameter "
+ + "'demos_pool', or via parameters 'demos_taken_from', 'demos_pool_size', and 'demos_removed_from_data', "
+ + "that together direct its production."
+ )
- super().prepare()
+ # now set self.demos_pool_size for the checks done by verify
+ if self.demos_pool:
+ self.demos_pool_size = len(self.demos_pool)
+ if self.demos_pool_size is not None and self.demos_pool_size == -1:
+ self.demos_pool_size = sys.maxsize
+ if isinstance(self.template, TemplatesList):
+ self.template = self.template.items
+ self.reset_pipeline()
-class StandardRecipe(StandardRecipeWithIndexes):
- """This class represents a standard recipe for data processing and preparation.
- This class can be used to prepare a recipe.
- with all necessary steps, refiners and renderers included. It allows to set various
- parameters and steps in a sequential manner for preparing the recipe.
+@deprecation(version="2.0.0", alternative=DatasetRecipe)
+class BaseRecipe(DatasetRecipe):
+ pass
- Args:
- card (TaskCard):
- TaskCard object associated with the recipe.
- template (Template, optional):
- Template object to be used for the recipe.
- system_prompt (SystemPrompt, optional):
- SystemPrompt object to be used for the recipe.
- loader_limit (int, optional):
- Specifies the maximum number of instances per stream to be returned from the loader (used to reduce loading time in large datasets)
- format (SystemFormat, optional):
- SystemFormat object to be used for the recipe.
- metrics (List[str]):
- list of catalog metrics to use with this recipe.
- postprocessors (List[str]):
- list of catalog processors to apply at post processing. (Not recommended to use from here)
- group_by (List[Union[str, List[str]]]):
- list of task_data or metadata keys to group global scores by.
- train_refiner (StreamRefiner, optional):
- Train refiner to be used in the recipe.
- max_train_instances (int, optional):
- Maximum training instances for the refiner.
- validation_refiner (StreamRefiner, optional):
- Validation refiner to be used in the recipe.
- max_validation_instances (int, optional):
- Maximum validation instances for the refiner.
- test_refiner (StreamRefiner, optional):
- Test refiner to be used in the recipe.
- max_test_instances (int, optional):
- Maximum test instances for the refiner.
- demos_pool_size (int, optional):
- Size of the demos pool.
- num_demos (int, optional):
- Number of demos to be used.
- demos_pool_name (str, optional):
- Name of the demos pool. Default is "demos_pool".
- demos_taken_from (str, optional):
- Specifies from where the demos are taken. Default is "train".
- demos_field (str, optional):
- Field name for demos. Default is "demos".
- demos_removed_from_data (bool, optional):
- whether to remove the demos from the source data, Default is True
- sampler (Sampler, optional):
- The Sampler used to select the demonstrations when num_demos > 0.
- steps (List[StreamingOperator], optional):
- List of StreamingOperator objects to be used in the recipe.
- augmentor (Augmentor) :
- Augmentor to be used to pseudo randomly augment the source text
- instruction_card_index (int, optional):
- Index of instruction card to be used for preparing the recipe.
- template_card_index (int, optional):
- Index of template card to be used for preparing the recipe.
- Methods:
- prepare():
- This overridden method is used for preparing the recipe
- by arranging all the steps, refiners, and renderers in a sequential manner.
+@deprecation(version="2.0.0", alternative=DatasetRecipe)
+class StandardRecipeWithIndexes(DatasetRecipe):
+ pass
- Raises:
- AssertionError:
- If both template and template_card_index are specified at the same time.
- """
+@deprecation(version="2.0.0", alternative=DatasetRecipe)
+class StandardRecipe(DatasetRecipe):
pass
diff --git a/src/unitxt/task.py b/src/unitxt/task.py
index 5f4a3b52a7..22b980f565 100644
--- a/src/unitxt/task.py
+++ b/src/unitxt/task.py
@@ -287,6 +287,9 @@ def process(
"media": instance.get("media", {}),
"recipe_metadata": instance.get("recipe_metadata", {}),
}
+ if "demos" in instance:
+ # for the case of recipe.skip_demoed_instances
+ result["demos"] = instance["demos"]
if stream_name == constants.inference_stream:
return result
diff --git a/src/unitxt/test_utils/card.py b/src/unitxt/test_utils/card.py
index c5917814f5..1d4edd56d2 100644
--- a/src/unitxt/test_utils/card.py
+++ b/src/unitxt/test_utils/card.py
@@ -9,7 +9,7 @@
from ..logging_utils import get_logger
from ..metric import _compute
from ..settings_utils import get_settings
-from ..standard import StandardRecipe
+from ..standard import DatasetRecipe
from ..text_utils import to_pretty_string
from ..utils import deep_copy
@@ -46,10 +46,10 @@ def test_loading_from_catalog(card):
), "Card loaded is not equal to card stored"
-def load_examples_from_standard_recipe(card, template_card_index, debug, **kwargs):
+def load_examples_from_dataset_recipe(card, template_card_index, debug, **kwargs):
if settings.test_card_disable:
logger.info(
- "load_examples_from_standard_recipe() functionality is disabled because unitxt.settings.test_card_disable=True or UNITXT_TEST_CARD_DISABLE environment variable is set"
+ "load_examples_from_dataset_recipe() functionality is disabled because unitxt.settings.test_card_disable=True or UNITXT_TEST_CARD_DISABLE environment variable is set"
)
return None
@@ -58,7 +58,7 @@ def load_examples_from_standard_recipe(card, template_card_index, debug, **kwarg
kwargs["loader_limit"] = 30
kwargs["template_card_index"] = template_card_index
- recipe = StandardRecipe(card=card, **kwargs)
+ recipe = DatasetRecipe(card=card, **kwargs)
logger.info(f"Using these card recipe parameters: {kwargs}")
if debug:
@@ -292,7 +292,7 @@ def test_card(
template_card_indices = range(len(card.templates))
for template_card_index in template_card_indices:
- examples = load_examples_from_standard_recipe(
+ examples = load_examples_from_dataset_recipe(
card, template_card_index=template_card_index, debug=debug, **kwargs
)
if test_exact_match_score_when_predictions_equal_references:
diff --git a/src/unitxt/ui/ui_utils.py b/src/unitxt/ui/ui_utils.py
index 4055d34828..9db3753f5d 100644
--- a/src/unitxt/ui/ui_utils.py
+++ b/src/unitxt/ui/ui_utils.py
@@ -6,7 +6,7 @@
from ..api import evaluate
from ..logging_utils import get_logger
-from ..standard import StandardRecipe
+from ..standard import DatasetRecipe
from ..text_utils import print_dict
from . import settings as config
from .load_catalog_data import get_catalog_items, load_cards_data
@@ -122,7 +122,7 @@ def collect(dataset, split, n):
def build_prompt(prompt_args):
- recipe = StandardRecipe(**prompt_args)
+ recipe = DatasetRecipe(**prompt_args)
logger.info("loading args:")
print_dict(prompt_args)
dataset = recipe()
@@ -134,7 +134,7 @@ def build_prompt(prompt_args):
prompt_args["demos_taken_from"] = "test"
logger.info("trying againg with loading args:")
print_dict(prompt_args)
- recipe = StandardRecipe(**prompt_args)
+ recipe = DatasetRecipe(**prompt_args)
dataset = recipe()
prompt_list = collect(dataset, "test", config.PROMPT_SAMPLE_SIZE)
return prompt_list
diff --git a/tests/library/test_api.py b/tests/library/test_api.py
index 1dbefeb3a1..47bdaacdb0 100644
--- a/tests/library/test_api.py
+++ b/tests/library/test_api.py
@@ -34,7 +34,7 @@ def test_load_dataset(self):
"target": "5.0",
"references": ["5.0"],
"source": "Given this sentence: 'A plane is taking off.', on a scale of 1.0 to 5.0, what is the similarity to this text 'An air plane is taking off.'?\n",
- "task_data": '{"text1": "A plane is taking off.", "text2": "An air plane is taking off.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "attribute_value": 5.0, "metadata": {"data_classification_policy": ["public"], "template": "templates.regression.two_texts.simple", "num_demos": 0}}',
+ "task_data": '{"text1": "A plane is taking off.", "text2": "An air plane is taking off.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "attribute_value": 5.0, "metadata": {"data_classification_policy": ["public"], "template": "templates.regression.two_texts.simple", "demos_pool_size": 0, "num_demos": 0}}',
"groups": [],
"media": {"audios": [], "images": []},
"subset": [],
@@ -65,7 +65,7 @@ def test_load_dataset_with_multi_num_demos(self):
"processors.take_first_non_empty_line",
"processors.cast_to_float_return_zero_if_failed",
],
- "task_data": '{"text1": "A man is spreading shreded cheese on a pizza.", "text2": "A man is spreading shredded cheese on an uncooked pizza.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "template": "templates.regression.two_texts.simple"}, "attribute_value": 3.799999952316284, "demos": []}',
+ "task_data": '{"text1": "A man is spreading shreded cheese on a pizza.", "text2": "A man is spreading shredded cheese on an uncooked pizza.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "metadata": {"data_classification_policy": ["public"], "demos_pool_size": 2, "num_demos": 0, "template": "templates.regression.two_texts.simple"}, "attribute_value": 3.799999952316284, "demos": []}',
"data_classification_policy": ["public"],
}
self.assertEqual(len(dataset["train"]), 5)
@@ -86,7 +86,7 @@ def test_load_dataset_with_multi_templates(self):
"target": "5.0",
"references": ["5.0"],
"source": "text1: A plane is taking off., text2: An air plane is taking off., attribute_name: similarity, min_value: 1.0, max_value: 5.0\n",
- "task_data": '{"text1": "A plane is taking off.", "text2": "An air plane is taking off.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "attribute_value": 5.0, "metadata": {"data_classification_policy": ["public"], "template": "templates.key_val", "num_demos": 0}}',
+ "task_data": '{"text1": "A plane is taking off.", "text2": "An air plane is taking off.", "attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0, "attribute_value": 5.0, "metadata": {"data_classification_policy": ["public"], "template": "templates.key_val", "demos_pool_size": 0, "num_demos": 0}}',
"groups": [],
"media": {"audios": [], "images": []},
"subset": [],
@@ -117,7 +117,7 @@ def test_load_dataset_with_benchmark(self):
"processors.lower_case_till_punc",
],
"source": "Classify the grammatical acceptability of the following text to one of these options: unacceptable, acceptable.\ntext: The sailors rode the breeze clear of the rocks.\nThe grammatical acceptability is ",
- "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "label": "acceptable", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.instruction", "num_demos": 0}}',
+ "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "label": "acceptable", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.instruction", "demos_pool_size": 0, "num_demos": 0}}',
"groups": [],
"media": {"audios": [], "images": []},
"subset": ["cola"],
@@ -132,7 +132,7 @@ def test_load_dataset_with_benchmark(self):
"processors.lower_case_till_punc",
],
"source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: The drain is clogged with hair. It has to be cleaned.\nhypothesis: The hair has to be cleaned.\nThe entailment class is ",
- "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "label": "entailment", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.relation.default", "num_demos": 0}}',
+ "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "label": "entailment", "metadata": {"data_classification_policy": ["public"], "template": "templates.classification.multi_class.relation.default", "demos_pool_size": 0, "num_demos": 0}}',
"groups": [],
"media": {"audios": [], "images": []},
"subset": ["wnli"],
@@ -201,6 +201,7 @@ def test_evaluate(self):
"metadata": {
"data_classification_policy": ["public"],
"template": "templates.regression.two_texts.simple",
+ "demos_pool_size": 0,
"num_demos": 0,
},
"source": "Given this sentence: 'A plane is taking off.', on a scale of 1.0 to 5.0, what is the similarity to this text 'An air plane is taking off.'?\n",
@@ -255,6 +256,7 @@ def test_evaluate_with_groups(self):
"metadata": {
"data_classification_policy": ["public"],
"template": "templates.regression.two_texts.simple",
+ "demos_pool_size": 0,
"num_demos": 0,
},
"source": "Given this sentence: 'A plane is taking off.', on a scale of 1.0 to 5.0, what is the similarity to this text 'An air plane is taking off.'?\n",
@@ -335,7 +337,7 @@ def test_produce_with_recipe(self):
"processors.lower_case_till_punc",
],
"source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.\nhypothesis: mother was careful not to disturb her, undressing and climbing back into her berth.\nThe entailment class is entailment\n\npremise: Steve follows Fred's example in everything. He influences him hugely.\nhypothesis: Steve influences him hugely.\nThe entailment class is entailment\n\npremise: It works perfectly\nhypothesis: It works!\nThe entailment class is ",
- "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 2, "template": "templates.classification.multi_class.relation.default"}, "demos": [{"text_a": "When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.", "text_a_type": "premise", "text_b": "mother was careful not to disturb her, undressing and climbing back into her berth.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}, {"text_a": "Steve follows Fred\'s example in everything. He influences him hugely.", "text_a_type": "premise", "text_b": "Steve influences him hugely.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
+ "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 2, "demos_pool_size": 5, "template": "templates.classification.multi_class.relation.default"}, "demos": [{"text_a": "When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.", "text_a_type": "premise", "text_b": "mother was careful not to disturb her, undressing and climbing back into her berth.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}, {"text_a": "Steve follows Fred\'s example in everything. He influences him hugely.", "text_a_type": "premise", "text_b": "Steve influences him hugely.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
"groups": [],
"subset": [],
"media": {"images": [], "audios": []},
@@ -364,7 +366,7 @@ def test_produce_with_task(self):
"processors.lower_case_till_punc",
],
"source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: It works perfectly\nhypothesis: It works!\nThe entailment class is ",
- "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 0, "template": "templates.classification.multi_class.relation.default"}}',
+ "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default"}}',
"groups": [],
"subset": [],
"media": {"images": [], "audios": []},
@@ -395,7 +397,7 @@ def test_produce_with_recipe_with_list_of_instances(self):
"processors.lower_case_till_punc",
],
"source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\npremise: When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.\nhypothesis: mother was careful not to disturb her, undressing and climbing back into her berth.\nThe entailment class is entailment\n\npremise: Steve follows Fred's example in everything. He influences him hugely.\nhypothesis: Steve influences him hugely.\nThe entailment class is entailment\n\npremise: It works perfectly\nhypothesis: It works!\nThe entailment class is ",
- "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 2, "template": "templates.classification.multi_class.relation.default"}, "demos": [{"text_a": "When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.", "text_a_type": "premise", "text_b": "mother was careful not to disturb her, undressing and climbing back into her berth.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}, {"text_a": "Steve follows Fred\'s example in everything. He influences him hugely.", "text_a_type": "premise", "text_b": "Steve influences him hugely.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
+ "task_data": '{"text_a": "It works perfectly", "text_a_type": "premise", "text_b": "It works!", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": [], "num_demos": 2, "demos_pool_size": 5, "template": "templates.classification.multi_class.relation.default"}, "demos": [{"text_a": "When Tatyana reached the cabin, her mother was sleeping. She was careful not to disturb her, undressing and climbing back into her berth.", "text_a_type": "premise", "text_b": "mother was careful not to disturb her, undressing and climbing back into her berth.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}, {"text_a": "Steve follows Fred\'s example in everything. He influences him hugely.", "text_a_type": "premise", "text_b": "Steve influences him hugely.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
"groups": [],
"subset": [],
"media": {"images": [], "audios": []},
diff --git a/tests/library/test_artifact.py b/tests/library/test_artifact.py
index e17bd8ae4b..896fb0852e 100644
--- a/tests/library/test_artifact.py
+++ b/tests/library/test_artifact.py
@@ -572,7 +572,7 @@ def test_artifact_link_in_recursive_load(self):
def test_artifact_is_not_saving_if_artifact_has_changed(self):
with self.assertRaises(UnitxtError) as e:
args = {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.sst2",
"template_card_index": 0,
"demos_pool_size": 100,
diff --git a/tests/library/test_artifact_recovery.py b/tests/library/test_artifact_recovery.py
index 376baeb9f6..c074ad486b 100644
--- a/tests/library/test_artifact_recovery.py
+++ b/tests/library/test_artifact_recovery.py
@@ -13,7 +13,7 @@
class TestArtifactRecovery(UnitxtTestCase):
def test_correct_artifact_recovery(self):
args = {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.sst2",
"template_card_index": 0,
"demos_pool_size": 100,
@@ -24,7 +24,7 @@ def test_correct_artifact_recovery(self):
def test_correct_artifact_recovery_with_overwrite(self):
args = {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.sst2",
"template_card_index": 0,
"demos_pool_size": 100,
@@ -45,7 +45,7 @@ def test_bad_artifact_recovery_missing_type(self):
def test_bad_artifact_recovery_bad_type(self):
args = {
- "__type__": "standard_recipe",
+ "__type__": "dataset_recipe",
"card": "cards.sst2",
"template_card_index": 1000,
"demos_pool_size": 100,
diff --git a/tests/library/test_benchmark.py b/tests/library/test_benchmark.py
index 21579d97eb..3b9a2e4d3b 100644
--- a/tests/library/test_benchmark.py
+++ b/tests/library/test_benchmark.py
@@ -1,5 +1,5 @@
from unitxt.benchmark import Benchmark
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
from tests.utils import UnitxtTestCase
@@ -11,11 +11,11 @@ def test_benchmark(self):
max_samples_per_subset=2,
loader_limit=300,
subsets={
- "cola": StandardRecipe(
+ "cola": DatasetRecipe(
card="cards.cola",
template="templates.classification.multi_class.instruction",
),
- "wnli": StandardRecipe(
+ "wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
),
@@ -38,7 +38,7 @@ def test_benchmark(self):
"target": "acceptable",
"references": ["acceptable"],
"source": "Classify the grammatical acceptability of the following text to one of these options: unacceptable, acceptable.\n\nUser:text: The sailors rode the breeze clear of the rocks.\nAgent:The grammatical acceptability is ",
- "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "template": "templates.classification.multi_class.instruction"}, "label": "acceptable"}',
+ "task_data": '{"text": "The sailors rode the breeze clear of the rocks.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.instruction"}, "label": "acceptable"}',
"groups": [],
"subset": ["cola"],
},
@@ -53,7 +53,7 @@ def test_benchmark(self):
"target": "acceptable",
"references": ["acceptable"],
"source": "Classify the grammatical acceptability of the following text to one of these options: unacceptable, acceptable.\n\nUser:text: The weights made the rope stretch over the pulley.\nAgent:The grammatical acceptability is ",
- "task_data": '{"text": "The weights made the rope stretch over the pulley.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "template": "templates.classification.multi_class.instruction"}, "label": "acceptable"}',
+ "task_data": '{"text": "The weights made the rope stretch over the pulley.", "text_type": "text", "classes": ["unacceptable", "acceptable"], "type_of_class": "grammatical acceptability", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.instruction"}, "label": "acceptable"}',
"groups": [],
"subset": ["cola"],
},
@@ -72,7 +72,7 @@ def test_benchmark(self):
"target": "entailment",
"references": ["entailment"],
"source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\n\nUser:premise: The drain is clogged with hair. It has to be cleaned.\nhypothesis: The hair has to be cleaned.\nAgent:The entailment class is ",
- "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "template": "templates.classification.multi_class.relation.default"}, "label": "entailment"}',
+ "task_data": '{"text_a": "The drain is clogged with hair. It has to be cleaned.", "text_a_type": "premise", "text_b": "The hair has to be cleaned.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default"}, "label": "entailment"}',
"groups": [],
"subset": ["wnli"],
},
@@ -91,7 +91,7 @@ def test_benchmark(self):
"target": "not entailment",
"references": ["not entailment"],
"source": "Given a premise and hypothesis classify the entailment of the hypothesis to one of entailment, not entailment.\n\nUser:premise: Jane knocked on Susan's door but she did not answer.\nhypothesis: Susan did not answer.\nAgent:The entailment class is ",
- "task_data": '{"text_a": "Jane knocked on Susan\'s door but she did not answer.", "text_a_type": "premise", "text_b": "Susan did not answer.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "template": "templates.classification.multi_class.relation.default"}, "label": "not entailment"}',
+ "task_data": '{"text_a": "Jane knocked on Susan\'s door but she did not answer.", "text_a_type": "premise", "text_b": "Susan did not answer.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 0, "demos_pool_size": 0, "template": "templates.classification.multi_class.relation.default"}, "label": "not entailment"}',
"groups": [],
"subset": ["wnli"],
},
@@ -109,18 +109,18 @@ def test_benchmark_format_trickling(self):
max_samples_per_subset=1,
loader_limit=300,
subsets={
- "cola": StandardRecipe(
+ "cola": DatasetRecipe(
card="cards.cola",
template="templates.classification.multi_class.instruction",
),
- "wnli": StandardRecipe(
+ "wnli": DatasetRecipe(
card="cards.wnli",
format="formats.empty",
template="templates.classification.multi_class.relation.default",
),
},
),
- "wnli": StandardRecipe(
+ "wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
),
diff --git a/tests/library/test_formats.py b/tests/library/test_formats.py
index 598a2572d2..c433363e64 100644
--- a/tests/library/test_formats.py
+++ b/tests/library/test_formats.py
@@ -2,7 +2,7 @@
from unitxt.formats import ChatAPIFormat, HFSystemFormat, SystemFormat
from unitxt.loaders import LoadFromDictionary
from unitxt.settings_utils import get_constants
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
from unitxt.system_prompts import TextualSystemPrompt
from unitxt.task import Task
from unitxt.templates import InputOutputTemplate
@@ -665,7 +665,7 @@ def test_system_format_with_demos_different_target_prefixes(self):
templates=[template],
)
- recipe = StandardRecipe(
+ recipe = DatasetRecipe(
card=card,
loader_limit=20,
demos_pool_size=5,
diff --git a/tests/library/test_fusion.py b/tests/library/test_fusion.py
index b37d2ac9de..c4845e3db3 100644
--- a/tests/library/test_fusion.py
+++ b/tests/library/test_fusion.py
@@ -1,7 +1,7 @@
from unitxt.api import evaluate
from unitxt.fusion import FixedFusion, WeightedFusion
from unitxt.operators import IterableSource
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
from unitxt.test_utils.operators import check_operator
from tests.utils import UnitxtTestCase, fillna, round_values
@@ -309,22 +309,22 @@ def test_over_bounded_weighted_fusion(self):
def test_end_to_end(self):
dataset = WeightedFusion(
subsets={
- "wnli": StandardRecipe(
+ "wnli": DatasetRecipe(
card="cards.wnli",
template="templates.classification.multi_class.relation.default",
group_by=["template"],
),
- "rte": StandardRecipe(
+ "rte": DatasetRecipe(
card="cards.rte",
template="templates.classification.multi_class.relation.default",
),
"stsb": WeightedFusion(
subsets={
- "regression": StandardRecipe(
+ "regression": DatasetRecipe(
card="cards.stsb",
template="templates.regression.two_texts.simple",
),
- "classification": StandardRecipe(
+ "classification": DatasetRecipe(
card="cards.stsb",
template=[
"templates.regression.two_texts.similarity.flan",
diff --git a/tests/library/test_recipe.py b/tests/library/test_recipe.py
index 38a130572b..85dc48a3da 100644
--- a/tests/library/test_recipe.py
+++ b/tests/library/test_recipe.py
@@ -2,26 +2,30 @@
import copy
import json
import re
+import sys
from typing import Any, Dict
from unitxt import dataset_file
from unitxt.artifact import fetch_artifact
from unitxt.card import TaskCard
+from unitxt.catalog import get_from_catalog
from unitxt.formats import SystemFormat
from unitxt.loaders import LoadFromDictionary
from unitxt.serializers import SingleTypeSerializer, TableSerializer
-from unitxt.standard import StandardRecipe, StandardRecipeWithIndexes
+from unitxt.splitters import SplitRandomMix
+from unitxt.standard import DatasetRecipe
from unitxt.task import Task
from unitxt.templates import InputOutputTemplate, TemplatesList
from unitxt.text_utils import print_dict
from unitxt.types import Table
+from unitxt.utils import recursive_copy
from tests.utils import UnitxtTestCase
class TestRecipes(UnitxtTestCase):
- def test_standard_recipe(self):
- recipe = StandardRecipe(
+ def test_dataset_recipe(self):
+ recipe = DatasetRecipe(
card="cards.wnli",
template=InputOutputTemplate(
input_format="{text_a}",
@@ -58,8 +62,8 @@ def test_standard_recipe(self):
)
break
- def test_standard_recipe_with_catalog(self):
- recipe = StandardRecipe(
+ def test_dataset_recipe_with_catalog(self):
+ recipe = DatasetRecipe(
card="cards.mmlu.marketing",
system_prompt="system_prompts.models.llama",
template="templates.qa.multiple_choice.with_topic.lm_eval_harness",
@@ -74,8 +78,8 @@ def test_standard_recipe_with_catalog(self):
print_dict(instance)
break
- def test_standard_recipe_production_without_demos(self):
- recipe = StandardRecipe(
+ def test_dataset_recipe_production_without_demos(self):
+ recipe = DatasetRecipe(
card="cards.mmlu.marketing",
system_prompt="system_prompts.models.llama",
template="templates.qa.multiple_choice.with_topic.lm_eval_harness",
@@ -106,8 +110,8 @@ def test_standard_recipe_production_without_demos(self):
self.assertDictEqual(result, target)
- def test_standard_recipe_production_consistency(self):
- recipe = StandardRecipe(
+ def test_dataset_recipe_production_consistency(self):
+ recipe = DatasetRecipe(
card="cards.mmlu.marketing",
system_prompt="system_prompts.models.llama",
template="templates.qa.multiple_choice.with_topic.lm_eval_harness",
@@ -125,31 +129,17 @@ def test_standard_recipe_production_consistency(self):
}
]
- self.assertListEqual(
- recipe.production_demos_pool(), recipe.production_demos_pool()
- )
-
self.assertDictEqual(
- recipe.produce(instances)[0],
- recipe.produce(instances)[0],
- )
-
- i1 = recipe.production_preprocess(instances)[0]
- i2 = recipe.production_preprocess(instances)[0]
- for meta_data in ["card", "template", "format", "system_prompt"]:
- if meta_data in i1["recipe_metadata"]:
- i1["recipe_metadata"][meta_data] = i1["recipe_metadata"][
- meta_data
- ]._to_raw_dict()
- if not isinstance(i2["recipe_metadata"][meta_data], dict):
- i2["recipe_metadata"][meta_data] = i2["recipe_metadata"][
- meta_data
- ]._to_raw_dict()
+ recipe.produce(recursive_copy(instances))[0],
+ recipe.produce(recursive_copy(instances))[0],
+ )
+ i1 = recipe.production_preprocess(recursive_copy(instances))[0]
+ i2 = recipe.production_preprocess(recursive_copy(instances))[0]
self.assertDictEqual(i1, i2)
- def test_standard_recipe_production_with_demos(self):
- recipe = StandardRecipe(
+ def test_dataset_recipe_production_with_demos(self):
+ recipe = DatasetRecipe(
card="cards.mmlu.marketing",
system_prompt="system_prompts.models.llama",
template="templates.qa.multiple_choice.with_topic.lm_eval_harness",
@@ -173,7 +163,7 @@ def test_standard_recipe_production_with_demos(self):
"data_classification_policy": [],
"postprocessors": ["processors.first_character"],
"source": "<>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<>\n\n\n\n\nUser: The following are multiple choice questions (with answers) about marketing.\n\nAlthough the content and quality can be as controlled as direct mail, response rates of this medium are lower because of the lack of a personal address mechanism. This media format is known as:\nA. Care lines.\nB. Direct mail.\nC. Inserts.\nD. Door to door.\nAnswer:\nAgent: D\n\nUser: The following are multiple choice questions (with answers) about marketing.\n\n _____________ is a natural outcome when combining demographic and geographic variables.\nA. Geodemographics\nB. Product differentiation.\nC. ANSOFF matrix.\nD. Brand management.\nAnswer:\nAgent: A\n\nUser: The following are multiple choice questions (with answers) about marketing.\n\nIn an organization, the group of people tasked with buying decisions is referred to as the _______________.\nA. Outsourcing unit.\nB. Procurement centre.\nC. Chief executive unit.\nD. Decision-making unit.\nAnswer:\nAgent: D\n\n\nUser:The following are multiple choice questions (with answers) about testing.\n\nwhat?\nA. yes\nB. not\nC. maybe\nAnswer:\nAgent:",
- "task_data": '{"topic": "testing", "question": "what?", "choices": ["yes", "not", "maybe"], "options": [" A", " B", " C"], "metadata": {"data_classification_policy": [], "num_demos": 3, "template": "templates.qa.multiple_choice.with_topic.lm_eval_harness"}, "demos": [{"topic": "marketing", "question": "Although the content and quality can be as controlled as direct mail, response rates of this medium are lower because of the lack of a personal address mechanism. This media format is known as:", "choices": ["Care lines.", "Direct mail.", "Inserts.", "Door to door."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 3}, {"topic": "marketing", "question": " _____________ is a natural outcome when combining demographic and geographic variables.", "choices": ["Geodemographics", "Product differentiation.", "ANSOFF matrix.", "Brand management."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 0}, {"topic": "marketing", "question": "In an organization, the group of people tasked with buying decisions is referred to as the _______________.", "choices": ["Outsourcing unit.", "Procurement centre.", "Chief executive unit.", "Decision-making unit."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 3}]}',
+ "task_data": '{"topic": "testing", "question": "what?", "choices": ["yes", "not", "maybe"], "options": [" A", " B", " C"], "metadata": {"data_classification_policy": [], "demos_pool_size": 5, "num_demos": 3, "template": "templates.qa.multiple_choice.with_topic.lm_eval_harness"}, "demos": [{"topic": "marketing", "question": "Although the content and quality can be as controlled as direct mail, response rates of this medium are lower because of the lack of a personal address mechanism. This media format is known as:", "choices": ["Care lines.", "Direct mail.", "Inserts.", "Door to door."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 3}, {"topic": "marketing", "question": " _____________ is a natural outcome when combining demographic and geographic variables.", "choices": ["Geodemographics", "Product differentiation.", "ANSOFF matrix.", "Brand management."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 0}, {"topic": "marketing", "question": "In an organization, the group of people tasked with buying decisions is referred to as the _______________.", "choices": ["Outsourcing unit.", "Procurement centre.", "Chief executive unit.", "Decision-making unit."], "options": [" A", " B", " C", " D"], "metadata": {"data_classification_policy": ["public"]}, "answer": 3}]}',
"groups": [],
"subset": [],
"media": {"images": [], "audios": []},
@@ -185,8 +175,204 @@ def test_standard_recipe_production_with_demos(self):
self.assertDictEqual(result, target)
self.assertDictEqual(target_task_data, result_task_data)
- def test_standard_recipe_with_indexes_with_catalog(self):
- recipe = StandardRecipe(
+ def test_dataset_recipe_with_given_demos(self):
+ recipe = DatasetRecipe(
+ card="cards.wnli",
+ template_card_index=0,
+ )
+ for_demos = recipe.inference_demos()
+ for_demos = recipe.processing(for_demos)
+ for_demos = recursive_copy(list(for_demos["validation"]))
+
+ # for_demos is a list of instances, taken from stream 'validation' of the source of 'cards.wnli'.
+ # Having passed the steps of preprocessing, each of them complies now with the format of 'cards.wnli.task'
+
+ # we now run a recipe with this for_demos, and see stream 'train' coming out with them as demos
+
+ recipe2 = DatasetRecipe(
+ card="cards.wnli",
+ template_card_index=0,
+ demos_pool=for_demos[0:5],
+ num_demos=3,
+ )
+
+ trains = list(recipe2()["train"])
+ source_demos_input = trains[0]["source"]
+
+ # the same result as when creating the demos while processing the recipe:
+
+ recipe3 = DatasetRecipe(
+ card="cards.wnli",
+ template_card_index=0,
+ demos_taken_from="validation",
+ demos_pool_size=5,
+ demos_removed_from_data=True,
+ num_demos=3,
+ )
+
+ trains = list(recipe3()["train"])
+ source_demos_selected = trains[0]["source"]
+
+ self.assertEqual(source_demos_input, source_demos_selected)
+
+ def test_dataset_recipe_not_duplicating_demos_pool(self):
+ recipe = DatasetRecipe(
+ card="cards.wnli",
+ template_card_index=0,
+ )
+ for_demos = recipe.inference_demos()
+ for_demos = recipe.processing(for_demos)
+ for_demos = recursive_copy(list(for_demos["validation"]))
+
+ recipe3 = DatasetRecipe(
+ card="cards.wnli",
+ template_card_index=0,
+ demos_pool=for_demos,
+ num_demos=3,
+ )
+
+ ms = recipe3.inference_demos()
+ ms = recipe3.processing(ms)
+ # here the ms stopped before verbalizing, after processing so it still has "_demos_pool_" field
+ trains = list(ms["train"])
+ assert "_demos_pool_" in trains[0]
+ first_demo_of_first_instance = trains[0]["_demos_pool_"][0]
+ first_demo_of_second_instance = trains[1]["_demos_pool_"][0]
+ self.assertDictEqual(
+ first_demo_of_first_instance, first_demo_of_second_instance
+ )
+ self.assertEqual(
+ first_demo_of_first_instance["input_fields"]["text_a_type"], "premise"
+ )
+
+ # change just the demos in the first instance
+ first_demo_of_first_instance["input_fields"]["text_a_type"] = "hallelujah"
+ # verify that the demos in the second instance change as well
+ self.assertEqual(
+ first_demo_of_second_instance["input_fields"]["text_a_type"], "hallelujah"
+ )
+
+ def test_dataset_recipe_with_demoed_instances(self):
+ recipe = DatasetRecipe(
+ card="cards.wnli",
+ template_card_index=0,
+ )
+ ms = recipe.loading()
+ ms = recipe.metadata(ms)
+ ms = recipe.standardization(ms)
+ a_standardized_input_instance = next(iter(ms["test"]))
+ self.assertNotIn("demos", a_standardized_input_instance)
+
+ ms = recipe.loading()
+ ms = recipe.metadata(ms)
+ ms = recipe.standardization(ms)
+ ms = recipe.task(ms)
+ a_tasked_input_instance = next(iter(ms["validation"]))
+ self.assertIn(
+ "I took the water bottle out of the backpack ",
+ a_tasked_input_instance["input_fields"]["text_a"],
+ )
+
+ a_standardized_input_instance["demos"] = [a_tasked_input_instance]
+ demoed_standardized_input_instance = recursive_copy(
+ a_standardized_input_instance
+ )
+
+ recipe2 = DatasetRecipe(
+ card="cards.wnli",
+ template_card_index=0,
+ demos_pool_size=3,
+ num_demos=1,
+ skip_demoed_instances=True,
+ )
+
+ processed_input_instance = recipe2.produce([a_standardized_input_instance])[0]
+ self.assertIn(
+ "premise: I took the water bottle out of the backpack ",
+ processed_input_instance["source"],
+ )
+
+ recipe3 = DatasetRecipe(
+ card="cards.wnli",
+ template_card_index=0,
+ demos_pool_size=3,
+ num_demos=1,
+ skip_demoed_instances=False,
+ )
+
+ processed_input_instance = recipe3.produce(
+ [demoed_standardized_input_instance]
+ )[0]
+ self.assertNotIn(
+ "premise: I took the water bottle out of the backpack ",
+ processed_input_instance["source"],
+ )
+
+ # flake8: noqa: C416
+ def test_dataset_recipe_with_whole_stream_to_become_demos(self):
+ # glue.wnli has: train: 635 instances, validation: 71 and test: 146 (together: 852)
+ # take the whole of validation tobecome demos_pool
+ recipe = DatasetRecipe(
+ card="cards.wnli",
+ system_prompt="system_prompts.models.llama",
+ template_card_index=0,
+ format="formats.user_agent",
+ demos_taken_from="validation",
+ demos_pool_size=-1,
+ num_demos=3,
+ )
+ ms = recipe()
+
+ # assert 'validation' is wholly consumed for demos, not showing at the end of recipe
+ self.assertSetEqual({stream_name for stream_name in ms}, {"train", "test"})
+
+ tests = list(ms["test"])
+ task_data = json.loads(tests[0]["task_data"])
+ # assert maxsize is written as demos_pool_size
+ self.assertEqual(task_data["metadata"]["demos_pool_size"], sys.maxsize)
+
+ # flake8: noqa: C400
+ def test_dataset_recipe_with_whole_stream_to_become_demos_and_no_stream_left(self):
+ # tweaking wnli to become a card going through preprocess_steps with just one stream: 'validation'
+ wnli_card = get_from_catalog("cards.wnli")
+ wnli_card.preprocess_steps[0] = SplitRandomMix({"validation": "train[5%]"})
+
+ # now consume that single stream wholly for demos
+ with self.assertRaises(ValueError) as ve:
+ recipe = DatasetRecipe(
+ card=wnli_card,
+ system_prompt="system_prompts.models.llama",
+ template_card_index=0,
+ format="formats.user_agent",
+ demos_taken_from="validation",
+ demos_pool_size=-1,
+ num_demos=3,
+ )
+ ms = recipe()
+ # error: no instance is left to use the demos_pool made of the wholly consumed single input stream
+ self.assertEqual(
+ "The single input stream, 'validation' is to be wholly consumed for generating demos, and no instance is left to use these demos.",
+ str(ve.exception),
+ )
+
+ # but if recipe.demos_removed_from_data is false, that very stream will use the demos_pool
+ # and reach the end
+ recipe = DatasetRecipe(
+ card=wnli_card,
+ system_prompt="system_prompts.models.llama",
+ template_card_index=0,
+ format="formats.user_agent",
+ demos_taken_from="validation",
+ demos_pool_size=-1,
+ num_demos=3,
+ demos_removed_from_data=False,
+ )
+ ms = recipe()
+ self.assertListEqual(["validation"], [stream_name for stream_name in ms])
+ self.assertEqual(10, len(list(ms["validation"])))
+
+ def test_dataset_recipe_with_catalog_wnli(self):
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template_card_index=0,
@@ -201,8 +387,8 @@ def test_standard_recipe_with_indexes_with_catalog(self):
print_dict(instance)
break
- def test_standard_recipe_with_demos_not_removed_from_data(self):
- recipe = StandardRecipe(
+ def test_dataset_recipe_with_demos_not_removed_from_data(self):
+ recipe = DatasetRecipe(
card="cards.wnli",
template_card_index=0,
demos_pool_size=100,
@@ -211,10 +397,13 @@ def test_standard_recipe_with_demos_not_removed_from_data(self):
)
stream = recipe()
- n_trains_remove_demos = len(list(stream["train"]))
- n_demos_remove_demos = len(list(stream["demos_pool"]))
+ trains = list(stream["train"])
+ n_trains_remove_demos = len(trains)
+ n_demos_remove_demos = json.loads(trains[0]["task_data"])["metadata"][
+ "demos_pool_size"
+ ]
- recipe = StandardRecipeWithIndexes(
+ recipe = DatasetRecipe(
card="cards.wnli",
template_card_index=0,
demos_pool_size=100,
@@ -223,8 +412,11 @@ def test_standard_recipe_with_demos_not_removed_from_data(self):
)
stream = recipe()
- n_trains_keep_demos = len(list(stream["train"]))
- n_demos_keep_demos = len(list(stream["demos_pool"]))
+ trains = list(stream["train"])
+ n_trains_keep_demos = len(trains)
+ n_demos_keep_demos = json.loads(trains[0]["task_data"])["metadata"][
+ "demos_pool_size"
+ ]
self.assertEqual(
n_trains_keep_demos, n_trains_remove_demos + n_demos_remove_demos
@@ -232,7 +424,7 @@ def test_standard_recipe_with_demos_not_removed_from_data(self):
self.assertEqual(n_demos_keep_demos, n_demos_remove_demos)
def test_empty_template(self):
- recipe = StandardRecipeWithIndexes(
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template="templates.empty",
@@ -249,7 +441,7 @@ def test_empty_template(self):
"target": "not entailment",
"references": ["not entailment"],
"source": "<>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<>\n\n\n\n\nUser: Emma did not pass the ball to Janie although she was open., premise, She saw that Janie was open., hypothesis, entailment, not entailment, entailment\nAgent: not entailment\n\nUser: The foxes are getting in at night and attacking the chickens. I shall have to kill them., premise, I shall have to kill The foxes., hypothesis, entailment, not entailment, entailment\nAgent: not entailment\n\nUser: Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, he was twelve years old., premise, When Fred first saw my father, My father was twelve years old., hypothesis, entailment, not entailment, entailment\nAgent: entailment\n\n\nUser:Grace was happy to trade me her sweater for my jacket. She thinks it looks dowdy on her., premise, The sweater looks dowdy on her., hypothesis, entailment, not entailment, entailment\nAgent:",
- "task_data": '{"text_a": "Grace was happy to trade me her sweater for my jacket. She thinks it looks dowdy on her.", "text_a_type": "premise", "text_b": "The sweater looks dowdy on her.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 3, "template": "templates.empty"}, "label": "not entailment", "demos": [{"text_a": "Emma did not pass the ball to Janie although she was open.", "text_a_type": "premise", "text_b": "She saw that Janie was open.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "not entailment"}, {"text_a": "The foxes are getting in at night and attacking the chickens. I shall have to kill them.", "text_a_type": "premise", "text_b": "I shall have to kill The foxes.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "not entailment"}, {"text_a": "Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, he was twelve years old.", "text_a_type": "premise", "text_b": "When Fred first saw my father, My father was twelve years old.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
+ "task_data": '{"text_a": "Grace was happy to trade me her sweater for my jacket. She thinks it looks dowdy on her.", "text_a_type": "premise", "text_b": "The sweater looks dowdy on her.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"], "num_demos": 3, "demos_pool_size": 100, "template": "templates.empty"}, "label": "not entailment", "demos": [{"text_a": "Emma did not pass the ball to Janie although she was open.", "text_a_type": "premise", "text_b": "She saw that Janie was open.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "not entailment"}, {"text_a": "The foxes are getting in at night and attacking the chickens. I shall have to kill them.", "text_a_type": "premise", "text_b": "I shall have to kill The foxes.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "not entailment"}, {"text_a": "Fred is the only man alive who still remembers my father as an infant. When Fred first saw my father, he was twelve years old.", "text_a_type": "premise", "text_b": "When Fred first saw my father, My father was twelve years old.", "text_b_type": "hypothesis", "classes": ["entailment", "not entailment"], "type_of_relation": "entailment", "metadata": {"data_classification_policy": ["public"]}, "label": "entailment"}]}',
"groups": [],
"subset": [],
}
@@ -264,7 +456,7 @@ def test_empty_template(self):
self.assertDictEqual(target_task_data, result_task_data)
def test_key_val_template(self):
- recipe = StandardRecipeWithIndexes(
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template="templates.key_val",
@@ -294,6 +486,7 @@ def test_key_val_template(self):
"type_of_relation": "entailment",
"metadata": {
"data_classification_policy": ["public"],
+ "demos_pool_size": 100,
"num_demos": 3,
"template": "templates.key_val",
},
@@ -341,7 +534,7 @@ def test_key_val_template(self):
self.assertDictEqual(target_task_data, result_task_data)
def test_random_template(self):
- recipe = StandardRecipeWithIndexes(
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template=[
@@ -382,7 +575,7 @@ def test_random_template_with_templates_list(self):
"templates.classification.multi_class.relation.truthfulness.flan_5",
]
)
- recipe = StandardRecipeWithIndexes(
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template=templates,
@@ -414,7 +607,7 @@ def test_random_template_with_templates_list(self):
self.assertDictEqual(result, target)
def test_random_num_demos(self):
- recipe = StandardRecipeWithIndexes(
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template="templates.key_val",
@@ -432,8 +625,8 @@ def test_random_num_demos(self):
self.assertEqual(len(lengths), 4)
- def test_standard_recipe_with_balancer(self):
- recipe = StandardRecipeWithIndexes(
+ def test_dataset_recipe_with_balancer(self):
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template="templates.key_val",
@@ -450,8 +643,8 @@ def test_standard_recipe_with_balancer(self):
self.assertEqual(counts["entailment"], counts["not entailment"])
- def test_standard_recipe_with_loader_limit(self):
- recipe = StandardRecipeWithIndexes(
+ def test_dataset_recipe_with_loader_limit(self):
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template="templates.key_val",
@@ -467,9 +660,9 @@ def test_standard_recipe_with_loader_limit(self):
) # 5 elements were moved to demo pool
self.assertEqual(len(list(stream["test"])), 10)
- def test_standard_recipe_with_loader_limit_errors(self):
+ def test_dataset_recipe_with_loader_limit_errors(self):
with self.assertRaises(ValueError):
- StandardRecipeWithIndexes(
+ DatasetRecipe(
card="cards.wnli",
template="templates.key_val",
max_test_instances=10,
@@ -477,14 +670,14 @@ def test_standard_recipe_with_loader_limit_errors(self):
)
with self.assertRaises(ValueError):
- StandardRecipeWithIndexes(
+ DatasetRecipe(
card="cards.wnli",
template="templates.key_val",
max_train_instances=10,
loader_limit=9,
)
with self.assertRaises(ValueError):
- StandardRecipeWithIndexes(
+ DatasetRecipe(
template="templates.key_val",
card="cards.wnli",
max_validation_instances=10,
@@ -492,7 +685,7 @@ def test_standard_recipe_with_loader_limit_errors(self):
)
with self.assertRaises(ValueError):
- StandardRecipeWithIndexes(
+ DatasetRecipe(
template="templates.key_val",
card="cards.wnli",
num_demos=3,
@@ -500,8 +693,8 @@ def test_standard_recipe_with_loader_limit_errors(self):
loader_limit=9,
)
- def test_standard_recipe_with_no_demos_to_take(self):
- recipe = StandardRecipeWithIndexes(
+ def test_dataset_recipe_with_no_demos_to_take(self):
+ recipe = DatasetRecipe(
template="templates.key_val",
card="cards.xwinogrande.pt",
num_demos=3,
@@ -512,12 +705,12 @@ def test_standard_recipe_with_no_demos_to_take(self):
self.assertTrue(
str(cm.exception).startswith(
- "Unable to fetch instances from 'demos_pool' to 'demos'"
+ "Input multi-stream is missing a stream named 'train' to take demo instances from for the demos_pool."
)
)
with self.assertRaises(Exception) as cm:
- recipe = StandardRecipeWithIndexes(
+ recipe = DatasetRecipe(
template="templates.key_val",
card="cards.xwinogrande.pt",
num_demos=3,
@@ -530,7 +723,7 @@ def test_standard_recipe_with_no_demos_to_take(self):
)
with self.assertRaises(Exception) as cm:
- recipe = StandardRecipeWithIndexes(
+ recipe = DatasetRecipe(
template="templates.key_val",
card="cards.xwinogrande.pt",
num_demos=30,
@@ -539,11 +732,11 @@ def test_standard_recipe_with_no_demos_to_take(self):
self.assertEqual(
str(cm.exception),
- "num_demos (got: 30) should not exceed demos_pool_size (got: 10)",
+ "num_demos (got: 30) should not exceed demos_pool_size - 1 (got: 10), (-1: to always allow filtering of a demo identical to the processed instance).",
)
- def test_standard_recipe_with_no_test(self):
- recipe = StandardRecipeWithIndexes(
+ def test_dataset_recipe_with_no_test(self):
+ recipe = DatasetRecipe(
template="templates.key_val",
card="cards.xwinogrande.pt",
num_demos=3,
@@ -553,10 +746,10 @@ def test_standard_recipe_with_no_test(self):
results = list(recipe()["test"])
self.assertTrue(len(results) > 0)
- def test_standard_recipe_with_template_errors(self):
+ def test_dataset_recipe_with_template_errors(self):
# Check either template or template index was specified , but not both
with self.assertRaises(AssertionError) as cm:
- StandardRecipeWithIndexes(
+ DatasetRecipe(
card="cards.wnli", template="templates.key_val", template_card_index=100
)
self.assertTrue(
@@ -569,7 +762,7 @@ def test_standard_recipe_with_template_errors(self):
# Also check if string index is used
with self.assertRaises(AssertionError) as cm:
- StandardRecipeWithIndexes(
+ DatasetRecipe(
card="cards.wnli",
template="templates.key_val",
template_card_index="illegal_template",
@@ -584,17 +777,15 @@ def test_standard_recipe_with_template_errors(self):
# Return an error if index is not found in card
with self.assertRaises(ValueError) as cm:
- StandardRecipeWithIndexes(
- card="cards.wnli", template_card_index="illegal_template"
- )
+ DatasetRecipe(card="cards.wnli", template_card_index="illegal_template")
self.assertTrue("not defined in card." in str(cm.exception))
with self.assertRaises(ValueError) as cm:
- StandardRecipeWithIndexes(card="cards.wnli", template_card_index=100)
+ DatasetRecipe(card="cards.wnli", template_card_index=100)
self.assertTrue("not defined in card." in str(cm.exception))
- def test_standard_recipe_with_balancer_and_size_limit(self):
- recipe = StandardRecipeWithIndexes(
+ def test_dataset_recipe_with_balancer_and_size_limit(self):
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template="templates.key_val",
@@ -607,13 +798,14 @@ def test_standard_recipe_with_balancer_and_size_limit(self):
stream = recipe()
counts = collections.Counter()
- for instance in stream["train"]:
+ trains = list(stream["train"])
+ for instance in trains:
counts[instance["target"]] += 1
self.assertEqual(counts["entailment"], counts["not entailment"], 10)
- def test_standard_recipe_with_augmentor_on_task_input(self):
- recipe = StandardRecipeWithIndexes(
+ def test_dataset_recipe_with_augmentor_on_task_input(self):
+ recipe = DatasetRecipe(
card="cards.sst2",
augmentor="augmentors.text.white_space",
template_card_index=0,
@@ -637,8 +829,8 @@ def test_standard_recipe_with_augmentor_on_task_input(self):
normalized_output_source == normalized_input_source
), f"{normalized_output_source} is not equal to f{normalized_input_source}"
- def test_standard_recipe_with_train_size_limit(self):
- recipe = StandardRecipeWithIndexes(
+ def test_dataset_recipe_with_train_size_limit(self):
+ recipe = DatasetRecipe(
card="cards.wnli",
system_prompt="system_prompts.models.llama",
template="templates.key_val",
@@ -659,7 +851,7 @@ def test_recipe_with_hf_with_twice_the_same_instance_demos(self):
d = load_dataset(
dataset_file,
- "__type__=standard_recipe_with_indexes,card=cards.wnli,template=templates.classification.multi_class.relation.default,system_prompt=system_prompts.models.llama,demos_pool_size=5,num_demos=1",
+ "__type__=dataset_recipe,card=cards.wnli,template=templates.classification.multi_class.relation.default,system_prompt=system_prompts.models.llama,demos_pool_size=5,num_demos=1",
streaming=True,
trust_remote_code=True,
)
@@ -679,12 +871,12 @@ def test_recipe_loaded_from_arguments_and_overwrites_only(self):
first_inst = next(iterator)
self.assertListEqual(["metrics.accuracy"], first_inst["metrics"])
- def test_standard_recipe_with_a_missing_sampler(self):
+ def test_dataset_recipe_with_a_missing_sampler(self):
"""Check that initializing a recipe with a card that does not have a sampler raises an exception."""
task_card, _ = copy.deepcopy(fetch_artifact("cards.sst2"))
task_card.sampler = None
with self.assertRaises(ValueError) as e:
- StandardRecipeWithIndexes(
+ DatasetRecipe(
card=task_card,
template_card_index=0,
max_train_instances=0,
@@ -733,7 +925,7 @@ def serialize(self, value: Table, instance: Dict[str, Any]) -> str:
task=task,
)
- recipe = StandardRecipe(
+ recipe = DatasetRecipe(
card=card,
template=template,
serializer=TableSerializer(),
@@ -742,7 +934,7 @@ def serialize(self, value: Table, instance: Dict[str, Any]) -> str:
target = "Solve: col1,col2\nval1,val2\nval3\nval4\nAnswer: \n"
self.assertEqual(result, target)
- recipe = StandardRecipe(
+ recipe = DatasetRecipe(
card=card,
template=template,
serializer=MyTableSerializer(),
diff --git a/utils/.secrets.baseline b/utils/.secrets.baseline
index d68745100a..4e54c76c14 100644
--- a/utils/.secrets.baseline
+++ b/utils/.secrets.baseline
@@ -151,7 +151,7 @@
"filename": "src/unitxt/loaders.py",
"hashed_secret": "840268f77a57d5553add023cfa8a4d1535f49742",
"is_verified": false,
- "line_number": 496,
+ "line_number": 492,
"is_secret": false
}
],
@@ -184,5 +184,5 @@
}
]
},
- "generated_at": "2024-12-23T18:48:07Z"
+ "generated_at": "2024-12-24T18:00:14Z"
}
diff --git a/utils/compare_unitxt_datasets_between_versions.py b/utils/compare_unitxt_datasets_between_versions.py
index db1cd22d43..b1df39c794 100644
--- a/utils/compare_unitxt_datasets_between_versions.py
+++ b/utils/compare_unitxt_datasets_between_versions.py
@@ -21,7 +21,7 @@
from unitxt import register_local_catalog
from unitxt.dataset_utils import fetch
-from unitxt.standard import StandardRecipe
+from unitxt.standard import DatasetRecipe
DEMOS_POOL_SIZE = 100
TEST_SIZE = 100
@@ -134,7 +134,7 @@ def generate_examples_for_configuration(
"demos_pool_size": DEMOS_POOL_SIZE,
"loader_limit": 2 * TEST_SIZE + DEMOS_POOL_SIZE,
}
- recipe = StandardRecipe(**inputs)
+ recipe = DatasetRecipe(**inputs)
stream = recipe()
return list(itertools.islice(stream["test"], TEST_SIZE))