From 3e94aaba96db142fb89e4f052bb535ae5a589291 Mon Sep 17 00:00:00 2001 From: ilyashn Date: Mon, 18 Dec 2023 11:38:30 -0500 Subject: [PATCH] cosmetics Signed-off-by: ilyashn --- prepare/cards/clinc_oos.py | 52 +++++++++++++++++++------------------- prepare/cards/ledgar.py | 4 +-- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/prepare/cards/clinc_oos.py b/prepare/cards/clinc_oos.py index dd87a4680d..777202f7d7 100644 --- a/prepare/cards/clinc_oos.py +++ b/prepare/cards/clinc_oos.py @@ -1,4 +1,4 @@ -from datasets import load_dataset_builder +from datasets import get_dataset_config_names, load_dataset_builder from src.unitxt import add_to_catalog from src.unitxt.blocks import ( @@ -10,31 +10,31 @@ ) from src.unitxt.test_utils.card import test_card -dataset_name = "clinc150" -subset = "plus" # TODO add imbalanced, small -ds_builder = load_dataset_builder("clinc_oos", subset) -classlabels = ds_builder.info.features["intent"] +dataset_name = "clinc_oos" -mappers = {} -for i in range(len(classlabels.names)): - mappers[str(i)] = classlabels.names[i] +for subset in get_dataset_config_names(dataset_name): + ds_builder = load_dataset_builder(dataset_name, subset) + classlabels = ds_builder.info.features["intent"] + mappers = {} + for i in range(len(classlabels.names)): + mappers[str(i)] = classlabels.names[i] -card = TaskCard( - loader=LoadHF(path="clinc_oos", name=subset), - preprocess_steps=[ - RenameFields(field_to_field={"intent": "label"}), - MapInstanceValues(mappers={"label": mappers}), - AddFields( - fields={ - "classes": mappers, - "text_type": "sentence", - "type_of_class": "intent", - } - ), - ], - task="tasks.classification.multi_class", - templates="templates.classification.multi_class.all", -) -test_card(card, debug=False) -add_to_catalog(artifact=card, name=f"cards.{dataset_name}.{subset}", overwrite=True) + card = TaskCard( + loader=LoadHF(path=dataset_name, name=subset), + preprocess_steps=[ + RenameFields(field_to_field={"intent": "label"}), + MapInstanceValues(mappers={"label": mappers}), + AddFields( + fields={ + "classes": classlabels.names, + "text_type": "sentence", + "type_of_class": "intent", + } + ), + ], + task="tasks.classification.multi_class", + templates="templates.classification.multi_class.all", + ) + test_card(card, debug=False) + add_to_catalog(artifact=card, name=f"cards.{dataset_name}.{subset}", overwrite=True) diff --git a/prepare/cards/ledgar.py b/prepare/cards/ledgar.py index 0f6da1fa2d..5979edccea 100644 --- a/prepare/cards/ledgar.py +++ b/prepare/cards/ledgar.py @@ -11,9 +11,7 @@ dataset_name = "ledgar" -ds_builder = load_dataset_builder( - "lex_glue", dataset_name -) # TODO should we take all from lex_glue? +ds_builder = load_dataset_builder("lex_glue", dataset_name) classlabels = ds_builder.info.features["label"] mappers = {}