CI checks

gaussalgo · Feb 12, 2024 · d0cbe17 · d0cbe17
1 parent fb2e025
commit d0cbe17
Show file tree

Hide file tree

Showing 6 changed files with 99 additions and 100 deletions.
diff --git a/adaptor/objectives/denoising.py b/adaptor/objectives/denoising.py
@@ -2,7 +2,7 @@
 import collections
 import itertools
 import random
-from typing import List, Tuple, Optional, Iterator
+from typing import List, Tuple, Optional, Iterable
 
 from transformers import BatchEncoding
 
@@ -147,7 +147,7 @@ def _apply_noise(self, text: str) -> str:
             out_text = noising_fn(out_text, self.noising_per_sentence)
         return out_text
 
-    def _get_inputs_iterator(self, split: str) -> Iterator[BatchEncoding]:
+    def _get_inputs_iterator(self, split: str) -> Iterable[BatchEncoding]:
         """
         Generates labels by applying selected noising strategies on inputs.
         :param split: Data split. `train` or `eval`.

diff --git a/adaptor/objectives/objective_base.py b/adaptor/objectives/objective_base.py
@@ -339,10 +339,11 @@ def compute_loss_on_last_sample(self) -> torch.FloatTensor:
 
         logger.warning("Computing model output")
         model_inputs = {k: v for k, v in self.last_input.items() if k not in ("oid", "labels")}
-        logits = self.compatible_head_model(**model_inputs).logits
+        outputs = self.compatible_head_model(**model_inputs)
+        logger.warning("Model outputs computation on the recent sample successful. Outputs: %s", outputs)
 
         logger.warning("Computing loss")
-        loss = self._compute_loss(logits, labels, self.last_input)
+        loss = self._compute_loss(self.last_input, labels)
 
         logger.warning("Loss computation on the recent sample successful. Loss value: %s", loss.item())
         return loss
@@ -519,7 +520,7 @@ def register_compatible_head_model(self, lang_module: LangModule,
         return super().register_compatible_head_model(lang_module, other_objective,
                                                       objective_args_for_head_config, preloaded_module)
 
-    def _get_inputs_iterator(self, split: str) -> Iterator[Union[BatchEncoding, Dict[str, torch.Tensor]]]:
+    def _get_inputs_iterator(self, split: str) -> Iterable[Union[BatchEncoding, Dict[str, torch.Tensor]]]:
         """
         Batches and encodes input texts and corresponding labels.
         :param split: Selected data split. `train` or `eval`.

diff --git a/adaptor/objectives/seq2seq.py b/adaptor/objectives/seq2seq.py
@@ -52,7 +52,7 @@ def _get_seq2seq_collated_iterator(self,
             # yield last nonempty residual batch
             yield self.collator(features_batch)
 
-    def _get_inputs_iterator(self, split: str) -> Iterator[Union[BatchEncoding, Dict[str, torch.Tensor]]]:
+    def _get_inputs_iterator(self, split: str) -> Iterable[Union[BatchEncoding, Dict[str, torch.Tensor]]]:
         """
         Creates a default iterator over encodings with aligned input and output texts.
         :param split: Data split. `train` or `eval`.

diff --git a/tests/distillation_test.py b/tests/distillation_test.py
diff --git a/tests/evaluators_test.py b/tests/evaluators_test.py
@@ -13,7 +13,8 @@ def assert_evaluator_logs(objective: Objective, split: str) -> None:
     dataset_sample = next(iter(objective.get_dataset(split, objective_i=0, device="cpu")))
 
     # request objective for its loss
-    loss = objective.compute_loss(dataset_sample, dataset_sample["labels"], split)
+    loss = objective.compute_loss({k: v for k, v in dataset_sample.items() if k not in ("oid",)},
+                                  dataset_sample["labels"], split)
     assert loss.item()
 
     log = objective.per_objective_log(split)

diff --git a/tests/objectives_test.py b/tests/objectives_test.py
@@ -3,6 +3,7 @@
 from adaptor.objectives.MLM import MaskedLanguageModeling
 from adaptor.objectives.backtranslation import BackTranslation, BackTranslator
 from adaptor.objectives.classification import TokenClassification
+from adaptor.objectives.distillation import Distillation
 from adaptor.objectives.denoising import DenoisingObjective
 from adaptor.objectives.objective_base import Objective
 from adaptor.objectives.question_answering import ExtractiveQA
@@ -123,6 +124,83 @@ def test_supervised_seq2seq_objective_mbart():
     assert_module_objective_ok(lang_module, objective)
 
 
+def test_distillation_seq():
+    from adaptor.objectives.seq2seq import Sequence2Sequence
+    from transformers import AutoModelForSeq2SeqLM
+
+    class DistilledSeq2Seq(Distillation, Sequence2Sequence):
+        # this is a full implementation of distillation within other objective
+        pass
+
+    lang_module = LangModule(test_base_models["translation_mono"])
+    distilled_model = AutoModelForSeq2SeqLM.from_pretrained(test_base_models["translation_mono"])
+
+    objective = DistilledSeq2Seq(lang_module,
+                                 teacher_model=distilled_model,
+                                 texts_or_path=paths["texts"]["translation"],
+                                 labels_or_path=paths["labels"]["translation"],
+                                 batch_size=4)
+
+    assert_module_objective_ok(lang_module, objective)
+
+
+def test_distillation_mlm():
+    from adaptor.objectives.MLM import MaskedLanguageModeling
+    from transformers import AutoModelForMaskedLM
+
+    class DistilledMLM(Distillation, MaskedLanguageModeling):
+        pass
+
+    lang_module = LangModule(test_base_models["MLM_student"])
+    distilled_model = AutoModelForMaskedLM.from_pretrained(test_base_models["MLM"])
+
+    objective = DistilledMLM(lang_module,
+                             teacher_model=distilled_model,
+                             texts_or_path=paths["texts"]["unsup"],
+                             batch_size=4)
+
+    assert_module_objective_ok(lang_module, objective)
+
+
+def test_distillation_mlm_incl_hidden_states():
+    from adaptor.objectives.MLM import MaskedLanguageModeling
+    from transformers import AutoModelForMaskedLM
+
+    class DistilledMLM(Distillation, MaskedLanguageModeling):
+        pass
+
+    lang_module = LangModule(test_base_models["MLM_student"])
+    distilled_model = AutoModelForMaskedLM.from_pretrained(test_base_models["MLM"])
+
+    objective = DistilledMLM(lang_module,
+                             teacher_model=distilled_model,
+                             add_hidden_states_loss=True,
+                             texts_or_path=paths["texts"]["unsup"],
+                             batch_size=4)
+
+    assert_module_objective_ok(lang_module, objective)
+
+
+def test_distillation_mlm_restrict_to_attention():
+    from adaptor.objectives.MLM import MaskedLanguageModeling
+    from transformers import AutoModelForMaskedLM
+
+    class DistilledMLM(Distillation, MaskedLanguageModeling):
+        pass
+
+    lang_module = LangModule(test_base_models["MLM_student"])
+    distilled_model = AutoModelForMaskedLM.from_pretrained(test_base_models["MLM"])
+
+    objective = DistilledMLM(lang_module,
+                             teacher_model=distilled_model,
+                             add_hidden_states_loss=True,
+                             restrict_loss_to_mask=True,
+                             texts_or_path=paths["texts"]["unsup"],
+                             batch_size=4)
+
+    assert_module_objective_ok(lang_module, objective)
+
+
 def test_supervised_QA_objective():
     lang_module = LangModule(test_base_models["extractive_QA"])
 
@@ -133,3 +211,15 @@ def test_supervised_QA_objective():
                              batch_size=4)
 
     assert_module_objective_ok(lang_module, objective)
+
+
+# def test_search_objective():
+#     lang_module = LangModule(test_base_models["extractive_QA"])
+#
+#     objective = Encoding(lang_module,
+#                          texts_or_path=paths["texts"]["QA"],
+#                          text_pair_or_path=paths["text_pair"]["QA"],
+#                          labels_or_path=paths["labels"]["QA"],
+#                          batch_size=4)
+#
+#     assert_module_objective_ok(lang_module, objective)