gaussalgo · stefanik12 · Dec 22, 2023 · Oct 30, 2023 · Oct 30, 2023 · Oct 30, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        python: ['3.7', '3.8']
+        python: ['3.8', '3.10']
     container:
       image: python:${{ matrix.python }}
     steps:
@@ -33,7 +33,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        python: ['3.7', '3.8']
+        python: ['3.8', '3.10']
     container:
       image: python:${{ matrix.python }}
     steps:
@@ -58,7 +58,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        python: ['3.7', '3.8']
+        python: ['3.8', '3.10']
     container:
       image: python:${{ matrix.python }}
     steps:

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# Adapt𝒪r: Objective-centric Adaptation library
+# Adaptor: Objective-centric Adaptation library
 
 [![Tests](https://github.com/gaussalgo/adaptor/actions/workflows/test.yml/badge.svg)](https://github.com/gaussalgo/adaptor/actions)
 [![PyPI version](https://badge.fury.io/py/adaptor.svg)](https://badge.fury.io/py/adaptor)
@@ -14,13 +14,13 @@ If you want to jump right in, take a look at the [tutorials](tutorials).
 
 - [Background](#how-to-use-adaptor)
     - [Benefits of Task and Domain Adaptation](#benefits-of-task-and-domain-adaptation)
-    - [How Can Adapt𝒪r Help](#how-can-adaptor-help)
+    - [How Can Adaptor Help](#how-can-adaptor-help)
 - [Usage](#usage)
     - [Install](#usage)
     - [Use-cases](#adapted-named-entity-recognition)
     - **[Tutorials](tutorials)**
 - [How to Contribute](CONTRIBUTING.md)
-- [Cite](#citing-adapt𝒪r)
+- [Cite](#citing-adaptor)
 </details>
 
 
@@ -240,7 +240,7 @@ also be able to fix it, don't hesitate to contribute and create a PR.
 * If you'd just like to share your general impressions or personal experience with others, 
 we're happy to get into a discussion in the [Discussions section](https://github.com/gaussalgo/adaptor/discussions).
 
-## Citing Adapt𝒪r
+## Citing Adaptor
 
 If you use Adaptor in your research, please cite it as follows.
 

diff --git a/adaptor/adapter.py b/adaptor/adapter.py
@@ -79,7 +79,7 @@ def compute_loss(self,
         mock_outputs = torch.tensor([-1, -1])
         return (loss, mock_outputs) if return_outputs else loss
 
-    def log(self, logs: List[Dict[str, float]]) -> None:
+    def log(self, logs: Dict[str, float]) -> None:
         is_eval_log = any(self.eval_metrics_prefix in log_key for log_key in logs)
         extended_logs = self.schedule.objectives_log(split="eval" if is_eval_log else "train")
         return super().log({**logs, **extended_logs})

diff --git a/adaptor/lang_module.py b/adaptor/lang_module.py
@@ -144,14 +144,14 @@ def _partially_merge_models(orig_model: torch.nn.Module,
 
         return unmatched_modules
 
-    def forward(self, **inputs) -> torch.LongTensor:
+    def forward(self, return_loss: bool = True, **inputs) -> torch.LongTensor:
         """
         Performs forward pass over the head identified by the sample's `oid`.
         :param inputs: given head input arguments with corresponding values.
         :return: Raw model outputs (logits).
         """
         try:
-            selected_head_model = self.trainable_models[str(inputs["oid"])]
+            selected_head_model = self.trainable_models[str(inputs["oid"].item())]
         except KeyError:
             raise ValueError("Requesting inference with the objective having no registered head."
                              "If you are using `extra_eval_objectives`, "

diff --git a/adaptor/objectives/objective_base.py b/adaptor/objectives/objective_base.py
@@ -142,7 +142,7 @@ def per_objective_log(self, split: str) -> Dict[str, float]:
         out_logs = {}
         # aggregate per-progress_bar-steps, or per-evaluation-steps, keep the results of unprocessed evaluations
         loss_history = self.loss_history[split][-self.max_samples_per_log[split]:]
-        mean_loss = sum(loss_history) / len(loss_history) if len(loss_history) else 0
+        mean_loss = sum(loss_history) / len(loss_history) if len(loss_history) else float("inf")
         self.evaluations_history[split]["loss"].append(mean_loss)
 
         out_logs["%s_%s_loss" % (split, self)] = mean_loss
@@ -203,7 +203,7 @@ def has_converged(self, patience: int) -> bool:
 
         if did_not_improve:
             logger.warning("Objective `%s` convergence metric `%s` did not improve for %s eval steps. History: %s" %
-                           (self, stopping_evaluator, patience, last_n))
+                           (self, stopping_evaluator, patience, self.evaluations_history["eval"][stopping_evaluator]))
 
         return passed_patience_evals and did_not_improve
 
@@ -299,7 +299,7 @@ def _sample_to_device(sample: Union[BatchEncoding, Dict[str, torch.LongTensor]])
             return {k: v.to(device) if k != "oid" else v for k, v in sample.items()}
 
         def _add_oid(sample: Union[BatchEncoding, Dict[str, torch.LongTensor]]) -> Dict[str, torch.LongTensor]:
-            sample["oid"] = id(self)
+            sample["oid"] = torch.tensor(id(self))
             return sample
 
         def _remember_input(sample: Union[BatchEncoding, Dict[str, torch.LongTensor]]) -> Dict[str, torch.LongTensor]:

diff --git a/adaptor/schedules.py b/adaptor/schedules.py
@@ -23,7 +23,7 @@ class Schedule(abc.ABC):
 
     label: str
     objectives: Dict[str, Dict[int, Objective]]
-    objectives_outputs_queue: List[Tuple[str, int]]
+    objectives_outputs_queue: List[Tuple[str, torch.LongTensor]]
     converged_objectives: List[Objective]
     should_stop: bool
 
@@ -177,7 +177,7 @@ def compute_loss(self,
         split, oid = self.objectives_outputs_queue.pop(0)
 
         # the objective loss arrives aggregated into a single item
-        loss = self.objectives[split][oid].compute_loss(logit_outputs, labels, inputs, split)
+        loss = self.objectives[split][oid.item()].compute_loss(logit_outputs, labels, inputs, split)
 
         return loss
 

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-description-file=README.md
+description_file=README.md
 license_files=LICENSE
 [flake8]
 exclude = .git,

diff --git a/setup.py b/setup.py
@@ -30,8 +30,9 @@
     zip_safe=True,
     install_requires=[
         "torch>=1.7",
-        "transformers>=4.10.2,<=4.19.1",  # upper-closed on 4.19.1 for now, due to minor bug in eval loss logging
+        "transformers<=4.30.2",  # TODO upper-closed on 4.30.2: Problem with returning empty batches
         "sentencepiece",
+        "accelerate>=0.20.1"
     ],
     test_require=[
         "pytest"