Rename sdg_path to tasks_dir

Signed-off-by: Dan McPherson <[email protected]>
instructlab · Jul 12, 2024 · 64e791c · 64e791c
1 parent e8f70c0
commit 64e791c
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 27 deletions.
diff --git a/src/instructlab/eval/exceptions.py b/src/instructlab/eval/exceptions.py
@@ -64,29 +64,29 @@ def __init__(self, branch) -> None:
         self.message = f"Invalid git branch: {branch}"
 
 
-class SDGPathNotFoundError(EvalError):
+class TasksDirNotFoundError(EvalError):
     """
-    Error raised when the sdg path doesn't exist
+    Error raised when the tasks dir doesn't exist
     Attributes
         message         error message to be printed on raise
-        sdg_path        sdg path
+        tasks_dir       tasks dir
     """
 
-    def __init__(self, sdg_path) -> None:
+    def __init__(self, tasks_dir) -> None:
         super().__init__()
-        self.sdg_path = sdg_path
-        self.message = f"SDG Path not found: {sdg_path}"
+        self.tasks_dir = tasks_dir
+        self.message = f"Tasks dir not found: {tasks_dir}"
 
 
-class InvalidSDGPathError(EvalError):
+class InvalidTasksDirError(EvalError):
     """
-    Error raised when the sdg path is invalid
+    Error raised when the tasks dir is invalid
     Attributes
         message         error message to be printed on raise
-        sdg_path        sdg path
+        tasks_dir       tasks dir
     """
 
-    def __init__(self, sdg_path) -> None:
+    def __init__(self, tasks_dir) -> None:
         super().__init__()
-        self.sdg_path = sdg_path
-        self.message = f"Invalid SDG Path: {sdg_path}"
+        self.tasks_dir = tasks_dir
+        self.message = f"Invalid Tasks Dir: {tasks_dir}"
diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py
@@ -12,9 +12,9 @@
 # First Party
 from instructlab.eval.evaluator import Evaluator
 from instructlab.eval.exceptions import (
-    InvalidSDGPathError,
+    InvalidTasksDirError,
     ModelNotFoundError,
-    SDGPathNotFoundError,
+    TasksDirNotFoundError,
 )
 
 # Local
@@ -89,7 +89,7 @@ class AbstractMMLUEvaluator(Evaluator):
 
     Attributes:
         model_path      absolute path to or name of a huggingface model
-        sdg_path        path where the <TASK_NAME>.jsonl and <TASK_NAME>_task.yaml files for the branches being evaluated are stored
+        tasks_dir       path where the <TASK_NAME>.jsonl and <TASK_NAME>_task.yaml files for the branches being evaluated are stored
         tasks           list of tasks for MMLU to test the model with
         model_dtype     dtype of model when served
         few_shots       number of examples
@@ -100,15 +100,15 @@ class AbstractMMLUEvaluator(Evaluator):
     def __init__(
         self,
         model_path,
-        sdg_path: Optional[str],
+        tasks_dir: Optional[str],
         tasks: list[str],
         model_dtype="bfloat16",
         few_shots: int = 2,
         batch_size: int = 5,
         device: str = ("cuda" if torch.cuda.is_available() else "cpu"),
     ) -> None:
         self.model_path = model_path
-        self.sdg_path = sdg_path
+        self.tasks_dir = tasks_dir
         self.tasks = tasks
         self.model_dtype = model_dtype
         self.few_shots = few_shots
@@ -118,12 +118,12 @@ def __init__(
     def _run_mmlu(self) -> dict:
         model_args = f"pretrained={self.model_path},dtype={self.model_dtype}"
         tm = None
-        if self.sdg_path is not None:
-            if not os.path.exists(self.sdg_path):
-                raise SDGPathNotFoundError(self.sdg_path)
-            if not os.access(self.sdg_path, os.R_OK):
-                raise InvalidSDGPathError(self.sdg_path)
-            tm = TaskManager(verbosity="DEBUG", include_path=self.sdg_path)
+        if self.tasks_dir is not None:
+            if not os.path.exists(self.tasks_dir):
+                raise TasksDirNotFoundError(self.tasks_dir)
+            if not os.access(self.tasks_dir, os.R_OK):
+                raise InvalidTasksDirError(self.tasks_dir)
+            tm = TaskManager(verbosity="DEBUG", include_path=self.tasks_dir)
         mmlu_output = self._simple_evaluate_with_error_handling(
             model="hf",
             model_args=model_args,
@@ -142,14 +142,14 @@ def _simple_evaluate_with_error_handling(self, **kwargs):
         try:
             return simple_evaluate(**kwargs)
         except KeyError as ke:
-            # If the first task key file cannot be found in sdg_path, simple_evaluate() will return
+            # If the first task key file cannot be found in tasks_dir, simple_evaluate() will return
             # an obscure KeyError(first task key)
             if (
-                self.sdg_path is not None
+                self.tasks_dir is not None
                 and len(self.tasks) > 0
                 and ke.args[0] == self.tasks[0]
             ):
-                raise InvalidSDGPathError(self.sdg_path) from ke
+                raise InvalidTasksDirError(self.tasks_dir) from ke
             raise
         except OSError as ose:
             # If a model can not be found, simple_evaluate() will return
@@ -223,7 +223,7 @@ class MMLUBranchEvaluator(AbstractMMLUEvaluator):
 
     Attributes:
         model_path      absolute path to or name of a huggingface model
-        sdg_path        path where the <TASK_NAME>.jsonl and <TASK_NAME>_task.yaml files for the branches being evaluated are stored
+        tasks_dir       path where the <TASK_NAME>.jsonl and <TASK_NAME>_task.yaml files for the branches being evaluated are stored
         tasks           group name that is shared by all the MMLUBranch tasks
         model_dtype     dtype of model when served
         few_shots       number of examples