Merge pull request #101 from danmcp/automaxworkers

Add optional auto tuning for max_workers
instructlab · Aug 21, 2024 · ec709c7 · ec709c7
2 parents ba6fe0e + 2bf0504
commit ec709c7
Show file tree

Hide file tree

Showing 3 changed files with 83 additions and 17 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -90,7 +90,7 @@ persistent=yes
 
 # Minimum Python version to use for version dependent checks. Will default to
 # the version used to run pylint.
-py-version=3.9
+py-version=3.10
 
 # Discover python modules and packages in the file system subtree.
 recursive=no

diff --git a/src/instructlab/eval/exceptions.py b/src/instructlab/eval/exceptions.py
@@ -39,6 +39,21 @@ def __init__(self, path, reason) -> None:
         self.message = f"Model found at {path} but was invalid due to: {reason}"
 
 
+class InvalidMaxWorkersError(EvalError):
+    """
+    Error raised when max_workers isn't an int or "auto"
+
+    Attributes
+        message         error message to be printed on raise
+        max_workers     max_workers specified
+    """
+
+    def __init__(self, max_workers) -> None:
+        super().__init__()
+        self.max_workers = max_workers
+        self.message = f"Invalid max_workers '{max_workers}' specified. Valid values are positive integers or 'auto'."
+
+
 class InvalidGitRepoError(EvalError):
     """
     Error raised when taxonomy dir provided isn't a valid git repo

diff --git a/src/instructlab/eval/mt_bench.py b/src/instructlab/eval/mt_bench.py
@@ -6,12 +6,17 @@
 https://arxiv.org/html/2306.05685
 """
 
+# Standard
+import multiprocessing
+import os
+
 # First Party
 from instructlab.eval import (
     mt_bench_answers,
     mt_bench_branch_generator,
     mt_bench_judgment,
 )
+from instructlab.eval.exceptions import InvalidMaxWorkersError
 
 # Local
 from .evaluator import Evaluator
@@ -20,34 +25,75 @@
 logger = setup_logger(__name__)
 
 
-class MTBenchEvaluator(Evaluator):
+class AbstractMTBenchEvaluator(Evaluator):
     """
-    Child class of an Evaluator for Multi-turn Benchmark (MT-Bench)
+    Abstract class of an MTBenchEvaluator for Multi-turn Benchmark (MT-Bench)
 
     Attributes
         model_name                  Name of the model to evaluate
         judge_model_name            Name of the judge model
         output_dir                  The directory to use for evaluation output
-        max_workers                 Max parallel workers to run the evaluation with
+        max_workers                 Max parallel workers to run the evaluation with (int or "auto")
+        serving_gpus                Number of gpus allocated for serving.  Used to tune with max_workers=auto.
         merge_system_user_message   Boolean indicating whether to merge system and user messages (required for Mistral based judges)
     """
 
-    name = "mt_bench"
-
     def __init__(
         self,
         model_name: str,
         judge_model_name: str,
         output_dir: str = "eval_output",
-        max_workers: int = 40,
+        max_workers: int | str = "auto",
+        serving_gpus: int | None = None,
         merge_system_user_message: bool = False,
     ) -> None:
         self.model_name = model_name
         self.judge_model_name = judge_model_name
         self.output_dir = output_dir
-        self.max_workers = max_workers
+        self.serving_gpus = serving_gpus
         self.merge_system_user_message = merge_system_user_message
 
+        if max_workers == "auto":
+            try:
+                # Not available on all platforms
+                usable_cpu_count = len(os.sched_getaffinity(0))  # type: ignore[attr-defined]
+            except AttributeError:
+                usable_cpu_count = multiprocessing.cpu_count()
+            if serving_gpus is not None:
+                # Tune max_workers based on hardware configuration: min(#GPUs being used * 10, #CPU cores)
+                # Please see https://github.com/instructlab/instructlab/issues/2050 for detailed explanation
+                self.max_workers = min(max(serving_gpus, 1) * 10, usable_cpu_count)
+                logger.debug("Auto tuning max_workers to %s", self.max_workers)
+            else:
+                # Don't be too aggressive when serving_gpus isn't specified. Use half the cpu count.
+                self.max_workers = usable_cpu_count // 2
+                logger.debug(
+                    "max_workers set to auto but serving_gpus is not specified. Defaulting to (cpu count / 2): %s",
+                    self.max_workers,
+                )
+        else:
+            if isinstance(max_workers, int) and max_workers > 0:
+                logger.debug("max_workers specified as: %s", max_workers)
+                self.max_workers = max_workers
+            else:
+                raise InvalidMaxWorkersError(max_workers)
+
+
+class MTBenchEvaluator(AbstractMTBenchEvaluator):
+    """
+    Evaluator for Multi-turn Benchmark (MT-Bench)
+
+    Attributes
+        model_name                  Name of the model to evaluate
+        judge_model_name            Name of the judge model
+        output_dir                  The directory to use for evaluation output
+        max_workers                 Max parallel workers to run the evaluation with (int or "auto")
+        serving_gpus                Number of gpus allocated for serving.  Used to tune with max_workers=auto.
+        merge_system_user_message   Boolean indicating whether to merge system and user messages (required for Mistral based judges)
+    """
+
+    name = "mt_bench"
+
     def gen_answers(self, server_url) -> None:
         """
         Asks questions to model
@@ -86,17 +132,18 @@ def judge_answers(self, server_url) -> tuple:
         )
 
 
-class MTBenchBranchEvaluator(Evaluator):
+class MTBenchBranchEvaluator(AbstractMTBenchEvaluator):
     """
-    Child class of an Evaluator for MT-Bench-Branch Benchmark
+    Evaluator for comparing taxonomy branches with MT-Bench-Branch Benchmark
 
     Attributes
         model_name                  Name of the model to evaluate
         judge_model_name            Name of the judge model
         taxonomy_git_repo_path      Taxonomy git repo path
         branch                      Branch of taxonomy repo to eval QNAs against model
         output_dir                  The directory to use for evaluation output
-        max_workers                 Max parallel workers to run the evaluation with
+        max_workers                 Max parallel workers to run the evaluation with (int or "auto")
+        serving_gpus                Number of gpus allocated for serving.  Used to tune with max_workers=auto.
         merge_system_user_message   Boolean indicating whether to merge system and user messages (required for Mistral based judges)
     """
 
@@ -109,16 +156,20 @@ def __init__(
         taxonomy_git_repo_path: str,
         branch: str,
         output_dir: str = "eval_output",
-        max_workers: int = 40,
+        max_workers: int | str = "auto",
+        serving_gpus: int | None = None,
         merge_system_user_message: bool = False,
     ) -> None:
-        self.model_name = model_name
-        self.judge_model_name = judge_model_name
+        super().__init__(
+            model_name,
+            judge_model_name,
+            output_dir,
+            max_workers,
+            serving_gpus,
+            merge_system_user_message,
+        )
         self.taxonomy_git_repo_path = taxonomy_git_repo_path
         self.branch = branch
-        self.output_dir = output_dir
-        self.max_workers = max_workers
-        self.merge_system_user_message = merge_system_user_message
 
     def gen_answers(self, server_url) -> None:
         """