From b3fcc47fe96c9d680145c8a82b4e221d8b9437f9 Mon Sep 17 00:00:00 2001
From: Nathan Weinberg <nweinber@redhat.com>
Date: Wed, 26 Jun 2024 16:15:10 -0400
Subject: [PATCH 1/2] Add list of default MMLU tasks as a constant

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
---
 src/instructlab/eval/mmlu.py | 66 +++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py
index 8ae4eb7..261dbbd 100644
--- a/src/instructlab/eval/mmlu.py
+++ b/src/instructlab/eval/mmlu.py
@@ -11,6 +11,70 @@
 # First Party
 from instructlab.eval.evaluator import Evaluator
 
+MMLU_TASKS = (
+    "mmlu_abstract_algebra",
+    "mmlu_anatomy",
+    "mmlu_astronomy",
+    "mmlu_business_ethics",
+    "mmlu_clinical_knowledge",
+    "mmlu_college_biology",
+    "mmlu_college_chemistry",
+    "mmlu_college_computer_science",
+    "mmlu_college_mathematics",
+    "mmlu_college_medicine",
+    "mmlu_college_physics",
+    "mmlu_computer_security",
+    "mmlu_conceptual_physics",
+    "mmlu_econometrics",
+    "mmlu_electrical_engineering",
+    "mmlu_elementary_mathematics",
+    "mmlu_formal_logic",
+    "mmlu_global_facts",
+    "mmlu_high_school_biology",
+    "mmlu_high_school_chemistry",
+    "mmlu_high_school_computer_science",
+    "mmlu_high_school_european_history",
+    "mmlu_high_school_geography",
+    "mmlu_high_school_government_and_politics",
+    "mmlu_high_school_macroeconomics",
+    "mmlu_high_school_mathematics",
+    "mmlu_high_school_microeconomics",
+    "mmlu_high_school_physics",
+    "mmlu_high_school_psychology",
+    "mmlu_high_school_statistics",
+    "mmlu_high_school_us_history",
+    "mmlu_high_school_world_history",
+    "mmlu_human_aging",
+    "mmlu_human_sexuality",
+    "mmlu_humanities",
+    "mmlu_international_law",
+    "mmlu_jurisprudence",
+    "mmlu_logical_fallacies",
+    "mmlu_machine_learning",
+    "mmlu_management",
+    "mmlu_marketing",
+    "mmlu_medical_genetics",
+    "mmlu_miscellaneous",
+    "mmlu_moral_disputes",
+    "mmlu_moral_scenarios",
+    "mmlu_nutrition",
+    "mmlu_other",
+    "mmlu_philosophy",
+    "mmlu_prehistory",
+    "mmlu_professional_accounting",
+    "mmlu_professional_law",
+    "mmlu_professional_medicine",
+    "mmlu_professional_psychology",
+    "mmlu_public_relations",
+    "mmlu_security_studies",
+    "mmlu_social_sciences",
+    "mmlu_sociology",
+    "mmlu_stem",
+    "mmlu_us_foreign_policy",
+    "mmlu_virology",
+    "mmlu_world_religions",
+)
+
 
 class MMLUEvaluator(Evaluator):
     """
@@ -27,7 +91,7 @@ class MMLUEvaluator(Evaluator):
     def __init__(
         self,
         model_path,
-        tasks: list[str],
+        tasks: tuple[str, ...] = MMLU_TASKS,
         model_dtype="bfloat16",
         few_shots: int = 2,
         batch_size: int = 5,

From 6cf33563f70ee75711313b2aa114f98664d2188f Mon Sep 17 00:00:00 2001
From: Nathan Weinberg <nweinber@redhat.com>
Date: Thu, 27 Jun 2024 11:54:32 -0400
Subject: [PATCH 2/2] Reverted from tuple to list, added lint error to pylint
 ignore

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
---
 .pylintrc                    | 3 ++-
 src/instructlab/eval/mmlu.py | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index 64ef3d1..0959615 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -446,7 +446,8 @@ disable=raw-checker-failed,
         abstract-method,
         pointless-statement,
         wrong-import-order,
-        line-too-long
+        line-too-long,
+        dangerous-default-value
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py
index 261dbbd..20b4375 100644
--- a/src/instructlab/eval/mmlu.py
+++ b/src/instructlab/eval/mmlu.py
@@ -11,7 +11,7 @@
 # First Party
 from instructlab.eval.evaluator import Evaluator
 
-MMLU_TASKS = (
+MMLU_TASKS = [
     "mmlu_abstract_algebra",
     "mmlu_anatomy",
     "mmlu_astronomy",
@@ -73,7 +73,7 @@
     "mmlu_us_foreign_policy",
     "mmlu_virology",
     "mmlu_world_religions",
-)
+]
 
 
 class MMLUEvaluator(Evaluator):
@@ -91,7 +91,7 @@ class MMLUEvaluator(Evaluator):
     def __init__(
         self,
         model_path,
-        tasks: tuple[str, ...] = MMLU_TASKS,
+        tasks: list[str] = MMLU_TASKS,
         model_dtype="bfloat16",
         few_shots: int = 2,
         batch_size: int = 5,