Skip to content

Commit

Permalink
chore: update tests to include system prompt in MMLU evals
Browse files Browse the repository at this point in the history
Signed-off-by: Oleg S <[email protected]>
  • Loading branch information
RobotSail committed Dec 13, 2024
1 parent ab664b8 commit fd78adf
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
8 changes: 7 additions & 1 deletion scripts/test_mmlu.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
# First Party
from instructlab.eval.mmlu import MMLUEvaluator

SYSTEM_PROMPT = """I am, Red Hat® Instruct Model based on Granite 7B, an AI language model developed by Red Hat and IBM Research, based on the Granite-7b-base language model. My primary function is to be a chat assistant."""


def test_minimal_mmlu():
print("===> Executing 'test_minimal_mmlu'...")
try:
model_path = "instructlab/granite-7b-lab"
tasks = ["mmlu_anatomy", "mmlu_astronomy"]
mmlu = MMLUEvaluator(model_path=model_path, tasks=tasks)
mmlu = MMLUEvaluator(
model_path=model_path,
tasks=tasks,
system_prompt=SYSTEM_PROMPT,
)
overall_score, individual_scores = mmlu.run()
print(overall_score)
print(individual_scores)
Expand Down
11 changes: 9 additions & 2 deletions tests/test_mmlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ def test_mmlu_branch(eval_mock):
tasks_dir = f"{os.path.dirname(os.path.realpath(__file__))}/testdata/sdg"
tasks = ["mmlu_pr"]
mmlu = MMLUBranchEvaluator(
model_path=MODEL_EXAMPLE, tasks_dir=tasks_dir, tasks=tasks
model_path=MODEL_EXAMPLE,
tasks_dir=tasks_dir,
tasks=tasks,
system_prompt="You are an intelligent AI language model.",
)
overall_score, individual_scores = mmlu.run()

Expand All @@ -62,7 +65,11 @@ def test_mmlu_branch(eval_mock):
)
def test_mmlu(eval_mock):
tasks = ["mmlu_anatomy", "mmlu_astronomy", "mmlu_algebra"]
mmlu = MMLUEvaluator(model_path=MODEL_EXAMPLE, tasks=tasks)
mmlu = MMLUEvaluator(
model_path=MODEL_EXAMPLE,
tasks=tasks,
system_prompt="You are an intelligent AI language model.",
)
overall_score, individual_scores = mmlu.run()

eval_mock.assert_called()
Expand Down

0 comments on commit fd78adf

Please sign in to comment.