diff --git a/scripts/test_mmlu.py b/scripts/test_mmlu.py index 2db46c0..fd10f27 100755 --- a/scripts/test_mmlu.py +++ b/scripts/test_mmlu.py @@ -1,13 +1,19 @@ # First Party from instructlab.eval.mmlu import MMLUEvaluator +SYSTEM_PROMPT = """I am, Red Hat® Instruct Model based on Granite 7B, an AI language model developed by Red Hat and IBM Research, based on the Granite-7b-base language model. My primary function is to be a chat assistant.""" + def test_minimal_mmlu(): print("===> Executing 'test_minimal_mmlu'...") try: model_path = "instructlab/granite-7b-lab" tasks = ["mmlu_anatomy", "mmlu_astronomy"] - mmlu = MMLUEvaluator(model_path=model_path, tasks=tasks) + mmlu = MMLUEvaluator( + model_path=model_path, + tasks=tasks, + system_prompt=SYSTEM_PROMPT, + ) overall_score, individual_scores = mmlu.run() print(overall_score) print(individual_scores) diff --git a/tests/test_mmlu.py b/tests/test_mmlu.py index bdf4f90..2cc0c79 100644 --- a/tests/test_mmlu.py +++ b/tests/test_mmlu.py @@ -48,7 +48,10 @@ def test_mmlu_branch(eval_mock): tasks_dir = f"{os.path.dirname(os.path.realpath(__file__))}/testdata/sdg" tasks = ["mmlu_pr"] mmlu = MMLUBranchEvaluator( - model_path=MODEL_EXAMPLE, tasks_dir=tasks_dir, tasks=tasks + model_path=MODEL_EXAMPLE, + tasks_dir=tasks_dir, + tasks=tasks, + system_prompt="You are an intelligent AI language model.", ) overall_score, individual_scores = mmlu.run() @@ -62,7 +65,11 @@ def test_mmlu_branch(eval_mock): ) def test_mmlu(eval_mock): tasks = ["mmlu_anatomy", "mmlu_astronomy", "mmlu_algebra"] - mmlu = MMLUEvaluator(model_path=MODEL_EXAMPLE, tasks=tasks) + mmlu = MMLUEvaluator( + model_path=MODEL_EXAMPLE, + tasks=tasks, + system_prompt="You are an intelligent AI language model.", + ) overall_score, individual_scores = mmlu.run() eval_mock.assert_called()