From fc045849309ca2c4d1e3e641d9261570c9e43147 Mon Sep 17 00:00:00 2001 From: Dan McPherson Date: Wed, 28 Aug 2024 12:57:35 -0400 Subject: [PATCH] Cleanup usage of load model answers This logic removes the directory walking logic of load_model_answers in favor of finding the exact file needed. Since we are no longer attempting to support multiple models within a single eval, this simplification is possible and avoids some confusing logic that was in place to allow any judge model to be passed but still use the builtin reference answers. Signed-off-by: Dan McPherson --- src/instructlab/eval/mt_bench_common.py | 25 ++++------------------- src/instructlab/eval/mt_bench_judgment.py | 14 +++++++++---- 2 files changed, 14 insertions(+), 25 deletions(-) diff --git a/src/instructlab/eval/mt_bench_common.py b/src/instructlab/eval/mt_bench_common.py index 5c094c7..a9c52c6 100644 --- a/src/instructlab/eval/mt_bench_common.py +++ b/src/instructlab/eval/mt_bench_common.py @@ -87,32 +87,15 @@ def load_questions(question_file: str, begin: Optional[int], end: Optional[int]) return questions -def load_model_answers(answer_dir: str, model_name=None, answer_file=None) -> dict: - """Load model answers. +def load_model_answers(answer_file: str, model_name: str | None = None) -> dict: + """Load model answers from a single answer file The return value is a python dict of type: Dict[model_name: str -> Dict[question_id: int -> answer: dict]] """ logger.debug(locals()) - model_answers = {} - if answer_file is not None: - filename = os.path.basename(answer_file) - # Removing ".jsonl" - file_model_name = filename[:-6] - model_answers[file_model_name] = _load_answers(answer_file) - else: - for root, _, files in os.walk(answer_dir): - for filename in files: - if filename.endswith(".jsonl"): - # Removing ".jsonl" - file_model_name = filename[:-6] - file_path = os.path.join(root, filename) - model_answers[model_name or file_model_name] = _load_answers( - file_path - ) - if model_name == file_model_name: - logger.debug("Found answer file matching: %s", model_name) - break + file_model_name = os.path.splitext(os.path.basename(answer_file))[0] + model_answers = {model_name or file_model_name: _load_answers(answer_file)} return model_answers diff --git a/src/instructlab/eval/mt_bench_judgment.py b/src/instructlab/eval/mt_bench_judgment.py index 5dc4b84..0d24012 100644 --- a/src/instructlab/eval/mt_bench_judgment.py +++ b/src/instructlab/eval/mt_bench_judgment.py @@ -162,7 +162,9 @@ def judge_model( """Judge the model based on questions and reference answers""" logger.debug(locals()) package_data_dir = os.path.join(os.path.dirname(__file__), "data") + use_builtin_ref_answers = False if data_dir is None: + use_builtin_ref_answers = True data_dir = package_data_dir data_base_dir = bench_dir(data_dir, bench_name, branch) @@ -172,15 +174,19 @@ def judge_model( question_file = os.path.join(data_base_dir, "question.jsonl") answer_file = os.path.join(output_base_dir, "model_answer", f"{model_name}.jsonl") - answer_dir = os.path.dirname(answer_file) - ref_answer_dir = os.path.join(data_base_dir, "reference_answer") + if use_builtin_ref_answers: + ref_answer_file = os.path.join(data_base_dir, "reference_answer", "gpt-4.jsonl") + else: + ref_answer_file = os.path.join( + data_base_dir, "reference_answer", f"{judge_model_name}.jsonl" + ) # Load questions questions = load_questions(question_file, None, None) # Load answers - model_answers = load_model_answers(answer_dir, answer_file=answer_file) - ref_answers = load_model_answers(ref_answer_dir, judge_model_name) + model_answers = load_model_answers(answer_file) + ref_answers = load_model_answers(ref_answer_file, judge_model_name) # Load judge judge_prompts = load_judge_prompts(judge_file)