Skip to content

Commit

Permalink
Merge pull request #115 from danmcp/cleanuploadanswers
Browse files Browse the repository at this point in the history
Cleanup usage of load model answers
  • Loading branch information
mergify[bot] authored Sep 12, 2024
2 parents 78408f5 + fc04584 commit 6b3495b
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 25 deletions.
25 changes: 4 additions & 21 deletions src/instructlab/eval/mt_bench_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,32 +87,15 @@ def load_questions(question_file: str, begin: Optional[int], end: Optional[int])
return questions


def load_model_answers(answer_dir: str, model_name=None, answer_file=None) -> dict:
"""Load model answers.
def load_model_answers(answer_file: str, model_name: str | None = None) -> dict:
"""Load model answers from a single answer file
The return value is a python dict of type:
Dict[model_name: str -> Dict[question_id: int -> answer: dict]]
"""
logger.debug(locals())
model_answers = {}
if answer_file is not None:
filename = os.path.basename(answer_file)
# Removing ".jsonl"
file_model_name = filename[:-6]
model_answers[file_model_name] = _load_answers(answer_file)
else:
for root, _, files in os.walk(answer_dir):
for filename in files:
if filename.endswith(".jsonl"):
# Removing ".jsonl"
file_model_name = filename[:-6]
file_path = os.path.join(root, filename)
model_answers[model_name or file_model_name] = _load_answers(
file_path
)
if model_name == file_model_name:
logger.debug("Found answer file matching: %s", model_name)
break
file_model_name = os.path.splitext(os.path.basename(answer_file))[0]
model_answers = {model_name or file_model_name: _load_answers(answer_file)}
return model_answers


Expand Down
14 changes: 10 additions & 4 deletions src/instructlab/eval/mt_bench_judgment.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ def judge_model(
"""Judge the model based on questions and reference answers"""
logger.debug(locals())
package_data_dir = os.path.join(os.path.dirname(__file__), "data")
use_builtin_ref_answers = False
if data_dir is None:
use_builtin_ref_answers = True
data_dir = package_data_dir

data_base_dir = bench_dir(data_dir, bench_name, branch)
Expand All @@ -172,15 +174,19 @@ def judge_model(

question_file = os.path.join(data_base_dir, "question.jsonl")
answer_file = os.path.join(output_base_dir, "model_answer", f"{model_name}.jsonl")
answer_dir = os.path.dirname(answer_file)
ref_answer_dir = os.path.join(data_base_dir, "reference_answer")
if use_builtin_ref_answers:
ref_answer_file = os.path.join(data_base_dir, "reference_answer", "gpt-4.jsonl")
else:
ref_answer_file = os.path.join(
data_base_dir, "reference_answer", f"{judge_model_name}.jsonl"
)

# Load questions
questions = load_questions(question_file, None, None)

# Load answers
model_answers = load_model_answers(answer_dir, answer_file=answer_file)
ref_answers = load_model_answers(ref_answer_dir, judge_model_name)
model_answers = load_model_answers(answer_file)
ref_answers = load_model_answers(ref_answer_file, judge_model_name)

# Load judge
judge_prompts = load_judge_prompts(judge_file)
Expand Down

0 comments on commit 6b3495b

Please sign in to comment.