From 11ad7589eb2f251286f630933a841b706882df4e Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Sun, 16 Jun 2024 21:41:42 -0400 Subject: [PATCH] Change ret value from single dict to multiple ret values Signed-off-by: Nathan Weinberg --- src/instructlab/eval/mmlu.py | 21 ++++++--------------- src/instructlab/eval/mtbench.py | 10 ++++------ 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 7b1f6b2..2b35923 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -22,14 +22,10 @@ def __init__( self.few_shots = few_shots self.batch_size = batch_size - def run(self) -> dict: + def run(self) -> tuple: individual_scores: dict[str, float] = {} overall_score: float = 0.0 - payload = { - "individual_scores": individual_scores, - "overall_score": overall_score, - } - return payload + return overall_score, individual_scores class PR_MMLU_Evaluator(Evaluator): @@ -39,8 +35,8 @@ class PR_MMLU_Evaluator(Evaluator): Attributes: sdg_path path where all the PR MMLU tasks are stored task group name that is shared by all the PR MMLU tasks - few_shots number of examples - batch_size number of GPUs + few_shots number of examples + batch_size number of GPUs """ def __init__( @@ -57,13 +53,8 @@ def __init__( self.few_shots = few_shots self.batch_size = batch_size - def run(self) -> dict: + def run(self) -> tuple: individual_scores: dict[str, float] = {} overall_score: float = 0.0 qa_pairs: list[tuple] = [] - payload = { - "individual_scores": individual_scores, - "overall_score": overall_score, - "qa_pairs": qa_pairs, - } - return payload + return overall_score, individual_scores, qa_pairs diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index fae51a1..25469ba 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -16,11 +16,10 @@ def __init__(self, model_path, server_url: str) -> None: super().__init__(model_path) self.server_url = server_url - def run(self) -> dict: + def run(self) -> tuple: overall_score: float = 0.0 qa_pairs: list[tuple] = [] - payload = {"overall_score": overall_score, "qa_pairs": qa_pairs} - return payload + return overall_score, qa_pairs class PR_Bench_Evaluator(Evaluator): @@ -37,8 +36,7 @@ def __init__(self, model_path, server_url: str, questions: str) -> None: self.server_url = server_url self.questions = questions - def run(self) -> dict: + def run(self) -> tuple: overall_score = 0.0 qa_pairs: list[tuple] = [] - payload = {"overall_score": overall_score, "qa_pairs": qa_pairs} - return payload + return overall_score, qa_pairs