diff --git a/src/instructlab/eval/evaluator.py b/src/instructlab/eval/evaluator.py index 086732c..9d64914 100644 --- a/src/instructlab/eval/evaluator.py +++ b/src/instructlab/eval/evaluator.py @@ -11,6 +11,3 @@ class Evaluator: def __init__(self, model: str) -> None: self.model = model - - def run(self) -> dict: - return {} diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 1938d92..6d877f6 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -22,6 +22,15 @@ def __init__( self.fewshots = fewshots self.batchsize = batchsize + def run(self) -> dict: + individual_scores: dict[str, float] = {} + overall_score: float = 0.0 + payload = { + "individual_scores": individual_scores, + "overall_score": overall_score, + } + return payload + class PR_MMLU_Evaluator(Evaluator): """ @@ -47,3 +56,14 @@ def __init__( self.task = task self.fewshots = fewshots self.batchsize = batchsize + + def run(self) -> dict: + individual_scores: dict[str, float] = {} + overall_score: float = 0.0 + qa_pairs: list[tuple] = [] + payload = { + "individual_scores": individual_scores, + "overall_score": overall_score, + "qa_pairs": qa_pairs, + } + return payload diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index 70f3761..df22832 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -16,6 +16,12 @@ def __init__(self, model, server_url: str) -> None: super().__init__(model) self.server_url = server_url + def run(self) -> dict: + overall_score = 0.0 + qa_pairs: list[tuple] = [] + payload = {"overall_score": overall_score, "qa_pairs": qa_pairs} + return payload + class PR_Bench_Evaluator(Evaluator): """ @@ -30,3 +36,9 @@ def __init__(self, model, server_url: str, questions: str) -> None: super().__init__(model) self.server_url = server_url self.questions = questions + + def run(self) -> dict: + overall_score = 0.0 + qa_pairs: list[tuple] = [] + payload = {"overall_score": overall_score, "qa_pairs": qa_pairs} + return payload