From 50adbe72f6888b3dea833b3396742c2cce8f4014 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Thu, 13 Jun 2024 11:20:47 -0400 Subject: [PATCH 01/11] Initial skeleton for Evaluator classes and exceptions Signed-off-by: Nathan Weinberg --- src/instructlab/eval/evaluator.py | 13 +++++++++++++ src/instructlab/eval/exceptions.py | 18 ++++++++++++++++++ src/instructlab/eval/mmlu.py | 16 ++++++++++++++++ src/instructlab/eval/mtbench.py | 14 ++++++++++++++ 4 files changed, 61 insertions(+) create mode 100644 src/instructlab/eval/evaluator.py create mode 100644 src/instructlab/eval/exceptions.py create mode 100644 src/instructlab/eval/mmlu.py create mode 100644 src/instructlab/eval/mtbench.py diff --git a/src/instructlab/eval/evaluator.py b/src/instructlab/eval/evaluator.py new file mode 100644 index 0000000..aedd7c0 --- /dev/null +++ b/src/instructlab/eval/evaluator.py @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: Apache-2.0 + + +class Evaluator: + """ + Parent class for Evaluators + """ + + def __init__(self, model) -> None: + self.model = model + + def run(self) -> dict: + return {} diff --git a/src/instructlab/eval/exceptions.py b/src/instructlab/eval/exceptions.py new file mode 100644 index 0000000..be4aff5 --- /dev/null +++ b/src/instructlab/eval/exceptions.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: Apache-2.0 + + +class EvalError(Exception): + """ + Parent class for all of instructlab-eval exceptions + """ + + +class ModelNotFoundError(EvalError): + """ + Exception raised when model is not able to be found + """ + + def __init__(self, model) -> None: + super().__init__() + self.model = model + self.message = f"Model {self.model} could not be found" diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py new file mode 100644 index 0000000..3b0d014 --- /dev/null +++ b/src/instructlab/eval/mmlu.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: Apache-2.0 + +# Local +from .evaluator import Evaluator + + +class MMLUEvaluator(Evaluator): + """ + Child class of an Evaluator for Massive Multitask Language Understanding (MMLU) + """ + + def __init__(self, model, tasks: list[str], fewshots: int, batchsize: int) -> None: + super().__init__(model) + self.tasks = tasks + self.fewshots = fewshots + self.batchsize = batchsize diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py new file mode 100644 index 0000000..d49947c --- /dev/null +++ b/src/instructlab/eval/mtbench.py @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: Apache-2.0 + +# Local +from .evaluator import Evaluator + + +class MTBenchEvaluator(Evaluator): + """ + Child class of an Evaluator for Multi-turn Benchmark (MT-Bench) + """ + + def __init__(self, model, server: str) -> None: + super().__init__(model) + self.server = server From 3558524b8a589cdd92a359ab505f1e4b3bedf791 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 14 Jun 2024 11:28:21 -0400 Subject: [PATCH 02/11] Add additional child classes for PR-Bench and PR-MMLU Signed-off-by: Nathan Weinberg --- src/instructlab/eval/mmlu.py | 17 ++++++++++++++++- src/instructlab/eval/mtbench.py | 13 ++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 3b0d014..892b295 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -4,7 +4,7 @@ from .evaluator import Evaluator -class MMLUEvaluator(Evaluator): +class MMLU_Evaluator(Evaluator): """ Child class of an Evaluator for Massive Multitask Language Understanding (MMLU) """ @@ -14,3 +14,18 @@ def __init__(self, model, tasks: list[str], fewshots: int, batchsize: int) -> No self.tasks = tasks self.fewshots = fewshots self.batchsize = batchsize + + +class PR_MMLU_Evaluator(Evaluator): + """ + Child class of an Evaluator for PR Massive Multitask Language Understanding (PR MMLU) + """ + + def __init__( + self, model, task: str, sdg_path: str, fewshots: int, batchsize: int + ) -> None: + super().__init__(model) + self.task = task + self.sdg_path = sdg_path + self.fewshots = fewshots + self.batchsize = batchsize diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index d49947c..d2067d4 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -4,7 +4,7 @@ from .evaluator import Evaluator -class MTBenchEvaluator(Evaluator): +class MT_Bench_Evaluator(Evaluator): """ Child class of an Evaluator for Multi-turn Benchmark (MT-Bench) """ @@ -12,3 +12,14 @@ class MTBenchEvaluator(Evaluator): def __init__(self, model, server: str) -> None: super().__init__(model) self.server = server + + +class PR_Bench_Evaluator(Evaluator): + """ + Child class of an Evaluator for PR-Bench Benchmark (PR-Bench) + """ + + def __init__(self, model, server: str, questions: str) -> None: + super().__init__(model) + self.server = server + self.questions = questions From fb2c51b51397cf65b370f29c1e41e2462b4fbbe7 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 14 Jun 2024 13:27:07 -0400 Subject: [PATCH 03/11] Add attribute descriptors to class docstrings Signed-off-by: Nathan Weinberg --- src/instructlab/eval/evaluator.py | 5 ++++- src/instructlab/eval/exceptions.py | 3 +++ src/instructlab/eval/mmlu.py | 24 +++++++++++++++++++++--- src/instructlab/eval/mtbench.py | 7 +++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/instructlab/eval/evaluator.py b/src/instructlab/eval/evaluator.py index aedd7c0..086732c 100644 --- a/src/instructlab/eval/evaluator.py +++ b/src/instructlab/eval/evaluator.py @@ -4,9 +4,12 @@ class Evaluator: """ Parent class for Evaluators + + Atttributes: + model The model to be evaluated """ - def __init__(self, model) -> None: + def __init__(self, model: str) -> None: self.model = model def run(self) -> dict: diff --git a/src/instructlab/eval/exceptions.py b/src/instructlab/eval/exceptions.py index be4aff5..31c7606 100644 --- a/src/instructlab/eval/exceptions.py +++ b/src/instructlab/eval/exceptions.py @@ -10,6 +10,9 @@ class EvalError(Exception): class ModelNotFoundError(EvalError): """ Exception raised when model is not able to be found + + Attributes + model model that is being operated on """ def __init__(self, model) -> None: diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 892b295..1938d92 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -7,9 +7,16 @@ class MMLU_Evaluator(Evaluator): """ Child class of an Evaluator for Massive Multitask Language Understanding (MMLU) + + Attributes: + tasks list of tasks for MMLU to test the model with + fewshots number of examples + batchsize number of GPUs """ - def __init__(self, model, tasks: list[str], fewshots: int, batchsize: int) -> None: + def __init__( + self, model, tasks: list[str], fewshots: int = 2, batchsize: int = 5 + ) -> None: super().__init__(model) self.tasks = tasks self.fewshots = fewshots @@ -19,13 +26,24 @@ def __init__(self, model, tasks: list[str], fewshots: int, batchsize: int) -> No class PR_MMLU_Evaluator(Evaluator): """ Child class of an Evaluator for PR Massive Multitask Language Understanding (PR MMLU) + + Attributes: + sdg_path path where all the PR MMLU tasks are stored + task group name that is shared by all the PR MMLU tasks + fewshots number of examples + batchsize number of GPUs """ def __init__( - self, model, task: str, sdg_path: str, fewshots: int, batchsize: int + self, + model, + sdg_path: str, + task: str = "mmlu_pr", + fewshots: int = 2, + batchsize: int = 5, ) -> None: super().__init__(model) - self.task = task self.sdg_path = sdg_path + self.task = task self.fewshots = fewshots self.batchsize = batchsize diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index d2067d4..8817c9e 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -7,6 +7,9 @@ class MT_Bench_Evaluator(Evaluator): """ Child class of an Evaluator for Multi-turn Benchmark (MT-Bench) + + Attributes + server vLLM server endpoint """ def __init__(self, model, server: str) -> None: @@ -17,6 +20,10 @@ def __init__(self, model, server: str) -> None: class PR_Bench_Evaluator(Evaluator): """ Child class of an Evaluator for PR-Bench Benchmark (PR-Bench) + + Attributes + server vLLM server endpoint + questions questions to be asked """ def __init__(self, model, server: str, questions: str) -> None: From f62821a7f1a9e2cf955940ac719aa6eb58a08095 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 14 Jun 2024 13:53:43 -0400 Subject: [PATCH 04/11] Change 'server' to 'server_url' for clarity Signed-off-by: Nathan Weinberg --- src/instructlab/eval/mtbench.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index 8817c9e..70f3761 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -9,12 +9,12 @@ class MT_Bench_Evaluator(Evaluator): Child class of an Evaluator for Multi-turn Benchmark (MT-Bench) Attributes - server vLLM server endpoint + server_url vLLM server endpoint """ - def __init__(self, model, server: str) -> None: + def __init__(self, model, server_url: str) -> None: super().__init__(model) - self.server = server + self.server_url = server_url class PR_Bench_Evaluator(Evaluator): @@ -22,11 +22,11 @@ class PR_Bench_Evaluator(Evaluator): Child class of an Evaluator for PR-Bench Benchmark (PR-Bench) Attributes - server vLLM server endpoint + server_url vLLM server endpoint questions questions to be asked """ - def __init__(self, model, server: str, questions: str) -> None: + def __init__(self, model, server_url: str, questions: str) -> None: super().__init__(model) - self.server = server + self.server_url = server_url self.questions = questions From 0023da30bbc68b6203c334185a1ec30b4bd6e1f5 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 14 Jun 2024 14:29:40 -0400 Subject: [PATCH 05/11] Seperated out individual `run` commands for each child class Signed-off-by: Nathan Weinberg --- src/instructlab/eval/evaluator.py | 3 --- src/instructlab/eval/mmlu.py | 20 ++++++++++++++++++++ src/instructlab/eval/mtbench.py | 12 ++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/instructlab/eval/evaluator.py b/src/instructlab/eval/evaluator.py index 086732c..9d64914 100644 --- a/src/instructlab/eval/evaluator.py +++ b/src/instructlab/eval/evaluator.py @@ -11,6 +11,3 @@ class Evaluator: def __init__(self, model: str) -> None: self.model = model - - def run(self) -> dict: - return {} diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 1938d92..6d877f6 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -22,6 +22,15 @@ def __init__( self.fewshots = fewshots self.batchsize = batchsize + def run(self) -> dict: + individual_scores: dict[str, float] = {} + overall_score: float = 0.0 + payload = { + "individual_scores": individual_scores, + "overall_score": overall_score, + } + return payload + class PR_MMLU_Evaluator(Evaluator): """ @@ -47,3 +56,14 @@ def __init__( self.task = task self.fewshots = fewshots self.batchsize = batchsize + + def run(self) -> dict: + individual_scores: dict[str, float] = {} + overall_score: float = 0.0 + qa_pairs: list[tuple] = [] + payload = { + "individual_scores": individual_scores, + "overall_score": overall_score, + "qa_pairs": qa_pairs, + } + return payload diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index 70f3761..422d478 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -16,6 +16,12 @@ def __init__(self, model, server_url: str) -> None: super().__init__(model) self.server_url = server_url + def run(self) -> dict: + overall_score: float = 0.0 + qa_pairs: list[tuple] = [] + payload = {"overall_score": overall_score, "qa_pairs": qa_pairs} + return payload + class PR_Bench_Evaluator(Evaluator): """ @@ -30,3 +36,9 @@ def __init__(self, model, server_url: str, questions: str) -> None: super().__init__(model) self.server_url = server_url self.questions = questions + + def run(self) -> dict: + overall_score = 0.0 + qa_pairs: list[tuple] = [] + payload = {"overall_score": overall_score, "qa_pairs": qa_pairs} + return payload From 0db0be4d37b4b6d3a86e79fb5d7793c7447d360d Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 14 Jun 2024 15:48:53 -0400 Subject: [PATCH 06/11] Change 'model' to 'model_path' for clarity Signed-off-by: Nathan Weinberg --- src/instructlab/eval/evaluator.py | 6 +++--- src/instructlab/eval/mmlu.py | 8 ++++---- src/instructlab/eval/mtbench.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/instructlab/eval/evaluator.py b/src/instructlab/eval/evaluator.py index 9d64914..3bd51d5 100644 --- a/src/instructlab/eval/evaluator.py +++ b/src/instructlab/eval/evaluator.py @@ -6,8 +6,8 @@ class Evaluator: Parent class for Evaluators Atttributes: - model The model to be evaluated + model_path Path to the model to be evaluated """ - def __init__(self, model: str) -> None: - self.model = model + def __init__(self, model_path: str) -> None: + self.model_path = model_path diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 6d877f6..d01e59e 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -15,9 +15,9 @@ class MMLU_Evaluator(Evaluator): """ def __init__( - self, model, tasks: list[str], fewshots: int = 2, batchsize: int = 5 + self, model_path, tasks: list[str], fewshots: int = 2, batchsize: int = 5 ) -> None: - super().__init__(model) + super().__init__(model_path) self.tasks = tasks self.fewshots = fewshots self.batchsize = batchsize @@ -45,13 +45,13 @@ class PR_MMLU_Evaluator(Evaluator): def __init__( self, - model, + model_path, sdg_path: str, task: str = "mmlu_pr", fewshots: int = 2, batchsize: int = 5, ) -> None: - super().__init__(model) + super().__init__(model_path) self.sdg_path = sdg_path self.task = task self.fewshots = fewshots diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index 422d478..fae51a1 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -12,8 +12,8 @@ class MT_Bench_Evaluator(Evaluator): server_url vLLM server endpoint """ - def __init__(self, model, server_url: str) -> None: - super().__init__(model) + def __init__(self, model_path, server_url: str) -> None: + super().__init__(model_path) self.server_url = server_url def run(self) -> dict: @@ -32,8 +32,8 @@ class PR_Bench_Evaluator(Evaluator): questions questions to be asked """ - def __init__(self, model, server_url: str, questions: str) -> None: - super().__init__(model) + def __init__(self, model_path, server_url: str, questions: str) -> None: + super().__init__(model_path) self.server_url = server_url self.questions = questions From 4e72d0771f117f35e6f6f7941bcf2deacf26d2d4 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 14 Jun 2024 15:53:33 -0400 Subject: [PATCH 07/11] Change 'fewshots' and 'batchsize' to snake_case Signed-off-by: Nathan Weinberg --- src/instructlab/eval/mmlu.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index d01e59e..7b1f6b2 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -10,17 +10,17 @@ class MMLU_Evaluator(Evaluator): Attributes: tasks list of tasks for MMLU to test the model with - fewshots number of examples - batchsize number of GPUs + few_shots number of examples + batch_size number of GPUs """ def __init__( - self, model_path, tasks: list[str], fewshots: int = 2, batchsize: int = 5 + self, model_path, tasks: list[str], few_shots: int = 2, batch_size: int = 5 ) -> None: super().__init__(model_path) self.tasks = tasks - self.fewshots = fewshots - self.batchsize = batchsize + self.few_shots = few_shots + self.batch_size = batch_size def run(self) -> dict: individual_scores: dict[str, float] = {} @@ -39,8 +39,8 @@ class PR_MMLU_Evaluator(Evaluator): Attributes: sdg_path path where all the PR MMLU tasks are stored task group name that is shared by all the PR MMLU tasks - fewshots number of examples - batchsize number of GPUs + few_shots number of examples + batch_size number of GPUs """ def __init__( @@ -48,14 +48,14 @@ def __init__( model_path, sdg_path: str, task: str = "mmlu_pr", - fewshots: int = 2, - batchsize: int = 5, + few_shots: int = 2, + batch_size: int = 5, ) -> None: super().__init__(model_path) self.sdg_path = sdg_path self.task = task - self.fewshots = fewshots - self.batchsize = batchsize + self.few_shots = few_shots + self.batch_size = batch_size def run(self) -> dict: individual_scores: dict[str, float] = {} From 11ad7589eb2f251286f630933a841b706882df4e Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Sun, 16 Jun 2024 21:41:42 -0400 Subject: [PATCH 08/11] Change ret value from single dict to multiple ret values Signed-off-by: Nathan Weinberg --- src/instructlab/eval/mmlu.py | 21 ++++++--------------- src/instructlab/eval/mtbench.py | 10 ++++------ 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 7b1f6b2..2b35923 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -22,14 +22,10 @@ def __init__( self.few_shots = few_shots self.batch_size = batch_size - def run(self) -> dict: + def run(self) -> tuple: individual_scores: dict[str, float] = {} overall_score: float = 0.0 - payload = { - "individual_scores": individual_scores, - "overall_score": overall_score, - } - return payload + return overall_score, individual_scores class PR_MMLU_Evaluator(Evaluator): @@ -39,8 +35,8 @@ class PR_MMLU_Evaluator(Evaluator): Attributes: sdg_path path where all the PR MMLU tasks are stored task group name that is shared by all the PR MMLU tasks - few_shots number of examples - batch_size number of GPUs + few_shots number of examples + batch_size number of GPUs """ def __init__( @@ -57,13 +53,8 @@ def __init__( self.few_shots = few_shots self.batch_size = batch_size - def run(self) -> dict: + def run(self) -> tuple: individual_scores: dict[str, float] = {} overall_score: float = 0.0 qa_pairs: list[tuple] = [] - payload = { - "individual_scores": individual_scores, - "overall_score": overall_score, - "qa_pairs": qa_pairs, - } - return payload + return overall_score, individual_scores, qa_pairs diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index fae51a1..25469ba 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -16,11 +16,10 @@ def __init__(self, model_path, server_url: str) -> None: super().__init__(model_path) self.server_url = server_url - def run(self) -> dict: + def run(self) -> tuple: overall_score: float = 0.0 qa_pairs: list[tuple] = [] - payload = {"overall_score": overall_score, "qa_pairs": qa_pairs} - return payload + return overall_score, qa_pairs class PR_Bench_Evaluator(Evaluator): @@ -37,8 +36,7 @@ def __init__(self, model_path, server_url: str, questions: str) -> None: self.server_url = server_url self.questions = questions - def run(self) -> dict: + def run(self) -> tuple: overall_score = 0.0 qa_pairs: list[tuple] = [] - payload = {"overall_score": overall_score, "qa_pairs": qa_pairs} - return payload + return overall_score, qa_pairs From 3eadb4dbb71a0df1adcd68e73a40baaed93ec2f0 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Mon, 17 Jun 2024 09:55:18 -0400 Subject: [PATCH 09/11] Add docstrings to `run` methods Signed-off-by: Nathan Weinberg --- src/instructlab/eval/mmlu.py | 17 ++++++++++++++++- src/instructlab/eval/mtbench.py | 14 ++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 2b35923..820b067 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -9,7 +9,7 @@ class MMLU_Evaluator(Evaluator): Child class of an Evaluator for Massive Multitask Language Understanding (MMLU) Attributes: - tasks list of tasks for MMLU to test the model with + tasks list of tasks for MMLU to test the model with few_shots number of examples batch_size number of GPUs """ @@ -23,6 +23,13 @@ def __init__( self.batch_size = batch_size def run(self) -> tuple: + """ + Runs MMLU evaluation + + Returns: + overall_score MMLU score for the overall model evaluation + individual_scores Individual MMLU score for each task + """ individual_scores: dict[str, float] = {} overall_score: float = 0.0 return overall_score, individual_scores @@ -54,6 +61,14 @@ def __init__( self.batch_size = batch_size def run(self) -> tuple: + """ + Runs PR MMLU evaluation + + Returns: + overall_score PR MMLU score for the overall model evaluation + individual_scores Individual PR MMLU scores for each task + qa_pairs Question and answer pairs from the evaluation + """ individual_scores: dict[str, float] = {} overall_score: float = 0.0 qa_pairs: list[tuple] = [] diff --git a/src/instructlab/eval/mtbench.py b/src/instructlab/eval/mtbench.py index 25469ba..1d8f586 100644 --- a/src/instructlab/eval/mtbench.py +++ b/src/instructlab/eval/mtbench.py @@ -17,6 +17,13 @@ def __init__(self, model_path, server_url: str) -> None: self.server_url = server_url def run(self) -> tuple: + """ + Runs MT-Bench evaluation + + Returns: + overall_score MT-Bench score for the overall model evaluation + qa_pairs Question and answer pairs from the evaluation + """ overall_score: float = 0.0 qa_pairs: list[tuple] = [] return overall_score, qa_pairs @@ -37,6 +44,13 @@ def __init__(self, model_path, server_url: str, questions: str) -> None: self.questions = questions def run(self) -> tuple: + """ + Runs PR-Bench evaluation + + Returns: + overall_score MT-Bench score for the overall model evaluation + qa_pairs Question and answer pairs from the evaluation + """ overall_score = 0.0 qa_pairs: list[tuple] = [] return overall_score, qa_pairs From b422048b912cc065e2c800f1cfe1d48a5c38b29b Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Mon, 17 Jun 2024 10:19:50 -0400 Subject: [PATCH 10/11] Update attributes of ModelNotFoundError class Signed-off-by: Nathan Weinberg --- src/instructlab/eval/exceptions.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/instructlab/eval/exceptions.py b/src/instructlab/eval/exceptions.py index 31c7606..caa45b0 100644 --- a/src/instructlab/eval/exceptions.py +++ b/src/instructlab/eval/exceptions.py @@ -12,10 +12,13 @@ class ModelNotFoundError(EvalError): Exception raised when model is not able to be found Attributes - model model that is being operated on + message error message to be printed on raise + model model that is being operated on + path filepath of model location """ - def __init__(self, model) -> None: + def __init__(self, path) -> None: super().__init__() - self.model = model - self.message = f"Model {self.model} could not be found" + self.path = path + self.model = path.rsplit("/")[-1] + self.message = f"Model {self.model} could not be found at {self.path}" From 20d2fc439b46f221a002169408ed5485c988169f Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Mon, 17 Jun 2024 11:12:28 -0400 Subject: [PATCH 11/11] Add badges to README.md Signed-off-by: Nathan Weinberg --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index da139cf..0e34e5f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ # eval +![Lint](https://github.com/instructlab/eval/actions/workflows/lint.yml/badge.svg?branch=main) +![Build](https://github.com/instructlab/eval/actions/workflows/pypi.yaml/badge.svg?branch=main) +![Release](https://img.shields.io/github/v/release/instructlab/eval) +![License](https://img.shields.io/github/license/instructlab/eval) + Python library for Evaluation