diff --git a/pyproject.toml b/pyproject.toml index da26b04..b11c7bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,12 @@ homepage = "https://instructlab.ai" source = "https://github.com/instructlab/eval" issues = "https://github.com/instructlab/eval/issues" +[project.entry-points."instructlab.eval.evaluator"] +"mmlu" = "instructlab.eval.mmlu:MMLUEvaluator" +"mmlu_branch" = "instructlab.eval.mmlu:MMLUBranchEvaluator" +"mt_bench" = "instructlab.eval.mt_bench:MTBenchEvaluator" +"mt_bench_branch" = "instructlab.eval.mt_bench:MTBenchBranchEvaluator" + [tool.setuptools_scm] version_file = "src/instructlab/eval/_version.py" # do not include +gREV local version, required for Test PyPI upload diff --git a/src/instructlab/eval/evaluator.py b/src/instructlab/eval/evaluator.py index f62b254..7972aec 100644 --- a/src/instructlab/eval/evaluator.py +++ b/src/instructlab/eval/evaluator.py @@ -6,5 +6,7 @@ class Evaluator: Parent class for Evaluators """ + name: str + def __init__(self) -> None: pass diff --git a/src/instructlab/eval/mmlu.py b/src/instructlab/eval/mmlu.py index 962b794..a3d9f6c 100644 --- a/src/instructlab/eval/mmlu.py +++ b/src/instructlab/eval/mmlu.py @@ -84,6 +84,8 @@ class MMLUEvaluator(Evaluator): batch_size number of GPUs """ + name = "mmlu" + def __init__( self, model_path, @@ -147,6 +149,8 @@ class MMLUBranchEvaluator(Evaluator): batch_size number of GPUs """ + name = "mmlu_branch" + def __init__( self, model_path, diff --git a/src/instructlab/eval/mt_bench.py b/src/instructlab/eval/mt_bench.py index 3a5f12f..3043d10 100644 --- a/src/instructlab/eval/mt_bench.py +++ b/src/instructlab/eval/mt_bench.py @@ -22,6 +22,8 @@ class MTBenchEvaluator(Evaluator): max_workers Max parallel workers to run the evaluation with """ + name = "mt_bench" + def __init__( self, model_name: str, @@ -82,6 +84,8 @@ class MTBenchBranchEvaluator(Evaluator): max_workers Max parallel workers to run the evaluation with """ + name = "mt_bench_branch" + def __init__( self, model_name: str, diff --git a/tests/test_project.py b/tests/test_project.py new file mode 100644 index 0000000..13c4dbc --- /dev/null +++ b/tests/test_project.py @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: Apache-2.0 +# Standard +from importlib.metadata import entry_points + +# First Party +from instructlab.eval.evaluator import Evaluator +from instructlab.eval.mmlu import MMLUBranchEvaluator, MMLUEvaluator +from instructlab.eval.mt_bench import MTBenchBranchEvaluator, MTBenchEvaluator + + +def test_evaluator_eps(): + expected = { + "mmlu": MMLUEvaluator, + "mmlu_branch": MMLUBranchEvaluator, + "mt_bench": MTBenchEvaluator, + "mt_bench_branch": MTBenchBranchEvaluator, + } + eps = entry_points(group="instructlab.eval.evaluator") + found = {} + for ep in eps: + # different project + if not ep.module.startswith("instructlab.eval"): + continue + evaluator = ep.load() + assert issubclass(evaluator, Evaluator) + assert evaluator.name == ep.name + found[ep.name] = evaluator + + assert found == expected