Skip to content

Commit

Permalink
Merge pull request #6 from nathan-weinberg/skeleton
Browse files Browse the repository at this point in the history
Initial skeleton for Evaluator classes and exceptions
  • Loading branch information
nathan-weinberg authored Jun 17, 2024
2 parents a770a86 + 20d2fc4 commit 6632002
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 0 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# eval

![Lint](https://github.com/instructlab/eval/actions/workflows/lint.yml/badge.svg?branch=main)
![Build](https://github.com/instructlab/eval/actions/workflows/pypi.yaml/badge.svg?branch=main)
![Release](https://img.shields.io/github/v/release/instructlab/eval)
![License](https://img.shields.io/github/license/instructlab/eval)

Python library for Evaluation
13 changes: 13 additions & 0 deletions src/instructlab/eval/evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# SPDX-License-Identifier: Apache-2.0


class Evaluator:
"""
Parent class for Evaluators
Atttributes:
model_path Path to the model to be evaluated
"""

def __init__(self, model_path: str) -> None:
self.model_path = model_path
24 changes: 24 additions & 0 deletions src/instructlab/eval/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-License-Identifier: Apache-2.0


class EvalError(Exception):
"""
Parent class for all of instructlab-eval exceptions
"""


class ModelNotFoundError(EvalError):
"""
Exception raised when model is not able to be found
Attributes
message error message to be printed on raise
model model that is being operated on
path filepath of model location
"""

def __init__(self, path) -> None:
super().__init__()
self.path = path
self.model = path.rsplit("/")[-1]
self.message = f"Model {self.model} could not be found at {self.path}"
75 changes: 75 additions & 0 deletions src/instructlab/eval/mmlu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# SPDX-License-Identifier: Apache-2.0

# Local
from .evaluator import Evaluator


class MMLU_Evaluator(Evaluator):
"""
Child class of an Evaluator for Massive Multitask Language Understanding (MMLU)
Attributes:
tasks list of tasks for MMLU to test the model with
few_shots number of examples
batch_size number of GPUs
"""

def __init__(
self, model_path, tasks: list[str], few_shots: int = 2, batch_size: int = 5
) -> None:
super().__init__(model_path)
self.tasks = tasks
self.few_shots = few_shots
self.batch_size = batch_size

def run(self) -> tuple:
"""
Runs MMLU evaluation
Returns:
overall_score MMLU score for the overall model evaluation
individual_scores Individual MMLU score for each task
"""
individual_scores: dict[str, float] = {}
overall_score: float = 0.0
return overall_score, individual_scores


class PR_MMLU_Evaluator(Evaluator):
"""
Child class of an Evaluator for PR Massive Multitask Language Understanding (PR MMLU)
Attributes:
sdg_path path where all the PR MMLU tasks are stored
task group name that is shared by all the PR MMLU tasks
few_shots number of examples
batch_size number of GPUs
"""

def __init__(
self,
model_path,
sdg_path: str,
task: str = "mmlu_pr",
few_shots: int = 2,
batch_size: int = 5,
) -> None:
super().__init__(model_path)
self.sdg_path = sdg_path
self.task = task
self.few_shots = few_shots
self.batch_size = batch_size

def run(self) -> tuple:
"""
Runs PR MMLU evaluation
Returns:
overall_score PR MMLU score for the overall model evaluation
individual_scores Individual PR MMLU scores for each task
qa_pairs Question and answer pairs from the evaluation
"""
individual_scores: dict[str, float] = {}
overall_score: float = 0.0
qa_pairs: list[tuple] = []
return overall_score, individual_scores, qa_pairs
56 changes: 56 additions & 0 deletions src/instructlab/eval/mtbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# SPDX-License-Identifier: Apache-2.0

# Local
from .evaluator import Evaluator


class MT_Bench_Evaluator(Evaluator):
"""
Child class of an Evaluator for Multi-turn Benchmark (MT-Bench)
Attributes
server_url vLLM server endpoint
"""

def __init__(self, model_path, server_url: str) -> None:
super().__init__(model_path)
self.server_url = server_url

def run(self) -> tuple:
"""
Runs MT-Bench evaluation
Returns:
overall_score MT-Bench score for the overall model evaluation
qa_pairs Question and answer pairs from the evaluation
"""
overall_score: float = 0.0
qa_pairs: list[tuple] = []
return overall_score, qa_pairs


class PR_Bench_Evaluator(Evaluator):
"""
Child class of an Evaluator for PR-Bench Benchmark (PR-Bench)
Attributes
server_url vLLM server endpoint
questions questions to be asked
"""

def __init__(self, model_path, server_url: str, questions: str) -> None:
super().__init__(model_path)
self.server_url = server_url
self.questions = questions

def run(self) -> tuple:
"""
Runs PR-Bench evaluation
Returns:
overall_score MT-Bench score for the overall model evaluation
qa_pairs Question and answer pairs from the evaluation
"""
overall_score = 0.0
qa_pairs: list[tuple] = []
return overall_score, qa_pairs

0 comments on commit 6632002

Please sign in to comment.