Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #250 #251

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/ontogpt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1474,11 +1474,13 @@ def eval(evaluator, num_tests, output, chunking, model, **kwargs):
if model:
selectmodel = get_model_by_name(model)
modelname = selectmodel["alternative_names"][0]
else:
modelname = DEFAULT_MODEL

evaluator = create_evaluator(evaluator)
evaluator.num_tests = num_tests
evaluator.chunking = chunking
evaluator.model = modelname
evaluator = create_evaluator(name=evaluator,
num_tests=num_tests,
chunking=chunking,
model=modelname)
eos = evaluator.eval()
output.write(dump_minimal_yaml(eos, minimize=False))

Expand Down
4 changes: 2 additions & 2 deletions src/ontogpt/engines/knowledge_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class KnowledgeEngine(ABC):
api_key: str = ""
"""OpenAI API key."""

model: MODEL_NAME = ""
model: str = None
"""Language Model. This may be overridden in subclasses."""

# annotator: TextAnnotatorInterface = None
Expand Down Expand Up @@ -166,7 +166,7 @@ def __post_init__(self):
try:
self.encoding = tiktoken.encoding_for_model(self.client.model)
except KeyError:
self.encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
self.encoding = tiktoken.encoding_for_model(DEFAULT_MODEL)
logger.error(f"Could not find encoding for model {self.client.model}")

def set_api_key(self, key: str):
Expand Down
4 changes: 3 additions & 1 deletion src/ontogpt/engines/spires_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@
class SPIRESEngine(KnowledgeEngine):
"""Knowledge extractor."""

engine: str = "gpt-3.5-turbo-instruct"
engine: str = None
"""The engine name."""

recurse: bool = True
"""If true, then complex non-named entity objects are always recursively parsed.
If this is false AND the complex object is a pair, then token-based splitting is
Expand Down
8 changes: 5 additions & 3 deletions src/ontogpt/evaluation/ctd/eval_ctd.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,7 @@ class EvalCTD(SPIRESEvaluationEngine):
object_prefix = "MESH"

def __post_init__(self):
self.extractor = SPIRESEngine(template="ctd.ChemicalToDiseaseDocument",
model=self.model)
self.extractor = SPIRESEngine(template="ctd.ChemicalToDiseaseDocument", model=self.model)
# synonyms are derived entirely from training set
self.extractor.load_dictionary(DATABASE_DIR / "synonyms.yaml")

Expand Down Expand Up @@ -195,7 +194,10 @@ def create_training_set(self, num=100):
def eval(self) -> EvaluationObjectSetRE:
"""Evaluate the ability to extract relations."""
labeler = get_adapter("sqlite:obo:mesh")
num_test = self.num_tests
if self.num_tests and isinstance(self.num_tests, int):
num_test = self.num_tests
else:
num_test = 1
ke = self.extractor
docs = list(self.load_test_cases())
shuffle(docs)
Expand Down
14 changes: 6 additions & 8 deletions src/ontogpt/evaluation/evaluation_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@
"""

from dataclasses import dataclass
from typing import List, Optional, Set
from typing import List, Optional, Set, Type, Union

from oaklib import BasicOntologyInterface
from pydantic import BaseModel

from ontogpt import DEFAULT_MODEL
from ontogpt.engines.spires_engine import SPIRESEngine


Expand Down Expand Up @@ -90,17 +89,16 @@ class SPIRESEvaluationEngine(EvaluationEngine):
extractor: SPIRESEngine = None
"""Knowledge extractor to use"""

num_tests: int = 10
num_tests: Optional[Union[int, Type]] = 10
"""Number of test cases to use for evaluation"""

num_training: int = 5
num_training: Optional[Union[int, Type]] = 5
"""Number of training/exemplar cases to use for evaluation in generalization task.
Note this number will be low as we use few-shot learning."""

chunking: bool = False
chunking: Optional[Union[bool, Type]] = False
"""Whether to pre-process input texts by chunking. If True, each chunk gets its own
prompt. Otherwise, pass the full text with each prompt."""

model: str = DEFAULT_MODEL
"""Name of the model to use in evaluation. Defaults to the default model defined
in models.yaml, generally gpt-3.5-turbo."""
model: Optional[Union[str, Type]] = None
"""Name of the model to use in evaluation."""
12 changes: 9 additions & 3 deletions src/ontogpt/evaluation/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,16 @@
resolver = ClassResolver([EvalCTD], base=SPIRESEvaluationEngine)


def create_evaluator(name: Optional[Union[str, Type]] = None, **kwargs) -> SPIRESEvaluationEngine:
def create_evaluator(
name: Optional[Union[str, Type]] = None,
num_tests: Optional[Union[int, Type]] = None,
chunking: Optional[Union[bool, Type]] = None,
model: Optional[Union[str, Type]] = None,
**kwargs,
) -> SPIRESEvaluationEngine:
"""Create a knowledge engine."""
if name is None:
name = EvalCTD
if isinstance(name, str):
return resolver.lookup(name)(**kwargs)
return name(**kwargs)
return resolver.lookup(name)(num_tests=num_tests, chunking=chunking, model=model, **kwargs)
return name(num_tests=num_tests, chunking=chunking, model=model, **kwargs)
Loading