Skip to content

Commit

Permalink
Consolidated into CommonLLMNames in llmclient.llms (#30)
Browse files Browse the repository at this point in the history
* Consoldiated LLM names lookup into CommonLLMNames

* Removed niche gpt-3.5-turbo case

* Renamed OPENAI_GENERAL to GENERAL_PURPOSE per PR comments

* Defined GPT_4O and OPENAI_BASELINE to be opinionated when desired, and not opinionated when desired
  • Loading branch information
jamesbraza authored Jan 8, 2025
1 parent ab90c31 commit 988335f
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 30 deletions.
2 changes: 2 additions & 0 deletions llmclient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
JSONSchemaValidationError,
)
from .llms import (
CommonLLMNames,
LiteLLMModel,
LLMModel,
MultipleCompletionLLMModel,
Expand All @@ -38,6 +39,7 @@
"GLOBAL_COST_TRACKER",
"MODEL_COST_MAP",
"Chunk",
"CommonLLMNames",
"Embeddable",
"EmbeddingModel",
"EmbeddingModes",
Expand Down
25 changes: 21 additions & 4 deletions llmclient/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Iterable,
Mapping,
)
from enum import StrEnum
from inspect import isasyncgenfunction, signature
from typing import (
Any,
Expand Down Expand Up @@ -67,6 +68,23 @@
JSONSchema: TypeAlias = Mapping[str, Any]


class CommonLLMNames(StrEnum):
"""When you don't want to think about models, just use one from here."""

# Use these to avoid thinking about exact versions
GPT_4O = "gpt-4o-2024-11-20"
CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022"

# Use these when trying to think of a somewhat opinionated default
OPENAI_BASELINE = "gpt-4o-2024-11-20" # Fast and decent

# Use these in unit testing
OPENAI_TEST = "gpt-4o-mini-2024-07-18" # Cheap, fast, and not OpenAI's cutting edge
ANTHROPIC_TEST = (
"claude-3-haiku-20240307" # Cheap, fast, and not Anthropic's cutting edge
)


def sum_logprobs(choice: litellm.utils.Choices) -> float | None:
"""Calculate the sum of the log probabilities of an LLM completion (a Choices object).
Expand Down Expand Up @@ -675,10 +693,9 @@ def __str__(self) -> str:

@model_validator(mode="after")
def set_model_name(self) -> Self:
if (
self.config.get("model") in {"gpt-3.5-turbo", None}
and self.name != "unknown"
) or (self.name != "unknown" and "model" not in self.config):
if (self.config.get("model") is None and self.name != "unknown") or (
self.name != "unknown" and "model" not in self.config
):
self.config["model"] = self.name
elif "model" in self.config and self.name == "unknown":
self.name = self.config["model"]
Expand Down
8 changes: 0 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
import shutil
from collections.abc import Iterator
from enum import StrEnum
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -74,10 +73,3 @@ def fixture_reset_log_levels(caplog) -> Iterator[None]:
logger = logging.getLogger(name)
logger.setLevel(logging.NOTSET)
logger.propagate = True


class CILLMModelNames(StrEnum):
"""Models to use for generic CI testing."""

ANTHROPIC = "claude-3-haiku-20240307" # Cheap and not Anthropic's cutting edge
OPENAI = "gpt-4o-mini-2024-07-18" # Cheap and not OpenAI's cutting edge
8 changes: 4 additions & 4 deletions tests/test_cost_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from llmclient import cost_tracking_ctx
from llmclient.cost_tracker import GLOBAL_COST_TRACKER
from llmclient.embeddings import LiteLLMEmbeddingModel
from llmclient.llms import LiteLLMModel, MultipleCompletionLLMModel
from llmclient.llms import CommonLLMNames, LiteLLMModel, MultipleCompletionLLMModel
from llmclient.types import LLMResult

from .conftest import VCR_DEFAULT_MATCH_ON, CILLMModelNames
from .conftest import VCR_DEFAULT_MATCH_ON


@contextmanager
Expand Down Expand Up @@ -164,7 +164,7 @@ async def call_model(
return await model.call(*args, **kwargs)

@pytest.mark.parametrize(
"model_name", ["gpt-3.5-turbo", CILLMModelNames.ANTHROPIC.value]
"model_name", ["gpt-3.5-turbo", CommonLLMNames.ANTHROPIC_TEST.value]
)
@pytest.mark.asyncio
async def test_achat(self, model_name: str) -> None:
Expand All @@ -185,7 +185,7 @@ async def test_achat(self, model_name: str) -> None:
):
pass

@pytest.mark.parametrize("model_name", [CILLMModelNames.OPENAI.value])
@pytest.mark.parametrize("model_name", [CommonLLMNames.OPENAI_TEST.value])
@pytest.mark.asyncio
@pytest.mark.vcr
async def test_text_image_message(self, model_name: str) -> None:
Expand Down
21 changes: 7 additions & 14 deletions tests/test_llms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pathlib
import pickle
from enum import StrEnum
from typing import Any, ClassVar
from unittest.mock import Mock, patch

Expand All @@ -13,6 +12,7 @@
from llmclient.exceptions import JSONSchemaValidationError
from llmclient.llms import (
Chunk,
CommonLLMNames,
LiteLLMModel,
MultipleCompletionLLMModel,
validate_json_completion,
Expand Down Expand Up @@ -259,13 +259,6 @@ def test_pickling(self, tmp_path: pathlib.Path) -> None:
assert llm.router.deployment_names == rehydrated_llm.router.deployment_names


class CILLMModelNames(StrEnum):
"""Models to use for generic CI testing."""

ANTHROPIC = "claude-3-haiku-20240307" # Cheap and not Anthropic's cutting edge
OPENAI = "gpt-4o-mini-2024-07-18" # Cheap and not OpenAI's cutting edge


class DummyOutputSchema(BaseModel):
name: str
age: int
Expand All @@ -282,7 +275,7 @@ async def call_model(
return await model.call(*args, **kwargs)

@pytest.mark.parametrize(
"model_name", ["gpt-3.5-turbo", CILLMModelNames.ANTHROPIC.value]
"model_name", ["gpt-3.5-turbo", CommonLLMNames.ANTHROPIC_TEST.value]
)
@pytest.mark.asyncio
async def test_achat(self, model_name: str) -> None:
Expand Down Expand Up @@ -324,7 +317,7 @@ async def test_model(self, model_name: str) -> None:
assert result.logprob is None or result.logprob <= 0

@pytest.mark.parametrize(
"model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"]
"model_name", [CommonLLMNames.ANTHROPIC_TEST.value, "gpt-3.5-turbo"]
)
@pytest.mark.asyncio
async def test_streaming(self, model_name: str) -> None:
Expand Down Expand Up @@ -396,7 +389,7 @@ async def test_output_schema(
assert result.messages[0].content
DummyOutputSchema.model_validate_json(result.messages[0].content)

@pytest.mark.parametrize("model_name", [CILLMModelNames.OPENAI.value])
@pytest.mark.parametrize("model_name", [CommonLLMNames.OPENAI_TEST.value])
@pytest.mark.asyncio
@pytest.mark.vcr
async def test_text_image_message(self, model_name: str) -> None:
Expand Down Expand Up @@ -426,7 +419,7 @@ async def test_text_image_message(self, model_name: str) -> None:
assert "red" in result.messages[-1].content.lower()

@pytest.mark.parametrize(
"model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"]
"model_name", [CommonLLMNames.ANTHROPIC_TEST.value, "gpt-3.5-turbo"]
)
@pytest.mark.asyncio
@pytest.mark.vcr
Expand Down Expand Up @@ -456,8 +449,8 @@ async def test_single_completion(self, model_name: str) -> None:
@pytest.mark.parametrize(
"model_name",
[
pytest.param(CILLMModelNames.ANTHROPIC.value, id="anthropic"),
pytest.param(CILLMModelNames.OPENAI.value, id="openai"),
pytest.param(CommonLLMNames.ANTHROPIC_TEST.value, id="anthropic"),
pytest.param(CommonLLMNames.OPENAI_TEST.value, id="openai"),
],
)
async def test_multiple_completion(self, model_name: str, request) -> None:
Expand Down

0 comments on commit 988335f

Please sign in to comment.