diff --git a/llmclient/__init__.py b/llmclient/__init__.py index 139597d..ffab2b1 100644 --- a/llmclient/__init__.py +++ b/llmclient/__init__.py @@ -17,6 +17,7 @@ JSONSchemaValidationError, ) from .llms import ( + CommonLLMNames, LiteLLMModel, LLMModel, MultipleCompletionLLMModel, @@ -38,6 +39,7 @@ "GLOBAL_COST_TRACKER", "MODEL_COST_MAP", "Chunk", + "CommonLLMNames", "Embeddable", "EmbeddingModel", "EmbeddingModes", diff --git a/llmclient/llms.py b/llmclient/llms.py index 4a11a78..65d7ffa 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -13,6 +13,7 @@ Iterable, Mapping, ) +from enum import StrEnum from inspect import isasyncgenfunction, signature from typing import ( Any, @@ -67,6 +68,23 @@ JSONSchema: TypeAlias = Mapping[str, Any] +class CommonLLMNames(StrEnum): + """When you don't want to think about models, just use one from here.""" + + # Use these to avoid thinking about exact versions + GPT_4O = "gpt-4o-2024-11-20" + CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022" + + # Use these when trying to think of a somewhat opinionated default + OPENAI_BASELINE = "gpt-4o-2024-11-20" # Fast and decent + + # Use these in unit testing + OPENAI_TEST = "gpt-4o-mini-2024-07-18" # Cheap, fast, and not OpenAI's cutting edge + ANTHROPIC_TEST = ( + "claude-3-haiku-20240307" # Cheap, fast, and not Anthropic's cutting edge + ) + + def sum_logprobs(choice: litellm.utils.Choices) -> float | None: """Calculate the sum of the log probabilities of an LLM completion (a Choices object). @@ -675,10 +693,9 @@ def __str__(self) -> str: @model_validator(mode="after") def set_model_name(self) -> Self: - if ( - self.config.get("model") in {"gpt-3.5-turbo", None} - and self.name != "unknown" - ) or (self.name != "unknown" and "model" not in self.config): + if (self.config.get("model") is None and self.name != "unknown") or ( + self.name != "unknown" and "model" not in self.config + ): self.config["model"] = self.name elif "model" in self.config and self.name == "unknown": self.name = self.config["model"] diff --git a/tests/conftest.py b/tests/conftest.py index f99051d..b0f5177 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ import logging import shutil from collections.abc import Iterator -from enum import StrEnum from pathlib import Path from typing import Any @@ -74,10 +73,3 @@ def fixture_reset_log_levels(caplog) -> Iterator[None]: logger = logging.getLogger(name) logger.setLevel(logging.NOTSET) logger.propagate = True - - -class CILLMModelNames(StrEnum): - """Models to use for generic CI testing.""" - - ANTHROPIC = "claude-3-haiku-20240307" # Cheap and not Anthropic's cutting edge - OPENAI = "gpt-4o-mini-2024-07-18" # Cheap and not OpenAI's cutting edge diff --git a/tests/test_cost_tracking.py b/tests/test_cost_tracking.py index e15a6e9..a7d2859 100644 --- a/tests/test_cost_tracking.py +++ b/tests/test_cost_tracking.py @@ -8,10 +8,10 @@ from llmclient import cost_tracking_ctx from llmclient.cost_tracker import GLOBAL_COST_TRACKER from llmclient.embeddings import LiteLLMEmbeddingModel -from llmclient.llms import LiteLLMModel, MultipleCompletionLLMModel +from llmclient.llms import CommonLLMNames, LiteLLMModel, MultipleCompletionLLMModel from llmclient.types import LLMResult -from .conftest import VCR_DEFAULT_MATCH_ON, CILLMModelNames +from .conftest import VCR_DEFAULT_MATCH_ON @contextmanager @@ -164,7 +164,7 @@ async def call_model( return await model.call(*args, **kwargs) @pytest.mark.parametrize( - "model_name", ["gpt-3.5-turbo", CILLMModelNames.ANTHROPIC.value] + "model_name", ["gpt-3.5-turbo", CommonLLMNames.ANTHROPIC_TEST.value] ) @pytest.mark.asyncio async def test_achat(self, model_name: str) -> None: @@ -185,7 +185,7 @@ async def test_achat(self, model_name: str) -> None: ): pass - @pytest.mark.parametrize("model_name", [CILLMModelNames.OPENAI.value]) + @pytest.mark.parametrize("model_name", [CommonLLMNames.OPENAI_TEST.value]) @pytest.mark.asyncio @pytest.mark.vcr async def test_text_image_message(self, model_name: str) -> None: diff --git a/tests/test_llms.py b/tests/test_llms.py index ded4860..754080b 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -1,6 +1,5 @@ import pathlib import pickle -from enum import StrEnum from typing import Any, ClassVar from unittest.mock import Mock, patch @@ -13,6 +12,7 @@ from llmclient.exceptions import JSONSchemaValidationError from llmclient.llms import ( Chunk, + CommonLLMNames, LiteLLMModel, MultipleCompletionLLMModel, validate_json_completion, @@ -259,13 +259,6 @@ def test_pickling(self, tmp_path: pathlib.Path) -> None: assert llm.router.deployment_names == rehydrated_llm.router.deployment_names -class CILLMModelNames(StrEnum): - """Models to use for generic CI testing.""" - - ANTHROPIC = "claude-3-haiku-20240307" # Cheap and not Anthropic's cutting edge - OPENAI = "gpt-4o-mini-2024-07-18" # Cheap and not OpenAI's cutting edge - - class DummyOutputSchema(BaseModel): name: str age: int @@ -282,7 +275,7 @@ async def call_model( return await model.call(*args, **kwargs) @pytest.mark.parametrize( - "model_name", ["gpt-3.5-turbo", CILLMModelNames.ANTHROPIC.value] + "model_name", ["gpt-3.5-turbo", CommonLLMNames.ANTHROPIC_TEST.value] ) @pytest.mark.asyncio async def test_achat(self, model_name: str) -> None: @@ -324,7 +317,7 @@ async def test_model(self, model_name: str) -> None: assert result.logprob is None or result.logprob <= 0 @pytest.mark.parametrize( - "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"] + "model_name", [CommonLLMNames.ANTHROPIC_TEST.value, "gpt-3.5-turbo"] ) @pytest.mark.asyncio async def test_streaming(self, model_name: str) -> None: @@ -396,7 +389,7 @@ async def test_output_schema( assert result.messages[0].content DummyOutputSchema.model_validate_json(result.messages[0].content) - @pytest.mark.parametrize("model_name", [CILLMModelNames.OPENAI.value]) + @pytest.mark.parametrize("model_name", [CommonLLMNames.OPENAI_TEST.value]) @pytest.mark.asyncio @pytest.mark.vcr async def test_text_image_message(self, model_name: str) -> None: @@ -426,7 +419,7 @@ async def test_text_image_message(self, model_name: str) -> None: assert "red" in result.messages[-1].content.lower() @pytest.mark.parametrize( - "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"] + "model_name", [CommonLLMNames.ANTHROPIC_TEST.value, "gpt-3.5-turbo"] ) @pytest.mark.asyncio @pytest.mark.vcr @@ -456,8 +449,8 @@ async def test_single_completion(self, model_name: str) -> None: @pytest.mark.parametrize( "model_name", [ - pytest.param(CILLMModelNames.ANTHROPIC.value, id="anthropic"), - pytest.param(CILLMModelNames.OPENAI.value, id="openai"), + pytest.param(CommonLLMNames.ANTHROPIC_TEST.value, id="anthropic"), + pytest.param(CommonLLMNames.OPENAI_TEST.value, id="openai"), ], ) async def test_multiple_completion(self, model_name: str, request) -> None: