Consolidated into CommonLLMNames in llmclient.llms (#30)

* Consoldiated LLM names lookup into CommonLLMNames * Removed niche gpt-3.5-turbo case * Renamed OPENAI_GENERAL to GENERAL_PURPOSE per PR comments * Defined GPT_4O and OPENAI_BASELINE to be opinionated when desired, and not opinionated when desired
Future-House · Jan 8, 2025 · 988335f · 988335f
1 parent ab90c31
commit 988335f
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 30 deletions.
diff --git a/llmclient/__init__.py b/llmclient/__init__.py
@@ -17,6 +17,7 @@
     JSONSchemaValidationError,
 )
 from .llms import (
+    CommonLLMNames,
     LiteLLMModel,
     LLMModel,
     MultipleCompletionLLMModel,
@@ -38,6 +39,7 @@
     "GLOBAL_COST_TRACKER",
     "MODEL_COST_MAP",
     "Chunk",
+    "CommonLLMNames",
     "Embeddable",
     "EmbeddingModel",
     "EmbeddingModes",

diff --git a/llmclient/llms.py b/llmclient/llms.py
@@ -13,6 +13,7 @@
     Iterable,
     Mapping,
 )
+from enum import StrEnum
 from inspect import isasyncgenfunction, signature
 from typing import (
     Any,
@@ -67,6 +68,23 @@
 JSONSchema: TypeAlias = Mapping[str, Any]
 
 
+class CommonLLMNames(StrEnum):
+    """When you don't want to think about models, just use one from here."""
+
+    # Use these to avoid thinking about exact versions
+    GPT_4O = "gpt-4o-2024-11-20"
+    CLAUDE_35_SONNET = "claude-3-5-sonnet-20241022"
+
+    # Use these when trying to think of a somewhat opinionated default
+    OPENAI_BASELINE = "gpt-4o-2024-11-20"  # Fast and decent
+
+    # Use these in unit testing
+    OPENAI_TEST = "gpt-4o-mini-2024-07-18"  # Cheap, fast, and not OpenAI's cutting edge
+    ANTHROPIC_TEST = (
+        "claude-3-haiku-20240307"  # Cheap, fast, and not Anthropic's cutting edge
+    )
+
+
 def sum_logprobs(choice: litellm.utils.Choices) -> float | None:
     """Calculate the sum of the log probabilities of an LLM completion (a Choices object).
 
@@ -675,10 +693,9 @@ def __str__(self) -> str:
 
     @model_validator(mode="after")
     def set_model_name(self) -> Self:
-        if (
-            self.config.get("model") in {"gpt-3.5-turbo", None}
-            and self.name != "unknown"
-        ) or (self.name != "unknown" and "model" not in self.config):
+        if (self.config.get("model") is None and self.name != "unknown") or (
+            self.name != "unknown" and "model" not in self.config
+        ):
             self.config["model"] = self.name
         elif "model" in self.config and self.name == "unknown":
             self.name = self.config["model"]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -3,7 +3,6 @@
 import logging
 import shutil
 from collections.abc import Iterator
-from enum import StrEnum
 from pathlib import Path
 from typing import Any
 
@@ -74,10 +73,3 @@ def fixture_reset_log_levels(caplog) -> Iterator[None]:
         logger = logging.getLogger(name)
         logger.setLevel(logging.NOTSET)
         logger.propagate = True
-
-
-class CILLMModelNames(StrEnum):
-    """Models to use for generic CI testing."""
-
-    ANTHROPIC = "claude-3-haiku-20240307"  # Cheap and not Anthropic's cutting edge
-    OPENAI = "gpt-4o-mini-2024-07-18"  # Cheap and not OpenAI's cutting edge
diff --git a/tests/test_cost_tracking.py b/tests/test_cost_tracking.py
@@ -8,10 +8,10 @@
 from llmclient import cost_tracking_ctx
 from llmclient.cost_tracker import GLOBAL_COST_TRACKER
 from llmclient.embeddings import LiteLLMEmbeddingModel
-from llmclient.llms import LiteLLMModel, MultipleCompletionLLMModel
+from llmclient.llms import CommonLLMNames, LiteLLMModel, MultipleCompletionLLMModel
 from llmclient.types import LLMResult
 
-from .conftest import VCR_DEFAULT_MATCH_ON, CILLMModelNames
+from .conftest import VCR_DEFAULT_MATCH_ON
 
 
 @contextmanager
@@ -164,7 +164,7 @@ async def call_model(
         return await model.call(*args, **kwargs)
 
     @pytest.mark.parametrize(
-        "model_name", ["gpt-3.5-turbo", CILLMModelNames.ANTHROPIC.value]
+        "model_name", ["gpt-3.5-turbo", CommonLLMNames.ANTHROPIC_TEST.value]
     )
     @pytest.mark.asyncio
     async def test_achat(self, model_name: str) -> None:
@@ -185,7 +185,7 @@ async def test_achat(self, model_name: str) -> None:
                 ):
                     pass
 
-    @pytest.mark.parametrize("model_name", [CILLMModelNames.OPENAI.value])
+    @pytest.mark.parametrize("model_name", [CommonLLMNames.OPENAI_TEST.value])
     @pytest.mark.asyncio
     @pytest.mark.vcr
     async def test_text_image_message(self, model_name: str) -> None:

diff --git a/tests/test_llms.py b/tests/test_llms.py
@@ -1,6 +1,5 @@
 import pathlib
 import pickle
-from enum import StrEnum
 from typing import Any, ClassVar
 from unittest.mock import Mock, patch
 
@@ -13,6 +12,7 @@
 from llmclient.exceptions import JSONSchemaValidationError
 from llmclient.llms import (
     Chunk,
+    CommonLLMNames,
     LiteLLMModel,
     MultipleCompletionLLMModel,
     validate_json_completion,
@@ -259,13 +259,6 @@ def test_pickling(self, tmp_path: pathlib.Path) -> None:
         assert llm.router.deployment_names == rehydrated_llm.router.deployment_names
 
 
-class CILLMModelNames(StrEnum):
-    """Models to use for generic CI testing."""
-
-    ANTHROPIC = "claude-3-haiku-20240307"  # Cheap and not Anthropic's cutting edge
-    OPENAI = "gpt-4o-mini-2024-07-18"  # Cheap and not OpenAI's cutting edge
-
-
 class DummyOutputSchema(BaseModel):
     name: str
     age: int
@@ -282,7 +275,7 @@ async def call_model(
         return await model.call(*args, **kwargs)
 
     @pytest.mark.parametrize(
-        "model_name", ["gpt-3.5-turbo", CILLMModelNames.ANTHROPIC.value]
+        "model_name", ["gpt-3.5-turbo", CommonLLMNames.ANTHROPIC_TEST.value]
     )
     @pytest.mark.asyncio
     async def test_achat(self, model_name: str) -> None:
@@ -324,7 +317,7 @@ async def test_model(self, model_name: str) -> None:
             assert result.logprob is None or result.logprob <= 0
 
     @pytest.mark.parametrize(
-        "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"]
+        "model_name", [CommonLLMNames.ANTHROPIC_TEST.value, "gpt-3.5-turbo"]
     )
     @pytest.mark.asyncio
     async def test_streaming(self, model_name: str) -> None:
@@ -396,7 +389,7 @@ async def test_output_schema(
             assert result.messages[0].content
             DummyOutputSchema.model_validate_json(result.messages[0].content)
 
-    @pytest.mark.parametrize("model_name", [CILLMModelNames.OPENAI.value])
+    @pytest.mark.parametrize("model_name", [CommonLLMNames.OPENAI_TEST.value])
     @pytest.mark.asyncio
     @pytest.mark.vcr
     async def test_text_image_message(self, model_name: str) -> None:
@@ -426,7 +419,7 @@ async def test_text_image_message(self, model_name: str) -> None:
             assert "red" in result.messages[-1].content.lower()
 
     @pytest.mark.parametrize(
-        "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"]
+        "model_name", [CommonLLMNames.ANTHROPIC_TEST.value, "gpt-3.5-turbo"]
     )
     @pytest.mark.asyncio
     @pytest.mark.vcr
@@ -456,8 +449,8 @@ async def test_single_completion(self, model_name: str) -> None:
     @pytest.mark.parametrize(
         "model_name",
         [
-            pytest.param(CILLMModelNames.ANTHROPIC.value, id="anthropic"),
-            pytest.param(CILLMModelNames.OPENAI.value, id="openai"),
+            pytest.param(CommonLLMNames.ANTHROPIC_TEST.value, id="anthropic"),
+            pytest.param(CommonLLMNames.OPENAI_TEST.value, id="openai"),
         ],
     )
     async def test_multiple_completion(self, model_name: str, request) -> None: