From 8af20b084140ad831072186ff48bdf2284627877 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 15:40:30 -0800 Subject: [PATCH 1/3] Updated llmclient with newest ldp implementations --- llmclient/__init__.py | 4 ++++ llmclient/llms.py | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/llmclient/__init__.py b/llmclient/__init__.py index dc27f02..037f694 100644 --- a/llmclient/__init__.py +++ b/llmclient/__init__.py @@ -19,6 +19,8 @@ LiteLLMModel, LLMModel, MultipleCompletionLLMModel, + sum_logprobs, + validate_json_completion, ) from .types import ( Chunk, @@ -44,4 +46,6 @@ "SentenceTransformerEmbeddingModel", "SparseEmbeddingModel", "embedding_model_factory", + "sum_logprobs", + "validate_json_completion", ] diff --git a/llmclient/llms.py b/llmclient/llms.py index 149f43f..ecb4233 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -11,12 +11,14 @@ Awaitable, Callable, Iterable, + Mapping, ) from inspect import isasyncgenfunction, signature from typing import ( Any, ClassVar, Self, + TypeAlias, TypeVar, cast, ) @@ -59,6 +61,10 @@ config=ConfigDict(arbitrary_types_allowed=True), ) +# Yes, this is a hack, it mostly matches +# https://github.com/python-jsonschema/referencing/blob/v0.35.1/referencing/jsonschema.py#L20-L21 +JSONSchema: TypeAlias = Mapping[str, Any] + def sum_logprobs(choice: litellm.utils.Choices) -> float | None: """Calculate the sum of the log probabilities of an LLM completion (a Choices object). @@ -84,13 +90,13 @@ def sum_logprobs(choice: litellm.utils.Choices) -> float | None: def validate_json_completion( - completion: litellm.ModelResponse, output_type: type[BaseModel] + completion: litellm.ModelResponse, output_type: type[BaseModel] | JSONSchema ) -> None: """Validate a completion against a JSON schema. Args: completion: The completion to validate. - output_type: The Pydantic model to validate the completion against. + output_type: A JSON schema or a Pydantic model to validate the completion. """ try: for choice in completion.choices: @@ -102,7 +108,12 @@ def validate_json_completion( choice.message.content = ( choice.message.content.split("```json")[-1].split("```")[0] or "" ) - output_type.model_validate_json(choice.message.content) + if isinstance(output_type, Mapping): # JSON schema + litellm.litellm_core_utils.json_validation_rule.validate_schema( + schema=dict(output_type), response=choice.message.content + ) + else: + output_type.model_validate_json(choice.message.content) except ValidationError as err: raise JSONSchemaValidationError( "The completion does not match the specified schema." @@ -655,14 +666,20 @@ async def achat_iter(self, messages: Iterable[Message], **kwargs) -> AsyncGenera ) # SEE: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice + # > `none` means the model will not call any tool and instead generates a message. + # > `auto` means the model can pick between generating a message or calling one or more tools. # > `required` means the model must call one or more tools. + NO_TOOL_CHOICE: ClassVar[str] = "none" + MODEL_CHOOSES_TOOL: ClassVar[str] = "auto" TOOL_CHOICE_REQUIRED: ClassVar[str] = "required" + # None means we won't provide a tool_choice to the LLM API + UNSPECIFIED_TOOL_CHOICE: ClassVar[None] = None async def call( # noqa: C901, PLR0915 self, messages: list[Message], callbacks: list[Callable] | None = None, - output_type: type[BaseModel] | None = None, + output_type: type[BaseModel] | JSONSchema | None = None, tools: list[Tool] | None = None, tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, **chat_kwargs, @@ -705,7 +722,17 @@ async def call( # noqa: C901, PLR0915 ) # deal with specifying output type - if output_type is not None: + if isinstance(output_type, Mapping): # Use structured outputs + chat_kwargs["response_format"] = { + "type": "json_schema", + "json_schema": { + "strict": True, + # SEE: https://platform.openai.com/docs/guides/structured-outputs#additionalproperties-false-must-always-be-set-in-objects + "schema": dict(output_type) | {"additionalProperties": False}, + "name": output_type["title"], # Required by OpenAI as of 12/3/2024 + }, + } + elif output_type is not None: # Use JSON mode schema = json.dumps(output_type.model_json_schema(mode="serialization")) schema_msg = f"Respond following this JSON schema:\n\n{schema}" # Get the system prompt and its index, or the index to add it From 7a86be04195f6dbf5fa3af18821e250bc98ec382 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 15:54:56 -0800 Subject: [PATCH 2/3] Updated test_llms according to new JSONSchema --- tests/test_llms.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/test_llms.py b/tests/test_llms.py index e9f8320..b5cd46e 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -271,8 +271,19 @@ def play(move: int | None) -> None: @pytest.mark.asyncio @pytest.mark.vcr - async def test_output_schema(self) -> None: - model = self.MODEL_CLS(name="gpt-3.5-turbo", config=self.DEFAULT_CONFIG) + @pytest.mark.parametrize( + ("model_name", "output_type"), + [ + pytest.param("gpt-3.5-turbo", DummyOutputSchema, id="json-mode"), + pytest.param( + "gpt-4o", DummyOutputSchema.model_json_schema(), id="structured-outputs" + ), + ], + ) + async def test_output_schema( + self, model_name: str, output_type: type[BaseModel] | dict[str, Any] + ) -> None: + model = self.MODEL_CLS(name=model_name, config=self.DEFAULT_CONFIG) messages = [ Message( content=( @@ -280,7 +291,7 @@ async def test_output_schema(self) -> None: ) ), ] - results = await self.call_model(model, messages, output_type=DummyOutputSchema) + results = await self.call_model(model, messages, output_type=output_type) assert len(results) == self.NUM_COMPLETIONS for result in results: assert result.messages From a8ae940192d6352183e53318853a57bd2fb94a0b Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Tue, 10 Dec 2024 09:35:20 -0800 Subject: [PATCH 3/3] Added validation to check if model supports JSON schema for output --- llmclient/llms.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/llmclient/llms.py b/llmclient/llms.py index ecb4233..1974415 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -701,6 +701,9 @@ async def call( # noqa: C901, PLR0915 Raises: ValueError: If the number of completions (n) is invalid. """ + # add static configuration to kQwargs + chat_kwargs = self.config | chat_kwargs + start_clock = asyncio.get_running_loop().time() # Deal with tools. Note OpenAI throws a 400 response if tools is empty: @@ -723,6 +726,10 @@ async def call( # noqa: C901, PLR0915 # deal with specifying output type if isinstance(output_type, Mapping): # Use structured outputs + model_name: str = chat_kwargs.get("model", "") + if not litellm.supports_response_schema(model_name, None): + raise ValueError(f"Model {model_name} does not support JSON schema.") + chat_kwargs["response_format"] = { "type": "json_schema", "json_schema": { @@ -751,8 +758,6 @@ async def call( # noqa: C901, PLR0915 ] chat_kwargs["response_format"] = {"type": "json_object"} - # add static configuration to kwargs - chat_kwargs = self.config | chat_kwargs n = chat_kwargs.get("n", 1) # number of completions if n < 1: raise ValueError("Number of completions (n) must be >= 1.")