From 5453a6de7e265560c6baed5b163771b7195a3e63 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Tue, 26 Nov 2024 17:33:42 -0800 Subject: [PATCH] Added the MultipleCompletionLLMModel class used in LDP (#3) * Added the MultipleCompletionLLMModel class used in LDP --------- Co-authored-by: Mayk Caldas --- llmclient/__init__.py | 30 +- llmclient/constants.py | 5 + llmclient/embeddings.py | 6 +- llmclient/llms.py | 243 +++++++++++++- llmclient/types.py | 19 +- llmclient/utils.py | 5 + ...ionLLMModel.test_model[gpt-3.5-turbo].yaml | 104 ++++++ ...CompletionLLMModel.test_output_schema.yaml | 102 ++++++ ...st_parameterizing_tool_from_arg_union.yaml | 109 +++++++ ...image_message[gpt-4o-mini-2024-07-18].yaml | 111 +++++++ tests/test_embeddings.py | 112 ++++++- tests/test_llms.py | 300 +++++++++++------- uv.lock | 2 +- 13 files changed, 1023 insertions(+), 125 deletions(-) create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_model[gpt-3.5-turbo].yaml create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_output_schema.yaml create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_parameterizing_tool_from_arg_union.yaml create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_text_image_message[gpt-4o-mini-2024-07-18].yaml diff --git a/llmclient/__init__.py b/llmclient/__init__.py index e1769b2..ea14ec7 100644 --- a/llmclient/__init__.py +++ b/llmclient/__init__.py @@ -1,7 +1,33 @@ -from llmclient.llms import LLMModel -from llmclient.types import LLMResult +from llmclient.embeddings import ( + EmbeddingModel, + HybridEmbeddingModel, + LiteLLMEmbeddingModel, + SentenceTransformerEmbeddingModel, + SparseEmbeddingModel, + embedding_model_factory, +) +from llmclient.llms import LiteLLMModel, LLMModel, MultipleCompletionLLMModel +from llmclient.types import ( + Chunk, + Embeddable, + LLMResult, +) +from llmclient.version import __version__ __all__ = [ + "Chunk", + "Embeddable", + "EmbeddingModel", + "HybridEmbeddingModel", "LLMModel", "LLMResult", + "LLMResult", + "LiteLLMEmbeddingModel", + "LiteLLMModel", + "MultipleCompletionLLMModel", + "SentenceTransformerEmbeddingModel", + "SparseEmbeddingModel", + "__version__", + "embedding_model_factory", + "embedding_model_factory", ] diff --git a/llmclient/constants.py b/llmclient/constants.py index 3220d62..7a4ce97 100644 --- a/llmclient/constants.py +++ b/llmclient/constants.py @@ -2,7 +2,12 @@ import litellm +# Estimate from OpenAI's FAQ +# https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them CHARACTERS_PER_TOKEN_ASSUMPTION: float = 4.0 +# Added tokens from user/role message +# Need to add while doing rate limits +# Taken from empirical counts in tests EXTRA_TOKENS_FROM_USER_ROLE: int = 7 MODEL_COST_MAP = litellm.get_model_cost_map("") diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py index 2f4ca24..cba5a1a 100644 --- a/llmclient/embeddings.py +++ b/llmclient/embeddings.py @@ -16,11 +16,7 @@ from llmclient.constants import CHARACTERS_PER_TOKEN_ASSUMPTION, MODEL_COST_MAP from llmclient.rate_limiter import GLOBAL_LIMITER - - -def get_litellm_retrying_config(timeout: float = 60.0) -> dict[str, Any]: - """Get retrying configuration for litellm.acompletion and litellm.aembedding.""" - return {"num_retries": 3, "timeout": timeout} +from llmclient.utils import get_litellm_retrying_config class EmbeddingModes(StrEnum): diff --git a/llmclient/llms.py b/llmclient/llms.py index aae4e89..47c1799 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -1,6 +1,7 @@ import asyncio import contextlib import functools +import json from abc import ABC from collections.abc import ( AsyncGenerator, @@ -13,13 +14,18 @@ from inspect import isasyncgenfunction, signature from typing import ( Any, + ClassVar, + Self, TypeVar, cast, ) import litellm from aviary.core import ( + Message, + Tool, ToolRequestMessage, + ToolsAdapter, ToolSelector, ) from pydantic import ( @@ -41,7 +47,7 @@ from llmclient.prompts import default_system_prompt from llmclient.rate_limiter import GLOBAL_LIMITER from llmclient.types import Chunk, LLMResult -from llmclient.utils import is_coroutine_callable +from llmclient.utils import get_litellm_retrying_config, is_coroutine_callable if not IS_PYTHON_BELOW_312: _DeploymentTypedDictValidator = TypeAdapter( @@ -120,11 +126,6 @@ async def do_callbacks( f(*args, **kwargs) -def get_litellm_retrying_config(timeout: float = 60.0) -> dict[str, Any]: - """Get retrying configuration for litellm.acompletion and litellm.aembedding.""" - return {"num_retries": 3, "timeout": timeout} - - class LLMModel(ABC, BaseModel): model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) @@ -582,3 +583,233 @@ async def select_tool( model_name=self.name, acompletion=self.router.acompletion ) return await tool_selector(*selection_args, **selection_kwargs) + + +class MultipleCompletionLLMModel(BaseModel): + """Run n completions at once, all starting from the same messages.""" + + model_config = ConfigDict(extra="forbid") + + # this should keep the original model + # if fine-tuned, this should still refer to the base model + name: str = "unknown" + config: dict = Field( + default={ + "model": "gpt-3.5-turbo", # Default model should have cheap input/output for testing + "temperature": 0.1, + } + ) + encoding: Any | None = None + + def __str__(self) -> str: + return f"{type(self).__name__} {self.name}" + + @model_validator(mode="after") + def set_model_name(self) -> Self: + if ( + self.config.get("model") in {"gpt-3.5-turbo", None} + and self.name != "unknown" + ) or (self.name != "unknown" and "model" not in self.config): + self.config["model"] = self.name + elif "model" in self.config and self.name == "unknown": + self.name = self.config["model"] + # note we do not consider case where both are set + # because that could be true if the model is fine-tuned + return self + + async def achat( + self, messages: Iterable[Message], **kwargs + ) -> litellm.ModelResponse: + return await litellm.acompletion( + messages=[m.model_dump(by_alias=True) for m in messages], + **(self.config | kwargs), + ) + + async def achat_iter(self, messages: Iterable[Message], **kwargs) -> AsyncGenerator: + return cast( + AsyncGenerator, + await litellm.acompletion( + messages=[m.model_dump(by_alias=True) for m in messages], + stream=True, + stream_options={ + "include_usage": True, # Included to get prompt token counts + }, + **(self.config | kwargs), + ), + ) + + # SEE: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice + # > `required` means the model must call one or more tools. + TOOL_CHOICE_REQUIRED: ClassVar[str] = "required" + + async def call( # noqa: C901, PLR0915 + self, + messages: list[Message], + callbacks: list[Callable] | None = None, + output_type: type[BaseModel] | None = None, + tools: list[Tool] | None = None, + tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, + **chat_kwargs, + ) -> list[LLMResult]: + start_clock = asyncio.get_running_loop().time() + + # Deal with tools. Note OpenAI throws a 400 response if tools is empty: + # > Invalid 'tools': empty array. Expected an array with minimum length 1, + # > but got an empty array instead. + # So, circumvent this behavior if tools in (None, []) + if tools: + chat_kwargs["tools"] = ToolsAdapter.dump_python( + tools, exclude_none=True, by_alias=True + ) + if tool_choice is not None: + chat_kwargs["tool_choice"] = ( + { + "type": "function", + "function": {"name": tool_choice.info.name}, + } + if isinstance(tool_choice, Tool) + else tool_choice + ) + + # deal with specifying output type + if output_type is not None: + schema = json.dumps(output_type.model_json_schema(mode="serialization")) + schema_msg = f"Respond following this JSON schema:\n\n{schema}" + # Get the system prompt and its index, or the index to add it + i, system_prompt = next( + ((i, m) for i, m in enumerate(messages) if m.role == "system"), + (0, None), + ) + messages = [ + *messages[:i], + ( + system_prompt.append_text(schema_msg, inplace=False) + if system_prompt + else Message(role="system", content=schema_msg) + ), + *messages[i + 1 if system_prompt else i :], + ] + chat_kwargs["response_format"] = {"type": "json_object"} + + # add static configuration to kwargs + chat_kwargs = self.config | chat_kwargs + n = chat_kwargs.get("n", 1) # number of completions + if n < 1: + raise ValueError("Number of completions (n) must be >= 1.") + + prompt = [ + ( + m + if not isinstance(m, ToolRequestMessage) or m.tool_calls + # OpenAI doesn't allow for empty tool_calls lists, so downcast empty + # ToolRequestMessage to Message here + else Message(role=m.role, content=m.content) + ) + for m in messages + ] + results: list[LLMResult] = [] + + if callbacks is None: + completion: litellm.ModelResponse = await self.achat(prompt, **chat_kwargs) + if output_type is not None: + validate_json_completion(completion, output_type) + + for choice in completion.choices: + if isinstance(choice, litellm.utils.StreamingChoices): + raise NotImplementedError("Streaming is not yet supported.") + + if ( + tools is not None # Allows for empty tools list + or choice.finish_reason == "tool_calls" + or (getattr(choice.message, "tool_calls", None) is not None) + ): + serialized_choice_message = choice.message.model_dump() + serialized_choice_message["tool_calls"] = ( + serialized_choice_message.get("tool_calls") or [] + ) + output_messages: list[Message | ToolRequestMessage] = [ + ToolRequestMessage(**serialized_choice_message) + ] + else: + output_messages = [Message(**choice.message.model_dump())] + + results.append( + LLMResult( + model=self.name, + config=chat_kwargs, + prompt=prompt, + messages=output_messages, + logprob=sum_logprobs(choice), + system_fingerprint=completion.system_fingerprint, + # Note that these counts are aggregated over all choices + completion_count=completion.usage.completion_tokens, # type: ignore[attr-defined,unused-ignore] + prompt_count=completion.usage.prompt_tokens, # type: ignore[attr-defined,unused-ignore] + ) + ) + else: + if tools: + raise NotImplementedError("Using tools with callbacks is not supported") + if n > 1: + raise NotImplementedError( + "Multiple completions with callbacks is not supported" + ) + result = LLMResult(model=self.name, config=chat_kwargs, prompt=prompt) + + sync_callbacks = [f for f in callbacks if not is_coroutine_callable(f)] + async_callbacks = [f for f in callbacks if is_coroutine_callable(f)] + stream_completion = await self.achat_iter(messages, **chat_kwargs) + text_result = [] + role = "assistant" + + async for chunk in stream_completion: + delta = chunk.choices[0].delta + role = delta.role or role + if delta.content: + s = delta.content + if result.seconds_to_first_token == 0: + result.seconds_to_first_token = ( + asyncio.get_running_loop().time() - start_clock + ) + text_result.append(s) + [await f(s) for f in async_callbacks] + [f(s) for f in sync_callbacks] + if hasattr(chunk, "usage"): + result.prompt_count = chunk.usage.prompt_tokens + + output = "".join(text_result) + result.completion_count = litellm.token_counter( + model=self.name, + text=output, + ) + # TODO: figure out how tools stream, and log probs + result.messages = [Message(role=role, content=output)] + results.append(result) + + if not results: + # This happens in unit tests. We should probably not keep this block around + # long-term. Previously, we would emit an empty ToolRequestMessage if + # completion.choices were empty, so I am replicating that here. + results.append( + LLMResult( + model=self.name, + config=chat_kwargs, + prompt=prompt, + messages=[ToolRequestMessage(tool_calls=[])], + ) + ) + + end_clock = asyncio.get_running_loop().time() + + for result in results: + # Manually update prompt count if not set, which can + # happen if the target model doesn't support 'include_usage' + if not result.prompt_count and result.messages: + result.prompt_count = litellm.token_counter( + model=self.name, + messages=[m.model_dump() for m in result.messages], + ) + + # update with server-side counts + result.seconds_to_last_token = end_clock - start_clock + + return results diff --git a/llmclient/types.py b/llmclient/types.py index d4eeea1..3ad1df9 100644 --- a/llmclient/types.py +++ b/llmclient/types.py @@ -5,6 +5,7 @@ from uuid import UUID, uuid4 import litellm +from aviary.core import Message from pydantic import ( BaseModel, ConfigDict, @@ -67,11 +68,14 @@ class LLMResult(BaseModel): alias="answer_id", ) name: str | None = None - prompt: str | list[dict] | None = Field( + prompt: str | list[dict] | Message | list[Message] | None = Field( default=None, description="Optional prompt (str) or list of serialized prompts (list[dict]).", ) text: str = "" + messages: list[Message] | None = Field( + default=None, description="Messages received from the LLM." + ) prompt_count: int = 0 completion_count: int = 0 model: str @@ -82,6 +86,9 @@ class LLMResult(BaseModel): seconds_to_last_token: float = Field( default=0.0, description="Delta time (sec) to last response token's arrival." ) + logprob: float | None = Field( + default=None, description="Sum of logprobs in the completion." + ) def __str__(self) -> str: return self.text @@ -98,3 +105,13 @@ def cost(self) -> float: except KeyError: logger.warning(f"Could not find cost for model {self.model}.") return 0.0 + + # These two methods were implemented in ldp, but not in pqa. Check if they're necessary + # @property + # def provider(self) -> str: + # """Get the model provider's name (e.g. "openai", "mistral").""" + # return litellm.get_llm_provider(self.model)[1] + + # def get_supported_openai_params(self) -> list[str] | None: + # """Get the supported OpenAI parameters for the model.""" + # return litellm.get_supported_openai_params(self.model) diff --git a/llmclient/utils.py b/llmclient/utils.py index 304bf95..e99c8e9 100644 --- a/llmclient/utils.py +++ b/llmclient/utils.py @@ -12,6 +12,11 @@ import pymupdf +def get_litellm_retrying_config(timeout: float = 60.0) -> dict[str, Any]: + """Get retrying configuration for litellm.acompletion and litellm.aembedding.""" + return {"num_retries": 3, "timeout": timeout} + + def encode_image_to_base64(img: "np.ndarray") -> str: """Encode an image to a base64 string, to be included as an image_url in a Message.""" try: diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_model[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_model[gpt-3.5-turbo].yaml new file mode 100644 index 0000000..0f134c0 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_model[gpt-3.5-turbo].yaml @@ -0,0 +1,104 @@ +interactions: + - request: + body: + '{"messages": [{"role": "system", "content": "Respond with single words."}, + {"role": "user", "content": "Hello, how are you?"}], "model": "gpt-3.5-turbo", + "n": 2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "161" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9RTPU/DMBDd8yssz23Vph+IbjBQBIIJBoRQ5NqXxNTxWfYFUar+d+S0NKlaJFYW + D+/de353Z28SxrhWfM64LAXJypn+1csQ1rPF0/P9nf6aSPexus4fdfGgb8Hd8F5U4PIdJP2oBhIr + Z4A02h0tPQiC6Dq6GKez2eVwmjZEhQpMlBWO+uPBtE+1X2J/OEqne2WJWkLgc/aaMMbYpjljRqvg + k8/ZsPeDVBCCKIDPD0WMcY8mIlyEoAMJS7zXkhItgW1iLxBVl/KQ10HEaLY2Zo9vD3cZLJzHZdjz + BzzXVocy8yAC2ugbCB1POuKTBkb/poGEsbdmKfVRTO48Vo4ywhXYaJimOzvePoMOuecISZgOPOmd + McsUkNAmdEbCpZAlqFbZPgBRK40dojv20yznvHdta1v8xb4lpARHoDLnQWl53G9b5iH+kd/KDiNu + AvOwDgRVlmtbgHdeN0tuNrlNvgEAAP//AwDXzTxTpAMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e8e2a2a8b36ebf3-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 27 Nov 2024 00:57:32 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=6uK9hXTAaX3GtQrh0wymC8uS1CnY22_CRYaLunDpYWc-1732669052-1.0.1.1-OMQFbfRQjfc9bjIfLNCZrZa25fy_pzJ61f1ImKFfmaA0uPjcJncalq9EcklmUYJavMmbHKxk.JuvT7OjHmVBkw; + path=/; expires=Wed, 27-Nov-24 01:27:32 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=AyWxlj6WhbGDdCX9HNNYmuke4avEYVHR7LCnqCXSIyo-1732669052770-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "108" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999953" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_e85a8fca3654ef15d3b91c4e8b039c0d + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_output_schema.yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_output_schema.yaml new file mode 100644 index 0000000..f929fa6 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_output_schema.yaml @@ -0,0 +1,102 @@ +interactions: + - request: + body: + '{"messages": [{"role": "system", "content": "Respond following this JSON + schema:\n\n{\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, + \"age\": {\"title\": \"Age\", \"type\": \"integer\"}}, \"required\": [\"name\", + \"age\"], \"title\": \"DummyOutputSchema\", \"type\": \"object\"}"}, {"role": + "user", "content": "My name is Claude and I am 1 year old. What is my name and + age?"}], "model": "gpt-3.5-turbo", "n": 2, "response_format": {"type": "json_object"}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "480" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9xTy07DMBC85yusPaeING0RuSFOgBAnUBFBkWtvU4NjW/ZGolT9d+T0kVSAxJlL + DjM7oxnvZpMwBkpCwUCsOInG6dHV/PNpLm7C7ePdOl9ML++nD8/8rnlb4/x+AmlU2MUbCjqozoRt + nEZS1uxo4ZETRtfsIh/PZpPZOO+IxkrUUVY7GuVn0xG1fmFH59l4uleurBIYoGAvCWOMbbpvzGgk + fkDBztMD0mAIvEYojkOMgbc6IsBDUIG4IUh7UlhDaLrYm9JEqATDGyyhYCVca95KLCE9ULzumKw0 + 26GLx2UbeGxhWq33+PYYS9vaebsIe/6IL5VRYVV55MGaGCGQdZAMxN+6Zv+xa8LYa7fq9qQROG8b + RxXZdzTR8DLf2UF/XD2ZT/YkWeK6x7PxRfqDXSWRuNJh8H4guFih7KX9YfFWKjsghjv6nuYn711x + Zeq/2PeEEOgIZeU8SiVOG/djHuO/99vY8ZG7wBDWgbCplsrU6J1X3UV0u9wmXwAAAP//AwCJiYkd + /AMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e8dbe092bc16441-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 26 Nov 2024 23:43:44 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "309" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999894" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 6ms + x-request-id: + - req_5f3e5551351be13c737ce50667811e1d + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_parameterizing_tool_from_arg_union.yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_parameterizing_tool_from_arg_union.yaml new file mode 100644 index 0000000..9eb680c --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_parameterizing_tool_from_arg_union.yaml @@ -0,0 +1,109 @@ +interactions: + - request: + body: + '{"messages": [{"role": "user", "content": "Please win."}], "model": "gpt-3.5-turbo", + "n": 2, "tool_choice": "required", "tools": [{"type": "function", "function": + {"name": "play", "description": "Play one turn by choosing a move.", "parameters": + {"type": "object", "properties": {"move": {"anyOf": [{"type": "integer"}, {"type": + "null"}], "description": "Choose an integer to lose, choose None to win.", "title": + "Move"}}, "required": ["move"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "448" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA+xUTWvbQBC961csc7aD7dRKrVuhh0LakFLqBpIi1quRtPV+dXcVahv/97KSopWd + FHINVAexzJs3H29m95AQAryAjACrqWfSiOmHu/36x6fN17IW2/W327V8vOViWd18/DL7rWASGHrz + C5l/Yl0wLY1Az3UPM4vUY4g6v7pcpOm7dLFoAakLFIFWGT+9vFhOfWM3ejqbL5Y9s9acoYOM3CeE + EHJo/6FGVeAfyMhs8mSR6BytELLBiRCwWgQLUOe481R5mESQaeVRhbJVI8QI8FqLnFEhYuLuO4zO + USgqRH6DV/v1kq3S+npfbRE/f+fv765LPsrXhd6ZtqCyUWwQaIQP9uwsGSGgqGy5RtDdGY8QoLZq + JCofaobDA0j9iA+QhdaOcOJ8TF46/xwJYLFsHBW9Mr39OEgtdGWs3rgz5aDkirs6t0hd2wE4r02X + uyc/m9/8//zexvySPgM0J1MCY7U0Pvd6iyoETFddOIiPQAS7a99OyFMR7at+DU6j5QV6ytshDnvD + KKuxiMx4/2lTcD0Cxmv3vJiXYnd9c1W9JnwEGEPjsciNxYKz04ajm8XwRP7LbdC4LRjcznmUeclV + hdZYPix5ckz+AgAA//8DAI3rDWKjBQAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e8dbe004a387afa-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 26 Nov 2024 23:43:42 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=KgM_CbOJ8Or7Ox8J9BqngSRIXR83eqEH5DurocJaLj8-1732664622-1.0.1.1-YOsognHowLn83_5.SYdcV3Mk6t0JC0F2tRMWWu7zhfUKaJ0nDeSqQxaG2ouaLAaqzGd4v.AgNvIt1dINZ.gNYQ; + path=/; expires=Wed, 27-Nov-24 00:13:42 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=RGkXqcA_646HnnLEb..txOv0aByKWatHt2QrW.dppHY-1732664622457-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "229" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999963" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_b5bea59bcffa604abbecf90237318698 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_text_image_message[gpt-4o-mini-2024-07-18].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_text_image_message[gpt-4o-mini-2024-07-18].yaml new file mode 100644 index 0000000..f648b73 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_text_image_message[gpt-4o-mini-2024-07-18].yaml @@ -0,0 +1,111 @@ +interactions: + - request: + body: + '{"messages": [{"role": "user", "content": [{"type": "image_url", "image_url": + {"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAAAKElEQVR4nO3NMQEAAAjDMMC/ZzDBvlRA01vZJvwHAAAAAAAAAAAAbx2jxAE/i2AjOgAAAABJRU5ErkJggg=="}}, + {"type": "text", "text": "What color is this square? Respond only with the color + name."}]}], "model": "gpt-4o-mini-2024-07-18", "n": 2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "381" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9STP2/CMBDF93wKyzOpQqBA2bqgVuqEVNqqqiJjH8Hg+Cz7kPpHfPfKIZAgWqlr + lwz3u/fyfGd/JYxxrfiUcbkWJCtn0tvnz8XLbL6dfcJGDhZLhLunhw096vsqk7wXFbjcgKSj6kpi + 5QyQRnvA0oMgiK798SAfjYajfFiDChWYKCsdpUNMK211mmf5MM3GaX/SqNeoJQQ+Za8JY4x91d+Y + 0yp451OW9Y6VCkIQJfDpqYkx7tHEChch6EDCEu+1UKIlsHX0Oagu8bDaBRHT2Z0xTX1/+pXB0nlc + hoaf6ittdVgXHkRAG20DoeNJR3yRv/9f8ieMvdUr2Z2l5M5j5agg3IKNhpPrvNkJb69Ci/OGEZIw + Z6ojOTMsFJDQJnSmwqWQa1Cttr0CYqc0dkB38pdpfvI+HF3b8i/2LZASHIEqnAel5fmJ2zYP8aX8 + 1nYacx2Yh49AUBUrbUvwzuvDnleuGCjo55NJNrrhyT75BgAA//8DAHMIZsi1AwAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e8dbe02c857176b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 26 Nov 2024 23:43:44 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=FLcfXsLHocORp9BaX8WRXlKmbbwQLQT1aaZHgSHfOwM-1732664624-1.0.1.1-EMMzivzKoNvNES87qYj.8X3tabrd2Y0z7mUhxOXUBl_ApeNQMRYqOeomdi8VD9YY1AAfg9NvNYhuESasFB2r1Q; + path=/; expires=Wed, 27-Nov-24 00:13:44 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=xB_0vUwejpcRnbBxzZuL293zZT452oWlLskSKDKBGqE-1732664624374-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "1744" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-input-images: + - "50000" + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-input-images: + - "49999" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999187" + x-ratelimit-reset-input-images: + - 1ms + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_d887de09a40a121d235f796b4dc6a0c6 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py index c91cc5a..5c5f2d7 100644 --- a/tests/test_embeddings.py +++ b/tests/test_embeddings.py @@ -1,6 +1,13 @@ import pytest -from llmclient.embeddings import MODEL_COST_MAP, LiteLLMEmbeddingModel +from llmclient.embeddings import ( + MODEL_COST_MAP, + HybridEmbeddingModel, + LiteLLMEmbeddingModel, + SentenceTransformerEmbeddingModel, + SparseEmbeddingModel, + embedding_model_factory, +) class TestLiteLLMEmbeddingModel: @@ -65,3 +72,106 @@ async def test_embed_documents(self, embedding_model, mocker): embeddings = await embedding_model.embed_documents(texts) assert embeddings == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + + +@pytest.mark.asyncio +async def test_embedding_model_factory_sentence_transformer() -> None: + """Test that the factory creates a SentenceTransformerEmbeddingModel when given an 'st-' prefix.""" + embedding = "st-multi-qa-MiniLM-L6-cos-v1" + model = embedding_model_factory(embedding) + assert isinstance( + model, SentenceTransformerEmbeddingModel + ), "Factory did not create SentenceTransformerEmbeddingModel" + assert model.name == "multi-qa-MiniLM-L6-cos-v1", "Incorrect model name assigned" + + # Test embedding functionality + texts = ["Hello world", "Test sentence"] + embeddings = await model.embed_documents(texts) + assert len(embeddings) == 2, "Incorrect number of embeddings returned" + assert all( + isinstance(embed, list) for embed in embeddings + ), "Embeddings are not in list format" + assert all(len(embed) > 0 for embed in embeddings), "Embeddings should not be empty" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_hybrid_with_sentence_transformer() -> None: + """Test that the factory creates a HybridEmbeddingModel containing a SentenceTransformerEmbeddingModel.""" + embedding = "hybrid-st-multi-qa-MiniLM-L6-cos-v1" + model = embedding_model_factory(embedding) + assert isinstance( + model, HybridEmbeddingModel + ), "Factory did not create HybridEmbeddingModel" + assert len(model.models) == 2, "Hybrid model should contain two component models" + assert isinstance( + model.models[0], SentenceTransformerEmbeddingModel + ), "First component should be SentenceTransformerEmbeddingModel" + assert isinstance( + model.models[1], SparseEmbeddingModel + ), "Second component should be SparseEmbeddingModel" + + # Test embedding functionality + texts = ["Hello world", "Test sentence"] + embeddings = await model.embed_documents(texts) + assert len(embeddings) == 2, "Incorrect number of embeddings returned" + expected_length = len((await model.models[0].embed_documents(texts))[0]) + len( + (await model.models[1].embed_documents(texts))[0] + ) + assert all( + len(embed) == expected_length for embed in embeddings + ), "Embeddings do not match expected combined length" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_invalid_st_prefix() -> None: + """Test that the factory raises a ValueError when 'st-' prefix is provided without a model name.""" + embedding = "st-" + with pytest.raises( + ValueError, + match="SentenceTransformer model name must be specified after 'st-'.", + ): + embedding_model_factory(embedding) + + +@pytest.mark.asyncio +async def test_embedding_model_factory_unknown_prefix() -> None: + """Test that the factory defaults to LiteLLMEmbeddingModel when an unknown prefix is provided.""" + embedding = "unknown-prefix-model" + model = embedding_model_factory(embedding) + assert isinstance( + model, LiteLLMEmbeddingModel + ), "Factory did not default to LiteLLMEmbeddingModel for unknown prefix" + assert model.name == "unknown-prefix-model", "Incorrect model name assigned" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_sparse() -> None: + """Test that the factory creates a SparseEmbeddingModel when 'sparse' is provided.""" + embedding = "sparse" + model = embedding_model_factory(embedding) + assert isinstance( + model, SparseEmbeddingModel + ), "Factory did not create SparseEmbeddingModel" + assert model.name == "sparse", "Incorrect model name assigned" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_litellm() -> None: + """Test that the factory creates a LiteLLMEmbeddingModel when 'litellm-' prefix is provided.""" + embedding = "litellm-text-embedding-3-small" + model = embedding_model_factory(embedding) + assert isinstance( + model, LiteLLMEmbeddingModel + ), "Factory did not create LiteLLMEmbeddingModel" + assert model.name == "text-embedding-3-small", "Incorrect model name assigned" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_default() -> None: + """Test that the factory defaults to LiteLLMEmbeddingModel when no known prefix is provided.""" + embedding = "default-model" + model = embedding_model_factory(embedding) + assert isinstance( + model, LiteLLMEmbeddingModel + ), "Factory did not default to LiteLLMEmbeddingModel" + assert model.name == "default-model", "Incorrect model name assigned" diff --git a/tests/test_llms.py b/tests/test_llms.py index 7ad7358..5d45ddc 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -1,19 +1,23 @@ import pathlib import pickle -from typing import Any -from unittest.mock import patch +from enum import StrEnum +from typing import Any, ClassVar +from unittest.mock import Mock, patch import litellm +import numpy as np import pytest +from aviary.core import Message, Tool, ToolRequestMessage +from pydantic import BaseModel -from llmclient.embeddings import ( - HybridEmbeddingModel, - LiteLLMEmbeddingModel, - SentenceTransformerEmbeddingModel, - SparseEmbeddingModel, - embedding_model_factory, +from llmclient.exceptions import JSONSchemaValidationError +from llmclient.llms import ( + Chunk, + LiteLLMModel, + MultipleCompletionLLMModel, + validate_json_completion, ) -from llmclient.llms import Chunk, LiteLLMModel +from llmclient.types import LLMResult from tests.conftest import VCR_DEFAULT_MATCH_ON @@ -158,104 +162,182 @@ def test_pickling(self, tmp_path: pathlib.Path) -> None: assert llm.router.deployment_names == rehydrated_llm.router.deployment_names -@pytest.mark.asyncio -async def test_embedding_model_factory_sentence_transformer() -> None: - """Test that the factory creates a SentenceTransformerEmbeddingModel when given an 'st-' prefix.""" - embedding = "st-multi-qa-MiniLM-L6-cos-v1" - model = embedding_model_factory(embedding) - assert isinstance( - model, SentenceTransformerEmbeddingModel - ), "Factory did not create SentenceTransformerEmbeddingModel" - assert model.name == "multi-qa-MiniLM-L6-cos-v1", "Incorrect model name assigned" - - # Test embedding functionality - texts = ["Hello world", "Test sentence"] - embeddings = await model.embed_documents(texts) - assert len(embeddings) == 2, "Incorrect number of embeddings returned" - assert all( - isinstance(embed, list) for embed in embeddings - ), "Embeddings are not in list format" - assert all(len(embed) > 0 for embed in embeddings), "Embeddings should not be empty" - - -@pytest.mark.asyncio -async def test_embedding_model_factory_hybrid_with_sentence_transformer() -> None: - """Test that the factory creates a HybridEmbeddingModel containing a SentenceTransformerEmbeddingModel.""" - embedding = "hybrid-st-multi-qa-MiniLM-L6-cos-v1" - model = embedding_model_factory(embedding) - assert isinstance( - model, HybridEmbeddingModel - ), "Factory did not create HybridEmbeddingModel" - assert len(model.models) == 2, "Hybrid model should contain two component models" - assert isinstance( - model.models[0], SentenceTransformerEmbeddingModel - ), "First component should be SentenceTransformerEmbeddingModel" - assert isinstance( - model.models[1], SparseEmbeddingModel - ), "Second component should be SparseEmbeddingModel" - - # Test embedding functionality - texts = ["Hello world", "Test sentence"] - embeddings = await model.embed_documents(texts) - assert len(embeddings) == 2, "Incorrect number of embeddings returned" - expected_length = len((await model.models[0].embed_documents(texts))[0]) + len( - (await model.models[1].embed_documents(texts))[0] +class CILLMModelNames(StrEnum): + """Models to use for generic CI testing.""" + + ANTHROPIC = "claude-3-haiku-20240307" # Cheap and not Anthropic's cutting edge + OPENAI = "gpt-4o-mini-2024-07-18" # Cheap and not OpenAI's cutting edge + + +class DummyOutputSchema(BaseModel): + name: str + age: int + + +class TestMultipleCompletionLLMModel: + NUM_COMPLETIONS: ClassVar[int] = 2 + DEFAULT_CONFIG: ClassVar[dict] = {"n": NUM_COMPLETIONS} + MODEL_CLS: ClassVar[type[MultipleCompletionLLMModel]] = MultipleCompletionLLMModel + + async def call_model( + self, model: MultipleCompletionLLMModel, *args, **kwargs + ) -> list[LLMResult]: + return await model.call(*args, **kwargs) + + @pytest.mark.parametrize( + "model_name", ["gpt-3.5-turbo", CILLMModelNames.ANTHROPIC.value] ) - assert all( - len(embed) == expected_length for embed in embeddings - ), "Embeddings do not match expected combined length" - - -@pytest.mark.asyncio -async def test_embedding_model_factory_invalid_st_prefix() -> None: - """Test that the factory raises a ValueError when 'st-' prefix is provided without a model name.""" - embedding = "st-" - with pytest.raises( - ValueError, - match="SentenceTransformer model name must be specified after 'st-'.", - ): - embedding_model_factory(embedding) - - -@pytest.mark.asyncio -async def test_embedding_model_factory_unknown_prefix() -> None: - """Test that the factory defaults to LiteLLMEmbeddingModel when an unknown prefix is provided.""" - embedding = "unknown-prefix-model" - model = embedding_model_factory(embedding) - assert isinstance( - model, LiteLLMEmbeddingModel - ), "Factory did not default to LiteLLMEmbeddingModel for unknown prefix" - assert model.name == "unknown-prefix-model", "Incorrect model name assigned" - - -@pytest.mark.asyncio -async def test_embedding_model_factory_sparse() -> None: - """Test that the factory creates a SparseEmbeddingModel when 'sparse' is provided.""" - embedding = "sparse" - model = embedding_model_factory(embedding) - assert isinstance( - model, SparseEmbeddingModel - ), "Factory did not create SparseEmbeddingModel" - assert model.name == "sparse", "Incorrect model name assigned" - - -@pytest.mark.asyncio -async def test_embedding_model_factory_litellm() -> None: - """Test that the factory creates a LiteLLMEmbeddingModel when 'litellm-' prefix is provided.""" - embedding = "litellm-text-embedding-3-small" - model = embedding_model_factory(embedding) - assert isinstance( - model, LiteLLMEmbeddingModel - ), "Factory did not create LiteLLMEmbeddingModel" - assert model.name == "text-embedding-3-small", "Incorrect model name assigned" - - -@pytest.mark.asyncio -async def test_embedding_model_factory_default() -> None: - """Test that the factory defaults to LiteLLMEmbeddingModel when no known prefix is provided.""" - embedding = "default-model" - model = embedding_model_factory(embedding) - assert isinstance( - model, LiteLLMEmbeddingModel - ), "Factory did not default to LiteLLMEmbeddingModel" - assert model.name == "default-model", "Incorrect model name assigned" + @pytest.mark.asyncio + async def test_achat(self, model_name: str) -> None: + model = MultipleCompletionLLMModel(name=model_name) + response = await model.achat( + messages=[ + Message(content="What are three things I should do today?"), + ] + ) + + assert len(response.choices) == 1 + + # Check we can iterate through the response + async for chunk in await model.achat_iter( + messages=[ + Message(content="What are three things I should do today?"), + ] + ): + assert len(chunk.choices) == 1 + + @pytest.mark.vcr(match_on=[*VCR_DEFAULT_MATCH_ON, "body"]) + @pytest.mark.parametrize("model_name", ["gpt-3.5-turbo"]) + @pytest.mark.asyncio + async def test_model(self, model_name: str) -> None: + # Make model_name an arg so that TestLLMModel can parametrize it + # only testing OpenAI, as other APIs don't support n>1 + model = self.MODEL_CLS(name=model_name, config=self.DEFAULT_CONFIG) + messages = [ + Message(role="system", content="Respond with single words."), + Message(content="Hello, how are you?"), + ] + results = await self.call_model(model, messages) + assert len(results) == self.NUM_COMPLETIONS + + for result in results: + assert result.prompt_count > 0 + assert result.completion_count > 0 + assert result.cost > 0 + assert result.logprob is None or result.logprob <= 0 + + @pytest.mark.parametrize( + "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"] + ) + @pytest.mark.asyncio + async def test_streaming(self, model_name: str) -> None: + model = self.MODEL_CLS(name=model_name, config=self.DEFAULT_CONFIG) + messages = [ + Message(role="system", content="Respond with single words."), + Message(content="Hello, how are you?"), + ] + + def callback(_) -> None: + return + + with pytest.raises( + NotImplementedError, + match="Multiple completions with callbacks is not supported", + ): + await self.call_model(model, messages, [callback]) + + @pytest.mark.vcr + @pytest.mark.asyncio + async def test_parameterizing_tool_from_arg_union(self) -> None: + def play(move: int | None) -> None: + """Play one turn by choosing a move. + + Args: + move: Choose an integer to lose, choose None to win. + """ + + results = await self.call_model( + self.MODEL_CLS(name="gpt-3.5-turbo", config=self.DEFAULT_CONFIG), + messages=[Message(content="Please win.")], + tools=[Tool.from_function(play)], + ) + assert len(results) == self.NUM_COMPLETIONS + for result in results: + assert result.messages + assert len(result.messages) == 1 + assert isinstance(result.messages[0], ToolRequestMessage) + assert result.messages[0].tool_calls + assert result.messages[0].tool_calls[0].function.arguments["move"] is None + + @pytest.mark.asyncio + @pytest.mark.vcr + async def test_output_schema(self) -> None: + model = self.MODEL_CLS(name="gpt-3.5-turbo", config=self.DEFAULT_CONFIG) + messages = [ + Message( + content=( + "My name is Claude and I am 1 year old. What is my name and age?" + ) + ), + ] + results = await self.call_model(model, messages, output_type=DummyOutputSchema) + assert len(results) == self.NUM_COMPLETIONS + for result in results: + assert result.messages + assert len(result.messages) == 1 + assert result.messages[0].content + DummyOutputSchema.model_validate_json(result.messages[0].content) + + @pytest.mark.parametrize("model_name", [CILLMModelNames.OPENAI.value]) + @pytest.mark.asyncio + @pytest.mark.vcr + async def test_text_image_message(self, model_name: str) -> None: + model = self.MODEL_CLS(name=model_name, config=self.DEFAULT_CONFIG) + + # An RGB image of a red square + image = np.zeros((32, 32, 3), dtype=np.uint8) + image[:] = [255, 0, 0] # (255 red, 0 green, 0 blue) is maximum red in RGB + + results = await self.call_model( + model, + messages=[ + Message.create_message( + text="What color is this square? Respond only with the color name.", + image=image, + ) + ], + ) + assert len(results) == self.NUM_COMPLETIONS + for result in results: + assert ( + result.messages is not None + ), "Expected messages in result, but got None" + assert ( + result.messages[-1].content is not None + ), "Expected content in message, but got None" + assert "red" in result.messages[-1].content.lower() + + +def test_json_schema_validation() -> None: + # Invalid JSON + mock_completion1 = Mock() + mock_completion1.choices = [Mock()] + mock_completion1.choices[0].message.content = "not a json" + # Invalid schema + mock_completion2 = Mock() + mock_completion2.choices = [Mock()] + mock_completion2.choices[0].message.content = '{"name": "John", "age": "nan"}' + # Valid schema + mock_completion3 = Mock() + mock_completion3.choices = [Mock()] + mock_completion3.choices[0].message.content = '{"name": "John", "age": 30}' + + class DummyModel(BaseModel): + name: str + age: int + + with pytest.raises(JSONSchemaValidationError): + validate_json_completion(mock_completion1, DummyModel) + with pytest.raises(JSONSchemaValidationError): + validate_json_completion(mock_completion2, DummyModel) + validate_json_completion(mock_completion3, DummyModel) diff --git a/uv.lock b/uv.lock index b1dcc0f..01f65ad 100644 --- a/uv.lock +++ b/uv.lock @@ -585,7 +585,7 @@ wheels = [ [[package]] name = "fh-llm-client" -version = "0.1.dev37+g63f57b5.d20241126" +version = "0.1.dev26+g4e95024.d20241126" source = { editable = "." } dependencies = [ { name = "aiofiles" },