From 7146c916c43c609b46defb18217a3cd9cd4c980b Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Thu, 5 Dec 2024 13:54:20 -0800 Subject: [PATCH 01/18] Overloaded typying in MultipleCompletionLLMModel.call. It returns either a list or a single element of LLMResult depending on how many completions are requested --- llmclient/llms.py | 97 ++++++++- ...st_multiple_completion[gpt-3.5-turbo].yaml | 194 +++++++++++++++++ ...odel.test_multiple_completion[openai].yaml | 200 ++++++++++++++++++ ...e_completion[claude-3-haiku-20240307].yaml | 69 ++++++ ...test_single_completion[gpt-3.5-turbo].yaml | 104 +++++++++ tests/test_llms.py | 78 +++---- uv.lock | 88 +++----- 7 files changed, 725 insertions(+), 105 deletions(-) create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml diff --git a/llmclient/llms.py b/llmclient/llms.py index 1066e08..20fc9bf 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -16,9 +16,11 @@ from typing import ( Any, ClassVar, + Literal, Self, TypeVar, cast, + overload, ) import litellm @@ -605,8 +607,8 @@ class MultipleCompletionLLMModel(BaseModel): description=( "Configuration of the model:" "model is the name of the llm model to use," - "temperature is the sampling temperature, and", - "n is the number of completions to generate.", + "temperature is the sampling temperature, and" + "n is the number of completions to generate." ), ) encoding: Any | None = None @@ -652,7 +654,7 @@ async def achat_iter(self, messages: Iterable[Message], **kwargs) -> AsyncGenera # > `required` means the model must call one or more tools. TOOL_CHOICE_REQUIRED: ClassVar[str] = "required" - async def call( # noqa: C901, PLR0915 + async def _call( # noqa: C901, PLR0915 self, messages: list[Message], callbacks: list[Callable] | None = None, @@ -823,3 +825,92 @@ async def call( # noqa: C901, PLR0915 result.seconds_to_last_token = end_clock - start_clock return results + + # TODO: Is it good practice to have this multiple interface? + # Users can just use `call` and we chat `n` + # or they can specifically call `call_single` or `call_multiple` + async def call_single( + self, + messages: list[Message], + callbacks: list[Callable] | None = None, + output_type: type[BaseModel] | None = None, + tools: list[Tool] | None = None, + tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, + **chat_kwargs, + ) -> LLMResult: + if chat_kwargs.get("n", 1) != 1 or self.config.get("n", 1) != 1: + raise ValueError("n must be 1 for call_single.") + return ( + await self._call( + messages, callbacks, output_type, tools, tool_choice, **chat_kwargs + ) + )[0] + + async def call_multiple( + self, + messages: list[Message], + callbacks: list[Callable] | None = None, + output_type: type[BaseModel] | None = None, + tools: list[Tool] | None = None, + tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, + **chat_kwargs, + ) -> list[LLMResult]: + if chat_kwargs.get("n", 1) == 1: + logger.warning( + "n is 1 for call_multiple. It will return a list with a single element" + ) + return await self._call( + messages, callbacks, output_type, tools, tool_choice, **chat_kwargs + ) + + @overload + async def call( + self, + messages: list[Message], + callbacks: list[Callable] | None = None, + output_type: type[BaseModel] | None = None, + tools: list[Tool] | None = None, + tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, + n: Literal[1] = 1, + **chat_kwargs, + ) -> LLMResult: ... + + @overload + async def call( + self, + messages: list[Message], + callbacks: list[Callable] | None = None, + output_type: type[BaseModel] | None = None, + tools: list[Tool] | None = None, + tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, + n: int | None = None, + **chat_kwargs, + ) -> list[LLMResult]: ... + + async def call( + self, + messages: list[Message], + callbacks: list[Callable] | None = None, + output_type: type[BaseModel] | None = None, + tools: list[Tool] | None = None, + tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, + n: int | None = None, + **chat_kwargs, + ) -> list[LLMResult] | LLMResult: + + # Uses the LLMModel configuration unless specified in chat_kwargs + # If n is not specified anywhere, defaults to 1 + if not n or n <= 0: + logger.info( + "Invalid n passed to the call function. Will get it from the model's configuration" + ) + n = self.config.get("n", 1) + if "n" in chat_kwargs: + n = chat_kwargs["n"] + if n == 1: + return await self.call_single( + messages, callbacks, output_type, tools, tool_choice, **chat_kwargs + ) + return await self.call_multiple( + messages, callbacks, output_type, tools, tool_choice, **chat_kwargs + ) diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml new file mode 100644 index 0000000..35c02c0 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml @@ -0,0 +1,194 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '149' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '1' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9RTy2rDMBC8+yuEzklo3jS3QCCXXNoe+qIYWdrYamStKq1LS8i/FzkPOySFXnvR + YWZnNLsrbRPGuFZ8xrgsBMnSme48W6zuxvBAL5v55/3H06Ra4OOoWhXl82LJO1GB2TtIOqp6Ektn + gDTaPS09CILo2p8Oh6PhYHo7qYkSFZgoyx11h71xlyqfYfemPxgflAVqCYHP2GvCGGPb+owZrYIv + PmM3nSNSQggiBz47FTHGPZqIcBGCDiQs8U5DSrQEto69RFRtysO6CiJGs5UxB3x3ustg7jxm4cCf + 8LW2OhSpBxHQRt9A6HjSEl800P83DSSMvdVLqc5icuexdJQSbsBGw8Fgb8ebZ9AiDxwhCdOCR50r + ZqkCEtqE1ki4FLIA1SibByAqpbFFtMd+meWa975tbfO/2DeElOAIVOo8KC3P+23KPMQ/8lvZacR1 + YB6+A0GZrrXNwTuv6yXXm9wlPwAAAP//AwAh8pBrpAMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed70040cbcdf99b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:36 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - '134' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '12000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '11999' + x-ratelimit-remaining-tokens: + - '999953' + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_1f88664946b9891fbc90796687f144c4 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '149' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9RTTUsDMRC9768IOW9LP63tzaIIIqgH7UFkSZPZbTSbCcksWEr/u2T7sVtawauX + HN6b9/JmJtkkjHGt+IxxuRIkS2c6N8vbx5fF9Tisy7l5e13clfl0/vQwfl5P5o6nUYHLT5B0UHUl + ls4AabQ7WnoQBNG1PxkOR8PBZHpVEyUqMFFWOOoMu+MOVX6JnV5/MN4rV6glBD5j7wljjG3qM2a0 + Cr75jPXSA1JCCKIAPjsWMcY9mohwEYIOJCzxtCElWgJbx75HVG3KQ14FEaPZypg9vj3eZbBwHpdh + zx/xXFsdVpkHEdBG30DoeNISnzXQ/zcNJIx91EupTmJy57F0lBF+gY2Gg8HOjjfPoEXuOUISpgWP + 0gtmmQIS2oTWSLgUcgWqUTYPQFRKY4toj/08yyXvXdvaFn+xbwgpwRGozHlQWp7225R5iH/kt7Lj + iOvAPKwDQZnl2hbgndf1kutNbpMfAAAA//8DALEE5HikAwAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700428d77f99b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:36 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - '114' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '12000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '11999' + x-ratelimit-remaining-tokens: + - '999953' + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_e32516fa5bb6ab11dda5155511280ea6 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml new file mode 100644 index 0000000..cc8a292 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml @@ -0,0 +1,200 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '158' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA7RTwWoCMRC971ekObui7qLWm1JaWgq9FEopZckm4xqbzYQkSxXx30tWu7uihR7a + Sw7z5r28N5PsIkKoFHRGKF8xz0uj4nl+A3yxWD9Vi4f56zYtko15Htr1/aP9fKG9wMB8Ddx/s/oc + S6PAS9QHmFtgHoLqcJIkaZJcT8c1UKIAFWiF8XGKcSm1jEeDURoPJvFwemSvUHJwdEbeIkII2dVn + 8KkFbOiMDHrflRKcYwXQWdNECLWoQoUy56TzTHvaa0GO2oOurd9KDf0uZmFZORb86UqpY33fXKaw + MBZzd8Sb+lJq6VaZBeZQB2Hn0dCoQz5LMPybBHeI4uqfE0SEvNdrqU58UmOxND7z+AE6CI5GBzna + PoYWTI+YR89UhzPuXRDLBHgmlevMhHLGVyBaZvsEWCUkdoDu3M+9XNI+xJa6+I18C3AOxoPIjAUh + +Wnets1C+Ck/tTUjrg1Tt3UeymwpdQHWWHnY8tJkec4SPoXJIKfRPvoCAAD//wMAviUi9bUDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed71d50ac15cf13-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:26:27 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=.T97IJZbEqHKl_VUBqOzbYQ3.fPwrK1uEUCoRWrQ0Vs-1733433987-1.0.1.1-g7sgDoAXo0fTveWsSMwxPdEXEmD5ZOQ_XYi1pZoi0dW2JzEVU83E5oRAyXudBimOLtvB92CoJm1WxF9LBkquZA; + path=/; expires=Thu, 05-Dec-24 21:56:27 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=h0PMfMYOdM04Mkzg9aKUS2PH3E1LFBnUsNmdRQ4ltVY-1733433987796-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - '1036' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999954' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2525b96c65ed2235204a4f7fbb79f88e + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '158' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//tFPLasMwELz7K1Sd7eA479wKpaVQAr30UoqRpbWtVtYKSYG0If9e5Dzs + kBR6aC867OyMZnalbUQIlYIuCeU187wxKrkt7qCeblYTs5gau3pxMmP8/vmrSh/hicaBgcU7cH9k + DTg2RoGXqPcwt8A8BNXhbDQaj0aLxbAFGhSgAq0yPhlj0kgtkyzNxkk6S4bzA7tGycHRJXmNCCFk + 257BpxawoUuSxsdKA86xCujy1EQItahChTLnpPNMexp3IEftQbfWHxDFoI9ZKNeOBX96rdShvjtd + prAyFgt3wE/1Umrp6twCc6iDsPNoaNQjXyQY/l2Cm39OEBHy1q5lfeaTGouN8bnHD9BBMMv2crR7 + DB04PmAePVM9zjS+IpYL8Ewq15sJ5YzXIDpm9wTYWkjsAf25X3q5pr2PLXX1G/kO4ByMB5EbC0Ly + 87xdm4XwU35qO424NUzdp/PQ5KXUFVhj5X7LpcnTWTopyvmMpzTaRd8AAAD//wMAgEsPw7UDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed71d57fe24cf13-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:26:31 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - '765' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999954' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_84370a5d5d53f54172bc0ffe3feb7e4a + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml new file mode 100644 index 0000000..ff06001 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml @@ -0,0 +1,69 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":[{"type":"text","text":"Hello, how + are you?"}]}],"system":[{"type":"text","text":"Respond with single words."}],"max_tokens":4096,"model":"claude-3-haiku-20240307"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '202' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - litellm/1.53.3 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA0yOzWqEQBCE36XO4+LfYpxb8gA5BdwlBBm0MbJjj7F7wCC+e1ASyKngqx9qw9jD + YpKhTbPXItPhpcqeFwlV2az3p9tb3cBAv2c6UiTiBoLBEvwBnMgo6lhhMIWePCw672JPSZF8uvER + kzzNy7RIKxh0gZVYYd+3v0Wl9eieYtGQ9xfsHwaiYW4XchIYFsR9q3Fh/BpCX5G4I1iO3hvE85Xd + MPIctdXwIBbYrDYIUf+j677/AAAA//8DAHETe7LyAAAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700088ad5942c-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:27 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-ratelimit-requests-limit: + - '5000' + anthropic-ratelimit-requests-remaining: + - '4999' + anthropic-ratelimit-requests-reset: + - '2024-12-05T21:06:27Z' + anthropic-ratelimit-tokens-limit: + - '5000000' + anthropic-ratelimit-tokens-remaining: + - '5000000' + anthropic-ratelimit-tokens-reset: + - '2024-12-05T21:06:27Z' + request-id: + - req_01PYs7k3gcHPDqdgTayudkMv + via: + - 1.1 google + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml new file mode 100644 index 0000000..073df30 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml @@ -0,0 +1,104 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":1}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '149' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4yST0sDMRDF7/spQs5tabstK70JQj0oFL2IIks2me7GZjMxmaVq6XeXbP/sihW8 + 5DC/eS9vJtkljHGt+IJxWQmStTPD6+Lm7tauskf7/iSMfdjMlvPnWZV9bVfbez6ICizeQNJJNZJY + OwOk0R6w9CAIouskS9NZOs2ushbUqMBEWelomI7mQ2p8gcPxZDo/KivUEgJfsJeEMcZ27RkzWgUf + fMHGg1OlhhBECXxxbmKMezSxwkUIOpCwxAcdlGgJbBt7iaj6yMO6CSJGs40xx/r+fJfB0nkswpGf + 62ttdahyDyKgjb6B0PGW7hPGXtuZmh8xufNYO8oJN2Cj4XR6sOPdFjs4OTJCEqanSQcXzHIFJLQJ + vZVwKWQFqlN2+xON0tgDSW/k31kueR/G1rb8j30HpARHoHLnQWn5c96uzUP8Yn+1nVfcBubhMxDU + +VrbErzzun3k9iX3yTcAAAD//wMAusvg7OMCAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700089e2e072b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:27 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=4Fzl_VHC9.c_.kyLBREHy8a7wA.lEcwtqZXonX9ka10-1733432787-1.0.1.1-tBlI5dXtGa55yRlJwRgFkxlkQ7emZl1_xhYirjNw7CcPBv7WkC60ubux0sARYF8Nzun5tNgFTC100P_ywLDMgw; + path=/; expires=Thu, 05-Dec-24 21:36:27 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=WokAKLMIioMxOZo9K5N1oJLSuWXQVmUht7hb75_Z06w-1733432787525-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - '132' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '12000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '11999' + x-ratelimit-remaining-tokens: + - '999969' + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_567abdfb8bf13c71bc3f2bac8be8b4af + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_llms.py b/tests/test_llms.py index 687b468..40d207d 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -8,7 +8,7 @@ import numpy as np import pytest from aviary.core import Tool, ToolRequestMessage -from pydantic import BaseModel, Field +from pydantic import BaseModel from llmclient.exceptions import JSONSchemaValidationError from llmclient.llms import ( @@ -318,68 +318,52 @@ async def test_text_image_message(self, model_name: str) -> None: ), "Expected content in message, but got None" assert "red" in result.messages[-1].content.lower() - -class TestSingleCompletionLLMModel(TestMultipleCompletionLLMModel): - NUM_COMPLETIONS: ClassVar[int] = 1 - DEFAULT_CONFIG: ClassVar[dict] = {} - MODEL_CLS: ClassVar[type[MultipleCompletionLLMModel]] = MultipleCompletionLLMModel - + # Test n = 1 @pytest.mark.parametrize( "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"] ) @pytest.mark.asyncio @pytest.mark.vcr - async def test_model(self, model_name: str) -> None: - await super().test_model(model_name) - - @pytest.mark.vcr - @pytest.mark.parametrize( - "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"] - ) - @pytest.mark.asyncio - async def test_streaming(self, model_name: str) -> None: - model = self.MODEL_CLS(name=model_name) + async def test_single_completion(self, model_name: str) -> None: + model = self.MODEL_CLS(name=model_name, config={"n": 1}) messages = [ Message(role="system", content="Respond with single words."), Message(content="Hello, how are you?"), ] - content = [] + result = await model.call(messages) + assert isinstance(result, LLMResult) - def callback(s): - content.append(s) + result = await model.call(messages, n=1) + assert isinstance(result, LLMResult) + assert result.messages + assert len(result.messages) == 1 + assert result.messages[0].content - results = await model.call(messages, [callback]) - for result in results: - assert result.completion_count > 0 - assert content - - @pytest.mark.vcr @pytest.mark.asyncio - async def test_parameterizing_tool_from_arg_union(self) -> None: - await super().test_parameterizing_tool_from_arg_union() - @pytest.mark.vcr - @pytest.mark.asyncio - @pytest.mark.skip(reason="TODO: Check why this error should be raised") - async def test_output_type_rejected_validation(self) -> None: - class InstructionList(BaseModel): - instructions: list[str] = Field(description="list of instructions") - - model = self.MODEL_CLS(name=CILLMModelNames.ANTHROPIC.value) - with pytest.raises(litellm.BadRequestError, match="anthropic"): - await model.call( - [Message(content="What are three things I should do today?")], - output_type=InstructionList, - ) - @pytest.mark.parametrize( "model_name", - [CILLMModelNames.ANTHROPIC.value, "gpt-4-turbo", CILLMModelNames.OPENAI.value], + [ + pytest.param(CILLMModelNames.ANTHROPIC.value, id="anthropic"), + pytest.param(CILLMModelNames.OPENAI.value, id="openai"), + ], ) - @pytest.mark.asyncio - @pytest.mark.vcr - async def test_text_image_message(self, model_name: str) -> None: - await super().test_text_image_message(model_name) + async def test_multiple_completion(self, model_name: str, request) -> None: + model = self.MODEL_CLS(name=model_name, config={"n": self.NUM_COMPLETIONS}) + messages = [ + Message(role="system", content="Respond with single words."), + Message(content="Hello, how are you?"), + ] + if request.node.callspec.id == "anthropic": + # Anthropic does not support multiple completions + with pytest.raises(litellm.BadRequestError, match="anthropic"): + await model.call(messages) + else: + results = await model.call(messages, n=None) + assert len(results) == self.NUM_COMPLETIONS + + results = await model.call(messages, n=self.NUM_COMPLETIONS) + assert len(results) == self.NUM_COMPLETIONS def test_json_schema_validation() -> None: diff --git a/uv.lock b/uv.lock index b2829e9..9805785 100644 --- a/uv.lock +++ b/uv.lock @@ -123,15 +123,16 @@ wheels = [ [[package]] name = "anyio" -version = "4.6.2.post1" +version = "4.7.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna" }, { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9f/09/45b9b7a6d4e45c6bcb5bf61d19e3ab87df68e0601fa8c5293de3542546cc/anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c", size = 173422 } +sdist = { url = "https://files.pythonhosted.org/packages/f6/40/318e58f669b1a9e00f5c4453910682e2d9dd594334539c7b7817dabb765f/anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48", size = 177076 } wheels = [ - { url = "https://files.pythonhosted.org/packages/e4/f5/f2b75d2fc6f1a260f340f0e7c6a060f4dd2961cc16884ed851b0d18da06a/anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d", size = 90377 }, + { url = "https://files.pythonhosted.org/packages/a0/7a/4daaf3b6c08ad7ceffea4634ec206faeff697526421c20f07628c7372156/anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352", size = 93052 }, ] [[package]] @@ -681,7 +682,7 @@ wheels = [ [[package]] name = "fh-llm-client" -version = "0.1.dev40+g0fc4372.d20241204" +version = "0.1.dev34+gac0511e.d20241205" source = { editable = "." } dependencies = [ { name = "aiofiles" }, @@ -2419,17 +2420,16 @@ wheels = [ [[package]] name = "pymupdf" -version = "1.24.14" +version = "1.25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e0/6b/6bd735144a190d26dcc23f98b4aae0e09b259cc4c87bba266a39b7b91f56/PyMuPDF-1.24.14.tar.gz", hash = "sha256:0eed9f998525eaf39706dbf2d0cf3162150f0f526e4a36b1748ffa50bde581ae", size = 56242747 } +sdist = { url = "https://files.pythonhosted.org/packages/d2/9e/ec6139116b551922789eb72e710371ddd770a2236fbd5302c2a58670ebbc/pymupdf-1.25.0.tar.gz", hash = "sha256:9e5a33816e4b85ed6a01545cada2b866fc280a3b6478bb8e19c364532adf6692", size = 60812481 } wheels = [ - { url = "https://files.pythonhosted.org/packages/62/ce/972b080c526af80577ffaa49676c05361ba152de94de3af339a2f3ac07c2/PyMuPDF-1.24.14-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b3ad7a4f4b607ff97f2e1b8111823dd3797dbb381ec851c3ae4695fea6f68478", size = 19167365 }, - { url = "https://files.pythonhosted.org/packages/2c/11/8d6f4c8fca86b93759e430c4b0b7b66f8067d58893d6fe0a193420d14453/PyMuPDF-1.24.14-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:755906af4b4d693552ae5469ba682075853f4dc8a70639affd1bd6c049c5d900", size = 18417324 }, - { url = "https://files.pythonhosted.org/packages/51/69/518e6c088e20a5ded1fc658d4aec1e54c0f98f2d62d91362bd4231df9ecf/PyMuPDF-1.24.14-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:37f24108e2e18150fb8d512dcccdfa1e3d9b9dd203ffaa7ffb959bb20aea40b4", size = 19303826 }, - { url = "https://files.pythonhosted.org/packages/27/bf/203d06c68660d5535db65b6c54cacd35b950945c11c1c4546d674f270892/PyMuPDF-1.24.14-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0de4f5ed903c2be6d0abcccdc796368939b51ce03916eb53292916e3b6ea65d6", size = 19833056 }, - { url = "https://files.pythonhosted.org/packages/77/ed/40eb23cf5e91de0510dfedb7d9feedeab5ce9691544ad09599e124a0a333/PyMuPDF-1.24.14-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d1b5c47df2f8055de5dedfbd3189c742188261a8c257f406378382adac94cff", size = 20963535 }, - { url = "https://files.pythonhosted.org/packages/87/2b/46af7461bd299c3f52bc5455332cc82608cea1667cd692652505fdf9308e/PyMuPDF-1.24.14-cp39-abi3-win32.whl", hash = "sha256:60a7ee7db3e0d3a4dcbe6df2781ba4487acb7e515c64ea9c857504f44effcb25", size = 14965671 }, - { url = "https://files.pythonhosted.org/packages/25/b2/82d70d9f5aea5a33e770f37e6db43ed08b5dc71b3526c5d7051689d1031e/PyMuPDF-1.24.14-cp39-abi3-win_amd64.whl", hash = "sha256:3d1f1ec2fe0249484afde7a0fc02589f19aaeb47c42939d23ae1d012aa1bc59b", size = 16257645 }, + { url = "https://files.pythonhosted.org/packages/bd/fe/c7810f3a960979963640dd7e8f5b485671fc423f11691df776ca3a093080/pymupdf-1.25.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:d12a4a7d4456b2c09afb674428be2021b87a3c4afbae7f717d5d7e805a30a989", size = 19367838 }, + { url = "https://files.pythonhosted.org/packages/8b/7f/9811212db9cc72757b40a0224fe3ed4a1b057c72546a224ab61d8981a56f/pymupdf-1.25.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:227ea293734b92fc1f49a01ffebe0f3d212bc0ec3be7e2db172088b8eaff5a4a", size = 18601499 }, + { url = "https://files.pythonhosted.org/packages/6b/ea/08041590cc1a8c66f5fefc3edd0b57f36b9225524586848d27ab470430e7/pymupdf-1.25.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:62b8673a2348c1b63874d2b1e93ee29e66892e7ca8311df9c7f4a9aa1d89caed", size = 20025425 }, + { url = "https://files.pythonhosted.org/packages/4f/99/ecd6edc233367a5820015bb98d098be781f9d3ef973b66197c0990c9f1c8/pymupdf-1.25.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e9d9e15af0fe389e15d605574f953df304d6037ce7cb4dca8b7cdec34d0694a3", size = 21148857 }, + { url = "https://files.pythonhosted.org/packages/ca/fe/31322d459dd7bb868c859d1a8f24f020f5c949496001f930a76f3190b830/pymupdf-1.25.0-cp39-abi3-win32.whl", hash = "sha256:cada9ebc14ae99253639e9d752f2e1cb8a62af813626c4391c62d39c41f8a43a", size = 15115786 }, + { url = "https://files.pythonhosted.org/packages/95/d0/aa79cc0c65ca6e8faf17cb44d779c1511a0da6525d617deb105b1ead0d98/pymupdf-1.25.0-cp39-abi3-win_amd64.whl", hash = "sha256:148800b9b14f2f48b5fc8f9213aeb94e272f1af4b533deeabe0e561d3bd334bf", size = 16562276 }, ] [[package]] @@ -3234,49 +3234,27 @@ sdist = { url = "https://files.pythonhosted.org/packages/80/f8/0802dd14c58b5d3d7 [[package]] name = "tokenizers" -version = "0.20.3" +version = "0.21.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/da/25/b1681c1c30ea3ea6e584ae3fffd552430b12faa599b558c4c4783f56d7ff/tokenizers-0.20.3.tar.gz", hash = "sha256:2278b34c5d0dd78e087e1ca7f9b1dcbf129d80211afa645f214bd6e051037539", size = 340513 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/93/6742ef9206409d5ce1fdf44d5ca1687cdc3847ba0485424e2c731e6bcf67/tokenizers-0.20.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:585b51e06ca1f4839ce7759941e66766d7b060dccfdc57c4ca1e5b9a33013a90", size = 2674224 }, - { url = "https://files.pythonhosted.org/packages/aa/14/e75ece72e99f6ef9ae07777ca9fdd78608f69466a5cecf636e9bd2f25d5c/tokenizers-0.20.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61cbf11954f3b481d08723ebd048ba4b11e582986f9be74d2c3bdd9293a4538d", size = 2558991 }, - { url = "https://files.pythonhosted.org/packages/46/54/033b5b2ba0c3ae01e026c6f7ced147d41a2fa1c573d00a66cb97f6d7f9b3/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef820880d5e4e8484e2fa54ff8d297bb32519eaa7815694dc835ace9130a3eea", size = 2892476 }, - { url = "https://files.pythonhosted.org/packages/e6/b0/cc369fb3297d61f3311cab523d16d48c869dc2f0ba32985dbf03ff811041/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:67ef4dcb8841a4988cd00dd288fb95dfc8e22ed021f01f37348fd51c2b055ba9", size = 2802775 }, - { url = "https://files.pythonhosted.org/packages/1a/74/62ad983e8ea6a63e04ed9c5be0b605056bf8aac2f0125f9b5e0b3e2b89fa/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff1ef8bd47a02b0dc191688ccb4da53600df5d4c9a05a4b68e1e3de4823e78eb", size = 3086138 }, - { url = "https://files.pythonhosted.org/packages/6b/ac/4637ba619db25094998523f9e6f5b456e1db1f8faa770a3d925d436db0c3/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:444d188186eab3148baf0615b522461b41b1f0cd58cd57b862ec94b6ac9780f1", size = 3098076 }, - { url = "https://files.pythonhosted.org/packages/58/ce/9793f2dc2ce529369807c9c74e42722b05034af411d60f5730b720388c7d/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37c04c032c1442740b2c2d925f1857885c07619224a533123ac7ea71ca5713da", size = 3379650 }, - { url = "https://files.pythonhosted.org/packages/50/f6/2841de926bc4118af996eaf0bdf0ea5b012245044766ffc0347e6c968e63/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453c7769d22231960ee0e883d1005c93c68015025a5e4ae56275406d94a3c907", size = 2994005 }, - { url = "https://files.pythonhosted.org/packages/a3/b2/00915c4fed08e9505d37cf6eaab45b12b4bff8f6719d459abcb9ead86a4b/tokenizers-0.20.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4bb31f7b2847e439766aaa9cc7bccf7ac7088052deccdb2275c952d96f691c6a", size = 8977488 }, - { url = "https://files.pythonhosted.org/packages/e9/ac/1c069e7808181ff57bcf2d39e9b6fbee9133a55410e6ebdaa89f67c32e83/tokenizers-0.20.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:843729bf0f991b29655a069a2ff58a4c24375a553c70955e15e37a90dd4e045c", size = 9294935 }, - { url = "https://files.pythonhosted.org/packages/50/47/722feb70ee68d1c4412b12d0ea4acc2713179fd63f054913990f9e259492/tokenizers-0.20.3-cp311-none-win32.whl", hash = "sha256:efcce3a927b1e20ca694ba13f7a68c59b0bd859ef71e441db68ee42cf20c2442", size = 2197175 }, - { url = "https://files.pythonhosted.org/packages/75/68/1b4f928b15a36ed278332ac75d66d7eb65d865bf344d049c452c18447bf9/tokenizers-0.20.3-cp311-none-win_amd64.whl", hash = "sha256:88301aa0801f225725b6df5dea3d77c80365ff2362ca7e252583f2b4809c4cc0", size = 2381616 }, - { url = "https://files.pythonhosted.org/packages/07/00/92a08af2a6b0c88c50f1ab47d7189e695722ad9714b0ee78ea5e1e2e1def/tokenizers-0.20.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:49d12a32e190fad0e79e5bdb788d05da2f20d8e006b13a70859ac47fecf6ab2f", size = 2667951 }, - { url = "https://files.pythonhosted.org/packages/ec/9a/e17a352f0bffbf415cf7d73756f5c73a3219225fc5957bc2f39d52c61684/tokenizers-0.20.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:282848cacfb9c06d5e51489f38ec5aa0b3cd1e247a023061945f71f41d949d73", size = 2555167 }, - { url = "https://files.pythonhosted.org/packages/27/37/d108df55daf4f0fcf1f58554692ff71687c273d870a34693066f0847be96/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe4e08c7d0cd6154c795deb5bf81d2122f36daf075e0c12a8b050d824ef0a64", size = 2898389 }, - { url = "https://files.pythonhosted.org/packages/b2/27/32f29da16d28f59472fa7fb38e7782069748c7e9ab9854522db20341624c/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca94fc1b73b3883c98f0c88c77700b13d55b49f1071dfd57df2b06f3ff7afd64", size = 2795866 }, - { url = "https://files.pythonhosted.org/packages/29/4e/8a9a3c89e128c4a40f247b501c10279d2d7ade685953407c4d94c8c0f7a7/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef279c7e239f95c8bdd6ff319d9870f30f0d24915b04895f55b1adcf96d6c60d", size = 3085446 }, - { url = "https://files.pythonhosted.org/packages/b4/3b/a2a7962c496ebcd95860ca99e423254f760f382cd4bd376f8895783afaf5/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16384073973f6ccbde9852157a4fdfe632bb65208139c9d0c0bd0176a71fd67f", size = 3094378 }, - { url = "https://files.pythonhosted.org/packages/1f/f4/a8a33f0192a1629a3bd0afcad17d4d221bbf9276da4b95d226364208d5eb/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:312d522caeb8a1a42ebdec87118d99b22667782b67898a76c963c058a7e41d4f", size = 3385755 }, - { url = "https://files.pythonhosted.org/packages/9e/65/c83cb3545a65a9eaa2e13b22c93d5e00bd7624b354a44adbdc93d5d9bd91/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b7cb962564785a83dafbba0144ecb7f579f1d57d8c406cdaa7f32fe32f18ad", size = 2997679 }, - { url = "https://files.pythonhosted.org/packages/55/e9/a80d4e592307688a67c7c59ab77e03687b6a8bd92eb5db763a2c80f93f57/tokenizers-0.20.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:124c5882ebb88dadae1fc788a582299fcd3a8bd84fc3e260b9918cf28b8751f5", size = 8989296 }, - { url = "https://files.pythonhosted.org/packages/90/af/60c957af8d2244321124e893828f1a4817cde1a2d08d09d423b73f19bd2f/tokenizers-0.20.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2b6e54e71f84c4202111a489879005cb14b92616a87417f6c102c833af961ea2", size = 9303621 }, - { url = "https://files.pythonhosted.org/packages/be/a9/96172310ee141009646d63a1ca267c099c462d747fe5ef7e33f74e27a683/tokenizers-0.20.3-cp312-none-win32.whl", hash = "sha256:83d9bfbe9af86f2d9df4833c22e94d94750f1d0cd9bfb22a7bb90a86f61cdb1c", size = 2188979 }, - { url = "https://files.pythonhosted.org/packages/bd/68/61d85ae7ae96dde7d0974ff3538db75d5cdc29be2e4329cd7fc51a283e22/tokenizers-0.20.3-cp312-none-win_amd64.whl", hash = "sha256:44def74cee574d609a36e17c8914311d1b5dbcfe37c55fd29369d42591b91cf2", size = 2380725 }, - { url = "https://files.pythonhosted.org/packages/07/19/36e9eaafb229616cb8502b42030fa7fe347550e76cb618de71b498fc3222/tokenizers-0.20.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0b630e0b536ef0e3c8b42c685c1bc93bd19e98c0f1543db52911f8ede42cf84", size = 2666813 }, - { url = "https://files.pythonhosted.org/packages/b9/c7/e2ce1d4f756c8a62ef93fdb4df877c2185339b6d63667b015bf70ea9d34b/tokenizers-0.20.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a02d160d2b19bcbfdf28bd9a4bf11be4cb97d0499c000d95d4c4b1a4312740b6", size = 2555354 }, - { url = "https://files.pythonhosted.org/packages/7c/cf/5309c2d173a6a67f9ec8697d8e710ea32418de6fd8541778032c202a1c3e/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e3d80d89b068bc30034034b5319218c7c0a91b00af19679833f55f3becb6945", size = 2897745 }, - { url = "https://files.pythonhosted.org/packages/2c/e5/af3078e32f225e680e69d61f78855880edb8d53f5850a1834d519b2b103f/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:174a54910bed1b089226512b4458ea60d6d6fd93060254734d3bc3540953c51c", size = 2794385 }, - { url = "https://files.pythonhosted.org/packages/0b/a7/bc421fe46650cc4eb4a913a236b88c243204f32c7480684d2f138925899e/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:098b8a632b8656aa5802c46689462c5c48f02510f24029d71c208ec2c822e771", size = 3084580 }, - { url = "https://files.pythonhosted.org/packages/c6/22/97e1e95ee81f75922c9f569c23cb2b1fdc7f5a7a29c4c9fae17e63f751a6/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78c8c143e3ae41e718588281eb3e212c2b31623c9d6d40410ec464d7d6221fb5", size = 3093581 }, - { url = "https://files.pythonhosted.org/packages/d5/14/f0df0ee3b9e516121e23c0099bccd7b9f086ba9150021a750e99b16ce56f/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b26b0aadb18cd8701077362ba359a06683662d5cafe3e8e8aba10eb05c037f1", size = 3385934 }, - { url = "https://files.pythonhosted.org/packages/66/52/7a171bd4929e3ffe61a29b4340fe5b73484709f92a8162a18946e124c34c/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07d7851a72717321022f3774e84aa9d595a041d643fafa2e87fbc9b18711dac0", size = 2997311 }, - { url = "https://files.pythonhosted.org/packages/7c/64/f1993bb8ebf775d56875ca0d50a50f2648bfbbb143da92fe2e6ceeb4abd5/tokenizers-0.20.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:bd44e48a430ada902c6266a8245f5036c4fe744fcb51f699999fbe82aa438797", size = 8988601 }, - { url = "https://files.pythonhosted.org/packages/d6/3f/49fa63422159bbc2f2a4ac5bfc597d04d4ec0ad3d2ef46649b5e9a340e37/tokenizers-0.20.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a4c186bb006ccbe1f5cc4e0380d1ce7806f5955c244074fd96abc55e27b77f01", size = 9303950 }, - { url = "https://files.pythonhosted.org/packages/66/11/79d91aeb2817ad1993ef61c690afe73e6dbedbfb21918b302ef5a2ba9bfb/tokenizers-0.20.3-cp313-none-win32.whl", hash = "sha256:6e19e0f1d854d6ab7ea0c743d06e764d1d9a546932be0a67f33087645f00fe13", size = 2188941 }, - { url = "https://files.pythonhosted.org/packages/c2/ff/ac8410f868fb8b14b5e619efa304aa119cb8a40bd7df29fc81a898e64f99/tokenizers-0.20.3-cp313-none-win_amd64.whl", hash = "sha256:d50ede425c7e60966a9680d41b58b3a0950afa1bb570488e2972fa61662c4273", size = 2380269 }, +sdist = { url = "https://files.pythonhosted.org/packages/20/41/c2be10975ca37f6ec40d7abd7e98a5213bb04f284b869c1a24e6504fd94d/tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4", size = 343021 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/5c/8b09607b37e996dc47e70d6a7b6f4bdd4e4d5ab22fe49d7374565c7fefaf/tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2", size = 2647461 }, + { url = "https://files.pythonhosted.org/packages/22/7a/88e58bb297c22633ed1c9d16029316e5b5ac5ee44012164c2edede599a5e/tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e", size = 2563639 }, + { url = "https://files.pythonhosted.org/packages/f7/14/83429177c19364df27d22bc096d4c2e431e0ba43e56c525434f1f9b0fd00/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193", size = 2903304 }, + { url = "https://files.pythonhosted.org/packages/7e/db/3433eab42347e0dc5452d8fcc8da03f638c9accffefe5a7c78146666964a/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e", size = 2804378 }, + { url = "https://files.pythonhosted.org/packages/57/8b/7da5e6f89736c2ade02816b4733983fca1c226b0c42980b1ae9dc8fcf5cc/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e", size = 3095488 }, + { url = "https://files.pythonhosted.org/packages/4d/f6/5ed6711093dc2c04a4e03f6461798b12669bc5a17c8be7cce1240e0b5ce8/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba", size = 3121410 }, + { url = "https://files.pythonhosted.org/packages/81/42/07600892d48950c5e80505b81411044a2d969368cdc0d929b1c847bf6697/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273", size = 3388821 }, + { url = "https://files.pythonhosted.org/packages/22/06/69d7ce374747edaf1695a4f61b83570d91cc8bbfc51ccfecf76f56ab4aac/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04", size = 3008868 }, + { url = "https://files.pythonhosted.org/packages/c8/69/54a0aee4d576045b49a0eb8bffdc495634309c823bf886042e6f46b80058/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e", size = 8975831 }, + { url = "https://files.pythonhosted.org/packages/f7/f3/b776061e4f3ebf2905ba1a25d90380aafd10c02d406437a8ba22d1724d76/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b", size = 8920746 }, + { url = "https://files.pythonhosted.org/packages/d8/ee/ce83d5ec8b6844ad4c3ecfe3333d58ecc1adc61f0878b323a15355bcab24/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74", size = 9161814 }, + { url = "https://files.pythonhosted.org/packages/18/07/3e88e65c0ed28fa93aa0c4d264988428eef3df2764c3126dc83e243cb36f/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff", size = 9357138 }, + { url = "https://files.pythonhosted.org/packages/15/b0/dc4572ca61555fc482ebc933f26cb407c6aceb3dc19c301c68184f8cad03/tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a", size = 2202266 }, + { url = "https://files.pythonhosted.org/packages/44/69/d21eb253fa91622da25585d362a874fa4710be600f0ea9446d8d0217cec1/tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", size = 2389192 }, ] [[package]] @@ -3349,7 +3327,7 @@ wheels = [ [[package]] name = "transformers" -version = "4.46.3" +version = "4.47.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -3363,9 +3341,9 @@ dependencies = [ { name = "tokenizers" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/37/5a/58f96c83e566f907ae39f16d4401bbefd8bb85c60bd1e6a95c419752ab90/transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc", size = 8627944 } +sdist = { url = "https://files.pythonhosted.org/packages/b1/5a/0ecfde3264bed0579c37f249e04e15f3c1451ba864d78bbe390177664cac/transformers-4.47.0.tar.gz", hash = "sha256:f8ead7a5a4f6937bb507e66508e5e002dc5930f7b6122a9259c37b099d0f3b19", size = 8693668 } wheels = [ - { url = "https://files.pythonhosted.org/packages/51/51/b87caa939fedf307496e4dbf412f4b909af3d9ca8b189fc3b65c1faa456f/transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef", size = 10034536 }, + { url = "https://files.pythonhosted.org/packages/d0/a7/7eedcf6a359e1e1eff3bc204ad022485aa5d88c08e1e3e0e0aee8a2e2235/transformers-4.47.0-py3-none-any.whl", hash = "sha256:a8e1bafdaae69abdda3cad638fe392e37c86d2ce0ecfcae11d60abb8f949ff4d", size = 10133426 }, ] [[package]] From 1dcda13ade11914671b1bf0694c9f665e31f01c8 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Thu, 5 Dec 2024 15:18:17 -0800 Subject: [PATCH 02/18] Improved logging for call_multiple --- llmclient/llms.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llmclient/llms.py b/llmclient/llms.py index 20fc9bf..1136bd7 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -855,7 +855,17 @@ async def call_multiple( tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, **chat_kwargs, ) -> list[LLMResult]: - if chat_kwargs.get("n", 1) == 1: + if chat_kwargs.get("n", 1) == 1 or self.config.get("n", 1) == 1: + if ( + chat_kwargs.get("n") + and self.config.get("n") + and chat_kwargs.get("n") != self.config.get("n") + ): + raise ValueError( + f"Incompatible number of completions requested. " + f"Model's configuration n is {self.config['n']}, " + f"but kwarg n={chat_kwargs['n']} was passed." + ) logger.warning( "n is 1 for call_multiple. It will return a list with a single element" ) From 2847af7fe0f7127cefd5b636e28f8c36c94fe3d5 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Thu, 5 Dec 2024 15:26:30 -0800 Subject: [PATCH 03/18] removed deprecated check of n in kwargs --- llmclient/llms.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/llmclient/llms.py b/llmclient/llms.py index 1136bd7..4b3ba41 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -915,8 +915,6 @@ async def call( "Invalid n passed to the call function. Will get it from the model's configuration" ) n = self.config.get("n", 1) - if "n" in chat_kwargs: - n = chat_kwargs["n"] if n == 1: return await self.call_single( messages, callbacks, output_type, tools, tool_choice, **chat_kwargs From 6fbf2f2f4c79e774e3074b0a4448bbc4766001b5 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Fri, 6 Dec 2024 14:54:33 -0800 Subject: [PATCH 04/18] Added cassets for TestMultipleCompletionLLMModel --- ...st_multiple_completion[gpt-3.5-turbo].yaml | 386 ++++++++--------- ...odel.test_multiple_completion[openai].yaml | 398 +++++++++--------- ...e_completion[claude-3-haiku-20240307].yaml | 135 +++--- ...test_single_completion[gpt-3.5-turbo].yaml | 205 ++++----- uv.lock | 2 +- 5 files changed, 566 insertions(+), 560 deletions(-) diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml index 35c02c0..caee9e8 100644 --- a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml @@ -1,194 +1,196 @@ interactions: -- request: - body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, - how are you?"}],"model":"gpt-3.5-turbo","n":2}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '149' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.57.0 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.57.0 - x-stainless-raw-response: - - 'true' - x-stainless-retry-count: - - '1' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.12.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA9RTy2rDMBC8+yuEzklo3jS3QCCXXNoe+qIYWdrYamStKq1LS8i/FzkPOySFXnvR - YWZnNLsrbRPGuFZ8xrgsBMnSme48W6zuxvBAL5v55/3H06Ra4OOoWhXl82LJO1GB2TtIOqp6Ektn - gDTaPS09CILo2p8Oh6PhYHo7qYkSFZgoyx11h71xlyqfYfemPxgflAVqCYHP2GvCGGPb+owZrYIv - PmM3nSNSQggiBz47FTHGPZqIcBGCDiQs8U5DSrQEto69RFRtysO6CiJGs5UxB3x3ustg7jxm4cCf - 8LW2OhSpBxHQRt9A6HjSEl800P83DSSMvdVLqc5icuexdJQSbsBGw8Fgb8ebZ9AiDxwhCdOCR50r - ZqkCEtqE1ki4FLIA1SibByAqpbFFtMd+meWa975tbfO/2DeElOAIVOo8KC3P+23KPMQ/8lvZacR1 - YB6+A0GZrrXNwTuv6yXXm9wlPwAAAP//AwAh8pBrpAMAAA== - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ed70040cbcdf99b-SJC - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Thu, 05 Dec 2024 21:06:36 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - future-house-xr4tdh - openai-processing-ms: - - '134' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '12000' - x-ratelimit-limit-tokens: - - '1000000' - x-ratelimit-remaining-requests: - - '11999' - x-ratelimit-remaining-tokens: - - '999953' - x-ratelimit-reset-requests: - - 5ms - x-ratelimit-reset-tokens: - - 2ms - x-request-id: - - req_1f88664946b9891fbc90796687f144c4 - status: - code: 200 - message: OK -- request: - body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, - how are you?"}],"model":"gpt-3.5-turbo","n":2}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '149' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.57.0 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.57.0 - x-stainless-raw-response: - - 'true' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.12.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA9RTTUsDMRC9768IOW9LP63tzaIIIqgH7UFkSZPZbTSbCcksWEr/u2T7sVtawauX - HN6b9/JmJtkkjHGt+IxxuRIkS2c6N8vbx5fF9Tisy7l5e13clfl0/vQwfl5P5o6nUYHLT5B0UHUl - ls4AabQ7WnoQBNG1PxkOR8PBZHpVEyUqMFFWOOoMu+MOVX6JnV5/MN4rV6glBD5j7wljjG3qM2a0 - Cr75jPXSA1JCCKIAPjsWMcY9mohwEYIOJCzxtCElWgJbx75HVG3KQ14FEaPZypg9vj3eZbBwHpdh - zx/xXFsdVpkHEdBG30DoeNISnzXQ/zcNJIx91EupTmJy57F0lBF+gY2Gg8HOjjfPoEXuOUISpgWP - 0gtmmQIS2oTWSLgUcgWqUTYPQFRKY4toj/08yyXvXdvaFn+xbwgpwRGozHlQWp7225R5iH/kt7Lj - iOvAPKwDQZnl2hbgndf1kutNbpMfAAAA//8DALEE5HikAwAA - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ed700428d77f99b-SJC - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Thu, 05 Dec 2024 21:06:36 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - future-house-xr4tdh - openai-processing-ms: - - '114' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '12000' - x-ratelimit-limit-tokens: - - '1000000' - x-ratelimit-remaining-requests: - - '11999' - x-ratelimit-remaining-tokens: - - '999953' - x-ratelimit-reset-requests: - - 5ms - x-ratelimit-reset-tokens: - - 2ms - x-request-id: - - req_e32516fa5bb6ab11dda5155511280ea6 - status: - code: 200 - message: OK + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "149" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "1" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9RTy2rDMBC8+yuEzklo3jS3QCCXXNoe+qIYWdrYamStKq1LS8i/FzkPOySFXnvR + YWZnNLsrbRPGuFZ8xrgsBMnSme48W6zuxvBAL5v55/3H06Ra4OOoWhXl82LJO1GB2TtIOqp6Ektn + gDTaPS09CILo2p8Oh6PhYHo7qYkSFZgoyx11h71xlyqfYfemPxgflAVqCYHP2GvCGGPb+owZrYIv + PmM3nSNSQggiBz47FTHGPZqIcBGCDiQs8U5DSrQEto69RFRtysO6CiJGs5UxB3x3ustg7jxm4cCf + 8LW2OhSpBxHQRt9A6HjSEl800P83DSSMvdVLqc5icuexdJQSbsBGw8Fgb8ebZ9AiDxwhCdOCR50r + ZqkCEtqE1ki4FLIA1SibByAqpbFFtMd+meWa975tbfO/2DeElOAIVOo8KC3P+23KPMQ/8lvZacR1 + YB6+A0GZrrXNwTuv6yXXm9wlPwAAAP//AwAh8pBrpAMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed70040cbcdf99b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:36 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "134" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999953" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_1f88664946b9891fbc90796687f144c4 + status: + code: 200 + message: OK + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "149" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9RTTUsDMRC9768IOW9LP63tzaIIIqgH7UFkSZPZbTSbCcksWEr/u2T7sVtawauX + HN6b9/JmJtkkjHGt+IxxuRIkS2c6N8vbx5fF9Tisy7l5e13clfl0/vQwfl5P5o6nUYHLT5B0UHUl + ls4AabQ7WnoQBNG1PxkOR8PBZHpVEyUqMFFWOOoMu+MOVX6JnV5/MN4rV6glBD5j7wljjG3qM2a0 + Cr75jPXSA1JCCKIAPjsWMcY9mohwEYIOJCzxtCElWgJbx75HVG3KQ14FEaPZypg9vj3eZbBwHpdh + zx/xXFsdVpkHEdBG30DoeNISnzXQ/zcNJIx91EupTmJy57F0lBF+gY2Gg8HOjjfPoEXuOUISpgWP + 0gtmmQIS2oTWSLgUcgWqUTYPQFRKY4toj/08yyXvXdvaFn+xbwgpwRGozHlQWp7225R5iH/kt7Lj + iOvAPKwDQZnl2hbgndf1kutNbpMfAAAA//8DALEE5HikAwAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700428d77f99b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:36 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "114" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999953" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_e32516fa5bb6ab11dda5155511280ea6 + status: + code: 200 + message: OK version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml index cc8a292..150ca29 100644 --- a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml @@ -1,200 +1,202 @@ interactions: -- request: - body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, - how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '158' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.57.0 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.57.0 - x-stainless-raw-response: - - 'true' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.12.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA7RTwWoCMRC971ekObui7qLWm1JaWgq9FEopZckm4xqbzYQkSxXx30tWu7uihR7a - Sw7z5r28N5PsIkKoFHRGKF8xz0uj4nl+A3yxWD9Vi4f56zYtko15Htr1/aP9fKG9wMB8Ddx/s/oc - S6PAS9QHmFtgHoLqcJIkaZJcT8c1UKIAFWiF8XGKcSm1jEeDURoPJvFwemSvUHJwdEbeIkII2dVn - 8KkFbOiMDHrflRKcYwXQWdNECLWoQoUy56TzTHvaa0GO2oOurd9KDf0uZmFZORb86UqpY33fXKaw - MBZzd8Sb+lJq6VaZBeZQB2Hn0dCoQz5LMPybBHeI4uqfE0SEvNdrqU58UmOxND7z+AE6CI5GBzna - PoYWTI+YR89UhzPuXRDLBHgmlevMhHLGVyBaZvsEWCUkdoDu3M+9XNI+xJa6+I18C3AOxoPIjAUh - +Wnets1C+Ck/tTUjrg1Tt3UeymwpdQHWWHnY8tJkec4SPoXJIKfRPvoCAAD//wMAviUi9bUDAAA= - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ed71d50ac15cf13-SJC - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Thu, 05 Dec 2024 21:26:27 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=.T97IJZbEqHKl_VUBqOzbYQ3.fPwrK1uEUCoRWrQ0Vs-1733433987-1.0.1.1-g7sgDoAXo0fTveWsSMwxPdEXEmD5ZOQ_XYi1pZoi0dW2JzEVU83E5oRAyXudBimOLtvB92CoJm1WxF9LBkquZA; - path=/; expires=Thu, 05-Dec-24 21:56:27 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=h0PMfMYOdM04Mkzg9aKUS2PH3E1LFBnUsNmdRQ4ltVY-1733433987796-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - future-house-xr4tdh - openai-processing-ms: - - '1036' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149999954' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_2525b96c65ed2235204a4f7fbb79f88e - status: - code: 200 - message: OK -- request: - body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, - how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '158' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.57.0 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.57.0 - x-stainless-raw-response: - - 'true' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.12.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//tFPLasMwELz7K1Sd7eA479wKpaVQAr30UoqRpbWtVtYKSYG0If9e5Dzs - kBR6aC867OyMZnalbUQIlYIuCeU187wxKrkt7qCeblYTs5gau3pxMmP8/vmrSh/hicaBgcU7cH9k - DTg2RoGXqPcwt8A8BNXhbDQaj0aLxbAFGhSgAq0yPhlj0kgtkyzNxkk6S4bzA7tGycHRJXmNCCFk - 257BpxawoUuSxsdKA86xCujy1EQItahChTLnpPNMexp3IEftQbfWHxDFoI9ZKNeOBX96rdShvjtd - prAyFgt3wE/1Umrp6twCc6iDsPNoaNQjXyQY/l2Cm39OEBHy1q5lfeaTGouN8bnHD9BBMMv2crR7 - DB04PmAePVM9zjS+IpYL8Ewq15sJ5YzXIDpm9wTYWkjsAf25X3q5pr2PLXX1G/kO4ByMB5EbC0Ly - 87xdm4XwU35qO424NUzdp/PQ5KXUFVhj5X7LpcnTWTopyvmMpzTaRd8AAAD//wMAgEsPw7UDAAA= - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ed71d57fe24cf13-SJC - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Thu, 05 Dec 2024 21:26:31 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - future-house-xr4tdh - openai-processing-ms: - - '765' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149999954' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_84370a5d5d53f54172bc0ffe3feb7e4a - status: - code: 200 - message: OK + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "158" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA7RTwWoCMRC971ekObui7qLWm1JaWgq9FEopZckm4xqbzYQkSxXx30tWu7uihR7a + Sw7z5r28N5PsIkKoFHRGKF8xz0uj4nl+A3yxWD9Vi4f56zYtko15Htr1/aP9fKG9wMB8Ddx/s/oc + S6PAS9QHmFtgHoLqcJIkaZJcT8c1UKIAFWiF8XGKcSm1jEeDURoPJvFwemSvUHJwdEbeIkII2dVn + 8KkFbOiMDHrflRKcYwXQWdNECLWoQoUy56TzTHvaa0GO2oOurd9KDf0uZmFZORb86UqpY33fXKaw + MBZzd8Sb+lJq6VaZBeZQB2Hn0dCoQz5LMPybBHeI4uqfE0SEvNdrqU58UmOxND7z+AE6CI5GBzna + PoYWTI+YR89UhzPuXRDLBHgmlevMhHLGVyBaZvsEWCUkdoDu3M+9XNI+xJa6+I18C3AOxoPIjAUh + +Wnets1C+Ck/tTUjrg1Tt3UeymwpdQHWWHnY8tJkec4SPoXJIKfRPvoCAAD//wMAviUi9bUDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed71d50ac15cf13-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:26:27 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=.T97IJZbEqHKl_VUBqOzbYQ3.fPwrK1uEUCoRWrQ0Vs-1733433987-1.0.1.1-g7sgDoAXo0fTveWsSMwxPdEXEmD5ZOQ_XYi1pZoi0dW2JzEVU83E5oRAyXudBimOLtvB92CoJm1WxF9LBkquZA; + path=/; expires=Thu, 05-Dec-24 21:56:27 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=h0PMfMYOdM04Mkzg9aKUS2PH3E1LFBnUsNmdRQ4ltVY-1733433987796-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "1036" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999954" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2525b96c65ed2235204a4f7fbb79f88e + status: + code: 200 + message: OK + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "158" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//tFPLasMwELz7K1Sd7eA479wKpaVQAr30UoqRpbWtVtYKSYG0If9e5Dzs + kBR6aC867OyMZnalbUQIlYIuCeU187wxKrkt7qCeblYTs5gau3pxMmP8/vmrSh/hicaBgcU7cH9k + DTg2RoGXqPcwt8A8BNXhbDQaj0aLxbAFGhSgAq0yPhlj0kgtkyzNxkk6S4bzA7tGycHRJXmNCCFk + 257BpxawoUuSxsdKA86xCujy1EQItahChTLnpPNMexp3IEftQbfWHxDFoI9ZKNeOBX96rdShvjtd + prAyFgt3wE/1Umrp6twCc6iDsPNoaNQjXyQY/l2Cm39OEBHy1q5lfeaTGouN8bnHD9BBMMv2crR7 + DB04PmAePVM9zjS+IpYL8Ewq15sJ5YzXIDpm9wTYWkjsAf25X3q5pr2PLXX1G/kO4ByMB5EbC0Ly + 87xdm4XwU35qO424NUzdp/PQ5KXUFVhj5X7LpcnTWTopyvmMpzTaRd8AAAD//wMAgEsPw7UDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed71d57fe24cf13-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:26:31 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "765" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999954" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_84370a5d5d53f54172bc0ffe3feb7e4a + status: + code: 200 + message: OK version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml index ff06001..c5e4922 100644 --- a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml @@ -1,69 +1,70 @@ interactions: -- request: - body: '{"messages":[{"role":"user","content":[{"type":"text","text":"Hello, how - are you?"}]}],"system":[{"type":"text","text":"Respond with single words."}],"max_tokens":4096,"model":"claude-3-haiku-20240307"}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - anthropic-version: - - '2023-06-01' - connection: - - keep-alive - content-length: - - '202' - content-type: - - application/json - host: - - api.anthropic.com - user-agent: - - litellm/1.53.3 - method: POST - uri: https://api.anthropic.com/v1/messages - response: - body: - string: !!binary | - H4sIAAAAAAAAA0yOzWqEQBCE36XO4+LfYpxb8gA5BdwlBBm0MbJjj7F7wCC+e1ASyKngqx9qw9jD - YpKhTbPXItPhpcqeFwlV2az3p9tb3cBAv2c6UiTiBoLBEvwBnMgo6lhhMIWePCw672JPSZF8uvER - kzzNy7RIKxh0gZVYYd+3v0Wl9eieYtGQ9xfsHwaiYW4XchIYFsR9q3Fh/BpCX5G4I1iO3hvE85Xd - MPIctdXwIBbYrDYIUf+j677/AAAA//8DAHETe7LyAAAA - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ed700088ad5942c-SJC - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Thu, 05 Dec 2024 21:06:27 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Robots-Tag: - - none - anthropic-ratelimit-requests-limit: - - '5000' - anthropic-ratelimit-requests-remaining: - - '4999' - anthropic-ratelimit-requests-reset: - - '2024-12-05T21:06:27Z' - anthropic-ratelimit-tokens-limit: - - '5000000' - anthropic-ratelimit-tokens-remaining: - - '5000000' - anthropic-ratelimit-tokens-reset: - - '2024-12-05T21:06:27Z' - request-id: - - req_01PYs7k3gcHPDqdgTayudkMv - via: - - 1.1 google - status: - code: 200 - message: OK + - request: + body: + '{"messages":[{"role":"user","content":[{"type":"text","text":"Hello, how + are you?"}]}],"system":[{"type":"text","text":"Respond with single words."}],"max_tokens":4096,"model":"claude-3-haiku-20240307"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - "2023-06-01" + connection: + - keep-alive + content-length: + - "202" + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - litellm/1.53.3 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA0yOzWqEQBCE36XO4+LfYpxb8gA5BdwlBBm0MbJjj7F7wCC+e1ASyKngqx9qw9jD + YpKhTbPXItPhpcqeFwlV2az3p9tb3cBAv2c6UiTiBoLBEvwBnMgo6lhhMIWePCw672JPSZF8uvER + kzzNy7RIKxh0gZVYYd+3v0Wl9eieYtGQ9xfsHwaiYW4XchIYFsR9q3Fh/BpCX5G4I1iO3hvE85Xd + MPIctdXwIBbYrDYIUf+j677/AAAA//8DAHETe7LyAAAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700088ad5942c-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:27 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-ratelimit-requests-limit: + - "5000" + anthropic-ratelimit-requests-remaining: + - "4999" + anthropic-ratelimit-requests-reset: + - "2024-12-05T21:06:27Z" + anthropic-ratelimit-tokens-limit: + - "5000000" + anthropic-ratelimit-tokens-remaining: + - "5000000" + anthropic-ratelimit-tokens-reset: + - "2024-12-05T21:06:27Z" + request-id: + - req_01PYs7k3gcHPDqdgTayudkMv + via: + - 1.1 google + status: + code: 200 + message: OK version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml index 073df30..c591a7c 100644 --- a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml @@ -1,104 +1,105 @@ interactions: -- request: - body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, - how are you?"}],"model":"gpt-3.5-turbo","n":1}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '149' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - AsyncOpenAI/Python 1.57.0 - x-stainless-arch: - - arm64 - x-stainless-async: - - async:asyncio - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.57.0 - x-stainless-raw-response: - - 'true' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.12.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAA4yST0sDMRDF7/spQs5tabstK70JQj0oFL2IIks2me7GZjMxmaVq6XeXbP/sihW8 - 5DC/eS9vJtkljHGt+IJxWQmStTPD6+Lm7tauskf7/iSMfdjMlvPnWZV9bVfbez6ICizeQNJJNZJY - OwOk0R6w9CAIouskS9NZOs2ushbUqMBEWelomI7mQ2p8gcPxZDo/KivUEgJfsJeEMcZ27RkzWgUf - fMHGg1OlhhBECXxxbmKMezSxwkUIOpCwxAcdlGgJbBt7iaj6yMO6CSJGs40xx/r+fJfB0nkswpGf - 62ttdahyDyKgjb6B0PGW7hPGXtuZmh8xufNYO8oJN2Cj4XR6sOPdFjs4OTJCEqanSQcXzHIFJLQJ - vZVwKWQFqlN2+xON0tgDSW/k31kueR/G1rb8j30HpARHoHLnQWn5c96uzUP8Yn+1nVfcBubhMxDU - +VrbErzzun3k9iX3yTcAAAD//wMAusvg7OMCAAA= - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ed700089e2e072b-SJC - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Thu, 05 Dec 2024 21:06:27 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=4Fzl_VHC9.c_.kyLBREHy8a7wA.lEcwtqZXonX9ka10-1733432787-1.0.1.1-tBlI5dXtGa55yRlJwRgFkxlkQ7emZl1_xhYirjNw7CcPBv7WkC60ubux0sARYF8Nzun5tNgFTC100P_ywLDMgw; - path=/; expires=Thu, 05-Dec-24 21:36:27 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=WokAKLMIioMxOZo9K5N1oJLSuWXQVmUht7hb75_Z06w-1733432787525-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - future-house-xr4tdh - openai-processing-ms: - - '132' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '12000' - x-ratelimit-limit-tokens: - - '1000000' - x-ratelimit-remaining-requests: - - '11999' - x-ratelimit-remaining-tokens: - - '999969' - x-ratelimit-reset-requests: - - 5ms - x-ratelimit-reset-tokens: - - 1ms - x-request-id: - - req_567abdfb8bf13c71bc3f2bac8be8b4af - status: - code: 200 - message: OK + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":1}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "149" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4yST0sDMRDF7/spQs5tabstK70JQj0oFL2IIks2me7GZjMxmaVq6XeXbP/sihW8 + 5DC/eS9vJtkljHGt+IJxWQmStTPD6+Lm7tauskf7/iSMfdjMlvPnWZV9bVfbez6ICizeQNJJNZJY + OwOk0R6w9CAIouskS9NZOs2ushbUqMBEWelomI7mQ2p8gcPxZDo/KivUEgJfsJeEMcZ27RkzWgUf + fMHGg1OlhhBECXxxbmKMezSxwkUIOpCwxAcdlGgJbBt7iaj6yMO6CSJGs40xx/r+fJfB0nkswpGf + 62ttdahyDyKgjb6B0PGW7hPGXtuZmh8xufNYO8oJN2Cj4XR6sOPdFjs4OTJCEqanSQcXzHIFJLQJ + vZVwKWQFqlN2+xON0tgDSW/k31kueR/G1rb8j30HpARHoHLnQWn5c96uzUP8Yn+1nVfcBubhMxDU + +VrbErzzun3k9iX3yTcAAAD//wMAusvg7OMCAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700089e2e072b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:27 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=4Fzl_VHC9.c_.kyLBREHy8a7wA.lEcwtqZXonX9ka10-1733432787-1.0.1.1-tBlI5dXtGa55yRlJwRgFkxlkQ7emZl1_xhYirjNw7CcPBv7WkC60ubux0sARYF8Nzun5tNgFTC100P_ywLDMgw; + path=/; expires=Thu, 05-Dec-24 21:36:27 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=WokAKLMIioMxOZo9K5N1oJLSuWXQVmUht7hb75_Z06w-1733432787525-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "132" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999969" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_567abdfb8bf13c71bc3f2bac8be8b4af + status: + code: 200 + message: OK version: 1 diff --git a/uv.lock b/uv.lock index 9cc7e3e..ce7fbac 100644 --- a/uv.lock +++ b/uv.lock @@ -564,7 +564,7 @@ wheels = [ [[package]] name = "fh-llm-client" -version = "0.0.4.dev6+g6de1e91.d20241206" +version = "0.0.4.dev6+g2eac4a6.d20241206" source = { editable = "." } dependencies = [ { name = "coredis" }, From 5d3a3c98ca15973d7af64d8b7852fe15364cfe8c Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Fri, 6 Dec 2024 15:18:55 -0800 Subject: [PATCH 05/18] Fix lint --- llmclient/llms.py | 2 +- tests/test_llms.py | 5 +++-- uv.lock | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llmclient/llms.py b/llmclient/llms.py index 0f66fb6..2f23255 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -861,7 +861,7 @@ async def call_multiple( tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, **chat_kwargs, ) -> list[LLMResult]: - if chat_kwargs.get("n", 1) == 1 or self.config.get("n", 1) == 1: + if 1 in {chat_kwargs.get("n", 1), self.config.get("n", 1)}: if ( chat_kwargs.get("n") and self.config.get("n") diff --git a/tests/test_llms.py b/tests/test_llms.py index 43ac45d..d8da345 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -332,7 +332,8 @@ async def test_single_completion(self, model_name: str) -> None: result = await model.call(messages) assert isinstance(result, LLMResult) - result = await model.call(messages, n=1) + result = await model.call(messages, n=1) # noqa: FURB120 + assert isinstance(result, LLMResult) assert result.messages assert len(result.messages) == 1 @@ -358,7 +359,7 @@ async def test_multiple_completion(self, model_name: str, request) -> None: with pytest.raises(litellm.BadRequestError, match="anthropic"): await model.call(messages) else: - results = await model.call(messages, n=None) + results = await model.call(messages, n=None) # noqa: FURB120 assert len(results) == self.NUM_COMPLETIONS results = await model.call(messages, n=self.NUM_COMPLETIONS) diff --git a/uv.lock b/uv.lock index ce7fbac..c74be79 100644 --- a/uv.lock +++ b/uv.lock @@ -564,7 +564,7 @@ wheels = [ [[package]] name = "fh-llm-client" -version = "0.0.4.dev6+g2eac4a6.d20241206" +version = "0.0.4.dev7+g6fbf2f2.d20241206" source = { editable = "." } dependencies = [ { name = "coredis" }, From 3f650fcead159bda0541c774be86c68985f4b761 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 11:01:41 -0800 Subject: [PATCH 06/18] Implemented tests to check kwarg priority when calling --- tests/test_llms.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/test_llms.py b/tests/test_llms.py index d8da345..8bd4974 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -317,7 +317,6 @@ async def test_text_image_message(self, model_name: str) -> None: ), "Expected content in message, but got None" assert "red" in result.messages[-1].content.lower() - # Test n = 1 @pytest.mark.parametrize( "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"] ) @@ -339,6 +338,13 @@ async def test_single_completion(self, model_name: str) -> None: assert len(result.messages) == 1 assert result.messages[0].content + model = self.MODEL_CLS(name=model_name, config={"n": 2}) + result = await model.call(messages, n=1) + assert isinstance(result, LLMResult) + assert result.messages + assert len(result.messages) == 1 + assert result.messages[0].content + @pytest.mark.asyncio @pytest.mark.vcr @pytest.mark.parametrize( @@ -365,6 +371,10 @@ async def test_multiple_completion(self, model_name: str, request) -> None: results = await model.call(messages, n=self.NUM_COMPLETIONS) assert len(results) == self.NUM_COMPLETIONS + model = self.MODEL_CLS(name=model_name, config={"n": 1}) + results = await model.call(messages, n=self.NUM_COMPLETIONS) + assert len(results) == self.NUM_COMPLETIONS + def test_json_schema_validation() -> None: # Invalid JSON From 7edd6133dd6c0eb102489d6dcb6fd803cecf1bb9 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 11:09:27 -0800 Subject: [PATCH 07/18] Exposed missing classes --- llmclient/__init__.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/llmclient/__init__.py b/llmclient/__init__.py index d471c9d..65aa5ab 100644 --- a/llmclient/__init__.py +++ b/llmclient/__init__.py @@ -1,7 +1,13 @@ +from .constants import ( + CHARACTERS_PER_TOKEN_ASSUMPTION, + EXTRA_TOKENS_FROM_USER_ROLE, + MODEL_COST_MAP, +) from .embeddings import ( EmbeddingModel, EmbeddingModes, HybridEmbeddingModel, + LiteLLMEmbeddingModel, SentenceTransformerEmbeddingModel, SparseEmbeddingModel, ) @@ -13,15 +19,25 @@ LLMModel, MultipleCompletionLLMModel, ) -from .types import LLMResult +from .types import ( + Chunk, + Embeddable, + LLMResult, +) __all__ = [ + "CHARACTERS_PER_TOKEN_ASSUMPTION", + "EXTRA_TOKENS_FROM_USER_ROLE", + "MODEL_COST_MAP", + "Chunk", + "Embeddable", "EmbeddingModel", "EmbeddingModes", "HybridEmbeddingModel", "JSONSchemaValidationError", "LLMModel", "LLMResult", + "LiteLLMEmbeddingModel", "LiteLLMModel", "MultipleCompletionLLMModel", "SentenceTransformerEmbeddingModel", From bae87659f3e14f90736ae701bc3d284f66dd9581 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 11:14:36 -0800 Subject: [PATCH 08/18] added embedding_model_factory --- llmclient/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llmclient/__init__.py b/llmclient/__init__.py index 65aa5ab..dc27f02 100644 --- a/llmclient/__init__.py +++ b/llmclient/__init__.py @@ -10,6 +10,7 @@ LiteLLMEmbeddingModel, SentenceTransformerEmbeddingModel, SparseEmbeddingModel, + embedding_model_factory, ) from .exceptions import ( JSONSchemaValidationError, @@ -42,4 +43,5 @@ "MultipleCompletionLLMModel", "SentenceTransformerEmbeddingModel", "SparseEmbeddingModel", + "embedding_model_factory", ] From 1e6eb7812302857744d91f0cf860bb7c05dd5164 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 11:24:59 -0800 Subject: [PATCH 09/18] Added documentation to call functions --- llmclient/llms.py | 82 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/llmclient/llms.py b/llmclient/llms.py index 2f23255..d68be73 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -614,7 +614,7 @@ class MultipleCompletionLLMModel(BaseModel): "Configuration of the model:" "model is the name of the llm model to use," "temperature is the sampling temperature, and" - "n is the number of completions to generate." + "n is the number of completions to generate by default." ), ) encoding: Any | None = None @@ -832,9 +832,6 @@ async def _call( # noqa: C901, PLR0915 return results - # TODO: Is it good practice to have this multiple interface? - # Users can just use `call` and we chat `n` - # or they can specifically call `call_single` or `call_multiple` async def call_single( self, messages: list[Message], @@ -844,7 +841,25 @@ async def call_single( tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, **chat_kwargs, ) -> LLMResult: - if chat_kwargs.get("n", 1) != 1 or self.config.get("n", 1) != 1: + """ + Calls the LLM with a list of messages and returns a single completion result. + + Args: + messages: A list of messages to send to the LLM. + callbacks: A list of callback functions to execute after the call. + output_type: The type of the output model. + tools: A list of tools to use during the call. + tool_choice: The tool or tool choice to use. + **chat_kwargs: Additional keyword arguments for the chat. + + Returns: + The result of the LLM call as a LLMResult object. + + Raises: + ValueError: If the value of 'n' is not 1. + """ + n = chat_kwargs.get("n", self.config.get("n", 1)) + if n != 1: raise ValueError("n must be 1 for call_single.") return ( await self._call( @@ -861,17 +876,27 @@ async def call_multiple( tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, **chat_kwargs, ) -> list[LLMResult]: - if 1 in {chat_kwargs.get("n", 1), self.config.get("n", 1)}: - if ( - chat_kwargs.get("n") - and self.config.get("n") - and chat_kwargs.get("n") != self.config.get("n") - ): - raise ValueError( - f"Incompatible number of completions requested. " - f"Model's configuration n is {self.config['n']}, " - f"but kwarg n={chat_kwargs['n']} was passed." - ) + """ + Calls the LLM with a list of messages and returns a list of completion results. + + Args: + messages: A list of messages to send to the LLM. + callbacks: A list of callback functions to execute after receiving the response. + output_type: The type of the output model. + tools: A list of tools to use during the call. + tool_choice: The tool or tool choice strategy to use. + **chat_kwargs: Additional keyword arguments to pass to the chat function. + + Returns: + A list of results from the LLM. + + Raises: + Warning: If the number of completions (`n`) requested is set to 1, + a warning is logged indicating that the returned list will contain a single element. + `n` can be set in chat_kargs or in the model's configuration. + """ + n = chat_kwargs.get("n", self.config.get("n", 1)) + if n == 1: logger.warning( "n is 1 for call_multiple. It will return a list with a single element" ) @@ -913,14 +938,33 @@ async def call( n: int | None = None, **chat_kwargs, ) -> list[LLMResult] | LLMResult: + """ + Call the LLM model with the given messages and configuration. - # Uses the LLMModel configuration unless specified in chat_kwargs - # If n is not specified anywhere, defaults to 1 + Args: + messages: A list of messages to send to the language model. + callbacks: A list of callback functions to execute after receiving the response. + output_type: The type of the output model. + tools: A list of tools to use during the call. + tool_choice: The tool or tool identifier to use. + n: An integer argument that specifies the number of completions to generate. + If n is not specified, the model's configuration is used. + **chat_kwargs: Additional keyword arguments to pass to the chat function. + + Returns: + A list of LLMResult objects if multiple completions are requested (n>1), + otherwise a single LLMResult object. + + Raises: + ValueError: If the number of completions `n` is invalid. + """ if not n or n <= 0: logger.info( - "Invalid n passed to the call function. Will get it from the model's configuration" + "Invalid number of completions `n` requested to the call function. " + "Will get it from the model's configuration." ) n = self.config.get("n", 1) + chat_kwargs["n"] = n if n == 1: return await self.call_single( messages, callbacks, output_type, tools, tool_choice, **chat_kwargs From cb16d19e216ed40ce83c387e9d2a031db487728b Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 11:37:00 -0800 Subject: [PATCH 10/18] skip lint checking for argument with default value in test_llms --- tests/test_llms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_llms.py b/tests/test_llms.py index 8bd4974..b236565 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -339,7 +339,7 @@ async def test_single_completion(self, model_name: str) -> None: assert result.messages[0].content model = self.MODEL_CLS(name=model_name, config={"n": 2}) - result = await model.call(messages, n=1) + result = await model.call(messages, n=1) # noqa: FURB120 assert isinstance(result, LLMResult) assert result.messages assert len(result.messages) == 1 From 7966f9ab2e22ce4bc8f15b36b67a5d72eefab283 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 11:40:26 -0800 Subject: [PATCH 11/18] Fixed pre-commit errors --- tests/test_llms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_llms.py b/tests/test_llms.py index b236565..1a4fd1c 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -339,7 +339,7 @@ async def test_single_completion(self, model_name: str) -> None: assert result.messages[0].content model = self.MODEL_CLS(name=model_name, config={"n": 2}) - result = await model.call(messages, n=1) # noqa: FURB120 + result = await model.call(messages, n=1) # noqa: FURB120 assert isinstance(result, LLMResult) assert result.messages assert len(result.messages) == 1 From 9e91858387c385e2894cc1ece13a6b3854dd25d7 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 12:31:54 -0800 Subject: [PATCH 12/18] Reverted changes in uv.lock --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index c74be79..3d3717f 100644 --- a/uv.lock +++ b/uv.lock @@ -3033,4 +3033,4 @@ source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 } wheels = [ { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 }, -] +] \ No newline at end of file From 29e4d917ea2312db4d58ad75d4fd4e6fb11e09cc Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 12:32:52 -0800 Subject: [PATCH 13/18] Fixed line wrap in docstrings --- llmclient/llms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llmclient/llms.py b/llmclient/llms.py index d68be73..52eb8ea 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -892,8 +892,8 @@ async def call_multiple( Raises: Warning: If the number of completions (`n`) requested is set to 1, - a warning is logged indicating that the returned list will contain a single element. - `n` can be set in chat_kargs or in the model's configuration. + a warning is logged indicating that the returned list will contain a single element. + `n` can be set in chat_kargs or in the model's configuration. """ n = chat_kwargs.get("n", self.config.get("n", 1)) if n == 1: @@ -948,7 +948,7 @@ async def call( tools: A list of tools to use during the call. tool_choice: The tool or tool identifier to use. n: An integer argument that specifies the number of completions to generate. - If n is not specified, the model's configuration is used. + If n is not specified, the model's configuration is used. **chat_kwargs: Additional keyword arguments to pass to the chat function. Returns: From f8090bbb0244f946d01540329e042900ddfa7d47 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 12:35:53 -0800 Subject: [PATCH 14/18] reverting uv.lock --- uv.lock | 73 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/uv.lock b/uv.lock index 3d3717f..8d118c6 100644 --- a/uv.lock +++ b/uv.lock @@ -114,16 +114,15 @@ wheels = [ [[package]] name = "anyio" -version = "4.7.0" +version = "4.6.2.post1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna" }, { name = "sniffio" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f6/40/318e58f669b1a9e00f5c4453910682e2d9dd594334539c7b7817dabb765f/anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48", size = 177076 } +sdist = { url = "https://files.pythonhosted.org/packages/9f/09/45b9b7a6d4e45c6bcb5bf61d19e3ab87df68e0601fa8c5293de3542546cc/anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c", size = 173422 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/7a/4daaf3b6c08ad7ceffea4634ec206faeff697526421c20f07628c7372156/anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352", size = 93052 }, + { url = "https://files.pythonhosted.org/packages/e4/f5/f2b75d2fc6f1a260f340f0e7c6a060f4dd2961cc16884ed851b0d18da06a/anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d", size = 90377 }, ] [[package]] @@ -564,7 +563,7 @@ wheels = [ [[package]] name = "fh-llm-client" -version = "0.0.4.dev7+g6fbf2f2.d20241206" +version = "0.0.4.dev6+g6de1e91.d20241206" source = { editable = "." } dependencies = [ { name = "coredis" }, @@ -2669,27 +2668,49 @@ sdist = { url = "https://files.pythonhosted.org/packages/80/f8/0802dd14c58b5d3d7 [[package]] name = "tokenizers" -version = "0.21.0" +version = "0.20.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/20/41/c2be10975ca37f6ec40d7abd7e98a5213bb04f284b869c1a24e6504fd94d/tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4", size = 343021 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b0/5c/8b09607b37e996dc47e70d6a7b6f4bdd4e4d5ab22fe49d7374565c7fefaf/tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2", size = 2647461 }, - { url = "https://files.pythonhosted.org/packages/22/7a/88e58bb297c22633ed1c9d16029316e5b5ac5ee44012164c2edede599a5e/tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e", size = 2563639 }, - { url = "https://files.pythonhosted.org/packages/f7/14/83429177c19364df27d22bc096d4c2e431e0ba43e56c525434f1f9b0fd00/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193", size = 2903304 }, - { url = "https://files.pythonhosted.org/packages/7e/db/3433eab42347e0dc5452d8fcc8da03f638c9accffefe5a7c78146666964a/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e", size = 2804378 }, - { url = "https://files.pythonhosted.org/packages/57/8b/7da5e6f89736c2ade02816b4733983fca1c226b0c42980b1ae9dc8fcf5cc/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e", size = 3095488 }, - { url = "https://files.pythonhosted.org/packages/4d/f6/5ed6711093dc2c04a4e03f6461798b12669bc5a17c8be7cce1240e0b5ce8/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba", size = 3121410 }, - { url = "https://files.pythonhosted.org/packages/81/42/07600892d48950c5e80505b81411044a2d969368cdc0d929b1c847bf6697/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273", size = 3388821 }, - { url = "https://files.pythonhosted.org/packages/22/06/69d7ce374747edaf1695a4f61b83570d91cc8bbfc51ccfecf76f56ab4aac/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04", size = 3008868 }, - { url = "https://files.pythonhosted.org/packages/c8/69/54a0aee4d576045b49a0eb8bffdc495634309c823bf886042e6f46b80058/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e", size = 8975831 }, - { url = "https://files.pythonhosted.org/packages/f7/f3/b776061e4f3ebf2905ba1a25d90380aafd10c02d406437a8ba22d1724d76/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b", size = 8920746 }, - { url = "https://files.pythonhosted.org/packages/d8/ee/ce83d5ec8b6844ad4c3ecfe3333d58ecc1adc61f0878b323a15355bcab24/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74", size = 9161814 }, - { url = "https://files.pythonhosted.org/packages/18/07/3e88e65c0ed28fa93aa0c4d264988428eef3df2764c3126dc83e243cb36f/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff", size = 9357138 }, - { url = "https://files.pythonhosted.org/packages/15/b0/dc4572ca61555fc482ebc933f26cb407c6aceb3dc19c301c68184f8cad03/tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a", size = 2202266 }, - { url = "https://files.pythonhosted.org/packages/44/69/d21eb253fa91622da25585d362a874fa4710be600f0ea9446d8d0217cec1/tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", size = 2389192 }, +sdist = { url = "https://files.pythonhosted.org/packages/da/25/b1681c1c30ea3ea6e584ae3fffd552430b12faa599b558c4c4783f56d7ff/tokenizers-0.20.3.tar.gz", hash = "sha256:2278b34c5d0dd78e087e1ca7f9b1dcbf129d80211afa645f214bd6e051037539", size = 340513 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/93/6742ef9206409d5ce1fdf44d5ca1687cdc3847ba0485424e2c731e6bcf67/tokenizers-0.20.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:585b51e06ca1f4839ce7759941e66766d7b060dccfdc57c4ca1e5b9a33013a90", size = 2674224 }, + { url = "https://files.pythonhosted.org/packages/aa/14/e75ece72e99f6ef9ae07777ca9fdd78608f69466a5cecf636e9bd2f25d5c/tokenizers-0.20.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61cbf11954f3b481d08723ebd048ba4b11e582986f9be74d2c3bdd9293a4538d", size = 2558991 }, + { url = "https://files.pythonhosted.org/packages/46/54/033b5b2ba0c3ae01e026c6f7ced147d41a2fa1c573d00a66cb97f6d7f9b3/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef820880d5e4e8484e2fa54ff8d297bb32519eaa7815694dc835ace9130a3eea", size = 2892476 }, + { url = "https://files.pythonhosted.org/packages/e6/b0/cc369fb3297d61f3311cab523d16d48c869dc2f0ba32985dbf03ff811041/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:67ef4dcb8841a4988cd00dd288fb95dfc8e22ed021f01f37348fd51c2b055ba9", size = 2802775 }, + { url = "https://files.pythonhosted.org/packages/1a/74/62ad983e8ea6a63e04ed9c5be0b605056bf8aac2f0125f9b5e0b3e2b89fa/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff1ef8bd47a02b0dc191688ccb4da53600df5d4c9a05a4b68e1e3de4823e78eb", size = 3086138 }, + { url = "https://files.pythonhosted.org/packages/6b/ac/4637ba619db25094998523f9e6f5b456e1db1f8faa770a3d925d436db0c3/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:444d188186eab3148baf0615b522461b41b1f0cd58cd57b862ec94b6ac9780f1", size = 3098076 }, + { url = "https://files.pythonhosted.org/packages/58/ce/9793f2dc2ce529369807c9c74e42722b05034af411d60f5730b720388c7d/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37c04c032c1442740b2c2d925f1857885c07619224a533123ac7ea71ca5713da", size = 3379650 }, + { url = "https://files.pythonhosted.org/packages/50/f6/2841de926bc4118af996eaf0bdf0ea5b012245044766ffc0347e6c968e63/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453c7769d22231960ee0e883d1005c93c68015025a5e4ae56275406d94a3c907", size = 2994005 }, + { url = "https://files.pythonhosted.org/packages/a3/b2/00915c4fed08e9505d37cf6eaab45b12b4bff8f6719d459abcb9ead86a4b/tokenizers-0.20.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4bb31f7b2847e439766aaa9cc7bccf7ac7088052deccdb2275c952d96f691c6a", size = 8977488 }, + { url = "https://files.pythonhosted.org/packages/e9/ac/1c069e7808181ff57bcf2d39e9b6fbee9133a55410e6ebdaa89f67c32e83/tokenizers-0.20.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:843729bf0f991b29655a069a2ff58a4c24375a553c70955e15e37a90dd4e045c", size = 9294935 }, + { url = "https://files.pythonhosted.org/packages/50/47/722feb70ee68d1c4412b12d0ea4acc2713179fd63f054913990f9e259492/tokenizers-0.20.3-cp311-none-win32.whl", hash = "sha256:efcce3a927b1e20ca694ba13f7a68c59b0bd859ef71e441db68ee42cf20c2442", size = 2197175 }, + { url = "https://files.pythonhosted.org/packages/75/68/1b4f928b15a36ed278332ac75d66d7eb65d865bf344d049c452c18447bf9/tokenizers-0.20.3-cp311-none-win_amd64.whl", hash = "sha256:88301aa0801f225725b6df5dea3d77c80365ff2362ca7e252583f2b4809c4cc0", size = 2381616 }, + { url = "https://files.pythonhosted.org/packages/07/00/92a08af2a6b0c88c50f1ab47d7189e695722ad9714b0ee78ea5e1e2e1def/tokenizers-0.20.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:49d12a32e190fad0e79e5bdb788d05da2f20d8e006b13a70859ac47fecf6ab2f", size = 2667951 }, + { url = "https://files.pythonhosted.org/packages/ec/9a/e17a352f0bffbf415cf7d73756f5c73a3219225fc5957bc2f39d52c61684/tokenizers-0.20.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:282848cacfb9c06d5e51489f38ec5aa0b3cd1e247a023061945f71f41d949d73", size = 2555167 }, + { url = "https://files.pythonhosted.org/packages/27/37/d108df55daf4f0fcf1f58554692ff71687c273d870a34693066f0847be96/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe4e08c7d0cd6154c795deb5bf81d2122f36daf075e0c12a8b050d824ef0a64", size = 2898389 }, + { url = "https://files.pythonhosted.org/packages/b2/27/32f29da16d28f59472fa7fb38e7782069748c7e9ab9854522db20341624c/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca94fc1b73b3883c98f0c88c77700b13d55b49f1071dfd57df2b06f3ff7afd64", size = 2795866 }, + { url = "https://files.pythonhosted.org/packages/29/4e/8a9a3c89e128c4a40f247b501c10279d2d7ade685953407c4d94c8c0f7a7/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef279c7e239f95c8bdd6ff319d9870f30f0d24915b04895f55b1adcf96d6c60d", size = 3085446 }, + { url = "https://files.pythonhosted.org/packages/b4/3b/a2a7962c496ebcd95860ca99e423254f760f382cd4bd376f8895783afaf5/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16384073973f6ccbde9852157a4fdfe632bb65208139c9d0c0bd0176a71fd67f", size = 3094378 }, + { url = "https://files.pythonhosted.org/packages/1f/f4/a8a33f0192a1629a3bd0afcad17d4d221bbf9276da4b95d226364208d5eb/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:312d522caeb8a1a42ebdec87118d99b22667782b67898a76c963c058a7e41d4f", size = 3385755 }, + { url = "https://files.pythonhosted.org/packages/9e/65/c83cb3545a65a9eaa2e13b22c93d5e00bd7624b354a44adbdc93d5d9bd91/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b7cb962564785a83dafbba0144ecb7f579f1d57d8c406cdaa7f32fe32f18ad", size = 2997679 }, + { url = "https://files.pythonhosted.org/packages/55/e9/a80d4e592307688a67c7c59ab77e03687b6a8bd92eb5db763a2c80f93f57/tokenizers-0.20.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:124c5882ebb88dadae1fc788a582299fcd3a8bd84fc3e260b9918cf28b8751f5", size = 8989296 }, + { url = "https://files.pythonhosted.org/packages/90/af/60c957af8d2244321124e893828f1a4817cde1a2d08d09d423b73f19bd2f/tokenizers-0.20.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2b6e54e71f84c4202111a489879005cb14b92616a87417f6c102c833af961ea2", size = 9303621 }, + { url = "https://files.pythonhosted.org/packages/be/a9/96172310ee141009646d63a1ca267c099c462d747fe5ef7e33f74e27a683/tokenizers-0.20.3-cp312-none-win32.whl", hash = "sha256:83d9bfbe9af86f2d9df4833c22e94d94750f1d0cd9bfb22a7bb90a86f61cdb1c", size = 2188979 }, + { url = "https://files.pythonhosted.org/packages/bd/68/61d85ae7ae96dde7d0974ff3538db75d5cdc29be2e4329cd7fc51a283e22/tokenizers-0.20.3-cp312-none-win_amd64.whl", hash = "sha256:44def74cee574d609a36e17c8914311d1b5dbcfe37c55fd29369d42591b91cf2", size = 2380725 }, + { url = "https://files.pythonhosted.org/packages/07/19/36e9eaafb229616cb8502b42030fa7fe347550e76cb618de71b498fc3222/tokenizers-0.20.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0b630e0b536ef0e3c8b42c685c1bc93bd19e98c0f1543db52911f8ede42cf84", size = 2666813 }, + { url = "https://files.pythonhosted.org/packages/b9/c7/e2ce1d4f756c8a62ef93fdb4df877c2185339b6d63667b015bf70ea9d34b/tokenizers-0.20.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a02d160d2b19bcbfdf28bd9a4bf11be4cb97d0499c000d95d4c4b1a4312740b6", size = 2555354 }, + { url = "https://files.pythonhosted.org/packages/7c/cf/5309c2d173a6a67f9ec8697d8e710ea32418de6fd8541778032c202a1c3e/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e3d80d89b068bc30034034b5319218c7c0a91b00af19679833f55f3becb6945", size = 2897745 }, + { url = "https://files.pythonhosted.org/packages/2c/e5/af3078e32f225e680e69d61f78855880edb8d53f5850a1834d519b2b103f/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:174a54910bed1b089226512b4458ea60d6d6fd93060254734d3bc3540953c51c", size = 2794385 }, + { url = "https://files.pythonhosted.org/packages/0b/a7/bc421fe46650cc4eb4a913a236b88c243204f32c7480684d2f138925899e/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:098b8a632b8656aa5802c46689462c5c48f02510f24029d71c208ec2c822e771", size = 3084580 }, + { url = "https://files.pythonhosted.org/packages/c6/22/97e1e95ee81f75922c9f569c23cb2b1fdc7f5a7a29c4c9fae17e63f751a6/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78c8c143e3ae41e718588281eb3e212c2b31623c9d6d40410ec464d7d6221fb5", size = 3093581 }, + { url = "https://files.pythonhosted.org/packages/d5/14/f0df0ee3b9e516121e23c0099bccd7b9f086ba9150021a750e99b16ce56f/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b26b0aadb18cd8701077362ba359a06683662d5cafe3e8e8aba10eb05c037f1", size = 3385934 }, + { url = "https://files.pythonhosted.org/packages/66/52/7a171bd4929e3ffe61a29b4340fe5b73484709f92a8162a18946e124c34c/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07d7851a72717321022f3774e84aa9d595a041d643fafa2e87fbc9b18711dac0", size = 2997311 }, + { url = "https://files.pythonhosted.org/packages/7c/64/f1993bb8ebf775d56875ca0d50a50f2648bfbbb143da92fe2e6ceeb4abd5/tokenizers-0.20.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:bd44e48a430ada902c6266a8245f5036c4fe744fcb51f699999fbe82aa438797", size = 8988601 }, + { url = "https://files.pythonhosted.org/packages/d6/3f/49fa63422159bbc2f2a4ac5bfc597d04d4ec0ad3d2ef46649b5e9a340e37/tokenizers-0.20.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a4c186bb006ccbe1f5cc4e0380d1ce7806f5955c244074fd96abc55e27b77f01", size = 9303950 }, + { url = "https://files.pythonhosted.org/packages/66/11/79d91aeb2817ad1993ef61c690afe73e6dbedbfb21918b302ef5a2ba9bfb/tokenizers-0.20.3-cp313-none-win32.whl", hash = "sha256:6e19e0f1d854d6ab7ea0c743d06e764d1d9a546932be0a67f33087645f00fe13", size = 2188941 }, + { url = "https://files.pythonhosted.org/packages/c2/ff/ac8410f868fb8b14b5e619efa304aa119cb8a40bd7df29fc81a898e64f99/tokenizers-0.20.3-cp313-none-win_amd64.whl", hash = "sha256:d50ede425c7e60966a9680d41b58b3a0950afa1bb570488e2972fa61662c4273", size = 2380269 }, ] [[package]] @@ -2762,7 +2783,7 @@ wheels = [ [[package]] name = "transformers" -version = "4.47.0" +version = "4.46.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -2776,9 +2797,9 @@ dependencies = [ { name = "tokenizers" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b1/5a/0ecfde3264bed0579c37f249e04e15f3c1451ba864d78bbe390177664cac/transformers-4.47.0.tar.gz", hash = "sha256:f8ead7a5a4f6937bb507e66508e5e002dc5930f7b6122a9259c37b099d0f3b19", size = 8693668 } +sdist = { url = "https://files.pythonhosted.org/packages/37/5a/58f96c83e566f907ae39f16d4401bbefd8bb85c60bd1e6a95c419752ab90/transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc", size = 8627944 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/a7/7eedcf6a359e1e1eff3bc204ad022485aa5d88c08e1e3e0e0aee8a2e2235/transformers-4.47.0-py3-none-any.whl", hash = "sha256:a8e1bafdaae69abdda3cad638fe392e37c86d2ce0ecfcae11d60abb8f949ff4d", size = 10133426 }, + { url = "https://files.pythonhosted.org/packages/51/51/b87caa939fedf307496e4dbf412f4b909af3d9ca8b189fc3b65c1faa456f/transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef", size = 10034536 }, ] [[package]] @@ -3033,4 +3054,4 @@ source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 } wheels = [ { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 }, -] \ No newline at end of file +] From 418fa3be9739f0a26ce363b61d10e4d94da7aba4 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 12:59:46 -0800 Subject: [PATCH 15/18] removed the dependency on numpy. It is now a conditional dependency for local embeddings --- llmclient/embeddings.py | 19 +++++++++++-------- pyproject.toml | 7 ++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py index ce15503..c5a5b7c 100644 --- a/llmclient/embeddings.py +++ b/llmclient/embeddings.py @@ -1,10 +1,10 @@ import asyncio from abc import ABC, abstractmethod +from collections import Counter from enum import StrEnum from typing import Any import litellm -import numpy as np import tiktoken from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator @@ -171,13 +171,9 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: enc_batch = self.enc.encode_ordinary_batch(texts) # now get frequency of each token rel to length return [ - ( - np.bincount([xi % self.ndim for xi in x], minlength=self.ndim).astype( - float - ) - / len(x) - ).tolist() + [token_counts.get(i, 0) / len(x) for i in range(self.ndim)] for x in enc_batch + if (token_counts := Counter(xi % self.ndim for xi in x)) ] @@ -199,7 +195,11 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: all_embeds = await asyncio.gather( *[m.embed_documents(texts) for m in self.models] ) - return np.concatenate(all_embeds, axis=1).tolist() + + return [ + [val for sublist in embed_group for val in sublist] + for embed_group in zip(*all_embeds, strict=True) + ] def set_mode(self, mode: EmbeddingModes) -> None: # Set mode for all component models @@ -217,6 +217,7 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel): def __init__(self, **kwargs): super().__init__(**kwargs) try: + import numpy as np from sentence_transformers import SentenceTransformer except ImportError as exc: raise ImportError( @@ -240,6 +241,8 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: Returns: A list of embedding vectors. """ + import numpy as np + # Extract additional configurations if needed batch_size = self.config.get("batch_size", 32) device = self.config.get("device", "cpu") diff --git a/pyproject.toml b/pyproject.toml index 8c6d278..a5caeb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ dependencies = [ "fhaviary>=0.8.2", # For core namespace "limits", "litellm>=1.44", # For LITELLM_LOG addition - "numpy", "pydantic~=2.0,>=2.10.1,<2.10.2", "tiktoken>=0.4.0", "typing-extensions; python_version <= '3.11'", # for typing.override @@ -40,7 +39,7 @@ requires-python = ">=3.11" [project.optional-dependencies] dev = [ - "fh-llm-client[image,local]", + "fh-llm-client[local]", "fhaviary[xml]", "ipython>=8", # Pin to keep recent "mypy>=1.8", # Pin for mutable-override @@ -58,11 +57,9 @@ dev = [ "python-dotenv", "refurb>=2", # Pin to keep recent ] -image = [ - "Pillow", -] local = [ "sentence-transformers", + "numpy", ] [project.urls] From c34b02c761a48b7b746a076851c1a5037fa44765 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 13:44:31 -0800 Subject: [PATCH 16/18] Removed image group dependency Messages are not implemented in llmclient anymore --- llmclient/embeddings.py | 20 ++++++++++++-------- pyproject.toml | 7 ++----- uv.lock | 18 +++++++----------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py index ce15503..e03de2e 100644 --- a/llmclient/embeddings.py +++ b/llmclient/embeddings.py @@ -1,10 +1,11 @@ import asyncio from abc import ABC, abstractmethod +from collections import Counter from enum import StrEnum +from itertools import chain from typing import Any import litellm -import numpy as np import tiktoken from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator @@ -171,13 +172,9 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: enc_batch = self.enc.encode_ordinary_batch(texts) # now get frequency of each token rel to length return [ - ( - np.bincount([xi % self.ndim for xi in x], minlength=self.ndim).astype( - float - ) - / len(x) - ).tolist() + [token_counts.get(xi, 0) / len(x) for xi in range(self.ndim)] for x in enc_batch + if (token_counts := Counter(xi % self.ndim for xi in x)) ] @@ -199,7 +196,11 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: all_embeds = await asyncio.gather( *[m.embed_documents(texts) for m in self.models] ) - return np.concatenate(all_embeds, axis=1).tolist() + + return [ + list(chain.from_iterable(embed_group)) + for embed_group in zip(*all_embeds, strict=True) + ] def set_mode(self, mode: EmbeddingModes) -> None: # Set mode for all component models @@ -217,6 +218,7 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel): def __init__(self, **kwargs): super().__init__(**kwargs) try: + import numpy as np # noqa: F401 from sentence_transformers import SentenceTransformer except ImportError as exc: raise ImportError( @@ -240,6 +242,8 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: Returns: A list of embedding vectors. """ + import numpy as np + # Extract additional configurations if needed batch_size = self.config.get("batch_size", 32) device = self.config.get("device", "cpu") diff --git a/pyproject.toml b/pyproject.toml index 8c6d278..a85772d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ dependencies = [ "fhaviary>=0.8.2", # For core namespace "limits", "litellm>=1.44", # For LITELLM_LOG addition - "numpy", "pydantic~=2.0,>=2.10.1,<2.10.2", "tiktoken>=0.4.0", "typing-extensions; python_version <= '3.11'", # for typing.override @@ -40,7 +39,7 @@ requires-python = ">=3.11" [project.optional-dependencies] dev = [ - "fh-llm-client[image,local]", + "fh-llm-client[local]", "fhaviary[xml]", "ipython>=8", # Pin to keep recent "mypy>=1.8", # Pin for mutable-override @@ -58,10 +57,8 @@ dev = [ "python-dotenv", "refurb>=2", # Pin to keep recent ] -image = [ - "Pillow", -] local = [ + "numpy", "sentence-transformers", ] diff --git a/uv.lock b/uv.lock index 8d118c6..a161538 100644 --- a/uv.lock +++ b/uv.lock @@ -563,14 +563,13 @@ wheels = [ [[package]] name = "fh-llm-client" -version = "0.0.4.dev6+g6de1e91.d20241206" +version = "0.0.4.dev3+g418fa3b.d20241209" source = { editable = "." } dependencies = [ { name = "coredis" }, { name = "fhaviary" }, { name = "limits" }, { name = "litellm" }, - { name = "numpy" }, { name = "pydantic" }, { name = "tiktoken" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, @@ -581,7 +580,7 @@ dev = [ { name = "fhaviary", extra = ["xml"] }, { name = "ipython" }, { name = "mypy" }, - { name = "pillow" }, + { name = "numpy" }, { name = "pre-commit" }, { name = "pylint-pydantic" }, { name = "pytest" }, @@ -597,10 +596,8 @@ dev = [ { name = "refurb" }, { name = "sentence-transformers" }, ] -image = [ - { name = "pillow" }, -] local = [ + { name = "numpy" }, { name = "sentence-transformers" }, ] @@ -610,7 +607,7 @@ codeflash = [ { name = "fhaviary", extra = ["xml"] }, { name = "ipython" }, { name = "mypy" }, - { name = "pillow" }, + { name = "numpy" }, { name = "pre-commit" }, { name = "pylint-pydantic" }, { name = "pytest" }, @@ -630,7 +627,7 @@ dev = [ { name = "fhaviary", extra = ["xml"] }, { name = "ipython" }, { name = "mypy" }, - { name = "pillow" }, + { name = "numpy" }, { name = "pre-commit" }, { name = "pylint-pydantic" }, { name = "pytest" }, @@ -650,15 +647,14 @@ dev = [ [package.metadata] requires-dist = [ { name = "coredis" }, - { name = "fh-llm-client", extras = ["image", "local"], marker = "extra == 'dev'" }, + { name = "fh-llm-client", extras = ["local"], marker = "extra == 'dev'" }, { name = "fhaviary", specifier = ">=0.8.2" }, { name = "fhaviary", extras = ["xml"], marker = "extra == 'dev'" }, { name = "ipython", marker = "extra == 'dev'", specifier = ">=8" }, { name = "limits" }, { name = "litellm", specifier = ">=1.44" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8" }, - { name = "numpy" }, - { name = "pillow", marker = "extra == 'image'" }, + { name = "numpy", marker = "extra == 'local'" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.4" }, { name = "pydantic", specifier = "~=2.0,>=2.10.1,<2.10.2" }, { name = "pylint-pydantic", marker = "extra == 'dev'" }, From 86d455d7617ea9de52394d32a26c7b817dfb7055 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 13:53:35 -0800 Subject: [PATCH 17/18] Fixed typos --- llmclient/embeddings.py | 2 -- pyproject.toml | 1 - 2 files changed, 3 deletions(-) diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py index 271784f..e03de2e 100644 --- a/llmclient/embeddings.py +++ b/llmclient/embeddings.py @@ -1,7 +1,6 @@ import asyncio from abc import ABC, abstractmethod from collections import Counter -from collections import Counter from enum import StrEnum from itertools import chain from typing import Any @@ -176,7 +175,6 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: [token_counts.get(xi, 0) / len(x) for xi in range(self.ndim)] for x in enc_batch if (token_counts := Counter(xi % self.ndim for xi in x)) - if (token_counts := Counter(xi % self.ndim for xi in x)) ] diff --git a/pyproject.toml b/pyproject.toml index d58774c..a85772d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,6 @@ dev = [ local = [ "numpy", "sentence-transformers", - "numpy", ] [project.urls] From 7ef8f490c3656b1f84b4860ec29d4998dd2f9762 Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 9 Dec 2024 14:19:04 -0800 Subject: [PATCH 18/18] Removed overload from the multiple completion llm call --- llmclient/llms.py | 151 ++++++--------------------------------------- tests/test_llms.py | 13 ++-- 2 files changed, 24 insertions(+), 140 deletions(-) diff --git a/llmclient/llms.py b/llmclient/llms.py index 52eb8ea..149f43f 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -16,11 +16,9 @@ from typing import ( Any, ClassVar, - Literal, Self, TypeVar, cast, - overload, ) import litellm @@ -660,7 +658,7 @@ async def achat_iter(self, messages: Iterable[Message], **kwargs) -> AsyncGenera # > `required` means the model must call one or more tools. TOOL_CHOICE_REQUIRED: ClassVar[str] = "required" - async def _call( # noqa: C901, PLR0915 + async def call( # noqa: C901, PLR0915 self, messages: list[Message], callbacks: list[Callable] | None = None, @@ -669,6 +667,23 @@ async def _call( # noqa: C901, PLR0915 tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, **chat_kwargs, ) -> list[LLMResult]: + """ + Call the LLM model with the given messages and configuration. + + Args: + messages: A list of messages to send to the language model. + callbacks: A list of callback functions to execute after receiving the response. + output_type: The type of the output model. + tools: A list of tools to use during the call. + tool_choice: The tool or tool identifier to use. + **chat_kwargs: Additional keyword arguments to pass to the chat function. + + Returns: + A list of LLMResult objects containing the results of the call. + + Raises: + ValueError: If the number of completions (n) is invalid. + """ start_clock = asyncio.get_running_loop().time() # Deal with tools. Note OpenAI throws a 400 response if tools is empty: @@ -841,134 +856,8 @@ async def call_single( tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, **chat_kwargs, ) -> LLMResult: - """ - Calls the LLM with a list of messages and returns a single completion result. - - Args: - messages: A list of messages to send to the LLM. - callbacks: A list of callback functions to execute after the call. - output_type: The type of the output model. - tools: A list of tools to use during the call. - tool_choice: The tool or tool choice to use. - **chat_kwargs: Additional keyword arguments for the chat. - - Returns: - The result of the LLM call as a LLMResult object. - - Raises: - ValueError: If the value of 'n' is not 1. - """ - n = chat_kwargs.get("n", self.config.get("n", 1)) - if n != 1: - raise ValueError("n must be 1 for call_single.") return ( - await self._call( - messages, callbacks, output_type, tools, tool_choice, **chat_kwargs + await self.call( + messages, callbacks, output_type, tools, tool_choice, n=1, **chat_kwargs ) )[0] - - async def call_multiple( - self, - messages: list[Message], - callbacks: list[Callable] | None = None, - output_type: type[BaseModel] | None = None, - tools: list[Tool] | None = None, - tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, - **chat_kwargs, - ) -> list[LLMResult]: - """ - Calls the LLM with a list of messages and returns a list of completion results. - - Args: - messages: A list of messages to send to the LLM. - callbacks: A list of callback functions to execute after receiving the response. - output_type: The type of the output model. - tools: A list of tools to use during the call. - tool_choice: The tool or tool choice strategy to use. - **chat_kwargs: Additional keyword arguments to pass to the chat function. - - Returns: - A list of results from the LLM. - - Raises: - Warning: If the number of completions (`n`) requested is set to 1, - a warning is logged indicating that the returned list will contain a single element. - `n` can be set in chat_kargs or in the model's configuration. - """ - n = chat_kwargs.get("n", self.config.get("n", 1)) - if n == 1: - logger.warning( - "n is 1 for call_multiple. It will return a list with a single element" - ) - return await self._call( - messages, callbacks, output_type, tools, tool_choice, **chat_kwargs - ) - - @overload - async def call( - self, - messages: list[Message], - callbacks: list[Callable] | None = None, - output_type: type[BaseModel] | None = None, - tools: list[Tool] | None = None, - tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, - n: Literal[1] = 1, - **chat_kwargs, - ) -> LLMResult: ... - - @overload - async def call( - self, - messages: list[Message], - callbacks: list[Callable] | None = None, - output_type: type[BaseModel] | None = None, - tools: list[Tool] | None = None, - tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, - n: int | None = None, - **chat_kwargs, - ) -> list[LLMResult]: ... - - async def call( - self, - messages: list[Message], - callbacks: list[Callable] | None = None, - output_type: type[BaseModel] | None = None, - tools: list[Tool] | None = None, - tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, - n: int | None = None, - **chat_kwargs, - ) -> list[LLMResult] | LLMResult: - """ - Call the LLM model with the given messages and configuration. - - Args: - messages: A list of messages to send to the language model. - callbacks: A list of callback functions to execute after receiving the response. - output_type: The type of the output model. - tools: A list of tools to use during the call. - tool_choice: The tool or tool identifier to use. - n: An integer argument that specifies the number of completions to generate. - If n is not specified, the model's configuration is used. - **chat_kwargs: Additional keyword arguments to pass to the chat function. - - Returns: - A list of LLMResult objects if multiple completions are requested (n>1), - otherwise a single LLMResult object. - - Raises: - ValueError: If the number of completions `n` is invalid. - """ - if not n or n <= 0: - logger.info( - "Invalid number of completions `n` requested to the call function. " - "Will get it from the model's configuration." - ) - n = self.config.get("n", 1) - chat_kwargs["n"] = n - if n == 1: - return await self.call_single( - messages, callbacks, output_type, tools, tool_choice, **chat_kwargs - ) - return await self.call_multiple( - messages, callbacks, output_type, tools, tool_choice, **chat_kwargs - ) diff --git a/tests/test_llms.py b/tests/test_llms.py index 1a4fd1c..e9f8320 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -328,18 +328,16 @@ async def test_single_completion(self, model_name: str) -> None: Message(role="system", content="Respond with single words."), Message(content="Hello, how are you?"), ] - result = await model.call(messages) + result = await model.call_single(messages) assert isinstance(result, LLMResult) - result = await model.call(messages, n=1) # noqa: FURB120 - assert isinstance(result, LLMResult) assert result.messages assert len(result.messages) == 1 assert result.messages[0].content model = self.MODEL_CLS(name=model_name, config={"n": 2}) - result = await model.call(messages, n=1) # noqa: FURB120 + result = await model.call_single(messages) assert isinstance(result, LLMResult) assert result.messages assert len(result.messages) == 1 @@ -365,13 +363,10 @@ async def test_multiple_completion(self, model_name: str, request) -> None: with pytest.raises(litellm.BadRequestError, match="anthropic"): await model.call(messages) else: - results = await model.call(messages, n=None) # noqa: FURB120 - assert len(results) == self.NUM_COMPLETIONS - - results = await model.call(messages, n=self.NUM_COMPLETIONS) + results = await model.call(messages) # noqa: FURB120 assert len(results) == self.NUM_COMPLETIONS - model = self.MODEL_CLS(name=model_name, config={"n": 1}) + model = self.MODEL_CLS(name=model_name, config={"n": 5}) results = await model.call(messages, n=self.NUM_COMPLETIONS) assert len(results) == self.NUM_COMPLETIONS