From 7146c916c43c609b46defb18217a3cd9cd4c980b Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Thu, 5 Dec 2024 13:54:20 -0800
Subject: [PATCH 01/18] Overloaded typying in MultipleCompletionLLMModel.call.
 It returns either a list or a single element of LLMResult depending on how
 many completions are requested

---
 llmclient/llms.py                             |  97 ++++++++-
 ...st_multiple_completion[gpt-3.5-turbo].yaml | 194 +++++++++++++++++
 ...odel.test_multiple_completion[openai].yaml | 200 ++++++++++++++++++
 ...e_completion[claude-3-haiku-20240307].yaml |  69 ++++++
 ...test_single_completion[gpt-3.5-turbo].yaml | 104 +++++++++
 tests/test_llms.py                            |  78 +++----
 uv.lock                                       |  88 +++-----
 7 files changed, 725 insertions(+), 105 deletions(-)
 create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml
 create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml
 create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml
 create mode 100644 tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml

diff --git a/llmclient/llms.py b/llmclient/llms.py
index 1066e08..20fc9bf 100644
--- a/llmclient/llms.py
+++ b/llmclient/llms.py
@@ -16,9 +16,11 @@
 from typing import (
     Any,
     ClassVar,
+    Literal,
     Self,
     TypeVar,
     cast,
+    overload,
 )
 
 import litellm
@@ -605,8 +607,8 @@ class MultipleCompletionLLMModel(BaseModel):
         description=(
             "Configuration of the model:"
             "model is the name of the llm model to use,"
-            "temperature is the sampling temperature, and",
-            "n is the number of completions to generate.",
+            "temperature is the sampling temperature, and"
+            "n is the number of completions to generate."
         ),
     )
     encoding: Any | None = None
@@ -652,7 +654,7 @@ async def achat_iter(self, messages: Iterable[Message], **kwargs) -> AsyncGenera
     # > `required` means the model must call one or more tools.
     TOOL_CHOICE_REQUIRED: ClassVar[str] = "required"
 
-    async def call(  # noqa: C901, PLR0915
+    async def _call(  # noqa: C901, PLR0915
         self,
         messages: list[Message],
         callbacks: list[Callable] | None = None,
@@ -823,3 +825,92 @@ async def call(  # noqa: C901, PLR0915
             result.seconds_to_last_token = end_clock - start_clock
 
         return results
+
+    # TODO: Is it good practice to have this multiple interface?
+    # Users can just use `call` and we chat `n`
+    # or they can specifically call `call_single` or `call_multiple`
+    async def call_single(
+        self,
+        messages: list[Message],
+        callbacks: list[Callable] | None = None,
+        output_type: type[BaseModel] | None = None,
+        tools: list[Tool] | None = None,
+        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
+        **chat_kwargs,
+    ) -> LLMResult:
+        if chat_kwargs.get("n", 1) != 1 or self.config.get("n", 1) != 1:
+            raise ValueError("n must be 1 for call_single.")
+        return (
+            await self._call(
+                messages, callbacks, output_type, tools, tool_choice, **chat_kwargs
+            )
+        )[0]
+
+    async def call_multiple(
+        self,
+        messages: list[Message],
+        callbacks: list[Callable] | None = None,
+        output_type: type[BaseModel] | None = None,
+        tools: list[Tool] | None = None,
+        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
+        **chat_kwargs,
+    ) -> list[LLMResult]:
+        if chat_kwargs.get("n", 1) == 1:
+            logger.warning(
+                "n is 1 for call_multiple. It will return a list with a single element"
+            )
+        return await self._call(
+            messages, callbacks, output_type, tools, tool_choice, **chat_kwargs
+        )
+
+    @overload
+    async def call(
+        self,
+        messages: list[Message],
+        callbacks: list[Callable] | None = None,
+        output_type: type[BaseModel] | None = None,
+        tools: list[Tool] | None = None,
+        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
+        n: Literal[1] = 1,
+        **chat_kwargs,
+    ) -> LLMResult: ...
+
+    @overload
+    async def call(
+        self,
+        messages: list[Message],
+        callbacks: list[Callable] | None = None,
+        output_type: type[BaseModel] | None = None,
+        tools: list[Tool] | None = None,
+        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
+        n: int | None = None,
+        **chat_kwargs,
+    ) -> list[LLMResult]: ...
+
+    async def call(
+        self,
+        messages: list[Message],
+        callbacks: list[Callable] | None = None,
+        output_type: type[BaseModel] | None = None,
+        tools: list[Tool] | None = None,
+        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
+        n: int | None = None,
+        **chat_kwargs,
+    ) -> list[LLMResult] | LLMResult:
+
+        # Uses the LLMModel configuration unless specified in chat_kwargs
+        # If n is not specified anywhere, defaults to 1
+        if not n or n <= 0:
+            logger.info(
+                "Invalid n passed to the call function. Will get it from the model's configuration"
+            )
+            n = self.config.get("n", 1)
+            if "n" in chat_kwargs:
+                n = chat_kwargs["n"]
+        if n == 1:
+            return await self.call_single(
+                messages, callbacks, output_type, tools, tool_choice, **chat_kwargs
+            )
+        return await self.call_multiple(
+            messages, callbacks, output_type, tools, tool_choice, **chat_kwargs
+        )
diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml
new file mode 100644
index 0000000..35c02c0
--- /dev/null
+++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml
@@ -0,0 +1,194 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+      how are you?"}],"model":"gpt-3.5-turbo","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '149'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.57.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.57.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '1'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.7
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA9RTy2rDMBC8+yuEzklo3jS3QCCXXNoe+qIYWdrYamStKq1LS8i/FzkPOySFXnvR
+        YWZnNLsrbRPGuFZ8xrgsBMnSme48W6zuxvBAL5v55/3H06Ra4OOoWhXl82LJO1GB2TtIOqp6Ektn
+        gDTaPS09CILo2p8Oh6PhYHo7qYkSFZgoyx11h71xlyqfYfemPxgflAVqCYHP2GvCGGPb+owZrYIv
+        PmM3nSNSQggiBz47FTHGPZqIcBGCDiQs8U5DSrQEto69RFRtysO6CiJGs5UxB3x3ustg7jxm4cCf
+        8LW2OhSpBxHQRt9A6HjSEl800P83DSSMvdVLqc5icuexdJQSbsBGw8Fgb8ebZ9AiDxwhCdOCR50r
+        ZqkCEtqE1ki4FLIA1SibByAqpbFFtMd+meWa975tbfO/2DeElOAIVOo8KC3P+23KPMQ/8lvZacR1
+        YB6+A0GZrrXNwTuv6yXXm9wlPwAAAP//AwAh8pBrpAMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ed70040cbcdf99b-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Dec 2024 21:06:36 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - future-house-xr4tdh
+      openai-processing-ms:
+      - '134'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '12000'
+      x-ratelimit-limit-tokens:
+      - '1000000'
+      x-ratelimit-remaining-requests:
+      - '11999'
+      x-ratelimit-remaining-tokens:
+      - '999953'
+      x-ratelimit-reset-requests:
+      - 5ms
+      x-ratelimit-reset-tokens:
+      - 2ms
+      x-request-id:
+      - req_1f88664946b9891fbc90796687f144c4
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+      how are you?"}],"model":"gpt-3.5-turbo","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '149'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.57.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.57.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.7
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA9RTTUsDMRC9768IOW9LP63tzaIIIqgH7UFkSZPZbTSbCcksWEr/u2T7sVtawauX
+        HN6b9/JmJtkkjHGt+IxxuRIkS2c6N8vbx5fF9Tisy7l5e13clfl0/vQwfl5P5o6nUYHLT5B0UHUl
+        ls4AabQ7WnoQBNG1PxkOR8PBZHpVEyUqMFFWOOoMu+MOVX6JnV5/MN4rV6glBD5j7wljjG3qM2a0
+        Cr75jPXSA1JCCKIAPjsWMcY9mohwEYIOJCzxtCElWgJbx75HVG3KQ14FEaPZypg9vj3eZbBwHpdh
+        zx/xXFsdVpkHEdBG30DoeNISnzXQ/zcNJIx91EupTmJy57F0lBF+gY2Gg8HOjjfPoEXuOUISpgWP
+        0gtmmQIS2oTWSLgUcgWqUTYPQFRKY4toj/08yyXvXdvaFn+xbwgpwRGozHlQWp7225R5iH/kt7Lj
+        iOvAPKwDQZnl2hbgndf1kutNbpMfAAAA//8DALEE5HikAwAA
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ed700428d77f99b-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Dec 2024 21:06:36 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - future-house-xr4tdh
+      openai-processing-ms:
+      - '114'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '12000'
+      x-ratelimit-limit-tokens:
+      - '1000000'
+      x-ratelimit-remaining-requests:
+      - '11999'
+      x-ratelimit-remaining-tokens:
+      - '999953'
+      x-ratelimit-reset-requests:
+      - 5ms
+      x-ratelimit-reset-tokens:
+      - 2ms
+      x-request-id:
+      - req_e32516fa5bb6ab11dda5155511280ea6
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml
new file mode 100644
index 0000000..cc8a292
--- /dev/null
+++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml
@@ -0,0 +1,200 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+      how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '158'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.57.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.57.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.7
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA7RTwWoCMRC971ekObui7qLWm1JaWgq9FEopZckm4xqbzYQkSxXx30tWu7uihR7a
+        Sw7z5r28N5PsIkKoFHRGKF8xz0uj4nl+A3yxWD9Vi4f56zYtko15Htr1/aP9fKG9wMB8Ddx/s/oc
+        S6PAS9QHmFtgHoLqcJIkaZJcT8c1UKIAFWiF8XGKcSm1jEeDURoPJvFwemSvUHJwdEbeIkII2dVn
+        8KkFbOiMDHrflRKcYwXQWdNECLWoQoUy56TzTHvaa0GO2oOurd9KDf0uZmFZORb86UqpY33fXKaw
+        MBZzd8Sb+lJq6VaZBeZQB2Hn0dCoQz5LMPybBHeI4uqfE0SEvNdrqU58UmOxND7z+AE6CI5GBzna
+        PoYWTI+YR89UhzPuXRDLBHgmlevMhHLGVyBaZvsEWCUkdoDu3M+9XNI+xJa6+I18C3AOxoPIjAUh
+        +Wnets1C+Ck/tTUjrg1Tt3UeymwpdQHWWHnY8tJkec4SPoXJIKfRPvoCAAD//wMAviUi9bUDAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ed71d50ac15cf13-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Dec 2024 21:26:27 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=.T97IJZbEqHKl_VUBqOzbYQ3.fPwrK1uEUCoRWrQ0Vs-1733433987-1.0.1.1-g7sgDoAXo0fTveWsSMwxPdEXEmD5ZOQ_XYi1pZoi0dW2JzEVU83E5oRAyXudBimOLtvB92CoJm1WxF9LBkquZA;
+        path=/; expires=Thu, 05-Dec-24 21:56:27 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=h0PMfMYOdM04Mkzg9aKUS2PH3E1LFBnUsNmdRQ4ltVY-1733433987796-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - future-house-xr4tdh
+      openai-processing-ms:
+      - '1036'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999954'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_2525b96c65ed2235204a4f7fbb79f88e
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+      how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '158'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.57.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.57.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.7
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//tFPLasMwELz7K1Sd7eA479wKpaVQAr30UoqRpbWtVtYKSYG0If9e5Dzs
+        kBR6aC867OyMZnalbUQIlYIuCeU187wxKrkt7qCeblYTs5gau3pxMmP8/vmrSh/hicaBgcU7cH9k
+        DTg2RoGXqPcwt8A8BNXhbDQaj0aLxbAFGhSgAq0yPhlj0kgtkyzNxkk6S4bzA7tGycHRJXmNCCFk
+        257BpxawoUuSxsdKA86xCujy1EQItahChTLnpPNMexp3IEftQbfWHxDFoI9ZKNeOBX96rdShvjtd
+        prAyFgt3wE/1Umrp6twCc6iDsPNoaNQjXyQY/l2Cm39OEBHy1q5lfeaTGouN8bnHD9BBMMv2crR7
+        DB04PmAePVM9zjS+IpYL8Ewq15sJ5YzXIDpm9wTYWkjsAf25X3q5pr2PLXX1G/kO4ByMB5EbC0Ly
+        87xdm4XwU35qO424NUzdp/PQ5KXUFVhj5X7LpcnTWTopyvmMpzTaRd8AAAD//wMAgEsPw7UDAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ed71d57fe24cf13-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Dec 2024 21:26:31 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - future-house-xr4tdh
+      openai-processing-ms:
+      - '765'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999954'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_84370a5d5d53f54172bc0ffe3feb7e4a
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml
new file mode 100644
index 0000000..ff06001
--- /dev/null
+++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml
@@ -0,0 +1,69 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":[{"type":"text","text":"Hello, how
+      are you?"}]}],"system":[{"type":"text","text":"Respond with single words."}],"max_tokens":4096,"model":"claude-3-haiku-20240307"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '202'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - litellm/1.53.3
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA0yOzWqEQBCE36XO4+LfYpxb8gA5BdwlBBm0MbJjj7F7wCC+e1ASyKngqx9qw9jD
+        YpKhTbPXItPhpcqeFwlV2az3p9tb3cBAv2c6UiTiBoLBEvwBnMgo6lhhMIWePCw672JPSZF8uvER
+        kzzNy7RIKxh0gZVYYd+3v0Wl9eieYtGQ9xfsHwaiYW4XchIYFsR9q3Fh/BpCX5G4I1iO3hvE85Xd
+        MPIctdXwIBbYrDYIUf+j677/AAAA//8DAHETe7LyAAAA
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ed700088ad5942c-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Dec 2024 21:06:27 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-ratelimit-requests-limit:
+      - '5000'
+      anthropic-ratelimit-requests-remaining:
+      - '4999'
+      anthropic-ratelimit-requests-reset:
+      - '2024-12-05T21:06:27Z'
+      anthropic-ratelimit-tokens-limit:
+      - '5000000'
+      anthropic-ratelimit-tokens-remaining:
+      - '5000000'
+      anthropic-ratelimit-tokens-reset:
+      - '2024-12-05T21:06:27Z'
+      request-id:
+      - req_01PYs7k3gcHPDqdgTayudkMv
+      via:
+      - 1.1 google
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml
new file mode 100644
index 0000000..073df30
--- /dev/null
+++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml
@@ -0,0 +1,104 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+      how are you?"}],"model":"gpt-3.5-turbo","n":1}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '149'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.57.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.57.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.7
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4yST0sDMRDF7/spQs5tabstK70JQj0oFL2IIks2me7GZjMxmaVq6XeXbP/sihW8
+        5DC/eS9vJtkljHGt+IJxWQmStTPD6+Lm7tauskf7/iSMfdjMlvPnWZV9bVfbez6ICizeQNJJNZJY
+        OwOk0R6w9CAIouskS9NZOs2ushbUqMBEWelomI7mQ2p8gcPxZDo/KivUEgJfsJeEMcZ27RkzWgUf
+        fMHGg1OlhhBECXxxbmKMezSxwkUIOpCwxAcdlGgJbBt7iaj6yMO6CSJGs40xx/r+fJfB0nkswpGf
+        62ttdahyDyKgjb6B0PGW7hPGXtuZmh8xufNYO8oJN2Cj4XR6sOPdFjs4OTJCEqanSQcXzHIFJLQJ
+        vZVwKWQFqlN2+xON0tgDSW/k31kueR/G1rb8j30HpARHoHLnQWn5c96uzUP8Yn+1nVfcBubhMxDU
+        +VrbErzzun3k9iX3yTcAAAD//wMAusvg7OMCAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8ed700089e2e072b-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Dec 2024 21:06:27 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=4Fzl_VHC9.c_.kyLBREHy8a7wA.lEcwtqZXonX9ka10-1733432787-1.0.1.1-tBlI5dXtGa55yRlJwRgFkxlkQ7emZl1_xhYirjNw7CcPBv7WkC60ubux0sARYF8Nzun5tNgFTC100P_ywLDMgw;
+        path=/; expires=Thu, 05-Dec-24 21:36:27 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=WokAKLMIioMxOZo9K5N1oJLSuWXQVmUht7hb75_Z06w-1733432787525-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - future-house-xr4tdh
+      openai-processing-ms:
+      - '132'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '12000'
+      x-ratelimit-limit-tokens:
+      - '1000000'
+      x-ratelimit-remaining-requests:
+      - '11999'
+      x-ratelimit-remaining-tokens:
+      - '999969'
+      x-ratelimit-reset-requests:
+      - 5ms
+      x-ratelimit-reset-tokens:
+      - 1ms
+      x-request-id:
+      - req_567abdfb8bf13c71bc3f2bac8be8b4af
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/test_llms.py b/tests/test_llms.py
index 687b468..40d207d 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -8,7 +8,7 @@
 import numpy as np
 import pytest
 from aviary.core import Tool, ToolRequestMessage
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 
 from llmclient.exceptions import JSONSchemaValidationError
 from llmclient.llms import (
@@ -318,68 +318,52 @@ async def test_text_image_message(self, model_name: str) -> None:
             ), "Expected content in message, but got None"
             assert "red" in result.messages[-1].content.lower()
 
-
-class TestSingleCompletionLLMModel(TestMultipleCompletionLLMModel):
-    NUM_COMPLETIONS: ClassVar[int] = 1
-    DEFAULT_CONFIG: ClassVar[dict] = {}
-    MODEL_CLS: ClassVar[type[MultipleCompletionLLMModel]] = MultipleCompletionLLMModel
-
+    # Test n = 1
     @pytest.mark.parametrize(
         "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"]
     )
     @pytest.mark.asyncio
     @pytest.mark.vcr
-    async def test_model(self, model_name: str) -> None:
-        await super().test_model(model_name)
-
-    @pytest.mark.vcr
-    @pytest.mark.parametrize(
-        "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"]
-    )
-    @pytest.mark.asyncio
-    async def test_streaming(self, model_name: str) -> None:
-        model = self.MODEL_CLS(name=model_name)
+    async def test_single_completion(self, model_name: str) -> None:
+        model = self.MODEL_CLS(name=model_name, config={"n": 1})
         messages = [
             Message(role="system", content="Respond with single words."),
             Message(content="Hello, how are you?"),
         ]
-        content = []
+        result = await model.call(messages)
+        assert isinstance(result, LLMResult)
 
-        def callback(s):
-            content.append(s)
+        result = await model.call(messages, n=1)
+        assert isinstance(result, LLMResult)
+        assert result.messages
+        assert len(result.messages) == 1
+        assert result.messages[0].content
 
-        results = await model.call(messages, [callback])
-        for result in results:
-            assert result.completion_count > 0
-            assert content
-
-    @pytest.mark.vcr
     @pytest.mark.asyncio
-    async def test_parameterizing_tool_from_arg_union(self) -> None:
-        await super().test_parameterizing_tool_from_arg_union()
-
     @pytest.mark.vcr
-    @pytest.mark.asyncio
-    @pytest.mark.skip(reason="TODO: Check why this error should be raised")
-    async def test_output_type_rejected_validation(self) -> None:
-        class InstructionList(BaseModel):
-            instructions: list[str] = Field(description="list of instructions")
-
-        model = self.MODEL_CLS(name=CILLMModelNames.ANTHROPIC.value)
-        with pytest.raises(litellm.BadRequestError, match="anthropic"):
-            await model.call(
-                [Message(content="What are three things I should do today?")],
-                output_type=InstructionList,
-            )
-
     @pytest.mark.parametrize(
         "model_name",
-        [CILLMModelNames.ANTHROPIC.value, "gpt-4-turbo", CILLMModelNames.OPENAI.value],
+        [
+            pytest.param(CILLMModelNames.ANTHROPIC.value, id="anthropic"),
+            pytest.param(CILLMModelNames.OPENAI.value, id="openai"),
+        ],
     )
-    @pytest.mark.asyncio
-    @pytest.mark.vcr
-    async def test_text_image_message(self, model_name: str) -> None:
-        await super().test_text_image_message(model_name)
+    async def test_multiple_completion(self, model_name: str, request) -> None:
+        model = self.MODEL_CLS(name=model_name, config={"n": self.NUM_COMPLETIONS})
+        messages = [
+            Message(role="system", content="Respond with single words."),
+            Message(content="Hello, how are you?"),
+        ]
+        if request.node.callspec.id == "anthropic":
+            # Anthropic does not support multiple completions
+            with pytest.raises(litellm.BadRequestError, match="anthropic"):
+                await model.call(messages)
+        else:
+            results = await model.call(messages, n=None)
+            assert len(results) == self.NUM_COMPLETIONS
+
+            results = await model.call(messages, n=self.NUM_COMPLETIONS)
+            assert len(results) == self.NUM_COMPLETIONS
 
 
 def test_json_schema_validation() -> None:
diff --git a/uv.lock b/uv.lock
index b2829e9..9805785 100644
--- a/uv.lock
+++ b/uv.lock
@@ -123,15 +123,16 @@ wheels = [
 
 [[package]]
 name = "anyio"
-version = "4.6.2.post1"
+version = "4.7.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
     { name = "sniffio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9f/09/45b9b7a6d4e45c6bcb5bf61d19e3ab87df68e0601fa8c5293de3542546cc/anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c", size = 173422 }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/40/318e58f669b1a9e00f5c4453910682e2d9dd594334539c7b7817dabb765f/anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48", size = 177076 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/f5/f2b75d2fc6f1a260f340f0e7c6a060f4dd2961cc16884ed851b0d18da06a/anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d", size = 90377 },
+    { url = "https://files.pythonhosted.org/packages/a0/7a/4daaf3b6c08ad7ceffea4634ec206faeff697526421c20f07628c7372156/anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352", size = 93052 },
 ]
 
 [[package]]
@@ -681,7 +682,7 @@ wheels = [
 
 [[package]]
 name = "fh-llm-client"
-version = "0.1.dev40+g0fc4372.d20241204"
+version = "0.1.dev34+gac0511e.d20241205"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },
@@ -2419,17 +2420,16 @@ wheels = [
 
 [[package]]
 name = "pymupdf"
-version = "1.24.14"
+version = "1.25.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e0/6b/6bd735144a190d26dcc23f98b4aae0e09b259cc4c87bba266a39b7b91f56/PyMuPDF-1.24.14.tar.gz", hash = "sha256:0eed9f998525eaf39706dbf2d0cf3162150f0f526e4a36b1748ffa50bde581ae", size = 56242747 }
+sdist = { url = "https://files.pythonhosted.org/packages/d2/9e/ec6139116b551922789eb72e710371ddd770a2236fbd5302c2a58670ebbc/pymupdf-1.25.0.tar.gz", hash = "sha256:9e5a33816e4b85ed6a01545cada2b866fc280a3b6478bb8e19c364532adf6692", size = 60812481 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/62/ce/972b080c526af80577ffaa49676c05361ba152de94de3af339a2f3ac07c2/PyMuPDF-1.24.14-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b3ad7a4f4b607ff97f2e1b8111823dd3797dbb381ec851c3ae4695fea6f68478", size = 19167365 },
-    { url = "https://files.pythonhosted.org/packages/2c/11/8d6f4c8fca86b93759e430c4b0b7b66f8067d58893d6fe0a193420d14453/PyMuPDF-1.24.14-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:755906af4b4d693552ae5469ba682075853f4dc8a70639affd1bd6c049c5d900", size = 18417324 },
-    { url = "https://files.pythonhosted.org/packages/51/69/518e6c088e20a5ded1fc658d4aec1e54c0f98f2d62d91362bd4231df9ecf/PyMuPDF-1.24.14-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:37f24108e2e18150fb8d512dcccdfa1e3d9b9dd203ffaa7ffb959bb20aea40b4", size = 19303826 },
-    { url = "https://files.pythonhosted.org/packages/27/bf/203d06c68660d5535db65b6c54cacd35b950945c11c1c4546d674f270892/PyMuPDF-1.24.14-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0de4f5ed903c2be6d0abcccdc796368939b51ce03916eb53292916e3b6ea65d6", size = 19833056 },
-    { url = "https://files.pythonhosted.org/packages/77/ed/40eb23cf5e91de0510dfedb7d9feedeab5ce9691544ad09599e124a0a333/PyMuPDF-1.24.14-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d1b5c47df2f8055de5dedfbd3189c742188261a8c257f406378382adac94cff", size = 20963535 },
-    { url = "https://files.pythonhosted.org/packages/87/2b/46af7461bd299c3f52bc5455332cc82608cea1667cd692652505fdf9308e/PyMuPDF-1.24.14-cp39-abi3-win32.whl", hash = "sha256:60a7ee7db3e0d3a4dcbe6df2781ba4487acb7e515c64ea9c857504f44effcb25", size = 14965671 },
-    { url = "https://files.pythonhosted.org/packages/25/b2/82d70d9f5aea5a33e770f37e6db43ed08b5dc71b3526c5d7051689d1031e/PyMuPDF-1.24.14-cp39-abi3-win_amd64.whl", hash = "sha256:3d1f1ec2fe0249484afde7a0fc02589f19aaeb47c42939d23ae1d012aa1bc59b", size = 16257645 },
+    { url = "https://files.pythonhosted.org/packages/bd/fe/c7810f3a960979963640dd7e8f5b485671fc423f11691df776ca3a093080/pymupdf-1.25.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:d12a4a7d4456b2c09afb674428be2021b87a3c4afbae7f717d5d7e805a30a989", size = 19367838 },
+    { url = "https://files.pythonhosted.org/packages/8b/7f/9811212db9cc72757b40a0224fe3ed4a1b057c72546a224ab61d8981a56f/pymupdf-1.25.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:227ea293734b92fc1f49a01ffebe0f3d212bc0ec3be7e2db172088b8eaff5a4a", size = 18601499 },
+    { url = "https://files.pythonhosted.org/packages/6b/ea/08041590cc1a8c66f5fefc3edd0b57f36b9225524586848d27ab470430e7/pymupdf-1.25.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:62b8673a2348c1b63874d2b1e93ee29e66892e7ca8311df9c7f4a9aa1d89caed", size = 20025425 },
+    { url = "https://files.pythonhosted.org/packages/4f/99/ecd6edc233367a5820015bb98d098be781f9d3ef973b66197c0990c9f1c8/pymupdf-1.25.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e9d9e15af0fe389e15d605574f953df304d6037ce7cb4dca8b7cdec34d0694a3", size = 21148857 },
+    { url = "https://files.pythonhosted.org/packages/ca/fe/31322d459dd7bb868c859d1a8f24f020f5c949496001f930a76f3190b830/pymupdf-1.25.0-cp39-abi3-win32.whl", hash = "sha256:cada9ebc14ae99253639e9d752f2e1cb8a62af813626c4391c62d39c41f8a43a", size = 15115786 },
+    { url = "https://files.pythonhosted.org/packages/95/d0/aa79cc0c65ca6e8faf17cb44d779c1511a0da6525d617deb105b1ead0d98/pymupdf-1.25.0-cp39-abi3-win_amd64.whl", hash = "sha256:148800b9b14f2f48b5fc8f9213aeb94e272f1af4b533deeabe0e561d3bd334bf", size = 16562276 },
 ]
 
 [[package]]
@@ -3234,49 +3234,27 @@ sdist = { url = "https://files.pythonhosted.org/packages/80/f8/0802dd14c58b5d3d7
 
 [[package]]
 name = "tokenizers"
-version = "0.20.3"
+version = "0.21.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/da/25/b1681c1c30ea3ea6e584ae3fffd552430b12faa599b558c4c4783f56d7ff/tokenizers-0.20.3.tar.gz", hash = "sha256:2278b34c5d0dd78e087e1ca7f9b1dcbf129d80211afa645f214bd6e051037539", size = 340513 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c6/93/6742ef9206409d5ce1fdf44d5ca1687cdc3847ba0485424e2c731e6bcf67/tokenizers-0.20.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:585b51e06ca1f4839ce7759941e66766d7b060dccfdc57c4ca1e5b9a33013a90", size = 2674224 },
-    { url = "https://files.pythonhosted.org/packages/aa/14/e75ece72e99f6ef9ae07777ca9fdd78608f69466a5cecf636e9bd2f25d5c/tokenizers-0.20.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61cbf11954f3b481d08723ebd048ba4b11e582986f9be74d2c3bdd9293a4538d", size = 2558991 },
-    { url = "https://files.pythonhosted.org/packages/46/54/033b5b2ba0c3ae01e026c6f7ced147d41a2fa1c573d00a66cb97f6d7f9b3/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef820880d5e4e8484e2fa54ff8d297bb32519eaa7815694dc835ace9130a3eea", size = 2892476 },
-    { url = "https://files.pythonhosted.org/packages/e6/b0/cc369fb3297d61f3311cab523d16d48c869dc2f0ba32985dbf03ff811041/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:67ef4dcb8841a4988cd00dd288fb95dfc8e22ed021f01f37348fd51c2b055ba9", size = 2802775 },
-    { url = "https://files.pythonhosted.org/packages/1a/74/62ad983e8ea6a63e04ed9c5be0b605056bf8aac2f0125f9b5e0b3e2b89fa/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff1ef8bd47a02b0dc191688ccb4da53600df5d4c9a05a4b68e1e3de4823e78eb", size = 3086138 },
-    { url = "https://files.pythonhosted.org/packages/6b/ac/4637ba619db25094998523f9e6f5b456e1db1f8faa770a3d925d436db0c3/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:444d188186eab3148baf0615b522461b41b1f0cd58cd57b862ec94b6ac9780f1", size = 3098076 },
-    { url = "https://files.pythonhosted.org/packages/58/ce/9793f2dc2ce529369807c9c74e42722b05034af411d60f5730b720388c7d/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37c04c032c1442740b2c2d925f1857885c07619224a533123ac7ea71ca5713da", size = 3379650 },
-    { url = "https://files.pythonhosted.org/packages/50/f6/2841de926bc4118af996eaf0bdf0ea5b012245044766ffc0347e6c968e63/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453c7769d22231960ee0e883d1005c93c68015025a5e4ae56275406d94a3c907", size = 2994005 },
-    { url = "https://files.pythonhosted.org/packages/a3/b2/00915c4fed08e9505d37cf6eaab45b12b4bff8f6719d459abcb9ead86a4b/tokenizers-0.20.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4bb31f7b2847e439766aaa9cc7bccf7ac7088052deccdb2275c952d96f691c6a", size = 8977488 },
-    { url = "https://files.pythonhosted.org/packages/e9/ac/1c069e7808181ff57bcf2d39e9b6fbee9133a55410e6ebdaa89f67c32e83/tokenizers-0.20.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:843729bf0f991b29655a069a2ff58a4c24375a553c70955e15e37a90dd4e045c", size = 9294935 },
-    { url = "https://files.pythonhosted.org/packages/50/47/722feb70ee68d1c4412b12d0ea4acc2713179fd63f054913990f9e259492/tokenizers-0.20.3-cp311-none-win32.whl", hash = "sha256:efcce3a927b1e20ca694ba13f7a68c59b0bd859ef71e441db68ee42cf20c2442", size = 2197175 },
-    { url = "https://files.pythonhosted.org/packages/75/68/1b4f928b15a36ed278332ac75d66d7eb65d865bf344d049c452c18447bf9/tokenizers-0.20.3-cp311-none-win_amd64.whl", hash = "sha256:88301aa0801f225725b6df5dea3d77c80365ff2362ca7e252583f2b4809c4cc0", size = 2381616 },
-    { url = "https://files.pythonhosted.org/packages/07/00/92a08af2a6b0c88c50f1ab47d7189e695722ad9714b0ee78ea5e1e2e1def/tokenizers-0.20.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:49d12a32e190fad0e79e5bdb788d05da2f20d8e006b13a70859ac47fecf6ab2f", size = 2667951 },
-    { url = "https://files.pythonhosted.org/packages/ec/9a/e17a352f0bffbf415cf7d73756f5c73a3219225fc5957bc2f39d52c61684/tokenizers-0.20.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:282848cacfb9c06d5e51489f38ec5aa0b3cd1e247a023061945f71f41d949d73", size = 2555167 },
-    { url = "https://files.pythonhosted.org/packages/27/37/d108df55daf4f0fcf1f58554692ff71687c273d870a34693066f0847be96/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe4e08c7d0cd6154c795deb5bf81d2122f36daf075e0c12a8b050d824ef0a64", size = 2898389 },
-    { url = "https://files.pythonhosted.org/packages/b2/27/32f29da16d28f59472fa7fb38e7782069748c7e9ab9854522db20341624c/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca94fc1b73b3883c98f0c88c77700b13d55b49f1071dfd57df2b06f3ff7afd64", size = 2795866 },
-    { url = "https://files.pythonhosted.org/packages/29/4e/8a9a3c89e128c4a40f247b501c10279d2d7ade685953407c4d94c8c0f7a7/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef279c7e239f95c8bdd6ff319d9870f30f0d24915b04895f55b1adcf96d6c60d", size = 3085446 },
-    { url = "https://files.pythonhosted.org/packages/b4/3b/a2a7962c496ebcd95860ca99e423254f760f382cd4bd376f8895783afaf5/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16384073973f6ccbde9852157a4fdfe632bb65208139c9d0c0bd0176a71fd67f", size = 3094378 },
-    { url = "https://files.pythonhosted.org/packages/1f/f4/a8a33f0192a1629a3bd0afcad17d4d221bbf9276da4b95d226364208d5eb/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:312d522caeb8a1a42ebdec87118d99b22667782b67898a76c963c058a7e41d4f", size = 3385755 },
-    { url = "https://files.pythonhosted.org/packages/9e/65/c83cb3545a65a9eaa2e13b22c93d5e00bd7624b354a44adbdc93d5d9bd91/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b7cb962564785a83dafbba0144ecb7f579f1d57d8c406cdaa7f32fe32f18ad", size = 2997679 },
-    { url = "https://files.pythonhosted.org/packages/55/e9/a80d4e592307688a67c7c59ab77e03687b6a8bd92eb5db763a2c80f93f57/tokenizers-0.20.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:124c5882ebb88dadae1fc788a582299fcd3a8bd84fc3e260b9918cf28b8751f5", size = 8989296 },
-    { url = "https://files.pythonhosted.org/packages/90/af/60c957af8d2244321124e893828f1a4817cde1a2d08d09d423b73f19bd2f/tokenizers-0.20.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2b6e54e71f84c4202111a489879005cb14b92616a87417f6c102c833af961ea2", size = 9303621 },
-    { url = "https://files.pythonhosted.org/packages/be/a9/96172310ee141009646d63a1ca267c099c462d747fe5ef7e33f74e27a683/tokenizers-0.20.3-cp312-none-win32.whl", hash = "sha256:83d9bfbe9af86f2d9df4833c22e94d94750f1d0cd9bfb22a7bb90a86f61cdb1c", size = 2188979 },
-    { url = "https://files.pythonhosted.org/packages/bd/68/61d85ae7ae96dde7d0974ff3538db75d5cdc29be2e4329cd7fc51a283e22/tokenizers-0.20.3-cp312-none-win_amd64.whl", hash = "sha256:44def74cee574d609a36e17c8914311d1b5dbcfe37c55fd29369d42591b91cf2", size = 2380725 },
-    { url = "https://files.pythonhosted.org/packages/07/19/36e9eaafb229616cb8502b42030fa7fe347550e76cb618de71b498fc3222/tokenizers-0.20.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0b630e0b536ef0e3c8b42c685c1bc93bd19e98c0f1543db52911f8ede42cf84", size = 2666813 },
-    { url = "https://files.pythonhosted.org/packages/b9/c7/e2ce1d4f756c8a62ef93fdb4df877c2185339b6d63667b015bf70ea9d34b/tokenizers-0.20.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a02d160d2b19bcbfdf28bd9a4bf11be4cb97d0499c000d95d4c4b1a4312740b6", size = 2555354 },
-    { url = "https://files.pythonhosted.org/packages/7c/cf/5309c2d173a6a67f9ec8697d8e710ea32418de6fd8541778032c202a1c3e/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e3d80d89b068bc30034034b5319218c7c0a91b00af19679833f55f3becb6945", size = 2897745 },
-    { url = "https://files.pythonhosted.org/packages/2c/e5/af3078e32f225e680e69d61f78855880edb8d53f5850a1834d519b2b103f/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:174a54910bed1b089226512b4458ea60d6d6fd93060254734d3bc3540953c51c", size = 2794385 },
-    { url = "https://files.pythonhosted.org/packages/0b/a7/bc421fe46650cc4eb4a913a236b88c243204f32c7480684d2f138925899e/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:098b8a632b8656aa5802c46689462c5c48f02510f24029d71c208ec2c822e771", size = 3084580 },
-    { url = "https://files.pythonhosted.org/packages/c6/22/97e1e95ee81f75922c9f569c23cb2b1fdc7f5a7a29c4c9fae17e63f751a6/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78c8c143e3ae41e718588281eb3e212c2b31623c9d6d40410ec464d7d6221fb5", size = 3093581 },
-    { url = "https://files.pythonhosted.org/packages/d5/14/f0df0ee3b9e516121e23c0099bccd7b9f086ba9150021a750e99b16ce56f/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b26b0aadb18cd8701077362ba359a06683662d5cafe3e8e8aba10eb05c037f1", size = 3385934 },
-    { url = "https://files.pythonhosted.org/packages/66/52/7a171bd4929e3ffe61a29b4340fe5b73484709f92a8162a18946e124c34c/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07d7851a72717321022f3774e84aa9d595a041d643fafa2e87fbc9b18711dac0", size = 2997311 },
-    { url = "https://files.pythonhosted.org/packages/7c/64/f1993bb8ebf775d56875ca0d50a50f2648bfbbb143da92fe2e6ceeb4abd5/tokenizers-0.20.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:bd44e48a430ada902c6266a8245f5036c4fe744fcb51f699999fbe82aa438797", size = 8988601 },
-    { url = "https://files.pythonhosted.org/packages/d6/3f/49fa63422159bbc2f2a4ac5bfc597d04d4ec0ad3d2ef46649b5e9a340e37/tokenizers-0.20.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a4c186bb006ccbe1f5cc4e0380d1ce7806f5955c244074fd96abc55e27b77f01", size = 9303950 },
-    { url = "https://files.pythonhosted.org/packages/66/11/79d91aeb2817ad1993ef61c690afe73e6dbedbfb21918b302ef5a2ba9bfb/tokenizers-0.20.3-cp313-none-win32.whl", hash = "sha256:6e19e0f1d854d6ab7ea0c743d06e764d1d9a546932be0a67f33087645f00fe13", size = 2188941 },
-    { url = "https://files.pythonhosted.org/packages/c2/ff/ac8410f868fb8b14b5e619efa304aa119cb8a40bd7df29fc81a898e64f99/tokenizers-0.20.3-cp313-none-win_amd64.whl", hash = "sha256:d50ede425c7e60966a9680d41b58b3a0950afa1bb570488e2972fa61662c4273", size = 2380269 },
+sdist = { url = "https://files.pythonhosted.org/packages/20/41/c2be10975ca37f6ec40d7abd7e98a5213bb04f284b869c1a24e6504fd94d/tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4", size = 343021 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b0/5c/8b09607b37e996dc47e70d6a7b6f4bdd4e4d5ab22fe49d7374565c7fefaf/tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2", size = 2647461 },
+    { url = "https://files.pythonhosted.org/packages/22/7a/88e58bb297c22633ed1c9d16029316e5b5ac5ee44012164c2edede599a5e/tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e", size = 2563639 },
+    { url = "https://files.pythonhosted.org/packages/f7/14/83429177c19364df27d22bc096d4c2e431e0ba43e56c525434f1f9b0fd00/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193", size = 2903304 },
+    { url = "https://files.pythonhosted.org/packages/7e/db/3433eab42347e0dc5452d8fcc8da03f638c9accffefe5a7c78146666964a/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e", size = 2804378 },
+    { url = "https://files.pythonhosted.org/packages/57/8b/7da5e6f89736c2ade02816b4733983fca1c226b0c42980b1ae9dc8fcf5cc/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e", size = 3095488 },
+    { url = "https://files.pythonhosted.org/packages/4d/f6/5ed6711093dc2c04a4e03f6461798b12669bc5a17c8be7cce1240e0b5ce8/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba", size = 3121410 },
+    { url = "https://files.pythonhosted.org/packages/81/42/07600892d48950c5e80505b81411044a2d969368cdc0d929b1c847bf6697/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273", size = 3388821 },
+    { url = "https://files.pythonhosted.org/packages/22/06/69d7ce374747edaf1695a4f61b83570d91cc8bbfc51ccfecf76f56ab4aac/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04", size = 3008868 },
+    { url = "https://files.pythonhosted.org/packages/c8/69/54a0aee4d576045b49a0eb8bffdc495634309c823bf886042e6f46b80058/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e", size = 8975831 },
+    { url = "https://files.pythonhosted.org/packages/f7/f3/b776061e4f3ebf2905ba1a25d90380aafd10c02d406437a8ba22d1724d76/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b", size = 8920746 },
+    { url = "https://files.pythonhosted.org/packages/d8/ee/ce83d5ec8b6844ad4c3ecfe3333d58ecc1adc61f0878b323a15355bcab24/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74", size = 9161814 },
+    { url = "https://files.pythonhosted.org/packages/18/07/3e88e65c0ed28fa93aa0c4d264988428eef3df2764c3126dc83e243cb36f/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff", size = 9357138 },
+    { url = "https://files.pythonhosted.org/packages/15/b0/dc4572ca61555fc482ebc933f26cb407c6aceb3dc19c301c68184f8cad03/tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a", size = 2202266 },
+    { url = "https://files.pythonhosted.org/packages/44/69/d21eb253fa91622da25585d362a874fa4710be600f0ea9446d8d0217cec1/tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", size = 2389192 },
 ]
 
 [[package]]
@@ -3349,7 +3327,7 @@ wheels = [
 
 [[package]]
 name = "transformers"
-version = "4.46.3"
+version = "4.47.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -3363,9 +3341,9 @@ dependencies = [
     { name = "tokenizers" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/37/5a/58f96c83e566f907ae39f16d4401bbefd8bb85c60bd1e6a95c419752ab90/transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc", size = 8627944 }
+sdist = { url = "https://files.pythonhosted.org/packages/b1/5a/0ecfde3264bed0579c37f249e04e15f3c1451ba864d78bbe390177664cac/transformers-4.47.0.tar.gz", hash = "sha256:f8ead7a5a4f6937bb507e66508e5e002dc5930f7b6122a9259c37b099d0f3b19", size = 8693668 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/51/b87caa939fedf307496e4dbf412f4b909af3d9ca8b189fc3b65c1faa456f/transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef", size = 10034536 },
+    { url = "https://files.pythonhosted.org/packages/d0/a7/7eedcf6a359e1e1eff3bc204ad022485aa5d88c08e1e3e0e0aee8a2e2235/transformers-4.47.0-py3-none-any.whl", hash = "sha256:a8e1bafdaae69abdda3cad638fe392e37c86d2ce0ecfcae11d60abb8f949ff4d", size = 10133426 },
 ]
 
 [[package]]

From 1dcda13ade11914671b1bf0694c9f665e31f01c8 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Thu, 5 Dec 2024 15:18:17 -0800
Subject: [PATCH 02/18] Improved logging for call_multiple

---
 llmclient/llms.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/llmclient/llms.py b/llmclient/llms.py
index 20fc9bf..1136bd7 100644
--- a/llmclient/llms.py
+++ b/llmclient/llms.py
@@ -855,7 +855,17 @@ async def call_multiple(
         tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
         **chat_kwargs,
     ) -> list[LLMResult]:
-        if chat_kwargs.get("n", 1) == 1:
+        if chat_kwargs.get("n", 1) == 1 or self.config.get("n", 1) == 1:
+            if (
+                chat_kwargs.get("n")
+                and self.config.get("n")
+                and chat_kwargs.get("n") != self.config.get("n")
+            ):
+                raise ValueError(
+                    f"Incompatible number of completions requested. "
+                    f"Model's configuration n is {self.config['n']}, "
+                    f"but kwarg n={chat_kwargs['n']} was passed."
+                )
             logger.warning(
                 "n is 1 for call_multiple. It will return a list with a single element"
             )

From 2847af7fe0f7127cefd5b636e28f8c36c94fe3d5 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Thu, 5 Dec 2024 15:26:30 -0800
Subject: [PATCH 03/18] removed deprecated check of n in kwargs

---
 llmclient/llms.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llmclient/llms.py b/llmclient/llms.py
index 1136bd7..4b3ba41 100644
--- a/llmclient/llms.py
+++ b/llmclient/llms.py
@@ -915,8 +915,6 @@ async def call(
                 "Invalid n passed to the call function. Will get it from the model's configuration"
             )
             n = self.config.get("n", 1)
-            if "n" in chat_kwargs:
-                n = chat_kwargs["n"]
         if n == 1:
             return await self.call_single(
                 messages, callbacks, output_type, tools, tool_choice, **chat_kwargs

From 6fbf2f2f4c79e774e3074b0a4448bbc4766001b5 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Fri, 6 Dec 2024 14:54:33 -0800
Subject: [PATCH 04/18] Added cassets for TestMultipleCompletionLLMModel

---
 ...st_multiple_completion[gpt-3.5-turbo].yaml | 386 ++++++++---------
 ...odel.test_multiple_completion[openai].yaml | 398 +++++++++---------
 ...e_completion[claude-3-haiku-20240307].yaml | 135 +++---
 ...test_single_completion[gpt-3.5-turbo].yaml | 205 ++++-----
 uv.lock                                       |   2 +-
 5 files changed, 566 insertions(+), 560 deletions(-)

diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml
index 35c02c0..caee9e8 100644
--- a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml
+++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml
@@ -1,194 +1,196 @@
 interactions:
-- request:
-    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
-      how are you?"}],"model":"gpt-3.5-turbo","n":2}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '149'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.57.0
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.57.0
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-retry-count:
-      - '1'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.12.7
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA9RTy2rDMBC8+yuEzklo3jS3QCCXXNoe+qIYWdrYamStKq1LS8i/FzkPOySFXnvR
-        YWZnNLsrbRPGuFZ8xrgsBMnSme48W6zuxvBAL5v55/3H06Ra4OOoWhXl82LJO1GB2TtIOqp6Ektn
-        gDTaPS09CILo2p8Oh6PhYHo7qYkSFZgoyx11h71xlyqfYfemPxgflAVqCYHP2GvCGGPb+owZrYIv
-        PmM3nSNSQggiBz47FTHGPZqIcBGCDiQs8U5DSrQEto69RFRtysO6CiJGs5UxB3x3ustg7jxm4cCf
-        8LW2OhSpBxHQRt9A6HjSEl800P83DSSMvdVLqc5icuexdJQSbsBGw8Fgb8ebZ9AiDxwhCdOCR50r
-        ZqkCEtqE1ki4FLIA1SibByAqpbFFtMd+meWa975tbfO/2DeElOAIVOo8KC3P+23KPMQ/8lvZacR1
-        YB6+A0GZrrXNwTuv6yXXm9wlPwAAAP//AwAh8pBrpAMAAA==
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ed70040cbcdf99b-SJC
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 05 Dec 2024 21:06:36 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - future-house-xr4tdh
-      openai-processing-ms:
-      - '134'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '12000'
-      x-ratelimit-limit-tokens:
-      - '1000000'
-      x-ratelimit-remaining-requests:
-      - '11999'
-      x-ratelimit-remaining-tokens:
-      - '999953'
-      x-ratelimit-reset-requests:
-      - 5ms
-      x-ratelimit-reset-tokens:
-      - 2ms
-      x-request-id:
-      - req_1f88664946b9891fbc90796687f144c4
-    status:
-      code: 200
-      message: OK
-- request:
-    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
-      how are you?"}],"model":"gpt-3.5-turbo","n":2}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '149'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.57.0
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.57.0
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.12.7
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA9RTTUsDMRC9768IOW9LP63tzaIIIqgH7UFkSZPZbTSbCcksWEr/u2T7sVtawauX
-        HN6b9/JmJtkkjHGt+IxxuRIkS2c6N8vbx5fF9Tisy7l5e13clfl0/vQwfl5P5o6nUYHLT5B0UHUl
-        ls4AabQ7WnoQBNG1PxkOR8PBZHpVEyUqMFFWOOoMu+MOVX6JnV5/MN4rV6glBD5j7wljjG3qM2a0
-        Cr75jPXSA1JCCKIAPjsWMcY9mohwEYIOJCzxtCElWgJbx75HVG3KQ14FEaPZypg9vj3eZbBwHpdh
-        zx/xXFsdVpkHEdBG30DoeNISnzXQ/zcNJIx91EupTmJy57F0lBF+gY2Gg8HOjjfPoEXuOUISpgWP
-        0gtmmQIS2oTWSLgUcgWqUTYPQFRKY4toj/08yyXvXdvaFn+xbwgpwRGozHlQWp7225R5iH/kt7Lj
-        iOvAPKwDQZnl2hbgndf1kutNbpMfAAAA//8DALEE5HikAwAA
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ed700428d77f99b-SJC
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 05 Dec 2024 21:06:36 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - future-house-xr4tdh
-      openai-processing-ms:
-      - '114'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '12000'
-      x-ratelimit-limit-tokens:
-      - '1000000'
-      x-ratelimit-remaining-requests:
-      - '11999'
-      x-ratelimit-remaining-tokens:
-      - '999953'
-      x-ratelimit-reset-requests:
-      - 5ms
-      x-ratelimit-reset-tokens:
-      - 2ms
-      x-request-id:
-      - req_e32516fa5bb6ab11dda5155511280ea6
-    status:
-      code: 200
-      message: OK
+  - request:
+      body:
+        '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+        how are you?"}],"model":"gpt-3.5-turbo","n":2}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "149"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.0
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.0
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "1"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAA9RTy2rDMBC8+yuEzklo3jS3QCCXXNoe+qIYWdrYamStKq1LS8i/FzkPOySFXnvR
+          YWZnNLsrbRPGuFZ8xrgsBMnSme48W6zuxvBAL5v55/3H06Ra4OOoWhXl82LJO1GB2TtIOqp6Ektn
+          gDTaPS09CILo2p8Oh6PhYHo7qYkSFZgoyx11h71xlyqfYfemPxgflAVqCYHP2GvCGGPb+owZrYIv
+          PmM3nSNSQggiBz47FTHGPZqIcBGCDiQs8U5DSrQEto69RFRtysO6CiJGs5UxB3x3ustg7jxm4cCf
+          8LW2OhSpBxHQRt9A6HjSEl800P83DSSMvdVLqc5icuexdJQSbsBGw8Fgb8ebZ9AiDxwhCdOCR50r
+          ZqkCEtqE1ki4FLIA1SibByAqpbFFtMd+meWa975tbfO/2DeElOAIVOo8KC3P+23KPMQ/8lvZacR1
+          YB6+A0GZrrXNwTuv6yXXm9wlPwAAAP//AwAh8pBrpAMAAA==
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8ed70040cbcdf99b-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 05 Dec 2024 21:06:36 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "134"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "12000"
+        x-ratelimit-limit-tokens:
+          - "1000000"
+        x-ratelimit-remaining-requests:
+          - "11999"
+        x-ratelimit-remaining-tokens:
+          - "999953"
+        x-ratelimit-reset-requests:
+          - 5ms
+        x-ratelimit-reset-tokens:
+          - 2ms
+        x-request-id:
+          - req_1f88664946b9891fbc90796687f144c4
+      status:
+        code: 200
+        message: OK
+  - request:
+      body:
+        '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+        how are you?"}],"model":"gpt-3.5-turbo","n":2}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "149"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.0
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.0
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAA9RTTUsDMRC9768IOW9LP63tzaIIIqgH7UFkSZPZbTSbCcksWEr/u2T7sVtawauX
+          HN6b9/JmJtkkjHGt+IxxuRIkS2c6N8vbx5fF9Tisy7l5e13clfl0/vQwfl5P5o6nUYHLT5B0UHUl
+          ls4AabQ7WnoQBNG1PxkOR8PBZHpVEyUqMFFWOOoMu+MOVX6JnV5/MN4rV6glBD5j7wljjG3qM2a0
+          Cr75jPXSA1JCCKIAPjsWMcY9mohwEYIOJCzxtCElWgJbx75HVG3KQ14FEaPZypg9vj3eZbBwHpdh
+          zx/xXFsdVpkHEdBG30DoeNISnzXQ/zcNJIx91EupTmJy57F0lBF+gY2Gg8HOjjfPoEXuOUISpgWP
+          0gtmmQIS2oTWSLgUcgWqUTYPQFRKY4toj/08yyXvXdvaFn+xbwgpwRGozHlQWp7225R5iH/kt7Lj
+          iOvAPKwDQZnl2hbgndf1kutNbpMfAAAA//8DALEE5HikAwAA
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8ed700428d77f99b-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 05 Dec 2024 21:06:36 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "114"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "12000"
+        x-ratelimit-limit-tokens:
+          - "1000000"
+        x-ratelimit-remaining-requests:
+          - "11999"
+        x-ratelimit-remaining-tokens:
+          - "999953"
+        x-ratelimit-reset-requests:
+          - 5ms
+        x-ratelimit-reset-tokens:
+          - 2ms
+        x-request-id:
+          - req_e32516fa5bb6ab11dda5155511280ea6
+      status:
+        code: 200
+        message: OK
 version: 1
diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml
index cc8a292..150ca29 100644
--- a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml
+++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml
@@ -1,200 +1,202 @@
 interactions:
-- request:
-    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
-      how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '158'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.57.0
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.57.0
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.12.7
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA7RTwWoCMRC971ekObui7qLWm1JaWgq9FEopZckm4xqbzYQkSxXx30tWu7uihR7a
-        Sw7z5r28N5PsIkKoFHRGKF8xz0uj4nl+A3yxWD9Vi4f56zYtko15Htr1/aP9fKG9wMB8Ddx/s/oc
-        S6PAS9QHmFtgHoLqcJIkaZJcT8c1UKIAFWiF8XGKcSm1jEeDURoPJvFwemSvUHJwdEbeIkII2dVn
-        8KkFbOiMDHrflRKcYwXQWdNECLWoQoUy56TzTHvaa0GO2oOurd9KDf0uZmFZORb86UqpY33fXKaw
-        MBZzd8Sb+lJq6VaZBeZQB2Hn0dCoQz5LMPybBHeI4uqfE0SEvNdrqU58UmOxND7z+AE6CI5GBzna
-        PoYWTI+YR89UhzPuXRDLBHgmlevMhHLGVyBaZvsEWCUkdoDu3M+9XNI+xJa6+I18C3AOxoPIjAUh
-        +Wnets1C+Ck/tTUjrg1Tt3UeymwpdQHWWHnY8tJkec4SPoXJIKfRPvoCAAD//wMAviUi9bUDAAA=
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ed71d50ac15cf13-SJC
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 05 Dec 2024 21:26:27 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=.T97IJZbEqHKl_VUBqOzbYQ3.fPwrK1uEUCoRWrQ0Vs-1733433987-1.0.1.1-g7sgDoAXo0fTveWsSMwxPdEXEmD5ZOQ_XYi1pZoi0dW2JzEVU83E5oRAyXudBimOLtvB92CoJm1WxF9LBkquZA;
-        path=/; expires=Thu, 05-Dec-24 21:56:27 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=h0PMfMYOdM04Mkzg9aKUS2PH3E1LFBnUsNmdRQ4ltVY-1733433987796-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - future-house-xr4tdh
-      openai-processing-ms:
-      - '1036'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999954'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_2525b96c65ed2235204a4f7fbb79f88e
-    status:
-      code: 200
-      message: OK
-- request:
-    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
-      how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '158'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.57.0
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.57.0
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.12.7
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//tFPLasMwELz7K1Sd7eA479wKpaVQAr30UoqRpbWtVtYKSYG0If9e5Dzs
-        kBR6aC867OyMZnalbUQIlYIuCeU187wxKrkt7qCeblYTs5gau3pxMmP8/vmrSh/hicaBgcU7cH9k
-        DTg2RoGXqPcwt8A8BNXhbDQaj0aLxbAFGhSgAq0yPhlj0kgtkyzNxkk6S4bzA7tGycHRJXmNCCFk
-        257BpxawoUuSxsdKA86xCujy1EQItahChTLnpPNMexp3IEftQbfWHxDFoI9ZKNeOBX96rdShvjtd
-        prAyFgt3wE/1Umrp6twCc6iDsPNoaNQjXyQY/l2Cm39OEBHy1q5lfeaTGouN8bnHD9BBMMv2crR7
-        DB04PmAePVM9zjS+IpYL8Ewq15sJ5YzXIDpm9wTYWkjsAf25X3q5pr2PLXX1G/kO4ByMB5EbC0Ly
-        87xdm4XwU35qO424NUzdp/PQ5KXUFVhj5X7LpcnTWTopyvmMpzTaRd8AAAD//wMAgEsPw7UDAAA=
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ed71d57fe24cf13-SJC
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 05 Dec 2024 21:26:31 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - future-house-xr4tdh
-      openai-processing-ms:
-      - '765'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999954'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_84370a5d5d53f54172bc0ffe3feb7e4a
-    status:
-      code: 200
-      message: OK
+  - request:
+      body:
+        '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+        how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "158"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.0
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.0
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAA7RTwWoCMRC971ekObui7qLWm1JaWgq9FEopZckm4xqbzYQkSxXx30tWu7uihR7a
+          Sw7z5r28N5PsIkKoFHRGKF8xz0uj4nl+A3yxWD9Vi4f56zYtko15Htr1/aP9fKG9wMB8Ddx/s/oc
+          S6PAS9QHmFtgHoLqcJIkaZJcT8c1UKIAFWiF8XGKcSm1jEeDURoPJvFwemSvUHJwdEbeIkII2dVn
+          8KkFbOiMDHrflRKcYwXQWdNECLWoQoUy56TzTHvaa0GO2oOurd9KDf0uZmFZORb86UqpY33fXKaw
+          MBZzd8Sb+lJq6VaZBeZQB2Hn0dCoQz5LMPybBHeI4uqfE0SEvNdrqU58UmOxND7z+AE6CI5GBzna
+          PoYWTI+YR89UhzPuXRDLBHgmlevMhHLGVyBaZvsEWCUkdoDu3M+9XNI+xJa6+I18C3AOxoPIjAUh
+          +Wnets1C+Ck/tTUjrg1Tt3UeymwpdQHWWHnY8tJkec4SPoXJIKfRPvoCAAD//wMAviUi9bUDAAA=
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8ed71d50ac15cf13-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 05 Dec 2024 21:26:27 GMT
+        Server:
+          - cloudflare
+        Set-Cookie:
+          - __cf_bm=.T97IJZbEqHKl_VUBqOzbYQ3.fPwrK1uEUCoRWrQ0Vs-1733433987-1.0.1.1-g7sgDoAXo0fTveWsSMwxPdEXEmD5ZOQ_XYi1pZoi0dW2JzEVU83E5oRAyXudBimOLtvB92CoJm1WxF9LBkquZA;
+            path=/; expires=Thu, 05-Dec-24 21:56:27 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=h0PMfMYOdM04Mkzg9aKUS2PH3E1LFBnUsNmdRQ4ltVY-1733433987796-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "1036"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "30000"
+        x-ratelimit-limit-tokens:
+          - "150000000"
+        x-ratelimit-remaining-requests:
+          - "29999"
+        x-ratelimit-remaining-tokens:
+          - "149999954"
+        x-ratelimit-reset-requests:
+          - 2ms
+        x-ratelimit-reset-tokens:
+          - 0s
+        x-request-id:
+          - req_2525b96c65ed2235204a4f7fbb79f88e
+      status:
+        code: 200
+        message: OK
+  - request:
+      body:
+        '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+        how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "158"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.0
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.0
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAAwAAAP//tFPLasMwELz7K1Sd7eA479wKpaVQAr30UoqRpbWtVtYKSYG0If9e5Dzs
+          kBR6aC867OyMZnalbUQIlYIuCeU187wxKrkt7qCeblYTs5gau3pxMmP8/vmrSh/hicaBgcU7cH9k
+          DTg2RoGXqPcwt8A8BNXhbDQaj0aLxbAFGhSgAq0yPhlj0kgtkyzNxkk6S4bzA7tGycHRJXmNCCFk
+          257BpxawoUuSxsdKA86xCujy1EQItahChTLnpPNMexp3IEftQbfWHxDFoI9ZKNeOBX96rdShvjtd
+          prAyFgt3wE/1Umrp6twCc6iDsPNoaNQjXyQY/l2Cm39OEBHy1q5lfeaTGouN8bnHD9BBMMv2crR7
+          DB04PmAePVM9zjS+IpYL8Ewq15sJ5YzXIDpm9wTYWkjsAf25X3q5pr2PLXX1G/kO4ByMB5EbC0Ly
+          87xdm4XwU35qO424NUzdp/PQ5KXUFVhj5X7LpcnTWTopyvmMpzTaRd8AAAD//wMAgEsPw7UDAAA=
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8ed71d57fe24cf13-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 05 Dec 2024 21:26:31 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "765"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "30000"
+        x-ratelimit-limit-tokens:
+          - "150000000"
+        x-ratelimit-remaining-requests:
+          - "29999"
+        x-ratelimit-remaining-tokens:
+          - "149999954"
+        x-ratelimit-reset-requests:
+          - 2ms
+        x-ratelimit-reset-tokens:
+          - 0s
+        x-request-id:
+          - req_84370a5d5d53f54172bc0ffe3feb7e4a
+      status:
+        code: 200
+        message: OK
 version: 1
diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml
index ff06001..c5e4922 100644
--- a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml
+++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml
@@ -1,69 +1,70 @@
 interactions:
-- request:
-    body: '{"messages":[{"role":"user","content":[{"type":"text","text":"Hello, how
-      are you?"}]}],"system":[{"type":"text","text":"Respond with single words."}],"max_tokens":4096,"model":"claude-3-haiku-20240307"}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      anthropic-version:
-      - '2023-06-01'
-      connection:
-      - keep-alive
-      content-length:
-      - '202'
-      content-type:
-      - application/json
-      host:
-      - api.anthropic.com
-      user-agent:
-      - litellm/1.53.3
-    method: POST
-    uri: https://api.anthropic.com/v1/messages
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA0yOzWqEQBCE36XO4+LfYpxb8gA5BdwlBBm0MbJjj7F7wCC+e1ASyKngqx9qw9jD
-        YpKhTbPXItPhpcqeFwlV2az3p9tb3cBAv2c6UiTiBoLBEvwBnMgo6lhhMIWePCw672JPSZF8uvER
-        kzzNy7RIKxh0gZVYYd+3v0Wl9eieYtGQ9xfsHwaiYW4XchIYFsR9q3Fh/BpCX5G4I1iO3hvE85Xd
-        MPIctdXwIBbYrDYIUf+j677/AAAA//8DAHETe7LyAAAA
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ed700088ad5942c-SJC
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 05 Dec 2024 21:06:27 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Robots-Tag:
-      - none
-      anthropic-ratelimit-requests-limit:
-      - '5000'
-      anthropic-ratelimit-requests-remaining:
-      - '4999'
-      anthropic-ratelimit-requests-reset:
-      - '2024-12-05T21:06:27Z'
-      anthropic-ratelimit-tokens-limit:
-      - '5000000'
-      anthropic-ratelimit-tokens-remaining:
-      - '5000000'
-      anthropic-ratelimit-tokens-reset:
-      - '2024-12-05T21:06:27Z'
-      request-id:
-      - req_01PYs7k3gcHPDqdgTayudkMv
-      via:
-      - 1.1 google
-    status:
-      code: 200
-      message: OK
+  - request:
+      body:
+        '{"messages":[{"role":"user","content":[{"type":"text","text":"Hello, how
+        are you?"}]}],"system":[{"type":"text","text":"Respond with single words."}],"max_tokens":4096,"model":"claude-3-haiku-20240307"}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        anthropic-version:
+          - "2023-06-01"
+        connection:
+          - keep-alive
+        content-length:
+          - "202"
+        content-type:
+          - application/json
+        host:
+          - api.anthropic.com
+        user-agent:
+          - litellm/1.53.3
+      method: POST
+      uri: https://api.anthropic.com/v1/messages
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAA0yOzWqEQBCE36XO4+LfYpxb8gA5BdwlBBm0MbJjj7F7wCC+e1ASyKngqx9qw9jD
+          YpKhTbPXItPhpcqeFwlV2az3p9tb3cBAv2c6UiTiBoLBEvwBnMgo6lhhMIWePCw672JPSZF8uvER
+          kzzNy7RIKxh0gZVYYd+3v0Wl9eieYtGQ9xfsHwaiYW4XchIYFsR9q3Fh/BpCX5G4I1iO3hvE85Xd
+          MPIctdXwIBbYrDYIUf+j677/AAAA//8DAHETe7LyAAAA
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8ed700088ad5942c-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 05 Dec 2024 21:06:27 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        X-Robots-Tag:
+          - none
+        anthropic-ratelimit-requests-limit:
+          - "5000"
+        anthropic-ratelimit-requests-remaining:
+          - "4999"
+        anthropic-ratelimit-requests-reset:
+          - "2024-12-05T21:06:27Z"
+        anthropic-ratelimit-tokens-limit:
+          - "5000000"
+        anthropic-ratelimit-tokens-remaining:
+          - "5000000"
+        anthropic-ratelimit-tokens-reset:
+          - "2024-12-05T21:06:27Z"
+        request-id:
+          - req_01PYs7k3gcHPDqdgTayudkMv
+        via:
+          - 1.1 google
+      status:
+        code: 200
+        message: OK
 version: 1
diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml
index 073df30..c591a7c 100644
--- a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml
+++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml
@@ -1,104 +1,105 @@
 interactions:
-- request:
-    body: '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
-      how are you?"}],"model":"gpt-3.5-turbo","n":1}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '149'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - AsyncOpenAI/Python 1.57.0
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - async:asyncio
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.57.0
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.12.7
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA4yST0sDMRDF7/spQs5tabstK70JQj0oFL2IIks2me7GZjMxmaVq6XeXbP/sihW8
-        5DC/eS9vJtkljHGt+IJxWQmStTPD6+Lm7tauskf7/iSMfdjMlvPnWZV9bVfbez6ICizeQNJJNZJY
-        OwOk0R6w9CAIouskS9NZOs2ushbUqMBEWelomI7mQ2p8gcPxZDo/KivUEgJfsJeEMcZ27RkzWgUf
-        fMHGg1OlhhBECXxxbmKMezSxwkUIOpCwxAcdlGgJbBt7iaj6yMO6CSJGs40xx/r+fJfB0nkswpGf
-        62ttdahyDyKgjb6B0PGW7hPGXtuZmh8xufNYO8oJN2Cj4XR6sOPdFjs4OTJCEqanSQcXzHIFJLQJ
-        vZVwKWQFqlN2+xON0tgDSW/k31kueR/G1rb8j30HpARHoHLnQWn5c96uzUP8Yn+1nVfcBubhMxDU
-        +VrbErzzun3k9iX3yTcAAAD//wMAusvg7OMCAAA=
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ed700089e2e072b-SJC
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Thu, 05 Dec 2024 21:06:27 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=4Fzl_VHC9.c_.kyLBREHy8a7wA.lEcwtqZXonX9ka10-1733432787-1.0.1.1-tBlI5dXtGa55yRlJwRgFkxlkQ7emZl1_xhYirjNw7CcPBv7WkC60ubux0sARYF8Nzun5tNgFTC100P_ywLDMgw;
-        path=/; expires=Thu, 05-Dec-24 21:36:27 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=WokAKLMIioMxOZo9K5N1oJLSuWXQVmUht7hb75_Z06w-1733432787525-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - future-house-xr4tdh
-      openai-processing-ms:
-      - '132'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '12000'
-      x-ratelimit-limit-tokens:
-      - '1000000'
-      x-ratelimit-remaining-requests:
-      - '11999'
-      x-ratelimit-remaining-tokens:
-      - '999969'
-      x-ratelimit-reset-requests:
-      - 5ms
-      x-ratelimit-reset-tokens:
-      - 1ms
-      x-request-id:
-      - req_567abdfb8bf13c71bc3f2bac8be8b4af
-    status:
-      code: 200
-      message: OK
+  - request:
+      body:
+        '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
+        how are you?"}],"model":"gpt-3.5-turbo","n":1}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "149"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.57.0
+        x-stainless-arch:
+          - arm64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - MacOS
+        x-stainless-package-version:
+          - 1.57.0
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.12.7
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAA4yST0sDMRDF7/spQs5tabstK70JQj0oFL2IIks2me7GZjMxmaVq6XeXbP/sihW8
+          5DC/eS9vJtkljHGt+IJxWQmStTPD6+Lm7tauskf7/iSMfdjMlvPnWZV9bVfbez6ICizeQNJJNZJY
+          OwOk0R6w9CAIouskS9NZOs2ushbUqMBEWelomI7mQ2p8gcPxZDo/KivUEgJfsJeEMcZ27RkzWgUf
+          fMHGg1OlhhBECXxxbmKMezSxwkUIOpCwxAcdlGgJbBt7iaj6yMO6CSJGs40xx/r+fJfB0nkswpGf
+          62ttdahyDyKgjb6B0PGW7hPGXtuZmh8xufNYO8oJN2Cj4XR6sOPdFjs4OTJCEqanSQcXzHIFJLQJ
+          vZVwKWQFqlN2+xON0tgDSW/k31kueR/G1rb8j30HpARHoHLnQWn5c96uzUP8Yn+1nVfcBubhMxDU
+          +VrbErzzun3k9iX3yTcAAAD//wMAusvg7OMCAAA=
+      headers:
+        CF-Cache-Status:
+          - DYNAMIC
+        CF-RAY:
+          - 8ed700089e2e072b-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 05 Dec 2024 21:06:27 GMT
+        Server:
+          - cloudflare
+        Set-Cookie:
+          - __cf_bm=4Fzl_VHC9.c_.kyLBREHy8a7wA.lEcwtqZXonX9ka10-1733432787-1.0.1.1-tBlI5dXtGa55yRlJwRgFkxlkQ7emZl1_xhYirjNw7CcPBv7WkC60ubux0sARYF8Nzun5tNgFTC100P_ywLDMgw;
+            path=/; expires=Thu, 05-Dec-24 21:36:27 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=WokAKLMIioMxOZo9K5N1oJLSuWXQVmUht7hb75_Z06w-1733432787525-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        access-control-expose-headers:
+          - X-Request-ID
+        alt-svc:
+          - h3=":443"; ma=86400
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "132"
+        openai-version:
+          - "2020-10-01"
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-ratelimit-limit-requests:
+          - "12000"
+        x-ratelimit-limit-tokens:
+          - "1000000"
+        x-ratelimit-remaining-requests:
+          - "11999"
+        x-ratelimit-remaining-tokens:
+          - "999969"
+        x-ratelimit-reset-requests:
+          - 5ms
+        x-ratelimit-reset-tokens:
+          - 1ms
+        x-request-id:
+          - req_567abdfb8bf13c71bc3f2bac8be8b4af
+      status:
+        code: 200
+        message: OK
 version: 1
diff --git a/uv.lock b/uv.lock
index 9cc7e3e..ce7fbac 100644
--- a/uv.lock
+++ b/uv.lock
@@ -564,7 +564,7 @@ wheels = [
 
 [[package]]
 name = "fh-llm-client"
-version = "0.0.4.dev6+g6de1e91.d20241206"
+version = "0.0.4.dev6+g2eac4a6.d20241206"
 source = { editable = "." }
 dependencies = [
     { name = "coredis" },

From 5d3a3c98ca15973d7af64d8b7852fe15364cfe8c Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Fri, 6 Dec 2024 15:18:55 -0800
Subject: [PATCH 05/18] Fix lint

---
 llmclient/llms.py  | 2 +-
 tests/test_llms.py | 5 +++--
 uv.lock            | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llmclient/llms.py b/llmclient/llms.py
index 0f66fb6..2f23255 100644
--- a/llmclient/llms.py
+++ b/llmclient/llms.py
@@ -861,7 +861,7 @@ async def call_multiple(
         tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
         **chat_kwargs,
     ) -> list[LLMResult]:
-        if chat_kwargs.get("n", 1) == 1 or self.config.get("n", 1) == 1:
+        if 1 in {chat_kwargs.get("n", 1), self.config.get("n", 1)}:
             if (
                 chat_kwargs.get("n")
                 and self.config.get("n")
diff --git a/tests/test_llms.py b/tests/test_llms.py
index 43ac45d..d8da345 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -332,7 +332,8 @@ async def test_single_completion(self, model_name: str) -> None:
         result = await model.call(messages)
         assert isinstance(result, LLMResult)
 
-        result = await model.call(messages, n=1)
+        result = await model.call(messages, n=1)  # noqa: FURB120
+
         assert isinstance(result, LLMResult)
         assert result.messages
         assert len(result.messages) == 1
@@ -358,7 +359,7 @@ async def test_multiple_completion(self, model_name: str, request) -> None:
             with pytest.raises(litellm.BadRequestError, match="anthropic"):
                 await model.call(messages)
         else:
-            results = await model.call(messages, n=None)
+            results = await model.call(messages, n=None)  # noqa: FURB120
             assert len(results) == self.NUM_COMPLETIONS
 
             results = await model.call(messages, n=self.NUM_COMPLETIONS)
diff --git a/uv.lock b/uv.lock
index ce7fbac..c74be79 100644
--- a/uv.lock
+++ b/uv.lock
@@ -564,7 +564,7 @@ wheels = [
 
 [[package]]
 name = "fh-llm-client"
-version = "0.0.4.dev6+g2eac4a6.d20241206"
+version = "0.0.4.dev7+g6fbf2f2.d20241206"
 source = { editable = "." }
 dependencies = [
     { name = "coredis" },

From 3f650fcead159bda0541c774be86c68985f4b761 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 11:01:41 -0800
Subject: [PATCH 06/18] Implemented tests to check kwarg priority when calling

---
 tests/test_llms.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/test_llms.py b/tests/test_llms.py
index d8da345..8bd4974 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -317,7 +317,6 @@ async def test_text_image_message(self, model_name: str) -> None:
             ), "Expected content in message, but got None"
             assert "red" in result.messages[-1].content.lower()
 
-    # Test n = 1
     @pytest.mark.parametrize(
         "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"]
     )
@@ -339,6 +338,13 @@ async def test_single_completion(self, model_name: str) -> None:
         assert len(result.messages) == 1
         assert result.messages[0].content
 
+        model = self.MODEL_CLS(name=model_name, config={"n": 2})
+        result = await model.call(messages, n=1)
+        assert isinstance(result, LLMResult)
+        assert result.messages
+        assert len(result.messages) == 1
+        assert result.messages[0].content
+
     @pytest.mark.asyncio
     @pytest.mark.vcr
     @pytest.mark.parametrize(
@@ -365,6 +371,10 @@ async def test_multiple_completion(self, model_name: str, request) -> None:
             results = await model.call(messages, n=self.NUM_COMPLETIONS)
             assert len(results) == self.NUM_COMPLETIONS
 
+            model = self.MODEL_CLS(name=model_name, config={"n": 1})
+            results = await model.call(messages, n=self.NUM_COMPLETIONS)
+            assert len(results) == self.NUM_COMPLETIONS
+
 
 def test_json_schema_validation() -> None:
     # Invalid JSON

From 7edd6133dd6c0eb102489d6dcb6fd803cecf1bb9 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 11:09:27 -0800
Subject: [PATCH 07/18] Exposed missing classes

---
 llmclient/__init__.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/llmclient/__init__.py b/llmclient/__init__.py
index d471c9d..65aa5ab 100644
--- a/llmclient/__init__.py
+++ b/llmclient/__init__.py
@@ -1,7 +1,13 @@
+from .constants import (
+    CHARACTERS_PER_TOKEN_ASSUMPTION,
+    EXTRA_TOKENS_FROM_USER_ROLE,
+    MODEL_COST_MAP,
+)
 from .embeddings import (
     EmbeddingModel,
     EmbeddingModes,
     HybridEmbeddingModel,
+    LiteLLMEmbeddingModel,
     SentenceTransformerEmbeddingModel,
     SparseEmbeddingModel,
 )
@@ -13,15 +19,25 @@
     LLMModel,
     MultipleCompletionLLMModel,
 )
-from .types import LLMResult
+from .types import (
+    Chunk,
+    Embeddable,
+    LLMResult,
+)
 
 __all__ = [
+    "CHARACTERS_PER_TOKEN_ASSUMPTION",
+    "EXTRA_TOKENS_FROM_USER_ROLE",
+    "MODEL_COST_MAP",
+    "Chunk",
+    "Embeddable",
     "EmbeddingModel",
     "EmbeddingModes",
     "HybridEmbeddingModel",
     "JSONSchemaValidationError",
     "LLMModel",
     "LLMResult",
+    "LiteLLMEmbeddingModel",
     "LiteLLMModel",
     "MultipleCompletionLLMModel",
     "SentenceTransformerEmbeddingModel",

From bae87659f3e14f90736ae701bc3d284f66dd9581 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 11:14:36 -0800
Subject: [PATCH 08/18] added embedding_model_factory

---
 llmclient/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llmclient/__init__.py b/llmclient/__init__.py
index 65aa5ab..dc27f02 100644
--- a/llmclient/__init__.py
+++ b/llmclient/__init__.py
@@ -10,6 +10,7 @@
     LiteLLMEmbeddingModel,
     SentenceTransformerEmbeddingModel,
     SparseEmbeddingModel,
+    embedding_model_factory,
 )
 from .exceptions import (
     JSONSchemaValidationError,
@@ -42,4 +43,5 @@
     "MultipleCompletionLLMModel",
     "SentenceTransformerEmbeddingModel",
     "SparseEmbeddingModel",
+    "embedding_model_factory",
 ]

From 1e6eb7812302857744d91f0cf860bb7c05dd5164 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 11:24:59 -0800
Subject: [PATCH 09/18] Added documentation to call functions

---
 llmclient/llms.py | 82 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 19 deletions(-)

diff --git a/llmclient/llms.py b/llmclient/llms.py
index 2f23255..d68be73 100644
--- a/llmclient/llms.py
+++ b/llmclient/llms.py
@@ -614,7 +614,7 @@ class MultipleCompletionLLMModel(BaseModel):
             "Configuration of the model:"
             "model is the name of the llm model to use,"
             "temperature is the sampling temperature, and"
-            "n is the number of completions to generate."
+            "n is the number of completions to generate by default."
         ),
     )
     encoding: Any | None = None
@@ -832,9 +832,6 @@ async def _call(  # noqa: C901, PLR0915
 
         return results
 
-    # TODO: Is it good practice to have this multiple interface?
-    # Users can just use `call` and we chat `n`
-    # or they can specifically call `call_single` or `call_multiple`
     async def call_single(
         self,
         messages: list[Message],
@@ -844,7 +841,25 @@ async def call_single(
         tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
         **chat_kwargs,
     ) -> LLMResult:
-        if chat_kwargs.get("n", 1) != 1 or self.config.get("n", 1) != 1:
+        """
+        Calls the LLM with a list of messages and returns a single completion result.
+
+        Args:
+            messages: A list of messages to send to the LLM.
+            callbacks: A list of callback functions to execute after the call.
+            output_type: The type of the output model.
+            tools: A list of tools to use during the call.
+            tool_choice: The tool or tool choice to use.
+            **chat_kwargs: Additional keyword arguments for the chat.
+
+        Returns:
+            The result of the LLM call as a LLMResult object.
+
+        Raises:
+            ValueError: If the value of 'n' is not 1.
+        """
+        n = chat_kwargs.get("n", self.config.get("n", 1))
+        if n != 1:
             raise ValueError("n must be 1 for call_single.")
         return (
             await self._call(
@@ -861,17 +876,27 @@ async def call_multiple(
         tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
         **chat_kwargs,
     ) -> list[LLMResult]:
-        if 1 in {chat_kwargs.get("n", 1), self.config.get("n", 1)}:
-            if (
-                chat_kwargs.get("n")
-                and self.config.get("n")
-                and chat_kwargs.get("n") != self.config.get("n")
-            ):
-                raise ValueError(
-                    f"Incompatible number of completions requested. "
-                    f"Model's configuration n is {self.config['n']}, "
-                    f"but kwarg n={chat_kwargs['n']} was passed."
-                )
+        """
+        Calls the LLM with a list of messages and returns a list of completion results.
+
+        Args:
+            messages: A list of messages to send to the LLM.
+            callbacks: A list of callback functions to execute after receiving the response.
+            output_type: The type of the output model.
+            tools: A list of tools to use during the call.
+            tool_choice: The tool or tool choice strategy to use.
+            **chat_kwargs: Additional keyword arguments to pass to the chat function.
+
+        Returns:
+            A list of results from the LLM.
+
+        Raises:
+            Warning: If the number of completions (`n`) requested is set to 1,
+            a warning is logged indicating that the returned list will contain a single element.
+            `n` can be set in chat_kargs or in the model's configuration.
+        """
+        n = chat_kwargs.get("n", self.config.get("n", 1))
+        if n == 1:
             logger.warning(
                 "n is 1 for call_multiple. It will return a list with a single element"
             )
@@ -913,14 +938,33 @@ async def call(
         n: int | None = None,
         **chat_kwargs,
     ) -> list[LLMResult] | LLMResult:
+        """
+        Call the LLM model with the given messages and configuration.
 
-        # Uses the LLMModel configuration unless specified in chat_kwargs
-        # If n is not specified anywhere, defaults to 1
+        Args:
+            messages: A list of messages to send to the language model.
+            callbacks: A list of callback functions to execute after receiving the response.
+            output_type: The type of the output model.
+            tools: A list of tools to use during the call.
+            tool_choice: The tool or tool identifier to use.
+            n: An integer argument that specifies the number of completions to generate.
+            If n is not specified, the model's configuration is used.
+            **chat_kwargs: Additional keyword arguments to pass to the chat function.
+
+        Returns:
+            A list of LLMResult objects if multiple completions are requested (n>1),
+            otherwise a single LLMResult object.
+
+        Raises:
+            ValueError: If the number of completions `n` is invalid.
+        """
         if not n or n <= 0:
             logger.info(
-                "Invalid n passed to the call function. Will get it from the model's configuration"
+                "Invalid number of completions `n` requested to the call function. "
+                "Will get it from the model's configuration."
             )
             n = self.config.get("n", 1)
+        chat_kwargs["n"] = n
         if n == 1:
             return await self.call_single(
                 messages, callbacks, output_type, tools, tool_choice, **chat_kwargs

From cb16d19e216ed40ce83c387e9d2a031db487728b Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 11:37:00 -0800
Subject: [PATCH 10/18] skip lint checking for argument with default value in
 test_llms

---
 tests/test_llms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_llms.py b/tests/test_llms.py
index 8bd4974..b236565 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -339,7 +339,7 @@ async def test_single_completion(self, model_name: str) -> None:
         assert result.messages[0].content
 
         model = self.MODEL_CLS(name=model_name, config={"n": 2})
-        result = await model.call(messages, n=1)
+        result = await model.call(messages, n=1) # noqa: FURB120
         assert isinstance(result, LLMResult)
         assert result.messages
         assert len(result.messages) == 1

From 7966f9ab2e22ce4bc8f15b36b67a5d72eefab283 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 11:40:26 -0800
Subject: [PATCH 11/18] Fixed pre-commit errors

---
 tests/test_llms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_llms.py b/tests/test_llms.py
index b236565..1a4fd1c 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -339,7 +339,7 @@ async def test_single_completion(self, model_name: str) -> None:
         assert result.messages[0].content
 
         model = self.MODEL_CLS(name=model_name, config={"n": 2})
-        result = await model.call(messages, n=1) # noqa: FURB120
+        result = await model.call(messages, n=1)  # noqa: FURB120
         assert isinstance(result, LLMResult)
         assert result.messages
         assert len(result.messages) == 1

From 9e91858387c385e2894cc1ece13a6b3854dd25d7 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 12:31:54 -0800
Subject: [PATCH 12/18] Reverted changes in uv.lock

---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index c74be79..3d3717f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3033,4 +3033,4 @@ source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 },
-]
+]
\ No newline at end of file

From 29e4d917ea2312db4d58ad75d4fd4e6fb11e09cc Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 12:32:52 -0800
Subject: [PATCH 13/18] Fixed line wrap in docstrings

---
 llmclient/llms.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llmclient/llms.py b/llmclient/llms.py
index d68be73..52eb8ea 100644
--- a/llmclient/llms.py
+++ b/llmclient/llms.py
@@ -892,8 +892,8 @@ async def call_multiple(
 
         Raises:
             Warning: If the number of completions (`n`) requested is set to 1,
-            a warning is logged indicating that the returned list will contain a single element.
-            `n` can be set in chat_kargs or in the model's configuration.
+                a warning is logged indicating that the returned list will contain a single element.
+                `n` can be set in chat_kargs or in the model's configuration.
         """
         n = chat_kwargs.get("n", self.config.get("n", 1))
         if n == 1:
@@ -948,7 +948,7 @@ async def call(
             tools: A list of tools to use during the call.
             tool_choice: The tool or tool identifier to use.
             n: An integer argument that specifies the number of completions to generate.
-            If n is not specified, the model's configuration is used.
+                If n is not specified, the model's configuration is used.
             **chat_kwargs: Additional keyword arguments to pass to the chat function.
 
         Returns:

From f8090bbb0244f946d01540329e042900ddfa7d47 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 12:35:53 -0800
Subject: [PATCH 14/18] reverting uv.lock

---
 uv.lock | 73 +++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 26 deletions(-)

diff --git a/uv.lock b/uv.lock
index 3d3717f..8d118c6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -114,16 +114,15 @@ wheels = [
 
 [[package]]
 name = "anyio"
-version = "4.7.0"
+version = "4.6.2.post1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f6/40/318e58f669b1a9e00f5c4453910682e2d9dd594334539c7b7817dabb765f/anyio-4.7.0.tar.gz", hash = "sha256:2f834749c602966b7d456a7567cafcb309f96482b5081d14ac93ccd457f9dd48", size = 177076 }
+sdist = { url = "https://files.pythonhosted.org/packages/9f/09/45b9b7a6d4e45c6bcb5bf61d19e3ab87df68e0601fa8c5293de3542546cc/anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c", size = 173422 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/7a/4daaf3b6c08ad7ceffea4634ec206faeff697526421c20f07628c7372156/anyio-4.7.0-py3-none-any.whl", hash = "sha256:ea60c3723ab42ba6fff7e8ccb0488c898ec538ff4df1f1d5e642c3601d07e352", size = 93052 },
+    { url = "https://files.pythonhosted.org/packages/e4/f5/f2b75d2fc6f1a260f340f0e7c6a060f4dd2961cc16884ed851b0d18da06a/anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d", size = 90377 },
 ]
 
 [[package]]
@@ -564,7 +563,7 @@ wheels = [
 
 [[package]]
 name = "fh-llm-client"
-version = "0.0.4.dev7+g6fbf2f2.d20241206"
+version = "0.0.4.dev6+g6de1e91.d20241206"
 source = { editable = "." }
 dependencies = [
     { name = "coredis" },
@@ -2669,27 +2668,49 @@ sdist = { url = "https://files.pythonhosted.org/packages/80/f8/0802dd14c58b5d3d7
 
 [[package]]
 name = "tokenizers"
-version = "0.21.0"
+version = "0.20.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/20/41/c2be10975ca37f6ec40d7abd7e98a5213bb04f284b869c1a24e6504fd94d/tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4", size = 343021 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b0/5c/8b09607b37e996dc47e70d6a7b6f4bdd4e4d5ab22fe49d7374565c7fefaf/tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2", size = 2647461 },
-    { url = "https://files.pythonhosted.org/packages/22/7a/88e58bb297c22633ed1c9d16029316e5b5ac5ee44012164c2edede599a5e/tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e", size = 2563639 },
-    { url = "https://files.pythonhosted.org/packages/f7/14/83429177c19364df27d22bc096d4c2e431e0ba43e56c525434f1f9b0fd00/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193", size = 2903304 },
-    { url = "https://files.pythonhosted.org/packages/7e/db/3433eab42347e0dc5452d8fcc8da03f638c9accffefe5a7c78146666964a/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e", size = 2804378 },
-    { url = "https://files.pythonhosted.org/packages/57/8b/7da5e6f89736c2ade02816b4733983fca1c226b0c42980b1ae9dc8fcf5cc/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e", size = 3095488 },
-    { url = "https://files.pythonhosted.org/packages/4d/f6/5ed6711093dc2c04a4e03f6461798b12669bc5a17c8be7cce1240e0b5ce8/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba", size = 3121410 },
-    { url = "https://files.pythonhosted.org/packages/81/42/07600892d48950c5e80505b81411044a2d969368cdc0d929b1c847bf6697/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273", size = 3388821 },
-    { url = "https://files.pythonhosted.org/packages/22/06/69d7ce374747edaf1695a4f61b83570d91cc8bbfc51ccfecf76f56ab4aac/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04", size = 3008868 },
-    { url = "https://files.pythonhosted.org/packages/c8/69/54a0aee4d576045b49a0eb8bffdc495634309c823bf886042e6f46b80058/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e", size = 8975831 },
-    { url = "https://files.pythonhosted.org/packages/f7/f3/b776061e4f3ebf2905ba1a25d90380aafd10c02d406437a8ba22d1724d76/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b", size = 8920746 },
-    { url = "https://files.pythonhosted.org/packages/d8/ee/ce83d5ec8b6844ad4c3ecfe3333d58ecc1adc61f0878b323a15355bcab24/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74", size = 9161814 },
-    { url = "https://files.pythonhosted.org/packages/18/07/3e88e65c0ed28fa93aa0c4d264988428eef3df2764c3126dc83e243cb36f/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff", size = 9357138 },
-    { url = "https://files.pythonhosted.org/packages/15/b0/dc4572ca61555fc482ebc933f26cb407c6aceb3dc19c301c68184f8cad03/tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a", size = 2202266 },
-    { url = "https://files.pythonhosted.org/packages/44/69/d21eb253fa91622da25585d362a874fa4710be600f0ea9446d8d0217cec1/tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", size = 2389192 },
+sdist = { url = "https://files.pythonhosted.org/packages/da/25/b1681c1c30ea3ea6e584ae3fffd552430b12faa599b558c4c4783f56d7ff/tokenizers-0.20.3.tar.gz", hash = "sha256:2278b34c5d0dd78e087e1ca7f9b1dcbf129d80211afa645f214bd6e051037539", size = 340513 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/93/6742ef9206409d5ce1fdf44d5ca1687cdc3847ba0485424e2c731e6bcf67/tokenizers-0.20.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:585b51e06ca1f4839ce7759941e66766d7b060dccfdc57c4ca1e5b9a33013a90", size = 2674224 },
+    { url = "https://files.pythonhosted.org/packages/aa/14/e75ece72e99f6ef9ae07777ca9fdd78608f69466a5cecf636e9bd2f25d5c/tokenizers-0.20.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61cbf11954f3b481d08723ebd048ba4b11e582986f9be74d2c3bdd9293a4538d", size = 2558991 },
+    { url = "https://files.pythonhosted.org/packages/46/54/033b5b2ba0c3ae01e026c6f7ced147d41a2fa1c573d00a66cb97f6d7f9b3/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef820880d5e4e8484e2fa54ff8d297bb32519eaa7815694dc835ace9130a3eea", size = 2892476 },
+    { url = "https://files.pythonhosted.org/packages/e6/b0/cc369fb3297d61f3311cab523d16d48c869dc2f0ba32985dbf03ff811041/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:67ef4dcb8841a4988cd00dd288fb95dfc8e22ed021f01f37348fd51c2b055ba9", size = 2802775 },
+    { url = "https://files.pythonhosted.org/packages/1a/74/62ad983e8ea6a63e04ed9c5be0b605056bf8aac2f0125f9b5e0b3e2b89fa/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff1ef8bd47a02b0dc191688ccb4da53600df5d4c9a05a4b68e1e3de4823e78eb", size = 3086138 },
+    { url = "https://files.pythonhosted.org/packages/6b/ac/4637ba619db25094998523f9e6f5b456e1db1f8faa770a3d925d436db0c3/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:444d188186eab3148baf0615b522461b41b1f0cd58cd57b862ec94b6ac9780f1", size = 3098076 },
+    { url = "https://files.pythonhosted.org/packages/58/ce/9793f2dc2ce529369807c9c74e42722b05034af411d60f5730b720388c7d/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37c04c032c1442740b2c2d925f1857885c07619224a533123ac7ea71ca5713da", size = 3379650 },
+    { url = "https://files.pythonhosted.org/packages/50/f6/2841de926bc4118af996eaf0bdf0ea5b012245044766ffc0347e6c968e63/tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453c7769d22231960ee0e883d1005c93c68015025a5e4ae56275406d94a3c907", size = 2994005 },
+    { url = "https://files.pythonhosted.org/packages/a3/b2/00915c4fed08e9505d37cf6eaab45b12b4bff8f6719d459abcb9ead86a4b/tokenizers-0.20.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4bb31f7b2847e439766aaa9cc7bccf7ac7088052deccdb2275c952d96f691c6a", size = 8977488 },
+    { url = "https://files.pythonhosted.org/packages/e9/ac/1c069e7808181ff57bcf2d39e9b6fbee9133a55410e6ebdaa89f67c32e83/tokenizers-0.20.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:843729bf0f991b29655a069a2ff58a4c24375a553c70955e15e37a90dd4e045c", size = 9294935 },
+    { url = "https://files.pythonhosted.org/packages/50/47/722feb70ee68d1c4412b12d0ea4acc2713179fd63f054913990f9e259492/tokenizers-0.20.3-cp311-none-win32.whl", hash = "sha256:efcce3a927b1e20ca694ba13f7a68c59b0bd859ef71e441db68ee42cf20c2442", size = 2197175 },
+    { url = "https://files.pythonhosted.org/packages/75/68/1b4f928b15a36ed278332ac75d66d7eb65d865bf344d049c452c18447bf9/tokenizers-0.20.3-cp311-none-win_amd64.whl", hash = "sha256:88301aa0801f225725b6df5dea3d77c80365ff2362ca7e252583f2b4809c4cc0", size = 2381616 },
+    { url = "https://files.pythonhosted.org/packages/07/00/92a08af2a6b0c88c50f1ab47d7189e695722ad9714b0ee78ea5e1e2e1def/tokenizers-0.20.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:49d12a32e190fad0e79e5bdb788d05da2f20d8e006b13a70859ac47fecf6ab2f", size = 2667951 },
+    { url = "https://files.pythonhosted.org/packages/ec/9a/e17a352f0bffbf415cf7d73756f5c73a3219225fc5957bc2f39d52c61684/tokenizers-0.20.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:282848cacfb9c06d5e51489f38ec5aa0b3cd1e247a023061945f71f41d949d73", size = 2555167 },
+    { url = "https://files.pythonhosted.org/packages/27/37/d108df55daf4f0fcf1f58554692ff71687c273d870a34693066f0847be96/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe4e08c7d0cd6154c795deb5bf81d2122f36daf075e0c12a8b050d824ef0a64", size = 2898389 },
+    { url = "https://files.pythonhosted.org/packages/b2/27/32f29da16d28f59472fa7fb38e7782069748c7e9ab9854522db20341624c/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca94fc1b73b3883c98f0c88c77700b13d55b49f1071dfd57df2b06f3ff7afd64", size = 2795866 },
+    { url = "https://files.pythonhosted.org/packages/29/4e/8a9a3c89e128c4a40f247b501c10279d2d7ade685953407c4d94c8c0f7a7/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef279c7e239f95c8bdd6ff319d9870f30f0d24915b04895f55b1adcf96d6c60d", size = 3085446 },
+    { url = "https://files.pythonhosted.org/packages/b4/3b/a2a7962c496ebcd95860ca99e423254f760f382cd4bd376f8895783afaf5/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16384073973f6ccbde9852157a4fdfe632bb65208139c9d0c0bd0176a71fd67f", size = 3094378 },
+    { url = "https://files.pythonhosted.org/packages/1f/f4/a8a33f0192a1629a3bd0afcad17d4d221bbf9276da4b95d226364208d5eb/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:312d522caeb8a1a42ebdec87118d99b22667782b67898a76c963c058a7e41d4f", size = 3385755 },
+    { url = "https://files.pythonhosted.org/packages/9e/65/c83cb3545a65a9eaa2e13b22c93d5e00bd7624b354a44adbdc93d5d9bd91/tokenizers-0.20.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b7cb962564785a83dafbba0144ecb7f579f1d57d8c406cdaa7f32fe32f18ad", size = 2997679 },
+    { url = "https://files.pythonhosted.org/packages/55/e9/a80d4e592307688a67c7c59ab77e03687b6a8bd92eb5db763a2c80f93f57/tokenizers-0.20.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:124c5882ebb88dadae1fc788a582299fcd3a8bd84fc3e260b9918cf28b8751f5", size = 8989296 },
+    { url = "https://files.pythonhosted.org/packages/90/af/60c957af8d2244321124e893828f1a4817cde1a2d08d09d423b73f19bd2f/tokenizers-0.20.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2b6e54e71f84c4202111a489879005cb14b92616a87417f6c102c833af961ea2", size = 9303621 },
+    { url = "https://files.pythonhosted.org/packages/be/a9/96172310ee141009646d63a1ca267c099c462d747fe5ef7e33f74e27a683/tokenizers-0.20.3-cp312-none-win32.whl", hash = "sha256:83d9bfbe9af86f2d9df4833c22e94d94750f1d0cd9bfb22a7bb90a86f61cdb1c", size = 2188979 },
+    { url = "https://files.pythonhosted.org/packages/bd/68/61d85ae7ae96dde7d0974ff3538db75d5cdc29be2e4329cd7fc51a283e22/tokenizers-0.20.3-cp312-none-win_amd64.whl", hash = "sha256:44def74cee574d609a36e17c8914311d1b5dbcfe37c55fd29369d42591b91cf2", size = 2380725 },
+    { url = "https://files.pythonhosted.org/packages/07/19/36e9eaafb229616cb8502b42030fa7fe347550e76cb618de71b498fc3222/tokenizers-0.20.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0b630e0b536ef0e3c8b42c685c1bc93bd19e98c0f1543db52911f8ede42cf84", size = 2666813 },
+    { url = "https://files.pythonhosted.org/packages/b9/c7/e2ce1d4f756c8a62ef93fdb4df877c2185339b6d63667b015bf70ea9d34b/tokenizers-0.20.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a02d160d2b19bcbfdf28bd9a4bf11be4cb97d0499c000d95d4c4b1a4312740b6", size = 2555354 },
+    { url = "https://files.pythonhosted.org/packages/7c/cf/5309c2d173a6a67f9ec8697d8e710ea32418de6fd8541778032c202a1c3e/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e3d80d89b068bc30034034b5319218c7c0a91b00af19679833f55f3becb6945", size = 2897745 },
+    { url = "https://files.pythonhosted.org/packages/2c/e5/af3078e32f225e680e69d61f78855880edb8d53f5850a1834d519b2b103f/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:174a54910bed1b089226512b4458ea60d6d6fd93060254734d3bc3540953c51c", size = 2794385 },
+    { url = "https://files.pythonhosted.org/packages/0b/a7/bc421fe46650cc4eb4a913a236b88c243204f32c7480684d2f138925899e/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:098b8a632b8656aa5802c46689462c5c48f02510f24029d71c208ec2c822e771", size = 3084580 },
+    { url = "https://files.pythonhosted.org/packages/c6/22/97e1e95ee81f75922c9f569c23cb2b1fdc7f5a7a29c4c9fae17e63f751a6/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78c8c143e3ae41e718588281eb3e212c2b31623c9d6d40410ec464d7d6221fb5", size = 3093581 },
+    { url = "https://files.pythonhosted.org/packages/d5/14/f0df0ee3b9e516121e23c0099bccd7b9f086ba9150021a750e99b16ce56f/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b26b0aadb18cd8701077362ba359a06683662d5cafe3e8e8aba10eb05c037f1", size = 3385934 },
+    { url = "https://files.pythonhosted.org/packages/66/52/7a171bd4929e3ffe61a29b4340fe5b73484709f92a8162a18946e124c34c/tokenizers-0.20.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07d7851a72717321022f3774e84aa9d595a041d643fafa2e87fbc9b18711dac0", size = 2997311 },
+    { url = "https://files.pythonhosted.org/packages/7c/64/f1993bb8ebf775d56875ca0d50a50f2648bfbbb143da92fe2e6ceeb4abd5/tokenizers-0.20.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:bd44e48a430ada902c6266a8245f5036c4fe744fcb51f699999fbe82aa438797", size = 8988601 },
+    { url = "https://files.pythonhosted.org/packages/d6/3f/49fa63422159bbc2f2a4ac5bfc597d04d4ec0ad3d2ef46649b5e9a340e37/tokenizers-0.20.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a4c186bb006ccbe1f5cc4e0380d1ce7806f5955c244074fd96abc55e27b77f01", size = 9303950 },
+    { url = "https://files.pythonhosted.org/packages/66/11/79d91aeb2817ad1993ef61c690afe73e6dbedbfb21918b302ef5a2ba9bfb/tokenizers-0.20.3-cp313-none-win32.whl", hash = "sha256:6e19e0f1d854d6ab7ea0c743d06e764d1d9a546932be0a67f33087645f00fe13", size = 2188941 },
+    { url = "https://files.pythonhosted.org/packages/c2/ff/ac8410f868fb8b14b5e619efa304aa119cb8a40bd7df29fc81a898e64f99/tokenizers-0.20.3-cp313-none-win_amd64.whl", hash = "sha256:d50ede425c7e60966a9680d41b58b3a0950afa1bb570488e2972fa61662c4273", size = 2380269 },
 ]
 
 [[package]]
@@ -2762,7 +2783,7 @@ wheels = [
 
 [[package]]
 name = "transformers"
-version = "4.47.0"
+version = "4.46.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -2776,9 +2797,9 @@ dependencies = [
     { name = "tokenizers" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/5a/0ecfde3264bed0579c37f249e04e15f3c1451ba864d78bbe390177664cac/transformers-4.47.0.tar.gz", hash = "sha256:f8ead7a5a4f6937bb507e66508e5e002dc5930f7b6122a9259c37b099d0f3b19", size = 8693668 }
+sdist = { url = "https://files.pythonhosted.org/packages/37/5a/58f96c83e566f907ae39f16d4401bbefd8bb85c60bd1e6a95c419752ab90/transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc", size = 8627944 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d0/a7/7eedcf6a359e1e1eff3bc204ad022485aa5d88c08e1e3e0e0aee8a2e2235/transformers-4.47.0-py3-none-any.whl", hash = "sha256:a8e1bafdaae69abdda3cad638fe392e37c86d2ce0ecfcae11d60abb8f949ff4d", size = 10133426 },
+    { url = "https://files.pythonhosted.org/packages/51/51/b87caa939fedf307496e4dbf412f4b909af3d9ca8b189fc3b65c1faa456f/transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef", size = 10034536 },
 ]
 
 [[package]]
@@ -3033,4 +3054,4 @@ source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 },
-]
\ No newline at end of file
+]

From 418fa3be9739f0a26ce363b61d10e4d94da7aba4 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 12:59:46 -0800
Subject: [PATCH 15/18] removed the dependency on numpy. It is now a
 conditional dependency for local embeddings

---
 llmclient/embeddings.py | 19 +++++++++++--------
 pyproject.toml          |  7 ++-----
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py
index ce15503..c5a5b7c 100644
--- a/llmclient/embeddings.py
+++ b/llmclient/embeddings.py
@@ -1,10 +1,10 @@
 import asyncio
 from abc import ABC, abstractmethod
+from collections import Counter
 from enum import StrEnum
 from typing import Any
 
 import litellm
-import numpy as np
 import tiktoken
 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
@@ -171,13 +171,9 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
         enc_batch = self.enc.encode_ordinary_batch(texts)
         # now get frequency of each token rel to length
         return [
-            (
-                np.bincount([xi % self.ndim for xi in x], minlength=self.ndim).astype(
-                    float
-                )
-                / len(x)
-            ).tolist()
+            [token_counts.get(i, 0) / len(x) for i in range(self.ndim)]
             for x in enc_batch
+            if (token_counts := Counter(xi % self.ndim for xi in x))
         ]
 
 
@@ -199,7 +195,11 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
         all_embeds = await asyncio.gather(
             *[m.embed_documents(texts) for m in self.models]
         )
-        return np.concatenate(all_embeds, axis=1).tolist()
+
+        return [
+            [val for sublist in embed_group for val in sublist]
+            for embed_group in zip(*all_embeds, strict=True)
+        ]
 
     def set_mode(self, mode: EmbeddingModes) -> None:
         # Set mode for all component models
@@ -217,6 +217,7 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         try:
+            import numpy as np
             from sentence_transformers import SentenceTransformer
         except ImportError as exc:
             raise ImportError(
@@ -240,6 +241,8 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
         Returns:
             A list of embedding vectors.
         """
+        import numpy as np
+
         # Extract additional configurations if needed
         batch_size = self.config.get("batch_size", 32)
         device = self.config.get("device", "cpu")
diff --git a/pyproject.toml b/pyproject.toml
index 8c6d278..a5caeb3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,6 @@ dependencies = [
     "fhaviary>=0.8.2",  # For core namespace
     "limits",
     "litellm>=1.44",  # For LITELLM_LOG addition
-    "numpy",
     "pydantic~=2.0,>=2.10.1,<2.10.2",
     "tiktoken>=0.4.0",
     "typing-extensions; python_version <= '3.11'",  # for typing.override
@@ -40,7 +39,7 @@ requires-python = ">=3.11"
 
 [project.optional-dependencies]
 dev = [
-    "fh-llm-client[image,local]",
+    "fh-llm-client[local]",
     "fhaviary[xml]",
     "ipython>=8",  # Pin to keep recent
     "mypy>=1.8",  # Pin for mutable-override
@@ -58,11 +57,9 @@ dev = [
     "python-dotenv",
     "refurb>=2",  # Pin to keep recent
 ]
-image = [
-    "Pillow",
-]
 local = [
     "sentence-transformers",
+    "numpy",
 ]
 
 [project.urls]

From c34b02c761a48b7b746a076851c1a5037fa44765 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 13:44:31 -0800
Subject: [PATCH 16/18] Removed image group dependency

Messages are not implemented in llmclient anymore
---
 llmclient/embeddings.py | 20 ++++++++++++--------
 pyproject.toml          |  7 ++-----
 uv.lock                 | 18 +++++++-----------
 3 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py
index ce15503..e03de2e 100644
--- a/llmclient/embeddings.py
+++ b/llmclient/embeddings.py
@@ -1,10 +1,11 @@
 import asyncio
 from abc import ABC, abstractmethod
+from collections import Counter
 from enum import StrEnum
+from itertools import chain
 from typing import Any
 
 import litellm
-import numpy as np
 import tiktoken
 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
@@ -171,13 +172,9 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
         enc_batch = self.enc.encode_ordinary_batch(texts)
         # now get frequency of each token rel to length
         return [
-            (
-                np.bincount([xi % self.ndim for xi in x], minlength=self.ndim).astype(
-                    float
-                )
-                / len(x)
-            ).tolist()
+            [token_counts.get(xi, 0) / len(x) for xi in range(self.ndim)]
             for x in enc_batch
+            if (token_counts := Counter(xi % self.ndim for xi in x))
         ]
 
 
@@ -199,7 +196,11 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
         all_embeds = await asyncio.gather(
             *[m.embed_documents(texts) for m in self.models]
         )
-        return np.concatenate(all_embeds, axis=1).tolist()
+
+        return [
+            list(chain.from_iterable(embed_group))
+            for embed_group in zip(*all_embeds, strict=True)
+        ]
 
     def set_mode(self, mode: EmbeddingModes) -> None:
         # Set mode for all component models
@@ -217,6 +218,7 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         try:
+            import numpy as np  # noqa: F401
             from sentence_transformers import SentenceTransformer
         except ImportError as exc:
             raise ImportError(
@@ -240,6 +242,8 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
         Returns:
             A list of embedding vectors.
         """
+        import numpy as np
+
         # Extract additional configurations if needed
         batch_size = self.config.get("batch_size", 32)
         device = self.config.get("device", "cpu")
diff --git a/pyproject.toml b/pyproject.toml
index 8c6d278..a85772d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,6 @@ dependencies = [
     "fhaviary>=0.8.2",  # For core namespace
     "limits",
     "litellm>=1.44",  # For LITELLM_LOG addition
-    "numpy",
     "pydantic~=2.0,>=2.10.1,<2.10.2",
     "tiktoken>=0.4.0",
     "typing-extensions; python_version <= '3.11'",  # for typing.override
@@ -40,7 +39,7 @@ requires-python = ">=3.11"
 
 [project.optional-dependencies]
 dev = [
-    "fh-llm-client[image,local]",
+    "fh-llm-client[local]",
     "fhaviary[xml]",
     "ipython>=8",  # Pin to keep recent
     "mypy>=1.8",  # Pin for mutable-override
@@ -58,10 +57,8 @@ dev = [
     "python-dotenv",
     "refurb>=2",  # Pin to keep recent
 ]
-image = [
-    "Pillow",
-]
 local = [
+    "numpy",
     "sentence-transformers",
 ]
 
diff --git a/uv.lock b/uv.lock
index 8d118c6..a161538 100644
--- a/uv.lock
+++ b/uv.lock
@@ -563,14 +563,13 @@ wheels = [
 
 [[package]]
 name = "fh-llm-client"
-version = "0.0.4.dev6+g6de1e91.d20241206"
+version = "0.0.4.dev3+g418fa3b.d20241209"
 source = { editable = "." }
 dependencies = [
     { name = "coredis" },
     { name = "fhaviary" },
     { name = "limits" },
     { name = "litellm" },
-    { name = "numpy" },
     { name = "pydantic" },
     { name = "tiktoken" },
     { name = "typing-extensions", marker = "python_full_version < '3.12'" },
@@ -581,7 +580,7 @@ dev = [
     { name = "fhaviary", extra = ["xml"] },
     { name = "ipython" },
     { name = "mypy" },
-    { name = "pillow" },
+    { name = "numpy" },
     { name = "pre-commit" },
     { name = "pylint-pydantic" },
     { name = "pytest" },
@@ -597,10 +596,8 @@ dev = [
     { name = "refurb" },
     { name = "sentence-transformers" },
 ]
-image = [
-    { name = "pillow" },
-]
 local = [
+    { name = "numpy" },
     { name = "sentence-transformers" },
 ]
 
@@ -610,7 +607,7 @@ codeflash = [
     { name = "fhaviary", extra = ["xml"] },
     { name = "ipython" },
     { name = "mypy" },
-    { name = "pillow" },
+    { name = "numpy" },
     { name = "pre-commit" },
     { name = "pylint-pydantic" },
     { name = "pytest" },
@@ -630,7 +627,7 @@ dev = [
     { name = "fhaviary", extra = ["xml"] },
     { name = "ipython" },
     { name = "mypy" },
-    { name = "pillow" },
+    { name = "numpy" },
     { name = "pre-commit" },
     { name = "pylint-pydantic" },
     { name = "pytest" },
@@ -650,15 +647,14 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "coredis" },
-    { name = "fh-llm-client", extras = ["image", "local"], marker = "extra == 'dev'" },
+    { name = "fh-llm-client", extras = ["local"], marker = "extra == 'dev'" },
     { name = "fhaviary", specifier = ">=0.8.2" },
     { name = "fhaviary", extras = ["xml"], marker = "extra == 'dev'" },
     { name = "ipython", marker = "extra == 'dev'", specifier = ">=8" },
     { name = "limits" },
     { name = "litellm", specifier = ">=1.44" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8" },
-    { name = "numpy" },
-    { name = "pillow", marker = "extra == 'image'" },
+    { name = "numpy", marker = "extra == 'local'" },
     { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.4" },
     { name = "pydantic", specifier = "~=2.0,>=2.10.1,<2.10.2" },
     { name = "pylint-pydantic", marker = "extra == 'dev'" },

From 86d455d7617ea9de52394d32a26c7b817dfb7055 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 13:53:35 -0800
Subject: [PATCH 17/18] Fixed typos

---
 llmclient/embeddings.py | 2 --
 pyproject.toml          | 1 -
 2 files changed, 3 deletions(-)

diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py
index 271784f..e03de2e 100644
--- a/llmclient/embeddings.py
+++ b/llmclient/embeddings.py
@@ -1,7 +1,6 @@
 import asyncio
 from abc import ABC, abstractmethod
 from collections import Counter
-from collections import Counter
 from enum import StrEnum
 from itertools import chain
 from typing import Any
@@ -176,7 +175,6 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
             [token_counts.get(xi, 0) / len(x) for xi in range(self.ndim)]
             for x in enc_batch
             if (token_counts := Counter(xi % self.ndim for xi in x))
-            if (token_counts := Counter(xi % self.ndim for xi in x))
         ]
 
 
diff --git a/pyproject.toml b/pyproject.toml
index d58774c..a85772d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,7 +60,6 @@ dev = [
 local = [
     "numpy",
     "sentence-transformers",
-    "numpy",
 ]
 
 [project.urls]

From 7ef8f490c3656b1f84b4860ec29d4998dd2f9762 Mon Sep 17 00:00:00 2001
From: Mayk Caldas <mayk@futurehouse.org>
Date: Mon, 9 Dec 2024 14:19:04 -0800
Subject: [PATCH 18/18] Removed overload from the multiple completion llm call

---
 llmclient/llms.py  | 151 ++++++---------------------------------------
 tests/test_llms.py |  13 ++--
 2 files changed, 24 insertions(+), 140 deletions(-)

diff --git a/llmclient/llms.py b/llmclient/llms.py
index 52eb8ea..149f43f 100644
--- a/llmclient/llms.py
+++ b/llmclient/llms.py
@@ -16,11 +16,9 @@
 from typing import (
     Any,
     ClassVar,
-    Literal,
     Self,
     TypeVar,
     cast,
-    overload,
 )
 
 import litellm
@@ -660,7 +658,7 @@ async def achat_iter(self, messages: Iterable[Message], **kwargs) -> AsyncGenera
     # > `required` means the model must call one or more tools.
     TOOL_CHOICE_REQUIRED: ClassVar[str] = "required"
 
-    async def _call(  # noqa: C901, PLR0915
+    async def call(  # noqa: C901, PLR0915
         self,
         messages: list[Message],
         callbacks: list[Callable] | None = None,
@@ -669,6 +667,23 @@ async def _call(  # noqa: C901, PLR0915
         tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
         **chat_kwargs,
     ) -> list[LLMResult]:
+        """
+        Call the LLM model with the given messages and configuration.
+
+        Args:
+            messages: A list of messages to send to the language model.
+            callbacks: A list of callback functions to execute after receiving the response.
+            output_type: The type of the output model.
+            tools: A list of tools to use during the call.
+            tool_choice: The tool or tool identifier to use.
+            **chat_kwargs: Additional keyword arguments to pass to the chat function.
+
+        Returns:
+            A list of LLMResult objects containing the results of the call.
+
+        Raises:
+            ValueError: If the number of completions (n) is invalid.
+        """
         start_clock = asyncio.get_running_loop().time()
 
         # Deal with tools. Note OpenAI throws a 400 response if tools is empty:
@@ -841,134 +856,8 @@ async def call_single(
         tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
         **chat_kwargs,
     ) -> LLMResult:
-        """
-        Calls the LLM with a list of messages and returns a single completion result.
-
-        Args:
-            messages: A list of messages to send to the LLM.
-            callbacks: A list of callback functions to execute after the call.
-            output_type: The type of the output model.
-            tools: A list of tools to use during the call.
-            tool_choice: The tool or tool choice to use.
-            **chat_kwargs: Additional keyword arguments for the chat.
-
-        Returns:
-            The result of the LLM call as a LLMResult object.
-
-        Raises:
-            ValueError: If the value of 'n' is not 1.
-        """
-        n = chat_kwargs.get("n", self.config.get("n", 1))
-        if n != 1:
-            raise ValueError("n must be 1 for call_single.")
         return (
-            await self._call(
-                messages, callbacks, output_type, tools, tool_choice, **chat_kwargs
+            await self.call(
+                messages, callbacks, output_type, tools, tool_choice, n=1, **chat_kwargs
             )
         )[0]
-
-    async def call_multiple(
-        self,
-        messages: list[Message],
-        callbacks: list[Callable] | None = None,
-        output_type: type[BaseModel] | None = None,
-        tools: list[Tool] | None = None,
-        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
-        **chat_kwargs,
-    ) -> list[LLMResult]:
-        """
-        Calls the LLM with a list of messages and returns a list of completion results.
-
-        Args:
-            messages: A list of messages to send to the LLM.
-            callbacks: A list of callback functions to execute after receiving the response.
-            output_type: The type of the output model.
-            tools: A list of tools to use during the call.
-            tool_choice: The tool or tool choice strategy to use.
-            **chat_kwargs: Additional keyword arguments to pass to the chat function.
-
-        Returns:
-            A list of results from the LLM.
-
-        Raises:
-            Warning: If the number of completions (`n`) requested is set to 1,
-                a warning is logged indicating that the returned list will contain a single element.
-                `n` can be set in chat_kargs or in the model's configuration.
-        """
-        n = chat_kwargs.get("n", self.config.get("n", 1))
-        if n == 1:
-            logger.warning(
-                "n is 1 for call_multiple. It will return a list with a single element"
-            )
-        return await self._call(
-            messages, callbacks, output_type, tools, tool_choice, **chat_kwargs
-        )
-
-    @overload
-    async def call(
-        self,
-        messages: list[Message],
-        callbacks: list[Callable] | None = None,
-        output_type: type[BaseModel] | None = None,
-        tools: list[Tool] | None = None,
-        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
-        n: Literal[1] = 1,
-        **chat_kwargs,
-    ) -> LLMResult: ...
-
-    @overload
-    async def call(
-        self,
-        messages: list[Message],
-        callbacks: list[Callable] | None = None,
-        output_type: type[BaseModel] | None = None,
-        tools: list[Tool] | None = None,
-        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
-        n: int | None = None,
-        **chat_kwargs,
-    ) -> list[LLMResult]: ...
-
-    async def call(
-        self,
-        messages: list[Message],
-        callbacks: list[Callable] | None = None,
-        output_type: type[BaseModel] | None = None,
-        tools: list[Tool] | None = None,
-        tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
-        n: int | None = None,
-        **chat_kwargs,
-    ) -> list[LLMResult] | LLMResult:
-        """
-        Call the LLM model with the given messages and configuration.
-
-        Args:
-            messages: A list of messages to send to the language model.
-            callbacks: A list of callback functions to execute after receiving the response.
-            output_type: The type of the output model.
-            tools: A list of tools to use during the call.
-            tool_choice: The tool or tool identifier to use.
-            n: An integer argument that specifies the number of completions to generate.
-                If n is not specified, the model's configuration is used.
-            **chat_kwargs: Additional keyword arguments to pass to the chat function.
-
-        Returns:
-            A list of LLMResult objects if multiple completions are requested (n>1),
-            otherwise a single LLMResult object.
-
-        Raises:
-            ValueError: If the number of completions `n` is invalid.
-        """
-        if not n or n <= 0:
-            logger.info(
-                "Invalid number of completions `n` requested to the call function. "
-                "Will get it from the model's configuration."
-            )
-            n = self.config.get("n", 1)
-        chat_kwargs["n"] = n
-        if n == 1:
-            return await self.call_single(
-                messages, callbacks, output_type, tools, tool_choice, **chat_kwargs
-            )
-        return await self.call_multiple(
-            messages, callbacks, output_type, tools, tool_choice, **chat_kwargs
-        )
diff --git a/tests/test_llms.py b/tests/test_llms.py
index 1a4fd1c..e9f8320 100644
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -328,18 +328,16 @@ async def test_single_completion(self, model_name: str) -> None:
             Message(role="system", content="Respond with single words."),
             Message(content="Hello, how are you?"),
         ]
-        result = await model.call(messages)
+        result = await model.call_single(messages)
         assert isinstance(result, LLMResult)
 
-        result = await model.call(messages, n=1)  # noqa: FURB120
-
         assert isinstance(result, LLMResult)
         assert result.messages
         assert len(result.messages) == 1
         assert result.messages[0].content
 
         model = self.MODEL_CLS(name=model_name, config={"n": 2})
-        result = await model.call(messages, n=1)  # noqa: FURB120
+        result = await model.call_single(messages)
         assert isinstance(result, LLMResult)
         assert result.messages
         assert len(result.messages) == 1
@@ -365,13 +363,10 @@ async def test_multiple_completion(self, model_name: str, request) -> None:
             with pytest.raises(litellm.BadRequestError, match="anthropic"):
                 await model.call(messages)
         else:
-            results = await model.call(messages, n=None)  # noqa: FURB120
-            assert len(results) == self.NUM_COMPLETIONS
-
-            results = await model.call(messages, n=self.NUM_COMPLETIONS)
+            results = await model.call(messages)  # noqa: FURB120
             assert len(results) == self.NUM_COMPLETIONS
 
-            model = self.MODEL_CLS(name=model_name, config={"n": 1})
+            model = self.MODEL_CLS(name=model_name, config={"n": 5})
             results = await model.call(messages, n=self.NUM_COMPLETIONS)
             assert len(results) == self.NUM_COMPLETIONS