diff --git a/llmclient/__init__.py b/llmclient/__init__.py index d471c9d..dc27f02 100644 --- a/llmclient/__init__.py +++ b/llmclient/__init__.py @@ -1,9 +1,16 @@ +from .constants import ( + CHARACTERS_PER_TOKEN_ASSUMPTION, + EXTRA_TOKENS_FROM_USER_ROLE, + MODEL_COST_MAP, +) from .embeddings import ( EmbeddingModel, EmbeddingModes, HybridEmbeddingModel, + LiteLLMEmbeddingModel, SentenceTransformerEmbeddingModel, SparseEmbeddingModel, + embedding_model_factory, ) from .exceptions import ( JSONSchemaValidationError, @@ -13,17 +20,28 @@ LLMModel, MultipleCompletionLLMModel, ) -from .types import LLMResult +from .types import ( + Chunk, + Embeddable, + LLMResult, +) __all__ = [ + "CHARACTERS_PER_TOKEN_ASSUMPTION", + "EXTRA_TOKENS_FROM_USER_ROLE", + "MODEL_COST_MAP", + "Chunk", + "Embeddable", "EmbeddingModel", "EmbeddingModes", "HybridEmbeddingModel", "JSONSchemaValidationError", "LLMModel", "LLMResult", + "LiteLLMEmbeddingModel", "LiteLLMModel", "MultipleCompletionLLMModel", "SentenceTransformerEmbeddingModel", "SparseEmbeddingModel", + "embedding_model_factory", ] diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py index ce15503..e03de2e 100644 --- a/llmclient/embeddings.py +++ b/llmclient/embeddings.py @@ -1,10 +1,11 @@ import asyncio from abc import ABC, abstractmethod +from collections import Counter from enum import StrEnum +from itertools import chain from typing import Any import litellm -import numpy as np import tiktoken from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator @@ -171,13 +172,9 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: enc_batch = self.enc.encode_ordinary_batch(texts) # now get frequency of each token rel to length return [ - ( - np.bincount([xi % self.ndim for xi in x], minlength=self.ndim).astype( - float - ) - / len(x) - ).tolist() + [token_counts.get(xi, 0) / len(x) for xi in range(self.ndim)] for x in enc_batch + if (token_counts := Counter(xi % self.ndim for xi in x)) ] @@ -199,7 +196,11 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: all_embeds = await asyncio.gather( *[m.embed_documents(texts) for m in self.models] ) - return np.concatenate(all_embeds, axis=1).tolist() + + return [ + list(chain.from_iterable(embed_group)) + for embed_group in zip(*all_embeds, strict=True) + ] def set_mode(self, mode: EmbeddingModes) -> None: # Set mode for all component models @@ -217,6 +218,7 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel): def __init__(self, **kwargs): super().__init__(**kwargs) try: + import numpy as np # noqa: F401 from sentence_transformers import SentenceTransformer except ImportError as exc: raise ImportError( @@ -240,6 +242,8 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]: Returns: A list of embedding vectors. """ + import numpy as np + # Extract additional configurations if needed batch_size = self.config.get("batch_size", 32) device = self.config.get("device", "cpu") diff --git a/llmclient/llms.py b/llmclient/llms.py index ad12461..149f43f 100644 --- a/llmclient/llms.py +++ b/llmclient/llms.py @@ -612,7 +612,7 @@ class MultipleCompletionLLMModel(BaseModel): "Configuration of the model:" "model is the name of the llm model to use," "temperature is the sampling temperature, and" - "n is the number of completions to generate." + "n is the number of completions to generate by default." ), ) encoding: Any | None = None @@ -667,6 +667,23 @@ async def call( # noqa: C901, PLR0915 tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, **chat_kwargs, ) -> list[LLMResult]: + """ + Call the LLM model with the given messages and configuration. + + Args: + messages: A list of messages to send to the language model. + callbacks: A list of callback functions to execute after receiving the response. + output_type: The type of the output model. + tools: A list of tools to use during the call. + tool_choice: The tool or tool identifier to use. + **chat_kwargs: Additional keyword arguments to pass to the chat function. + + Returns: + A list of LLMResult objects containing the results of the call. + + Raises: + ValueError: If the number of completions (n) is invalid. + """ start_clock = asyncio.get_running_loop().time() # Deal with tools. Note OpenAI throws a 400 response if tools is empty: @@ -829,3 +846,18 @@ async def call( # noqa: C901, PLR0915 result.seconds_to_last_token = end_clock - start_clock return results + + async def call_single( + self, + messages: list[Message], + callbacks: list[Callable] | None = None, + output_type: type[BaseModel] | None = None, + tools: list[Tool] | None = None, + tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, + **chat_kwargs, + ) -> LLMResult: + return ( + await self.call( + messages, callbacks, output_type, tools, tool_choice, n=1, **chat_kwargs + ) + )[0] diff --git a/pyproject.toml b/pyproject.toml index 8c6d278..a85772d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ dependencies = [ "fhaviary>=0.8.2", # For core namespace "limits", "litellm>=1.44", # For LITELLM_LOG addition - "numpy", "pydantic~=2.0,>=2.10.1,<2.10.2", "tiktoken>=0.4.0", "typing-extensions; python_version <= '3.11'", # for typing.override @@ -40,7 +39,7 @@ requires-python = ">=3.11" [project.optional-dependencies] dev = [ - "fh-llm-client[image,local]", + "fh-llm-client[local]", "fhaviary[xml]", "ipython>=8", # Pin to keep recent "mypy>=1.8", # Pin for mutable-override @@ -58,10 +57,8 @@ dev = [ "python-dotenv", "refurb>=2", # Pin to keep recent ] -image = [ - "Pillow", -] local = [ + "numpy", "sentence-transformers", ] diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml new file mode 100644 index 0000000..caee9e8 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[gpt-3.5-turbo].yaml @@ -0,0 +1,196 @@ +interactions: + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "149" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "1" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9RTy2rDMBC8+yuEzklo3jS3QCCXXNoe+qIYWdrYamStKq1LS8i/FzkPOySFXnvR + YWZnNLsrbRPGuFZ8xrgsBMnSme48W6zuxvBAL5v55/3H06Ra4OOoWhXl82LJO1GB2TtIOqp6Ektn + gDTaPS09CILo2p8Oh6PhYHo7qYkSFZgoyx11h71xlyqfYfemPxgflAVqCYHP2GvCGGPb+owZrYIv + PmM3nSNSQggiBz47FTHGPZqIcBGCDiQs8U5DSrQEto69RFRtysO6CiJGs5UxB3x3ustg7jxm4cCf + 8LW2OhSpBxHQRt9A6HjSEl800P83DSSMvdVLqc5icuexdJQSbsBGw8Fgb8ebZ9AiDxwhCdOCR50r + ZqkCEtqE1ki4FLIA1SibByAqpbFFtMd+meWa975tbfO/2DeElOAIVOo8KC3P+23KPMQ/8lvZacR1 + YB6+A0GZrrXNwTuv6yXXm9wlPwAAAP//AwAh8pBrpAMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed70040cbcdf99b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:36 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "134" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999953" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_1f88664946b9891fbc90796687f144c4 + status: + code: 200 + message: OK + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "149" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA9RTTUsDMRC9768IOW9LP63tzaIIIqgH7UFkSZPZbTSbCcksWEr/u2T7sVtawauX + HN6b9/JmJtkkjHGt+IxxuRIkS2c6N8vbx5fF9Tisy7l5e13clfl0/vQwfl5P5o6nUYHLT5B0UHUl + ls4AabQ7WnoQBNG1PxkOR8PBZHpVEyUqMFFWOOoMu+MOVX6JnV5/MN4rV6glBD5j7wljjG3qM2a0 + Cr75jPXSA1JCCKIAPjsWMcY9mohwEYIOJCzxtCElWgJbx75HVG3KQ14FEaPZypg9vj3eZbBwHpdh + zx/xXFsdVpkHEdBG30DoeNISnzXQ/zcNJIx91EupTmJy57F0lBF+gY2Gg8HOjjfPoEXuOUISpgWP + 0gtmmQIS2oTWSLgUcgWqUTYPQFRKY4toj/08yyXvXdvaFn+xbwgpwRGozHlQWp7225R5iH/kt7Lj + iOvAPKwDQZnl2hbgndf1kutNbpMfAAAA//8DALEE5HikAwAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700428d77f99b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:36 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "114" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999953" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 2ms + x-request-id: + - req_e32516fa5bb6ab11dda5155511280ea6 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml new file mode 100644 index 0000000..150ca29 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_multiple_completion[openai].yaml @@ -0,0 +1,202 @@ +interactions: + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "158" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA7RTwWoCMRC971ekObui7qLWm1JaWgq9FEopZckm4xqbzYQkSxXx30tWu7uihR7a + Sw7z5r28N5PsIkKoFHRGKF8xz0uj4nl+A3yxWD9Vi4f56zYtko15Htr1/aP9fKG9wMB8Ddx/s/oc + S6PAS9QHmFtgHoLqcJIkaZJcT8c1UKIAFWiF8XGKcSm1jEeDURoPJvFwemSvUHJwdEbeIkII2dVn + 8KkFbOiMDHrflRKcYwXQWdNECLWoQoUy56TzTHvaa0GO2oOurd9KDf0uZmFZORb86UqpY33fXKaw + MBZzd8Sb+lJq6VaZBeZQB2Hn0dCoQz5LMPybBHeI4uqfE0SEvNdrqU58UmOxND7z+AE6CI5GBzna + PoYWTI+YR89UhzPuXRDLBHgmlevMhHLGVyBaZvsEWCUkdoDu3M+9XNI+xJa6+I18C3AOxoPIjAUh + +Wnets1C+Ck/tTUjrg1Tt3UeymwpdQHWWHnY8tJkec4SPoXJIKfRPvoCAAD//wMAviUi9bUDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed71d50ac15cf13-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:26:27 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=.T97IJZbEqHKl_VUBqOzbYQ3.fPwrK1uEUCoRWrQ0Vs-1733433987-1.0.1.1-g7sgDoAXo0fTveWsSMwxPdEXEmD5ZOQ_XYi1pZoi0dW2JzEVU83E5oRAyXudBimOLtvB92CoJm1WxF9LBkquZA; + path=/; expires=Thu, 05-Dec-24 21:56:27 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=h0PMfMYOdM04Mkzg9aKUS2PH3E1LFBnUsNmdRQ4ltVY-1733433987796-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "1036" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999954" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2525b96c65ed2235204a4f7fbb79f88e + status: + code: 200 + message: OK + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-4o-mini-2024-07-18","n":2}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "158" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//tFPLasMwELz7K1Sd7eA479wKpaVQAr30UoqRpbWtVtYKSYG0If9e5Dzs + kBR6aC867OyMZnalbUQIlYIuCeU187wxKrkt7qCeblYTs5gau3pxMmP8/vmrSh/hicaBgcU7cH9k + DTg2RoGXqPcwt8A8BNXhbDQaj0aLxbAFGhSgAq0yPhlj0kgtkyzNxkk6S4bzA7tGycHRJXmNCCFk + 257BpxawoUuSxsdKA86xCujy1EQItahChTLnpPNMexp3IEftQbfWHxDFoI9ZKNeOBX96rdShvjtd + prAyFgt3wE/1Umrp6twCc6iDsPNoaNQjXyQY/l2Cm39OEBHy1q5lfeaTGouN8bnHD9BBMMv2crR7 + DB04PmAePVM9zjS+IpYL8Ewq15sJ5YzXIDpm9wTYWkjsAf25X3q5pr2PLXX1G/kO4ByMB5EbC0Ly + 87xdm4XwU35qO424NUzdp/PQ5KXUFVhj5X7LpcnTWTopyvmMpzTaRd8AAAD//wMAgEsPw7UDAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed71d57fe24cf13-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:26:31 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "765" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999954" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_84370a5d5d53f54172bc0ffe3feb7e4a + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml new file mode 100644 index 0000000..c5e4922 --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[claude-3-haiku-20240307].yaml @@ -0,0 +1,70 @@ +interactions: + - request: + body: + '{"messages":[{"role":"user","content":[{"type":"text","text":"Hello, how + are you?"}]}],"system":[{"type":"text","text":"Respond with single words."}],"max_tokens":4096,"model":"claude-3-haiku-20240307"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - "2023-06-01" + connection: + - keep-alive + content-length: + - "202" + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - litellm/1.53.3 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA0yOzWqEQBCE36XO4+LfYpxb8gA5BdwlBBm0MbJjj7F7wCC+e1ASyKngqx9qw9jD + YpKhTbPXItPhpcqeFwlV2az3p9tb3cBAv2c6UiTiBoLBEvwBnMgo6lhhMIWePCw672JPSZF8uvER + kzzNy7RIKxh0gZVYYd+3v0Wl9eieYtGQ9xfsHwaiYW4XchIYFsR9q3Fh/BpCX5G4I1iO3hvE85Xd + MPIctdXwIBbYrDYIUf+j677/AAAA//8DAHETe7LyAAAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700088ad5942c-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:27 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-ratelimit-requests-limit: + - "5000" + anthropic-ratelimit-requests-remaining: + - "4999" + anthropic-ratelimit-requests-reset: + - "2024-12-05T21:06:27Z" + anthropic-ratelimit-tokens-limit: + - "5000000" + anthropic-ratelimit-tokens-remaining: + - "5000000" + anthropic-ratelimit-tokens-reset: + - "2024-12-05T21:06:27Z" + request-id: + - req_01PYs7k3gcHPDqdgTayudkMv + via: + - 1.1 google + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml new file mode 100644 index 0000000..c591a7c --- /dev/null +++ b/tests/cassettes/TestMultipleCompletionLLMModel.test_single_completion[gpt-3.5-turbo].yaml @@ -0,0 +1,105 @@ +interactions: + - request: + body: + '{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello, + how are you?"}],"model":"gpt-3.5-turbo","n":1}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "149" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.57.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.57.0 + x-stainless-raw-response: + - "true" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4yST0sDMRDF7/spQs5tabstK70JQj0oFL2IIks2me7GZjMxmaVq6XeXbP/sihW8 + 5DC/eS9vJtkljHGt+IJxWQmStTPD6+Lm7tauskf7/iSMfdjMlvPnWZV9bVfbez6ICizeQNJJNZJY + OwOk0R6w9CAIouskS9NZOs2ushbUqMBEWelomI7mQ2p8gcPxZDo/KivUEgJfsJeEMcZ27RkzWgUf + fMHGg1OlhhBECXxxbmKMezSxwkUIOpCwxAcdlGgJbBt7iaj6yMO6CSJGs40xx/r+fJfB0nkswpGf + 62ttdahyDyKgjb6B0PGW7hPGXtuZmh8xufNYO8oJN2Cj4XR6sOPdFjs4OTJCEqanSQcXzHIFJLQJ + vZVwKWQFqlN2+xON0tgDSW/k31kueR/G1rb8j30HpARHoHLnQWn5c96uzUP8Yn+1nVfcBubhMxDU + +VrbErzzun3k9iX3yTcAAAD//wMAusvg7OMCAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8ed700089e2e072b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 05 Dec 2024 21:06:27 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=4Fzl_VHC9.c_.kyLBREHy8a7wA.lEcwtqZXonX9ka10-1733432787-1.0.1.1-tBlI5dXtGa55yRlJwRgFkxlkQ7emZl1_xhYirjNw7CcPBv7WkC60ubux0sARYF8Nzun5tNgFTC100P_ywLDMgw; + path=/; expires=Thu, 05-Dec-24 21:36:27 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=WokAKLMIioMxOZo9K5N1oJLSuWXQVmUht7hb75_Z06w-1733432787525-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "132" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "12000" + x-ratelimit-limit-tokens: + - "1000000" + x-ratelimit-remaining-requests: + - "11999" + x-ratelimit-remaining-tokens: + - "999969" + x-ratelimit-reset-requests: + - 5ms + x-ratelimit-reset-tokens: + - 1ms + x-request-id: + - req_567abdfb8bf13c71bc3f2bac8be8b4af + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_llms.py b/tests/test_llms.py index 6211d8d..e9f8320 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -8,7 +8,7 @@ import numpy as np import pytest from aviary.core import Message, Tool, ToolRequestMessage -from pydantic import BaseModel, Field +from pydantic import BaseModel from llmclient.exceptions import JSONSchemaValidationError from llmclient.llms import ( @@ -317,68 +317,58 @@ async def test_text_image_message(self, model_name: str) -> None: ), "Expected content in message, but got None" assert "red" in result.messages[-1].content.lower() - -class TestSingleCompletionLLMModel(TestMultipleCompletionLLMModel): - NUM_COMPLETIONS: ClassVar[int] = 1 - DEFAULT_CONFIG: ClassVar[dict] = {} - MODEL_CLS: ClassVar[type[MultipleCompletionLLMModel]] = MultipleCompletionLLMModel - @pytest.mark.parametrize( "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"] ) @pytest.mark.asyncio @pytest.mark.vcr - async def test_model(self, model_name: str) -> None: - await super().test_model(model_name) - - @pytest.mark.vcr - @pytest.mark.parametrize( - "model_name", [CILLMModelNames.ANTHROPIC.value, "gpt-3.5-turbo"] - ) - @pytest.mark.asyncio - async def test_streaming(self, model_name: str) -> None: - model = self.MODEL_CLS(name=model_name) + async def test_single_completion(self, model_name: str) -> None: + model = self.MODEL_CLS(name=model_name, config={"n": 1}) messages = [ Message(role="system", content="Respond with single words."), Message(content="Hello, how are you?"), ] - content = [] + result = await model.call_single(messages) + assert isinstance(result, LLMResult) - def callback(s): - content.append(s) + assert isinstance(result, LLMResult) + assert result.messages + assert len(result.messages) == 1 + assert result.messages[0].content - results = await model.call(messages, [callback]) - for result in results: - assert result.completion_count > 0 - assert content + model = self.MODEL_CLS(name=model_name, config={"n": 2}) + result = await model.call_single(messages) + assert isinstance(result, LLMResult) + assert result.messages + assert len(result.messages) == 1 + assert result.messages[0].content - @pytest.mark.vcr @pytest.mark.asyncio - async def test_parameterizing_tool_from_arg_union(self) -> None: - await super().test_parameterizing_tool_from_arg_union() - @pytest.mark.vcr - @pytest.mark.asyncio - @pytest.mark.skip(reason="TODO: Check why this error should be raised") - async def test_output_type_rejected_validation(self) -> None: - class InstructionList(BaseModel): - instructions: list[str] = Field(description="list of instructions") - - model = self.MODEL_CLS(name=CILLMModelNames.ANTHROPIC.value) - with pytest.raises(litellm.BadRequestError, match="anthropic"): - await model.call( - [Message(content="What are three things I should do today?")], - output_type=InstructionList, - ) - @pytest.mark.parametrize( "model_name", - [CILLMModelNames.ANTHROPIC.value, "gpt-4-turbo", CILLMModelNames.OPENAI.value], + [ + pytest.param(CILLMModelNames.ANTHROPIC.value, id="anthropic"), + pytest.param(CILLMModelNames.OPENAI.value, id="openai"), + ], ) - @pytest.mark.asyncio - @pytest.mark.vcr - async def test_text_image_message(self, model_name: str) -> None: - await super().test_text_image_message(model_name) + async def test_multiple_completion(self, model_name: str, request) -> None: + model = self.MODEL_CLS(name=model_name, config={"n": self.NUM_COMPLETIONS}) + messages = [ + Message(role="system", content="Respond with single words."), + Message(content="Hello, how are you?"), + ] + if request.node.callspec.id == "anthropic": + # Anthropic does not support multiple completions + with pytest.raises(litellm.BadRequestError, match="anthropic"): + await model.call(messages) + else: + results = await model.call(messages) # noqa: FURB120 + assert len(results) == self.NUM_COMPLETIONS + + model = self.MODEL_CLS(name=model_name, config={"n": 5}) + results = await model.call(messages, n=self.NUM_COMPLETIONS) + assert len(results) == self.NUM_COMPLETIONS def test_json_schema_validation() -> None: diff --git a/uv.lock b/uv.lock index 8d118c6..a161538 100644 --- a/uv.lock +++ b/uv.lock @@ -563,14 +563,13 @@ wheels = [ [[package]] name = "fh-llm-client" -version = "0.0.4.dev6+g6de1e91.d20241206" +version = "0.0.4.dev3+g418fa3b.d20241209" source = { editable = "." } dependencies = [ { name = "coredis" }, { name = "fhaviary" }, { name = "limits" }, { name = "litellm" }, - { name = "numpy" }, { name = "pydantic" }, { name = "tiktoken" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, @@ -581,7 +580,7 @@ dev = [ { name = "fhaviary", extra = ["xml"] }, { name = "ipython" }, { name = "mypy" }, - { name = "pillow" }, + { name = "numpy" }, { name = "pre-commit" }, { name = "pylint-pydantic" }, { name = "pytest" }, @@ -597,10 +596,8 @@ dev = [ { name = "refurb" }, { name = "sentence-transformers" }, ] -image = [ - { name = "pillow" }, -] local = [ + { name = "numpy" }, { name = "sentence-transformers" }, ] @@ -610,7 +607,7 @@ codeflash = [ { name = "fhaviary", extra = ["xml"] }, { name = "ipython" }, { name = "mypy" }, - { name = "pillow" }, + { name = "numpy" }, { name = "pre-commit" }, { name = "pylint-pydantic" }, { name = "pytest" }, @@ -630,7 +627,7 @@ dev = [ { name = "fhaviary", extra = ["xml"] }, { name = "ipython" }, { name = "mypy" }, - { name = "pillow" }, + { name = "numpy" }, { name = "pre-commit" }, { name = "pylint-pydantic" }, { name = "pytest" }, @@ -650,15 +647,14 @@ dev = [ [package.metadata] requires-dist = [ { name = "coredis" }, - { name = "fh-llm-client", extras = ["image", "local"], marker = "extra == 'dev'" }, + { name = "fh-llm-client", extras = ["local"], marker = "extra == 'dev'" }, { name = "fhaviary", specifier = ">=0.8.2" }, { name = "fhaviary", extras = ["xml"], marker = "extra == 'dev'" }, { name = "ipython", marker = "extra == 'dev'", specifier = ">=8" }, { name = "limits" }, { name = "litellm", specifier = ">=1.44" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8" }, - { name = "numpy" }, - { name = "pillow", marker = "extra == 'image'" }, + { name = "numpy", marker = "extra == 'local'" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.4" }, { name = "pydantic", specifier = "~=2.0,>=2.10.1,<2.10.2" }, { name = "pylint-pydantic", marker = "extra == 'dev'" },