Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overloaded MultipleCompletionLLMModel.call type #13

Merged
merged 23 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7146c91
Overloaded typying in MultipleCompletionLLMModel.call. It returns eit…
maykcaldas Dec 5, 2024
1dcda13
Improved logging for call_multiple
maykcaldas Dec 5, 2024
2847af7
removed deprecated check of n in kwargs
maykcaldas Dec 5, 2024
2eac4a6
Merge branch 'main' into over-mult
maykcaldas Dec 6, 2024
6fbf2f2
Added cassets for TestMultipleCompletionLLMModel
maykcaldas Dec 6, 2024
5d3a3c9
Fix lint
maykcaldas Dec 6, 2024
3f650fc
Implemented tests to check kwarg priority when calling
maykcaldas Dec 9, 2024
7edd613
Exposed missing classes
maykcaldas Dec 9, 2024
bae8765
added embedding_model_factory
maykcaldas Dec 9, 2024
1e6eb78
Added documentation to call functions
maykcaldas Dec 9, 2024
cb16d19
skip lint checking for argument with default value in test_llms
maykcaldas Dec 9, 2024
7966f9a
Fixed pre-commit errors
maykcaldas Dec 9, 2024
9e91858
Reverted changes in uv.lock
maykcaldas Dec 9, 2024
29e4d91
Fixed line wrap in docstrings
maykcaldas Dec 9, 2024
f8090bb
reverting uv.lock
maykcaldas Dec 9, 2024
418fa3b
removed the dependency on numpy. It is now a conditional dependency f…
maykcaldas Dec 9, 2024
ba974e5
Merge branch 'main' into remove_numpy
maykcaldas Dec 9, 2024
c34b02c
Removed image group dependency
maykcaldas Dec 9, 2024
270948e
Merge branch 'remove_numpy' of github.com:Future-House/llm-client int…
maykcaldas Dec 9, 2024
86d455d
Fixed typos
maykcaldas Dec 9, 2024
7ef8f49
Removed overload from the multiple completion llm call
maykcaldas Dec 9, 2024
03ede77
Merge branch 'remove_numpy' into over-mult
maykcaldas Dec 9, 2024
7d196df
Merge branch 'update_init' into over-mult
maykcaldas Dec 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion llmclient/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
from .constants import (
CHARACTERS_PER_TOKEN_ASSUMPTION,
EXTRA_TOKENS_FROM_USER_ROLE,
MODEL_COST_MAP,
)
from .embeddings import (
EmbeddingModel,
EmbeddingModes,
HybridEmbeddingModel,
LiteLLMEmbeddingModel,
SentenceTransformerEmbeddingModel,
SparseEmbeddingModel,
embedding_model_factory,
)
from .exceptions import (
JSONSchemaValidationError,
Expand All @@ -13,17 +20,28 @@
LLMModel,
MultipleCompletionLLMModel,
)
from .types import LLMResult
from .types import (
Chunk,
Embeddable,
LLMResult,
)

__all__ = [
"CHARACTERS_PER_TOKEN_ASSUMPTION",
"EXTRA_TOKENS_FROM_USER_ROLE",
"MODEL_COST_MAP",
"Chunk",
"Embeddable",
"EmbeddingModel",
"EmbeddingModes",
"HybridEmbeddingModel",
"JSONSchemaValidationError",
"LLMModel",
"LLMResult",
"LiteLLMEmbeddingModel",
"LiteLLMModel",
"MultipleCompletionLLMModel",
"SentenceTransformerEmbeddingModel",
"SparseEmbeddingModel",
"embedding_model_factory",
]
20 changes: 12 additions & 8 deletions llmclient/embeddings.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import asyncio
from abc import ABC, abstractmethod
from collections import Counter
from enum import StrEnum
from itertools import chain
from typing import Any

import litellm
import numpy as np
import tiktoken
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator

Expand Down Expand Up @@ -171,13 +172,9 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
enc_batch = self.enc.encode_ordinary_batch(texts)
# now get frequency of each token rel to length
return [
(
np.bincount([xi % self.ndim for xi in x], minlength=self.ndim).astype(
float
)
/ len(x)
).tolist()
[token_counts.get(xi, 0) / len(x) for xi in range(self.ndim)]
for x in enc_batch
if (token_counts := Counter(xi % self.ndim for xi in x))
]


Expand All @@ -199,7 +196,11 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
all_embeds = await asyncio.gather(
*[m.embed_documents(texts) for m in self.models]
)
return np.concatenate(all_embeds, axis=1).tolist()

return [
list(chain.from_iterable(embed_group))
for embed_group in zip(*all_embeds, strict=True)
]

def set_mode(self, mode: EmbeddingModes) -> None:
# Set mode for all component models
Expand All @@ -217,6 +218,7 @@ class SentenceTransformerEmbeddingModel(EmbeddingModel):
def __init__(self, **kwargs):
super().__init__(**kwargs)
try:
import numpy as np # noqa: F401
from sentence_transformers import SentenceTransformer
except ImportError as exc:
raise ImportError(
Expand All @@ -240,6 +242,8 @@ async def embed_documents(self, texts: list[str]) -> list[list[float]]:
Returns:
A list of embedding vectors.
"""
import numpy as np

# Extract additional configurations if needed
batch_size = self.config.get("batch_size", 32)
device = self.config.get("device", "cpu")
Expand Down
34 changes: 33 additions & 1 deletion llmclient/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ class MultipleCompletionLLMModel(BaseModel):
"Configuration of the model:"
"model is the name of the llm model to use,"
"temperature is the sampling temperature, and"
"n is the number of completions to generate."
"n is the number of completions to generate by default."
),
)
encoding: Any | None = None
Expand Down Expand Up @@ -667,6 +667,23 @@ async def call( # noqa: C901, PLR0915
tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
**chat_kwargs,
) -> list[LLMResult]:
"""
Call the LLM model with the given messages and configuration.

Args:
messages: A list of messages to send to the language model.
callbacks: A list of callback functions to execute after receiving the response.
output_type: The type of the output model.
tools: A list of tools to use during the call.
tool_choice: The tool or tool identifier to use.
**chat_kwargs: Additional keyword arguments to pass to the chat function.

Returns:
A list of LLMResult objects containing the results of the call.

Raises:
ValueError: If the number of completions (n) is invalid.
"""
start_clock = asyncio.get_running_loop().time()

# Deal with tools. Note OpenAI throws a 400 response if tools is empty:
Expand Down Expand Up @@ -829,3 +846,18 @@ async def call( # noqa: C901, PLR0915
result.seconds_to_last_token = end_clock - start_clock

return results

async def call_single(
self,
messages: list[Message],
callbacks: list[Callable] | None = None,
output_type: type[BaseModel] | None = None,
tools: list[Tool] | None = None,
tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
**chat_kwargs,
) -> LLMResult:
return (
await self.call(
messages, callbacks, output_type, tools, tool_choice, n=1, **chat_kwargs
)
)[0]
7 changes: 2 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ dependencies = [
"fhaviary>=0.8.2", # For core namespace
"limits",
"litellm>=1.44", # For LITELLM_LOG addition
"numpy",
"pydantic~=2.0,>=2.10.1,<2.10.2",
"tiktoken>=0.4.0",
"typing-extensions; python_version <= '3.11'", # for typing.override
Expand All @@ -40,7 +39,7 @@ requires-python = ">=3.11"

[project.optional-dependencies]
dev = [
"fh-llm-client[image,local]",
"fh-llm-client[local]",
"fhaviary[xml]",
"ipython>=8", # Pin to keep recent
"mypy>=1.8", # Pin for mutable-override
Expand All @@ -58,10 +57,8 @@ dev = [
"python-dotenv",
"refurb>=2", # Pin to keep recent
]
image = [
"Pillow",
]
local = [
"numpy",
"sentence-transformers",
]

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
interactions:
- request:
body:
'{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
how are you?"}],"model":"gpt-3.5-turbo","n":2}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- "149"
content-type:
- application/json
host:
- api.openai.com
user-agent:
- AsyncOpenAI/Python 1.57.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.57.0
x-stainless-raw-response:
- "true"
x-stainless-retry-count:
- "1"
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAA9RTy2rDMBC8+yuEzklo3jS3QCCXXNoe+qIYWdrYamStKq1LS8i/FzkPOySFXnvR
YWZnNLsrbRPGuFZ8xrgsBMnSme48W6zuxvBAL5v55/3H06Ra4OOoWhXl82LJO1GB2TtIOqp6Ektn
gDTaPS09CILo2p8Oh6PhYHo7qYkSFZgoyx11h71xlyqfYfemPxgflAVqCYHP2GvCGGPb+owZrYIv
PmM3nSNSQggiBz47FTHGPZqIcBGCDiQs8U5DSrQEto69RFRtysO6CiJGs5UxB3x3ustg7jxm4cCf
8LW2OhSpBxHQRt9A6HjSEl800P83DSSMvdVLqc5icuexdJQSbsBGw8Fgb8ebZ9AiDxwhCdOCR50r
ZqkCEtqE1ki4FLIA1SibByAqpbFFtMd+meWa975tbfO/2DeElOAIVOo8KC3P+23KPMQ/8lvZacR1
YB6+A0GZrrXNwTuv6yXXm9wlPwAAAP//AwAh8pBrpAMAAA==
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ed70040cbcdf99b-SJC
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Thu, 05 Dec 2024 21:06:36 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- future-house-xr4tdh
openai-processing-ms:
- "134"
openai-version:
- "2020-10-01"
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- "12000"
x-ratelimit-limit-tokens:
- "1000000"
x-ratelimit-remaining-requests:
- "11999"
x-ratelimit-remaining-tokens:
- "999953"
x-ratelimit-reset-requests:
- 5ms
x-ratelimit-reset-tokens:
- 2ms
x-request-id:
- req_1f88664946b9891fbc90796687f144c4
status:
code: 200
message: OK
- request:
body:
'{"messages":[{"role":"system","content":"Respond with single words."},{"role":"user","content":"Hello,
how are you?"}],"model":"gpt-3.5-turbo","n":2}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- "149"
content-type:
- application/json
host:
- api.openai.com
user-agent:
- AsyncOpenAI/Python 1.57.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.57.0
x-stainless-raw-response:
- "true"
x-stainless-retry-count:
- "0"
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAA9RTTUsDMRC9768IOW9LP63tzaIIIqgH7UFkSZPZbTSbCcksWEr/u2T7sVtawauX
HN6b9/JmJtkkjHGt+IxxuRIkS2c6N8vbx5fF9Tisy7l5e13clfl0/vQwfl5P5o6nUYHLT5B0UHUl
ls4AabQ7WnoQBNG1PxkOR8PBZHpVEyUqMFFWOOoMu+MOVX6JnV5/MN4rV6glBD5j7wljjG3qM2a0
Cr75jPXSA1JCCKIAPjsWMcY9mohwEYIOJCzxtCElWgJbx75HVG3KQ14FEaPZypg9vj3eZbBwHpdh
zx/xXFsdVpkHEdBG30DoeNISnzXQ/zcNJIx91EupTmJy57F0lBF+gY2Gg8HOjjfPoEXuOUISpgWP
0gtmmQIS2oTWSLgUcgWqUTYPQFRKY4toj/08yyXvXdvaFn+xbwgpwRGozHlQWp7225R5iH/kt7Lj
iOvAPKwDQZnl2hbgndf1kutNbpMfAAAA//8DALEE5HikAwAA
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8ed700428d77f99b-SJC
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Thu, 05 Dec 2024 21:06:36 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
openai-organization:
- future-house-xr4tdh
openai-processing-ms:
- "114"
openai-version:
- "2020-10-01"
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- "12000"
x-ratelimit-limit-tokens:
- "1000000"
x-ratelimit-remaining-requests:
- "11999"
x-ratelimit-remaining-tokens:
- "999953"
x-ratelimit-reset-requests:
- 5ms
x-ratelimit-reset-tokens:
- 2ms
x-request-id:
- req_e32516fa5bb6ab11dda5155511280ea6
status:
code: 200
message: OK
version: 1
Loading
Loading