From 4e950245c729792fa9ed481bcf313095c6bdf31c Mon Sep 17 00:00:00 2001 From: Mayk Caldas Date: Mon, 25 Nov 2024 18:21:40 -0800 Subject: [PATCH] Updated client (#2) * implemented tests workflow * updated llm-client accordingly to the current pqa version * solved mypy and ruff errors * removed placeholders for future features * changed the package name * updated uv.lock --------- Co-authored-by: Mayk Caldas --- .github/workflows/test.yaml | 64 ++ .gitignore | 1 + .pre-commit-config.yaml | 56 +- LICENSE | 201 ++++++ README.md | 8 +- llmclient/__init__.py | 7 + llmclient/constants.py | 29 + llmclient/embeddings.py | 272 ++++++++ llmclient/exceptions.py | 2 + llmclient/llms.py | 584 +++++++++++++++++ .../constants.py => llmclient/prompts.py | 0 llmclient/rate_limiter.py | 397 +++++++++++ src/llmclient/result.py => llmclient/types.py | 115 ++-- src/llmclient/util.py => llmclient/utils.py | 57 +- pyproject.toml | 59 +- src/llmclient/__init__.py | 7 - src/llmclient/model.py | 504 -------------- tests/__init__.py | 0 ...est_max_token_truncation[with-router].yaml | 103 +++ ..._max_token_truncation[without-router].yaml | 103 +++ ...LLMModel.test_run_prompt[with-router].yaml | 495 ++++++++++++++ ...Model.test_run_prompt[without-router].yaml | 501 ++++++++++++++ tests/conftest.py | 75 +++ tests/test_embeddings.py | 67 ++ tests/test_llms.py | 261 ++++++++ tests/test_rate_limiter.py | 297 +++++++++ uv.lock | 618 ++++++++++++------ 27 files changed, 4056 insertions(+), 827 deletions(-) create mode 100644 .github/workflows/test.yaml create mode 100644 LICENSE create mode 100644 llmclient/__init__.py create mode 100644 llmclient/constants.py create mode 100644 llmclient/embeddings.py create mode 100644 llmclient/exceptions.py create mode 100644 llmclient/llms.py rename src/llmclient/constants.py => llmclient/prompts.py (100%) create mode 100644 llmclient/rate_limiter.py rename src/llmclient/result.py => llmclient/types.py (50%) rename src/llmclient/util.py => llmclient/utils.py (54%) delete mode 100644 src/llmclient/__init__.py delete mode 100644 src/llmclient/model.py create mode 100644 tests/__init__.py create mode 100644 tests/cassettes/TestLiteLLMModel.test_max_token_truncation[with-router].yaml create mode 100644 tests/cassettes/TestLiteLLMModel.test_max_token_truncation[without-router].yaml create mode 100644 tests/cassettes/TestLiteLLMModel.test_run_prompt[with-router].yaml create mode 100644 tests/cassettes/TestLiteLLMModel.test_run_prompt[without-router].yaml create mode 100644 tests/conftest.py create mode 100644 tests/test_embeddings.py create mode 100644 tests/test_llms.py create mode 100644 tests/test_rate_limiter.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..da4238a --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,64 @@ +name: Lint and Test + +on: + push: + branches: [main] + pull_request: + workflow_dispatch: + +jobs: + pre-commit: + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' # pre-commit-ci/lite-action only runs here + strategy: + matrix: + python-version: [3.11, 3.12] # Our min and max supported Python versions + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # For setuptools-scm, replace with fetch-tags after https://github.com/actions/checkout/issues/1471 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - uses: pre-commit/action@v3.0.1 + - uses: pre-commit-ci/lite-action@v1.1.0 + if: always() + lint: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.11] # Our min supported Python version + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + - run: uv python pin ${{ matrix.python-version }} + - uses: hynek/build-and-inspect-python-package@v2 + - run: uv sync --python-preference=only-managed + - run: uv run refurb llmclient tests + - run: uv run pylint llmclient + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.11, 3.12] # Our min and max supported Python versions + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + - run: uv python pin ${{ matrix.python-version }} + - run: uv sync --python-preference=only-managed + - name: Cache datasets + uses: actions/cache@v4 + with: + path: ~/.cache/huggingface/datasets + key: ${{ runner.os }}-datasets-${{ hashFiles('paperqa') }} + restore-keys: ${{ runner.os }}-datasets- + - run: uv run pytest -n auto + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + SEMANTIC_SCHOLAR_API_KEY: ${{ secrets.SEMANTIC_SCHOLAR_API_KEY }} + CROSSREF_API_KEY: ${{ secrets.CROSSREF_API_KEY }} diff --git a/.gitignore b/.gitignore index 01a52fb..6de69c5 100644 --- a/.gitignore +++ b/.gitignore @@ -100,6 +100,7 @@ fabric.properties !.vscode/launch.json !.vscode/extensions.json !.vscode/*.code-snippets +.vscode/ # Local History for Visual Studio Code .history/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 25dcbe1..b80c0d2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,24 +19,18 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.8.0 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - - id: ruff-format - repo: https://github.com/rbubley/mirrors-prettier rev: v3.3.3 hooks: - id: prettier - - repo: https://github.com/Yelp/detect-secrets - rev: v1.5.0 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.10.0 hooks: - - id: detect-secrets - additional_dependencies: [".[word_list]"] - args: - - --word-list=.secrets.allowlist - - --exclude-files=.secrets.baseline$ - exclude: tests/cassettes + - id: black - repo: https://github.com/jumanjihouse/pre-commit-hooks rev: 3.0.0 hooks: @@ -48,7 +42,7 @@ repos: additional_dependencies: [".[toml]"] exclude_types: [jupyter] - repo: https://github.com/pappasam/toml-sort - rev: v0.23.1 + rev: v0.24.2 hooks: - id: toml-sort-fix - repo: https://github.com/srstevenson/nb-clean @@ -57,29 +51,45 @@ repos: - id: nb-clean args: [--preserve-cell-outputs, --remove-empty-cells] - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.22 + rev: v0.23 hooks: - id: validate-pyproject additional_dependencies: - - "validate-pyproject-schema-store[all]>=2024.08.19" # For Ruff renaming RUF025 to C420 + - "validate-pyproject-schema-store[all]>=2024.06.24" # For Ruff renaming RUF025 to C420 - repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.4.29 + rev: 0.4.30 hooks: - id: uv-lock + - repo: https://github.com/jsh9/markdown-toc-creator + rev: 0.0.8 + hooks: + - id: markdown-toc-creator - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.13.0 hooks: - id: mypy + args: [--pretty, --ignore-missing-imports] additional_dependencies: - - fastapi>=0.109 # Match pyproject.toml + - aiohttp + - PyMuPDF>=1.24.12 + - anyio + - coredis + - fhaviary[llm]>=0.10.0 # Match pyproject.toml + - ldp>=0.12 # Match pyproject.toml + - html2text + - litellm>=1.44 # Match pyproject.toml - httpx - - litellm>=1.40.15,!=1.49.4,!=1.49.5,!=1.49.6 # Match pyproject.toml - - numpy>=1.20 # Match pyproject.toml - - openai>=1,<1.47 # Match pyproject.toml + - limits + - pybtex + - numpy + - pandas-stubs - pydantic~=2.0 # Match pyproject.toml + - pydantic-settings + - rich + - tantivy - tenacity - - torch - - types-aiofiles - - types-tqdm - - usearch>=2.13 # Match pyproject.toml - - wandb \ No newline at end of file + - tiktoken>=0.4.0 # Match pyproject.toml + - types-setuptools + - types-PyYAML + - sentence-transformers + - pyzotero diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 495d951..519b499 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,20 @@ # llm-client -Central LLM client for use by LDP and PaperQA + +Central FutureHouse LLM client library. ## Quick Start + ``` $ pip install -e . -$ uv sync && uv run pytest +$ uv sync && uv run pytest -n auto ``` ## Clients + - LLMModel - LLMResult ## Examples + - [PaperQA](https://github.com/Future-House/paper-qa/compare/main...llm-result-client) - [LDP](https://github.com/Future-House/ldp/compare/main...llm-result-client) diff --git a/llmclient/__init__.py b/llmclient/__init__.py new file mode 100644 index 0000000..e1769b2 --- /dev/null +++ b/llmclient/__init__.py @@ -0,0 +1,7 @@ +from llmclient.llms import LLMModel +from llmclient.types import LLMResult + +__all__ = [ + "LLMModel", + "LLMResult", +] diff --git a/llmclient/constants.py b/llmclient/constants.py new file mode 100644 index 0000000..3220d62 --- /dev/null +++ b/llmclient/constants.py @@ -0,0 +1,29 @@ +from sys import version_info + +import litellm + +CHARACTERS_PER_TOKEN_ASSUMPTION: float = 4.0 +EXTRA_TOKENS_FROM_USER_ROLE: int = 7 + +MODEL_COST_MAP = litellm.get_model_cost_map("") + +DEFAULT_VERTEX_SAFETY_SETTINGS: list[dict[str, str]] = [ + { + "category": "HARM_CATEGORY_HARASSMENT", + "threshold": "BLOCK_ONLY_HIGH", + }, + { + "category": "HARM_CATEGORY_HATE_SPEECH", + "threshold": "BLOCK_ONLY_HIGH", + }, + { + "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", + "threshold": "BLOCK_ONLY_HIGH", + }, + { + "category": "HARM_CATEGORY_DANGEROUS_CONTENT", + "threshold": "BLOCK_ONLY_HIGH", + }, +] + +IS_PYTHON_BELOW_312 = version_info < (3, 12) diff --git a/llmclient/embeddings.py b/llmclient/embeddings.py new file mode 100644 index 0000000..2f4ca24 --- /dev/null +++ b/llmclient/embeddings.py @@ -0,0 +1,272 @@ +from __future__ import annotations + +import asyncio +from abc import ABC, abstractmethod +from enum import StrEnum +from typing import Any + +import litellm +import numpy as np +import tiktoken +from pydantic import ( + BaseModel, + Field, + field_validator, +) + +from llmclient.constants import CHARACTERS_PER_TOKEN_ASSUMPTION, MODEL_COST_MAP +from llmclient.rate_limiter import GLOBAL_LIMITER + + +def get_litellm_retrying_config(timeout: float = 60.0) -> dict[str, Any]: + """Get retrying configuration for litellm.acompletion and litellm.aembedding.""" + return {"num_retries": 3, "timeout": timeout} + + +class EmbeddingModes(StrEnum): + DOCUMENT = "document" + QUERY = "query" + + +class EmbeddingModel(ABC, BaseModel): + name: str + config: dict[str, Any] = Field( + default_factory=dict, + description=( + "Optional `rate_limit` key, value must be a RateLimitItem or RateLimitItem" + " string for parsing" + ), + ) + + async def check_rate_limit(self, token_count: float, **kwargs) -> None: + if "rate_limit" in self.config: + await GLOBAL_LIMITER.try_acquire( + ("client", self.name), + self.config["rate_limit"], + weight=max(int(token_count), 1), + **kwargs, + ) + + def set_mode(self, mode: EmbeddingModes) -> None: + """Several embedding models have a 'mode' or prompt which affects output.""" + + @abstractmethod + async def embed_documents(self, texts: list[str]) -> list[list[float]]: + pass + + +class LiteLLMEmbeddingModel(EmbeddingModel): + + name: str = Field(default="text-embedding-3-small") + config: dict[str, Any] = Field( + default_factory=dict, # See below field_validator for injection of kwargs + description=( + "The optional `rate_limit` key's value must be a RateLimitItem or" + " RateLimitItem string for parsing. The optional `kwargs` key is keyword" + " arguments to pass to the litellm.aembedding function. Note that LiteLLM's" + " Router is not used here." + ), + ) + + @field_validator("config", mode="before") + @classmethod + def set_up_default_config(cls, value: dict[str, Any]) -> dict[str, Any]: + if "kwargs" not in value: + value["kwargs"] = get_litellm_retrying_config( + timeout=120, # 2-min timeout seemed reasonable + ) + return value + + def _truncate_if_large(self, texts: list[str]) -> list[str]: + """Truncate texts if they are too large by using litellm cost map.""" + if self.name not in MODEL_COST_MAP: + return texts + max_tokens = MODEL_COST_MAP[self.name]["max_input_tokens"] + # heuristic about ratio of tokens to characters + conservative_char_token_ratio = 3 + maybe_too_large = max_tokens * conservative_char_token_ratio + if any(len(t) > maybe_too_large for t in texts): + try: + enct = tiktoken.encoding_for_model("cl100k_base") + enc_batch = enct.encode_ordinary_batch(texts) + return [enct.decode(t[:max_tokens]) for t in enc_batch] + except KeyError: + return [t[: max_tokens * conservative_char_token_ratio] for t in texts] + + return texts + + async def embed_documents( + self, texts: list[str], batch_size: int = 16 + ) -> list[list[float]]: + texts = self._truncate_if_large(texts) + N = len(texts) + embeddings = [] + for i in range(0, N, batch_size): + + await self.check_rate_limit( + sum( + len(t) / CHARACTERS_PER_TOKEN_ASSUMPTION + for t in texts[i : i + batch_size] + ) + ) + + response = await litellm.aembedding( + self.name, + input=texts[i : i + batch_size], + **self.config.get("kwargs", {}), + ) + embeddings.extend([e["embedding"] for e in response.data]) + + return embeddings + + +class SparseEmbeddingModel(EmbeddingModel): + """This is a very simple keyword search model - probably best to be mixed with others.""" + + name: str = "sparse" + ndim: int = 256 + enc: Any = Field(default_factory=lambda: tiktoken.get_encoding("cl100k_base")) + + async def embed_documents(self, texts) -> list[list[float]]: + enc_batch = self.enc.encode_ordinary_batch(texts) + # now get frequency of each token rel to length + return [ + np.bincount([xi % self.ndim for xi in x], minlength=self.ndim).astype(float) # type: ignore[misc] + / len(x) + for x in enc_batch + ] + + +class HybridEmbeddingModel(EmbeddingModel): + name: str = "hybrid-embed" + models: list[EmbeddingModel] + + async def embed_documents(self, texts): + all_embeds = await asyncio.gather( + *[m.embed_documents(texts) for m in self.models] + ) + return np.concatenate(all_embeds, axis=1) + + def set_mode(self, mode: EmbeddingModes) -> None: + # Set mode for all component models + for model in self.models: + model.set_mode(mode) + + +class SentenceTransformerEmbeddingModel(EmbeddingModel): + """An embedding model using SentenceTransformers.""" + + name: str = Field(default="multi-qa-MiniLM-L6-cos-v1") + config: dict[str, Any] = Field(default_factory=dict) + _model: Any = None + + def __init__(self, **kwargs): + super().__init__(**kwargs) + try: + from sentence_transformers import SentenceTransformer + except ImportError as exc: + raise ImportError( + "Please install fh-llm-client[local] to use" + " SentenceTransformerEmbeddingModel." + ) from exc + + self._model = SentenceTransformer(self.name) + + def set_mode(self, mode: EmbeddingModes) -> None: + # SentenceTransformer does not support different modes. + pass + + async def embed_documents(self, texts: list[str]) -> list[list[float]]: + """ + Asynchronously embed a list of documents using SentenceTransformer. + + Args: + texts: A list of text documents to embed. + + Returns: + A list of embedding vectors. + """ + # Extract additional configurations if needed + batch_size = self.config.get("batch_size", 32) + device = self.config.get("device", "cpu") + + # Update the model's device if necessary + if device: + self._model.to(device) + + # Run the synchronous encode method in a thread pool to avoid blocking the event loop. + embeddings = await asyncio.to_thread( + lambda: self._model.encode( + texts, + convert_to_numpy=True, + show_progress_bar=False, # Disabled progress bar + batch_size=batch_size, + device=device, + ), + ) + # If embeddings are returned as numpy arrays, convert them to lists. + if isinstance(embeddings, np.ndarray): + embeddings = embeddings.tolist() + return embeddings + + +def embedding_model_factory(embedding: str, **kwargs) -> EmbeddingModel: + """ + Factory function to create an appropriate EmbeddingModel based on the embedding string. + + Supports: + - SentenceTransformer models prefixed with "st-" (e.g., "st-multi-qa-MiniLM-L6-cos-v1") + - LiteLLM models (default if no prefix is provided) + - Hybrid embeddings prefixed with "hybrid-", contains a sparse and a dense model + + Args: + embedding: The embedding model identifier. Supports prefixes like "st-" for SentenceTransformer + and "hybrid-" for combining multiple embedding models. + **kwargs: Additional keyword arguments for the embedding model. + """ + embedding = embedding.strip() # Remove any leading/trailing whitespace + + if embedding.startswith("hybrid-"): + # Extract the component embedding identifiers after "hybrid-" + dense_name = embedding[len("hybrid-") :] + + if not dense_name: + raise ValueError( + "Hybrid embedding must contain at least one component embedding." + ) + + # Recursively create each component embedding model + dense_model = embedding_model_factory(dense_name, **kwargs) + sparse_model = SparseEmbeddingModel(**kwargs) + + return HybridEmbeddingModel(models=[dense_model, sparse_model]) + + if embedding.startswith("st-"): + # Extract the SentenceTransformer model name after "st-" + model_name = embedding[len("st-") :].strip() + if not model_name: + raise ValueError( + "SentenceTransformer model name must be specified after 'st-'." + ) + + return SentenceTransformerEmbeddingModel( + name=model_name, + config=kwargs, + ) + + if embedding.startswith("litellm-"): + # Extract the LiteLLM model name after "litellm-" + model_name = embedding[len("litellm-") :].strip() + if not model_name: + raise ValueError("model name must be specified after 'litellm-'.") + + return LiteLLMEmbeddingModel( + name=model_name, + config=kwargs, + ) + + if embedding == "sparse": + return SparseEmbeddingModel(**kwargs) + + # Default to LiteLLMEmbeddingModel if no special prefix is found + return LiteLLMEmbeddingModel(name=embedding, config=kwargs) diff --git a/llmclient/exceptions.py b/llmclient/exceptions.py new file mode 100644 index 0000000..aea488d --- /dev/null +++ b/llmclient/exceptions.py @@ -0,0 +1,2 @@ +class JSONSchemaValidationError(ValueError): + """Raised when the completion does not match the specified schema.""" diff --git a/llmclient/llms.py b/llmclient/llms.py new file mode 100644 index 0000000..aae4e89 --- /dev/null +++ b/llmclient/llms.py @@ -0,0 +1,584 @@ +import asyncio +import contextlib +import functools +from abc import ABC +from collections.abc import ( + AsyncGenerator, + AsyncIterable, + AsyncIterator, + Awaitable, + Callable, + Iterable, +) +from inspect import isasyncgenfunction, signature +from typing import ( + Any, + TypeVar, + cast, +) + +import litellm +from aviary.core import ( + ToolRequestMessage, + ToolSelector, +) +from pydantic import ( + BaseModel, + ConfigDict, + Field, + TypeAdapter, + ValidationError, + model_validator, +) + +from llmclient.constants import ( + CHARACTERS_PER_TOKEN_ASSUMPTION, + DEFAULT_VERTEX_SAFETY_SETTINGS, + EXTRA_TOKENS_FROM_USER_ROLE, + IS_PYTHON_BELOW_312, +) +from llmclient.exceptions import JSONSchemaValidationError +from llmclient.prompts import default_system_prompt +from llmclient.rate_limiter import GLOBAL_LIMITER +from llmclient.types import Chunk, LLMResult +from llmclient.utils import is_coroutine_callable + +if not IS_PYTHON_BELOW_312: + _DeploymentTypedDictValidator = TypeAdapter( + list[litellm.DeploymentTypedDict], + config=ConfigDict(arbitrary_types_allowed=True), + ) + + +def sum_logprobs(choice: litellm.utils.Choices) -> float | None: + """Calculate the sum of the log probabilities of an LLM completion (a Choices object). + + Args: + choice: A sequence of choices from the completion. + + Returns: + The sum of the log probabilities of the choice. + """ + try: + logprob_obj = choice.logprobs + except AttributeError: + return None + if isinstance(logprob_obj, dict): + if logprob_obj.get("content"): + return sum( + logprob_info["logprob"] for logprob_info in logprob_obj["content"] + ) + elif choice.logprobs.content: + return sum(logprob_info.logprob for logprob_info in choice.logprobs.content) + return None + + +def validate_json_completion( + completion: litellm.ModelResponse, output_type: type[BaseModel] +) -> None: + """Validate a completion against a JSON schema. + + Args: + completion: The completion to validate. + output_type: The Pydantic model to validate the completion against. + """ + try: + for choice in completion.choices: + if not hasattr(choice, "message") or not choice.message.content: + continue + # make sure it is a JSON completion, even if None + # We do want to modify the underlying message + # so that users of it can just parse it as expected + choice.message.content = ( + choice.message.content.split("```json")[-1].split("```")[0] or "" + ) + output_type.model_validate_json(choice.message.content) + except ValidationError as err: + raise JSONSchemaValidationError( + "The completion does not match the specified schema." + ) from err + + +def prepare_args(func: Callable, chunk: str, name: str | None) -> tuple[tuple, dict]: + with contextlib.suppress(TypeError): + if "name" in signature(func).parameters: + return (chunk,), {"name": name} + return (chunk,), {} + + +async def do_callbacks( + async_callbacks: Iterable[Callable[..., Awaitable]], + sync_callbacks: Iterable[Callable[..., Any]], + chunk: str, + name: str | None, +) -> None: + for f in async_callbacks: + args, kwargs = prepare_args(f, chunk, name) + await f(*args, **kwargs) + for f in sync_callbacks: + args, kwargs = prepare_args(f, chunk, name) + f(*args, **kwargs) + + +def get_litellm_retrying_config(timeout: float = 60.0) -> dict[str, Any]: + """Get retrying configuration for litellm.acompletion and litellm.aembedding.""" + return {"num_retries": 3, "timeout": timeout} + + +class LLMModel(ABC, BaseModel): + model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) + + llm_type: str | None = None + name: str + llm_result_callback: ( + Callable[[LLMResult], None] | Callable[[LLMResult], Awaitable[None]] | None + ) = Field( + default=None, + description=( + "An async callback that will be executed on each" + " LLMResult (different than callbacks that execute on each chunk)" + ), + exclude=True, + ) + config: dict = Field(default_factory=dict) + + async def acomplete(self, prompt: str) -> Chunk: + """Return the completion as string and the number of tokens in the prompt and completion.""" + raise NotImplementedError + + async def acomplete_iter(self, prompt: str) -> AsyncIterable[Chunk]: + """Return an async generator that yields chunks of the completion. + + Only the last tuple will be non-zero. + """ + raise NotImplementedError + if False: # type: ignore[unreachable] # pylint: disable=using-constant-test + yield # Trick mypy: https://github.com/python/mypy/issues/5070#issuecomment-1050834495 + + async def achat(self, messages: Iterable[dict[str, str]]) -> Chunk: + """Return the completion as string and the number of tokens in the prompt and completion.""" + raise NotImplementedError + + async def achat_iter( + self, messages: Iterable[dict[str, str]] + ) -> AsyncIterable[Chunk]: + """Return an async generator that yields chunks of the completion. + + Only the last tuple will be non-zero. + """ + raise NotImplementedError + if False: # type: ignore[unreachable] # pylint: disable=using-constant-test + yield # Trick mypy: https://github.com/python/mypy/issues/5070#issuecomment-1050834495 + + def infer_llm_type(self) -> str: + return "completion" + + def count_tokens(self, text: str) -> int: + return len(text) // 4 # gross approximation + + async def run_prompt( + self, + prompt: str, + data: dict, + callbacks: list[Callable] | None = None, + name: str | None = None, + system_prompt: str | None = default_system_prompt, + ) -> LLMResult: + if self.llm_type is None: + self.llm_type = self.infer_llm_type() + if self.llm_type == "chat": + return await self._run_chat(prompt, data, callbacks, name, system_prompt) + if self.llm_type == "completion": + return await self._run_completion( + prompt, data, callbacks, name, system_prompt + ) + raise ValueError(f"Unknown llm_type {self.llm_type!r}.") + + async def _run_chat( + self, + prompt: str, + data: dict, + callbacks: list[Callable] | None = None, + name: str | None = None, + system_prompt: str | None = default_system_prompt, + ) -> LLMResult: + """Run a chat prompt. + + Args: + prompt: Prompt to use. + data: Keys for the input variables that will be formatted into prompt. + callbacks: Optional functions to call with each chunk of the completion. + name: Optional name for the result. + system_prompt: System prompt to use, or None/empty string to not use one. + + Returns: + Result of the chat. + """ + human_message_prompt = {"role": "user", "content": prompt} + messages = [ + {"role": m["role"], "content": m["content"].format(**data)} + for m in ( + [{"role": "system", "content": system_prompt}, human_message_prompt] + if system_prompt + else [human_message_prompt] + ) + ] + result = LLMResult( + model=self.name, + name=name, + prompt=messages, + prompt_count=( + sum(self.count_tokens(m["content"]) for m in messages) + + sum(self.count_tokens(m["role"]) for m in messages) + ), + ) + + start_clock = asyncio.get_running_loop().time() + if callbacks is None: + chunk = await self.achat(messages) + output = chunk.text + else: + sync_callbacks = [f for f in callbacks if not is_coroutine_callable(f)] + async_callbacks = [f for f in callbacks if is_coroutine_callable(f)] + completion = await self.achat_iter(messages) # type: ignore[misc] + text_result = [] + async for chunk in completion: + if chunk.text: + if result.seconds_to_first_token == 0: + result.seconds_to_first_token = ( + asyncio.get_running_loop().time() - start_clock + ) + text_result.append(chunk.text) + await do_callbacks( + async_callbacks, sync_callbacks, chunk.text, name + ) + output = "".join(text_result) + usage = chunk.prompt_tokens, chunk.completion_tokens + if sum(usage) > 0: + result.prompt_count, result.completion_count = usage + elif output: + result.completion_count = self.count_tokens(output) + result.text = output or "" + result.seconds_to_last_token = asyncio.get_running_loop().time() - start_clock + if self.llm_result_callback: + if is_coroutine_callable(self.llm_result_callback): + await self.llm_result_callback(result) # type: ignore[misc] + else: + self.llm_result_callback(result) + return result + + async def _run_completion( + self, + prompt: str, + data: dict, + callbacks: Iterable[Callable] | None = None, + name: str | None = None, + system_prompt: str | None = default_system_prompt, + ) -> LLMResult: + """Run a completion prompt. + + Args: + prompt: Prompt to use. + data: Keys for the input variables that will be formatted into prompt. + callbacks: Optional functions to call with each chunk of the completion. + name: Optional name for the result. + system_prompt: System prompt to use, or None/empty string to not use one. + + Returns: + Result of the completion. + """ + formatted_prompt: str = ( + system_prompt + "\n\n" + prompt if system_prompt else prompt + ).format(**data) + result = LLMResult( + model=self.name, + name=name, + prompt=formatted_prompt, + prompt_count=self.count_tokens(formatted_prompt), + ) + + start_clock = asyncio.get_running_loop().time() + if callbacks is None: + chunk = await self.acomplete(formatted_prompt) + output = chunk.text + else: + sync_callbacks = [f for f in callbacks if not is_coroutine_callable(f)] + async_callbacks = [f for f in callbacks if is_coroutine_callable(f)] + + completion = self.acomplete_iter(formatted_prompt) + text_result = [] + async for chunk in completion: + if chunk.text: + if result.seconds_to_first_token == 0: + result.seconds_to_first_token = ( + asyncio.get_running_loop().time() - start_clock + ) + text_result.append(chunk.text) + await do_callbacks( + async_callbacks, sync_callbacks, chunk.text, name + ) + output = "".join(text_result) + usage = chunk.prompt_tokens, chunk.completion_tokens + if sum(usage) > 0: + result.prompt_count, result.completion_count = usage + elif output: + result.completion_count = self.count_tokens(output) + result.text = output or "" + result.seconds_to_last_token = asyncio.get_running_loop().time() - start_clock + if self.llm_result_callback: + if is_coroutine_callable(self.llm_result_callback): + await self.llm_result_callback(result) # type: ignore[misc] + else: + self.llm_result_callback(result) + return result + + +LLMModelOrChild = TypeVar("LLMModelOrChild", bound=LLMModel) + + +def rate_limited( + func: Callable[[LLMModelOrChild, Any], Awaitable[Chunk] | AsyncIterable[Chunk]], +) -> Callable[ + [LLMModelOrChild, Any, Any], + Awaitable[Chunk | AsyncIterator[Chunk] | AsyncIterator[LLMModelOrChild]], +]: + """Decorator to rate limit relevant methods of an LLMModel.""" + + @functools.wraps(func) + async def wrapper( + self: LLMModelOrChild, *args: Any, **kwargs: Any + ) -> Chunk | AsyncIterator[Chunk] | AsyncIterator[LLMModelOrChild]: + + if not hasattr(self, "check_rate_limit"): + raise NotImplementedError( + f"Model {self.name} must have a `check_rate_limit` method." + ) + + # Estimate token count based on input + if func.__name__ in {"acomplete", "acomplete_iter"}: + prompt = args[0] if args else kwargs.get("prompt", "") + token_count = ( + len(prompt) / CHARACTERS_PER_TOKEN_ASSUMPTION + + EXTRA_TOKENS_FROM_USER_ROLE + ) + elif func.__name__ in {"achat", "achat_iter"}: + messages = args[0] if args else kwargs.get("messages", []) + token_count = len(str(messages)) / CHARACTERS_PER_TOKEN_ASSUMPTION + else: + token_count = 0 # Default if method is unknown + + await self.check_rate_limit(token_count) + + # If wrapping a generator, count the tokens for each + # portion before yielding + if isasyncgenfunction(func): + + async def rate_limited_generator() -> AsyncGenerator[LLMModelOrChild, None]: + async for item in func(self, *args, **kwargs): + token_count = 0 + if isinstance(item, Chunk): + token_count = int( + len(item.text or "") / CHARACTERS_PER_TOKEN_ASSUMPTION + ) + await self.check_rate_limit(token_count) + yield item + + return rate_limited_generator() + + result = await func(self, *args, **kwargs) # type: ignore[misc] + + if func.__name__ in {"acomplete", "achat"} and isinstance(result, Chunk): + await self.check_rate_limit(result.completion_tokens) + return result + + return wrapper + + +class PassThroughRouter(litellm.Router): # TODO: add rate_limited + """Router that is just a wrapper on LiteLLM's normal free functions.""" + + def __init__(self, **kwargs): + self._default_kwargs = kwargs + + async def atext_completion(self, *args, **kwargs): + return await litellm.atext_completion(*args, **(self._default_kwargs | kwargs)) + + async def acompletion(self, *args, **kwargs): + return await litellm.acompletion(*args, **(self._default_kwargs | kwargs)) + + +class LiteLLMModel(LLMModel): + """A wrapper around the litellm library.""" + + config: dict = Field( + default_factory=dict, + description=( + "Configuration of this model containing several important keys. The" + " optional `model_list` key stores a list of all model configurations" + " (SEE: https://docs.litellm.ai/docs/routing). The optional" + " `router_kwargs` key is keyword arguments to pass to the Router class." + " Inclusion of a key `pass_through_router` with a truthy value will lead" + " to using not using LiteLLM's Router, instead just LiteLLM's free" + f" functions (see {PassThroughRouter.__name__}). Rate limiting applies" + " regardless of `pass_through_router` being present. The optional" + " `rate_limit` key is a dictionary keyed by model group name with values" + " of type limits.RateLimitItem (in tokens / minute) or valid" + " limits.RateLimitItem string for parsing." + ), + ) + name: str = "gpt-4o-mini" + _router: litellm.Router | None = None + + @model_validator(mode="before") + @classmethod + def maybe_set_config_attribute(cls, data: dict[str, Any]) -> dict[str, Any]: + """If a user only gives a name, make a sensible config dict for them.""" + if "config" not in data: + data["config"] = {} + if "name" in data and "model_list" not in data["config"]: + data["config"] = { + "model_list": [ + { + "model_name": data["name"], + "litellm_params": {"model": data["name"]} + | ( + {} + if "gemini" not in data["name"] + else {"safety_settings": DEFAULT_VERTEX_SAFETY_SETTINGS} + ), + } + ], + } | data["config"] + + if "router_kwargs" not in data["config"]: + data["config"]["router_kwargs"] = {} + data["config"]["router_kwargs"] = ( + get_litellm_retrying_config() | data["config"]["router_kwargs"] + ) + if not data["config"].get("pass_through_router"): + data["config"]["router_kwargs"] = {"retry_after": 5} | data["config"][ + "router_kwargs" + ] + + # we only support one "model name" for now, here we validate + model_list = data["config"]["model_list"] + if IS_PYTHON_BELOW_312: + if not isinstance(model_list, list): + # Work around https://github.com/BerriAI/litellm/issues/5664 + raise TypeError(f"model_list must be a list, not a {type(model_list)}.") + else: + # pylint: disable-next=possibly-used-before-assignment + _DeploymentTypedDictValidator.validate_python(model_list) + if len({m["model_name"] for m in model_list}) > 1: + raise ValueError("Only one model name per model list is supported for now.") + return data + + def __getstate__(self): + # Prevent _router from being pickled, SEE: https://stackoverflow.com/a/2345953 + state = super().__getstate__() + state["__dict__"] = state["__dict__"].copy() + state["__dict__"].pop("_router", None) + return state + + @property + def router(self) -> litellm.Router: + if self._router is None: + router_kwargs: dict = self.config.get("router_kwargs", {}) + if self.config.get("pass_through_router"): + self._router = PassThroughRouter(**router_kwargs) + else: + self._router = litellm.Router( + model_list=self.config["model_list"], **router_kwargs + ) + return self._router + + async def check_rate_limit(self, token_count: float, **kwargs) -> None: + if "rate_limit" in self.config: + await GLOBAL_LIMITER.try_acquire( + ("client", self.name), + self.config["rate_limit"].get(self.name, None), + weight=max(int(token_count), 1), + **kwargs, + ) + + @rate_limited + async def acomplete(self, prompt: str) -> Chunk: # type: ignore[override] + response = await self.router.atext_completion(model=self.name, prompt=prompt) + return Chunk( + text=response.choices[0].text, + prompt_tokens=response.usage.prompt_tokens, + completion_tokens=response.usage.completion_tokens, + ) + + @rate_limited + async def acomplete_iter( # type: ignore[override] + self, prompt: str + ) -> AsyncIterable[Chunk]: + completion = await self.router.atext_completion( + model=self.name, + prompt=prompt, + stream=True, + stream_options={"include_usage": True}, + ) + async for chunk in completion: + yield Chunk( + text=chunk.choices[0].text, prompt_tokens=0, completion_tokens=0 + ) + if hasattr(chunk, "usage") and hasattr(chunk.usage, "prompt_tokens"): + yield Chunk( + text=chunk.choices[0].text, prompt_tokens=0, completion_tokens=0 + ) + + @rate_limited + async def achat( # type: ignore[override] + self, messages: Iterable[dict[str, str]] + ) -> Chunk: + response = await self.router.acompletion(self.name, list(messages)) + return Chunk( + text=cast(litellm.Choices, response.choices[0]).message.content, + prompt_tokens=response.usage.prompt_tokens, # type: ignore[attr-defined] + completion_tokens=response.usage.completion_tokens, # type: ignore[attr-defined] + ) + + @rate_limited + async def achat_iter( # type: ignore[override] + self, messages: Iterable[dict[str, str]] + ) -> AsyncIterable[Chunk]: + completion = await self.router.acompletion( + self.name, + list(messages), + stream=True, + stream_options={"include_usage": True}, + ) + async for chunk in completion: + yield Chunk( + text=chunk.choices[0].delta.content, + prompt_tokens=0, + completion_tokens=0, + ) + if hasattr(chunk, "usage") and hasattr(chunk.usage, "prompt_tokens"): + yield Chunk( + text=None, + prompt_tokens=chunk.usage.prompt_tokens, + completion_tokens=chunk.usage.completion_tokens, + ) + + def infer_llm_type(self) -> str: + if all( + "text-completion" in m.get("litellm_params", {}).get("model", "") + for m in self.config["model_list"] + ): + return "completion" + return "chat" + + def count_tokens(self, text: str) -> int: + return litellm.token_counter(model=self.name, text=text) + + async def select_tool( + self, *selection_args, **selection_kwargs + ) -> ToolRequestMessage: + """Shim to aviary.core.ToolSelector that supports tool schemae.""" + tool_selector = ToolSelector( + model_name=self.name, acompletion=self.router.acompletion + ) + return await tool_selector(*selection_args, **selection_kwargs) diff --git a/src/llmclient/constants.py b/llmclient/prompts.py similarity index 100% rename from src/llmclient/constants.py rename to llmclient/prompts.py diff --git a/llmclient/rate_limiter.py b/llmclient/rate_limiter.py new file mode 100644 index 0000000..8afbe3d --- /dev/null +++ b/llmclient/rate_limiter.py @@ -0,0 +1,397 @@ +import asyncio +import logging +import os +from collections.abc import Collection +from typing import ClassVar, Literal +from urllib.parse import urlparse + +import aiohttp +from coredis import Redis +from limits import ( + RateLimitItem, + RateLimitItemPerMinute, + RateLimitItemPerSecond, +) +from limits import ( + parse as limit_parse, +) +from limits.aio.storage import MemoryStorage, RedisStorage +from limits.aio.strategies import MovingWindowRateLimiter + +logger = logging.getLogger(__name__) + + +SEMANTIC_SCHOLAR_HOST = "api.semanticscholar.org" +SEMANTIC_SCHOLAR_BASE_URL = f"https://{SEMANTIC_SCHOLAR_HOST}" + + +CROSSREF_HOST = "api.crossref.org" +CROSSREF_BASE_URL = f"https://{CROSSREF_HOST}" + +GLOBAL_RATE_LIMITER_TIMEOUT = float(os.environ.get("RATE_LIMITER_TIMEOUT", "60")) + +MATCH_ALL = None +MatchAllInputs = Literal[None] +MATCH_MACHINE_ID = "" + +FALLBACK_RATE_LIMIT = RateLimitItemPerSecond(3, 1) +TOKEN_FALLBACK_RATE_LIMIT = RateLimitItemPerMinute(30_000, 1) + +# RATE_CONFIG keys are tuples, corresponding to a namespace and primary key. +# Anything defined with MATCH_ALL variable, will match all non-matched requests for that namespace. +# For the "get" namespace, all primary key urls will be parsed down to the domain level. +# For example, you're trying to do a get request to "https://google.com", "google.com" will get +# its own limit, and it will use the ("get", MATCH_ALL) for its limits. +# machine_id is a unique identifier for the machine making the request, it's used to limit the +# rate of requests per machine. If the primary_key is in the NO_MACHINE_ID_EXTENSIONS list, then +# the dynamic IP of the machine will be used to limit the rate of requests, otherwise the +# user input machine_id will be used. + +RATE_CONFIG: dict[tuple[str, str | MatchAllInputs], RateLimitItem] = { + ("get", CROSSREF_BASE_URL): RateLimitItemPerSecond(30, 1), + ("get", SEMANTIC_SCHOLAR_BASE_URL): RateLimitItemPerSecond(15, 1), + ("client", MATCH_ALL): TOKEN_FALLBACK_RATE_LIMIT, + # MATCH_MACHINE_ID is a sentinel for the machine_id passed in by the caller + (f"get|{MATCH_MACHINE_ID}", MATCH_ALL): FALLBACK_RATE_LIMIT, +} + +UNKNOWN_IP: str = "0.0.0.0" # noqa: S104 + + +class GlobalRateLimiter: + """Rate limiter for all requests within or between processes. + + Supports both Redis and in-memory storage. + 'Global' refers to being able to limit the rate + of requests across processes with Redis. + """ + + WAIT_INCREMENT: ClassVar[float] = 0.01 # seconds + # list of public free outbount IP services + # generated initially w. claude, then filtered + IP_CHECK_SERVICES: ClassVar[Collection[str]] = { + "https://api.ipify.org", + "https://ifconfig.me", + "http://icanhazip.com", + "https://ipecho.net/plain", + } + # the following will use IP scope for limiting, rather + # than user input machine ID + NO_MACHINE_ID_EXTENSIONS: ClassVar[Collection[str]] = {"crossref.org"} + + def __init__( + self, + rate_config: ( + None | dict[tuple[str, str | MatchAllInputs], RateLimitItem] + ) = None, + use_in_memory: bool = False, + ): + self.rate_config = RATE_CONFIG if rate_config is None else rate_config + self.use_in_memory = use_in_memory + self._storage: RedisStorage | MemoryStorage | None = None + self._rate_limiter: MovingWindowRateLimiter | None = None + self._current_ip: str | None = None + + @staticmethod + async def get_outbound_ip(session: aiohttp.ClientSession, url: str) -> str | None: + try: + async with session.get(url, timeout=aiohttp.ClientTimeout(5)) as response: + if response.ok: + return await response.text() + except TimeoutError: + logger.warning(f"Timeout occurred while connecting to {url}") + except aiohttp.ClientError: + logger.warning(f"Error occurred while connecting to {url}.", exc_info=True) + return None + + async def outbount_ip(self) -> str: + if self._current_ip is None: + async with aiohttp.ClientSession() as session: + for service in self.IP_CHECK_SERVICES: + ip = await self.get_outbound_ip(session, service) + if ip: + logger.info(f"Successfully retrieved IP from {service}") + self._current_ip = ip.strip() + break + if self._current_ip is None: + logger.error("Failed to retrieve IP from all services") + self._current_ip = UNKNOWN_IP + return self._current_ip + + @property + def storage(self) -> RedisStorage | MemoryStorage: + if self._storage is None: + if os.environ.get("REDIS_URL") and not self.use_in_memory: + self._storage = RedisStorage(f"async+redis://{os.environ['REDIS_URL']}") + logger.info("Connected to redis instance for rate limiting.") + else: + self._storage = MemoryStorage() + logger.info("Using in-memory rate limiter.") + + return self._storage + + @property + def rate_limiter(self) -> MovingWindowRateLimiter: + if self._rate_limiter is None: + self._rate_limiter = MovingWindowRateLimiter(self.storage) + return self._rate_limiter + + async def parse_namespace_and_primary_key( + self, namespace_and_key: tuple[str, str], machine_id: int = 0 + ) -> tuple[str, str]: + """Turn namespace_and_key tuple into a namespace and primary-key. + + If using a namespace starting with "get", then the primary key will be url parsed. + "get" namespaces will also have their machine_ids appended to the namespace here, + unless the primary key is in the NO_MACHINE_ID_EXTENSIONS list, in which case + the outbound IP will be used. + """ + namespace, primary_key = namespace_and_key + + if namespace.startswith("get") and primary_key is not None: + # for URLs to be parsed correctly, they need a protocol + if not primary_key.startswith(("http://", "https://")): + primary_key = "https://" + primary_key + + primary_key = urlparse(primary_key).netloc or urlparse(primary_key).path + + if any(ext in primary_key for ext in self.NO_MACHINE_ID_EXTENSIONS): + namespace = f"{namespace}|{await self.outbount_ip()}" + else: + namespace = f"{namespace}|{machine_id}" + + return namespace, primary_key + + def parse_rate_limits_and_namespace( + self, + namespace: str, + primary_key: str | MatchAllInputs, + ) -> tuple[RateLimitItem, str]: + """Get rate limit and new namespace for a given namespace and primary_key. + + This parsing logic finds the correct rate limits for a namespace/primary_key. + It's a bit complex due to the and placeholders. + These allow users to match + + """ + # the namespace may have a machine_id in it -- we replace if that's the case + namespace_w_stub_machine_id = namespace + namespace_w_machine_id_stripped = namespace + + # strip off the machine_id, and replace it with the MATCH_MACHINE_ID placeholder + if namespace.startswith("get"): + machine_id = namespace.split("|")[-1] + if machine_id != "get": + namespace_w_stub_machine_id = namespace.replace( + machine_id, MATCH_MACHINE_ID, 1 + ) + # try stripping the machine id for the namespace for shared limits + # i.e. matching to one rate limit across ALL machines + # these limits are in RATE_CONFIG WITHOUT a MATCH_MACHINE_ID placeholder + namespace_w_machine_id_stripped = "|".join(namespace.split("|")[:-1]) + + # here we want to use namespace_w_machine_id_stripped -- the rate should be shared + # this needs to be checked first, since it's more specific than the stub machine id + if (namespace_w_machine_id_stripped, primary_key) in self.rate_config: + return ( + self.rate_config[(namespace_w_machine_id_stripped, primary_key)], + namespace_w_machine_id_stripped, + ) + # we keep the old namespace if we match on the namespace_w_stub_machine_id + if (namespace_w_stub_machine_id, primary_key) in self.rate_config: + return ( + self.rate_config[(namespace_w_stub_machine_id, primary_key)], + namespace, + ) + # again we only want the original namespace, keep the old namespace + if (namespace_w_stub_machine_id, MATCH_ALL) in self.rate_config: + return ( + self.rate_config[(namespace_w_stub_machine_id, MATCH_ALL)], + namespace, + ) + # again we want to use the stripped namespace if it matches + if (namespace_w_machine_id_stripped, MATCH_ALL) in self.rate_config: + return ( + self.rate_config[(namespace_w_machine_id_stripped, MATCH_ALL)], + namespace_w_machine_id_stripped, + ) + return FALLBACK_RATE_LIMIT, namespace + + def parse_key( + self, key: str + ) -> tuple[RateLimitItem, tuple[str, str | MatchAllInputs]]: + """Parse the rate limit item from a redis/in-memory key. + + Args: + key (str): is created with RateLimitItem.key_for(*identifiers), + the first key is the namespace, then the next two will be our identifiers. + + """ + namespace, primary_key = key.split("/")[1:3] + rate_limit, new_namespace = self.parse_rate_limits_and_namespace( + namespace, primary_key + ) + return ( + rate_limit, + (new_namespace, primary_key), + ) + + async def get_rate_limit_keys( + self, cursor_scan_count: int = 100 + ) -> list[tuple[RateLimitItem, tuple[str, str | MatchAllInputs]]]: + """Returns a list of current RateLimitItems with tuples of namespace and primary key.""" + host, port = os.environ.get("REDIS_URL", ":").split(":", maxsplit=2) + + if not (host and port): + raise ValueError(f'Invalid REDIS_URL: {os.environ.get("REDIS_URL")}.') + + if not isinstance(self.storage, RedisStorage): + raise NotImplementedError( + "get_rate_limit_keys only works with RedisStorage." + ) + + client = Redis(host=host, port=int(port)) + + try: + cursor: int | bytes = b"0" + matching_keys: list[bytes] = [] + while cursor: + cursor, keys = await client.scan( + int(cursor), + match=f"{self.storage.PREFIX}*", + count=cursor_scan_count, + ) + matching_keys.extend(list(keys)) + finally: + await client.quit() + + return [self.parse_key(key.decode()) for key in matching_keys] + + def get_in_memory_limit_keys( + self, + ) -> list[tuple[RateLimitItem, tuple[str, str | MatchAllInputs]]]: + """Returns a list of current RateLimitItems with tuples of namespace and primary key.""" + if not isinstance(self.storage, MemoryStorage): + raise NotImplementedError( + "get_in_memory_limit_keys only works with MemoryStorage." + ) + return [self.parse_key(key) for key in self.storage.events] + + async def get_limit_keys( + self, + ) -> list[tuple[RateLimitItem, tuple[str, str | MatchAllInputs]]]: + if os.environ.get("REDIS_URL") and not self.use_in_memory: + return await self.get_rate_limit_keys() + return self.get_in_memory_limit_keys() + + async def rate_limit_status(self): + + limit_status = {} + + for rate_limit, (namespace, primary_key) in await self.get_limit_keys(): + period_start, n_items_in_period = await self.storage.get_moving_window( + rate_limit.key_for(*(namespace, primary_key or "")), + rate_limit.amount, + rate_limit.get_expiry(), + ) + limit_status[(namespace, primary_key)] = { + "period_start": period_start, + "n_items_in_period": n_items_in_period, + "period_seconds": rate_limit.GRANULARITY.seconds, + "period_name": rate_limit.GRANULARITY.name, + "period_cap": rate_limit.amount, + } + return limit_status + + async def try_acquire( + self, + namespace_and_key: tuple[str, str], + rate_limit: RateLimitItem | str | None = None, + machine_id: int = 0, + acquire_timeout: float = GLOBAL_RATE_LIMITER_TIMEOUT, + weight: int = 1, + raise_impossible_limits: bool = False, + ) -> None: + """Returns when the limit is satisfied for the namespace_and_key. + + Args: + namespace_and_key (:obj:`tuple[str, str]`): is + composed of a tuple with namespace (e.g. "get") and a primary-key + (e.g. "arxiv.org"). namespaces can be nested with multiple '|', + primary-keys in the "get" namespace will be stripped to the domain. + rate_limit (:obj:`RateLimitItem | str | None`, optional): Optional + RateLimitItem to be used for the namespace and primary-key. + If not provided, RATE_CONFIG will be used to find the rate limit. + Can also use a string of the form: + [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] + machine_id (:obj:`int`, optional): will be used to modify the namespace + of GET requests if the primary key is not in the + NO_MACHINE_ID_EXTENSIONS list. In that case, the outbound IP will be + used to modify the namespace. + acquire_timeout (:obj:`float`, optional): is the maximum time (in seconds) to + wait for the rate limit to be satisfied. + weight (:obj:`int`, optional): is the cost of the request, + default is 1. (could be tokens for example) + raise_impossible_limits (:obj:`bool`, optional): flag will raise a + ValueError for weights that exceed the rate. + + Raises: + TimeoutError: if the acquire_timeout is exceeded. + ValueError: if the weight exceeds the rate limit and raise_impossible_limits is True. + """ + namespace, primary_key = await self.parse_namespace_and_primary_key( + namespace_and_key, machine_id=machine_id + ) + + _rate_limit, new_namespace = self.parse_rate_limits_and_namespace( + namespace, primary_key + ) + + if isinstance(rate_limit, str): + rate_limit = limit_parse(rate_limit) + + rate_limit = rate_limit or _rate_limit + + if rate_limit.amount < weight and raise_impossible_limits: + raise ValueError( + f"Weight ({weight}) > RateLimit ({rate_limit}), cannot satisfy rate" + " limit." + ) + while True: + elapsed = 0.0 + while ( + not ( + await self.rate_limiter.test( + rate_limit, + new_namespace, + primary_key, + cost=min(weight, rate_limit.amount), + ) + ) + and elapsed < acquire_timeout + ): + await asyncio.sleep(self.WAIT_INCREMENT) + elapsed += self.WAIT_INCREMENT + if elapsed >= acquire_timeout: + raise TimeoutError( + f"Timeout ({elapsed} secs): rate limit for key: {namespace_and_key}" + ) + + # If the rate limit hit is False, then we're violating the limit, so we + # need to wait again. This can happen in race conditions. + if await self.rate_limiter.hit( + rate_limit, + new_namespace, + primary_key, + cost=min(weight, rate_limit.amount), + ): + # we need to keep trying when we have an "impossible" limit + if rate_limit.amount < weight: + weight -= rate_limit.amount + acquire_timeout = max(acquire_timeout - elapsed, 1.0) + continue + break + acquire_timeout = max(acquire_timeout - elapsed, 1.0) + + +GLOBAL_LIMITER = GlobalRateLimiter() diff --git a/src/llmclient/result.py b/llmclient/types.py similarity index 50% rename from src/llmclient/result.py rename to llmclient/types.py index c5571cc..d4eeea1 100644 --- a/src/llmclient/result.py +++ b/llmclient/types.py @@ -1,19 +1,16 @@ +import contextvars +import logging +from contextlib import contextmanager +from datetime import datetime +from uuid import UUID, uuid4 + +import litellm from pydantic import ( BaseModel, - Field, ConfigDict, + Field, computed_field, ) -from typing import Union, List, Optional -from uuid import UUID, uuid4 -from datetime import datetime -from contextlib import contextmanager - -import contextvars -import litellm -import logging - -from aviary.core import Message logger = logging.getLogger(__name__) @@ -30,61 +27,64 @@ def set_llm_session_ids(session_id: UUID): cvar_session_id.reset(token) +class Embeddable(BaseModel): + embedding: list[float] | None = Field(default=None, repr=False) + + +class Chunk(BaseModel): + model_config = ConfigDict(extra="forbid", frozen=True) + + text: str | None + prompt_tokens: int + completion_tokens: int + + def __str__(self): + return self.text + + class LLMResult(BaseModel): - """A unified class to hold the result of a LLM completion, replacing two prior versions.""" + """A class to hold the result of a LLM completion. + + To associate a group of LLMResults, you can use the `set_llm_session_ids` context manager: + + ```python + my_session_id = uuid4() + with set_llm_session_ids(my_session_id): + # code that generates LLMResults + pass + ``` + + and all the LLMResults generated within the context will have the same `session_id`. + This can be combined with LLMModels `llm_result_callback` to store all LLMResults. + """ + + model_config = ConfigDict(populate_by_name=True) id: UUID = Field(default_factory=uuid4) - model_config: ConfigDict = ConfigDict(populate_by_name=True) - name: Optional[str] = None - model: str = "" - text: str = "" - prompt_count: int = Field(default=0, description="Count of prompt tokens.") - completion_count: int = Field(default=0, description="Count of completion tokens.") - date: str = Field(default_factory=lambda: datetime.now().isoformat()) - seconds_to_first_token: Optional[float] = Field( - default=0.0, description="Delta time (sec) to first response token's arrival." - ) - seconds_to_last_token: float = Field( - default=0.0, description="Delta time (sec) to last response token's arrival." - ) - system_fingerprint: Optional[str] = Field( - default=None, description="System fingerprint received from the LLM." + session_id: UUID | None = Field( + default_factory=cvar_session_id.get, + description="A persistent ID to associate a group of LLMResults", + alias="answer_id", ) - prompt: Union[str, List[dict], List[Message], None] = Field( + name: str | None = None + prompt: str | list[dict] | None = Field( default=None, description="Optional prompt (str) or list of serialized prompts (list[dict]).", ) - config: Optional[dict] = None - messages: Optional[List[Message]] = Field( - default=None, description="Messages received from the LLM." - ) - session_id: Optional[UUID] = Field( - default_factory=cvar_session_id.get, - description="A persistent ID to associate a group of LLMResults", - alias="answer_id", + text: str = "" + prompt_count: int = 0 + completion_count: int = 0 + model: str + date: str = Field(default_factory=datetime.now().isoformat) + seconds_to_first_token: float = Field( + default=0.0, description="Delta time (sec) to first response token's arrival." ) - logprob: Optional[float] = Field( - default=None, description="Sum of logprobs in the completion." + seconds_to_last_token: float = Field( + default=0.0, description="Delta time (sec) to last response token's arrival." ) - finish_reason: str = "" - @property - def prompt_and_completion_costs(self) -> tuple[float, float]: - """Get a two-tuple of prompt tokens cost and completion tokens cost, in USD.""" - return litellm.cost_per_token( - self.model, - prompt_tokens=self.prompt_count, - completion_tokens=self.completion_count, - ) - - @property - def provider(self) -> str: - """Get the model provider's name (e.g. 'openai', 'mistral').""" - return litellm.get_llm_provider(self.model)[1] - - def get_supported_openai_params(self) -> Optional[List[str]]: - """Get the supported OpenAI parameters for the model.""" - return litellm.get_supported_openai_params(self.model) + def __str__(self) -> str: + return self.text @computed_field # type: ignore[prop-decorator] @property @@ -98,6 +98,3 @@ def cost(self) -> float: except KeyError: logger.warning(f"Could not find cost for model {self.model}.") return 0.0 - - def __str__(self) -> str: - return self.text diff --git a/src/llmclient/util.py b/llmclient/utils.py similarity index 54% rename from src/llmclient/util.py rename to llmclient/utils.py index 41665e0..304bf95 100644 --- a/src/llmclient/util.py +++ b/llmclient/utils.py @@ -1,10 +1,15 @@ import base64 -import io import contextlib - -from collections.abc import Callable, Iterable -from typing import Any +import io +import logging +import logging.config +from collections.abc import Callable from inspect import iscoroutinefunction, isfunction, signature +from typing import Any + +import litellm +import numpy as np +import pymupdf def encode_image_to_base64(img: "np.ndarray") -> str: @@ -25,20 +30,9 @@ def encode_image_to_base64(img: "np.ndarray") -> str: ) -async def do_callbacks( - callbacks: Iterable[Callable[..., Any]], - chunk: str, - name: str = None, -) -> None: - for f in callbacks: - args, kwargs = prepare_args(f, chunk, name) - if iscoroutinefunction(f): - await f(*args, **kwargs) - else: - f(*args, **kwargs) - - -def prepare_args(func: Callable, chunk: str, name: str = None) -> tuple[tuple, dict]: +def prepare_args( + func: Callable, chunk: str, name: str | None = None +) -> tuple[tuple, dict]: with contextlib.suppress(TypeError): if "name" in signature(func).parameters: return (chunk,), {"name": name} @@ -59,3 +53,30 @@ def partial_format(value: str, **formats: dict[str, Any]) -> str: with contextlib.suppress(KeyError): value = value.format(**{template_key: template_value}) return value + + +def setup_default_logs() -> None: + """Configure logs to reasonable defaults.""" + # Trigger PyMuPDF to use Python logging + # SEE: https://pymupdf.readthedocs.io/en/latest/app3.html#diagnostics + pymupdf.set_messages(pylogging=True) + + # Set sane default LiteLLM logging configuration + # SEE: https://docs.litellm.ai/docs/observability/telemetry + litellm.telemetry = False + + logging.config.dictConfig( + { + "version": 1, + "disable_existing_loggers": False, + # Lower level for verbose logs + "loggers": { + "httpcore": {"level": "WARNING"}, + "httpx": {"level": "WARNING"}, + # SEE: https://github.com/BerriAI/litellm/issues/2256 + "LiteLLM": {"level": "WARNING"}, + "LiteLLM Router": {"level": "WARNING"}, + "LiteLLM Proxy": {"level": "WARNING"}, + }, + } + ) diff --git a/pyproject.toml b/pyproject.toml index 3fa1c51..74d95a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,9 +5,9 @@ requires = ["setuptools>=64", "setuptools_scm>=8"] [dependency-groups] codeflash = [ "codeflash>=0.7", # Pin to keep recent - "llm-client[dev]", + "fh-llm-client[dev]", ] -dev = ["llm-client[dev]"] +dev = ["fh-llm-client[dev]"] [project] authors = [ @@ -22,42 +22,42 @@ classifiers = [ "Programming Language :: Python", ] dependencies = [ + "PyMuPDF>=1.24.12", # For pymupdf.set_messages addition "aiofiles", + "coredis", "dm-tree", "fhaviary>=0.8.2", # For core namespace "httpx", - "litellm>=1.40.15", # For LITELLM_LOG addition + "limits", + "litellm>=1.44", # For LITELLM_LOG addition "networkx[default]~=3.4", # Pin for pydot fix - "numpy>=1.20", # For numpy.typing - "openai>=1", + "numpy", "pydantic~=2.0", "tenacity", - "tiktoken", + "tiktoken>=0.4.0", "tqdm", "typing-extensions; python_version <= '3.11'", # for typing.override "usearch>=2.13", # For py.typed ] -description = "Agent framework for constructing language model agents and training on constructive tasks." +description = "A client to provide LLM responses for FutureHouse applications." dynamic = ["version"] license = {file = "LICENSE"} -name = "llm-client" +name = "fh-llm-client" readme = "README.md" requires-python = ">=3.11" [project.optional-dependencies] dev = [ + "fh-llm-client[monitor,nn,rich,server,typing,visualization,local]", "fhaviary[xml]", "ipython>=8", # Pin to keep recent - "llm-client[monitor,nn,rich,server,typing,visualization]", - "litellm!=1.49.4,!=1.49.5,!=1.49.6", # For https://github.com/BerriAI/litellm/issues/6216 "mypy>=1.8", # Pin for mutable-override - "openai<1.47", # Pin for https://github.com/BerriAI/litellm/issues/5854 "pre-commit>=3.4", # Pin to keep recent - "pydantic~=2.9", # Pydantic 2.9 changed JSON schema exports 'allOf', so ensure tests match + "pydantic~=2.0", "pylint-pydantic", - "pylint>=3.2", # Pin to keep recent "pytest-asyncio", "pytest-mock", + "pytest-mock", "pytest-recording", "pytest-rerunfailures", "pytest-subtests", @@ -65,8 +65,12 @@ dev = [ "pytest-timer[colorama]", "pytest-xdist", "pytest>=8", # Pin to keep recent + "python-dotenv", "refurb>=2", # Pin to keep recent ] +local = [ + "sentence-transformers", +] monitor = [ "wandb", ] @@ -232,6 +236,7 @@ disable = [ "too-many-positional-arguments", # Don't care to enforce this "too-many-return-statements", # Rely on ruff PLR0911 for this "too-many-statements", # Rely on ruff PLR0915 for this + "undefined-loop-variable", # Don't care to enforce this "ungrouped-imports", # Rely on ruff I001 for this "unidiomatic-typecheck", # Rely on ruff E721 for this "unreachable", # Rely on mypy unreachable for this @@ -258,9 +263,6 @@ score = false min-similarity-lines = 12 [tool.pytest.ini_options] -# Add the specified OPTS to the set of command line arguments as if they had been -# specified by the user. -addopts = "--doctest-modules" # Sets a list of filters and actions that should be taken for matched warnings. # By default all warnings emitted during the test session will be displayed in # a summary at the end of the test session. @@ -268,13 +270,15 @@ filterwarnings = [ "ignore:Support for class-based `config` is deprecated, use ConfigDict instead", # SEE: https://github.com/BerriAI/litellm/issues/5648 "ignore:The `dict` method is deprecated; use `model_dump` instead", # SEE: https://github.com/BerriAI/litellm/issues/5987 "ignore:Use 'content=<...>' to upload raw bytes/text content:DeprecationWarning", # SEE: https://github.com/BerriAI/litellm/issues/5986 + "ignore:builtin type (SwigPyPacked|SwigPyObject|swigvarlink) has no __module__:DeprecationWarning:importlib._bootstrap", # SEE: https://github.com/pymupdf/PyMuPDF/issues/3931 --> https://github.com/swig/swig/issues/2881#issuecomment-2332652634 'ignore:open_text is deprecated. Use files\(\) instead:DeprecationWarning', # SEE: https://github.com/BerriAI/litellm/issues/5647 + 'ignore:pkg_resources is deprecated as an API.:DeprecationWarning:pybtex', # SEE: https://bitbucket.org/pybtex-devs/pybtex/issues/169/replace-pkg_resources-with ] # List of directories that should be searched for tests when no specific directories, # files or test ids are given in the command line when executing pytest from the rootdir # directory. File system paths may use shell-style wildcards, including the recursive ** # pattern. -testpaths = ["src", "tests"] +testpaths = ["tests"] [tool.refurb] enable_all = true @@ -292,11 +296,7 @@ ignore = [ [tool.ruff] # Line length to use when enforcing long-lines violations (like `E501`). -line-length = 88 -# The minimum Python version to target, e.g., when considering automatic code -# upgrades, like rewriting type annotations. Ruff will not propose changes -# using features that are not available in the given version. -target-version = "py311" +line-length = 120 # Enable application of unsafe fixes. unsafe-fixes = true @@ -386,9 +386,9 @@ ignore = [ "S311", # Ok to use python random "SLF001", # Overly pedantic "T201", # Overly pedantic - "TCH001", # TCH001, TCH002, TCH003: don't care to enforce type checking blocks - "TCH002", - "TCH003", + "TC001", # TCH001, TCH002, TCH003: don't care to enforce type checking blocks + "TC002", + "TC003", "TD002", # Don't care for TODO author "TD003", # Don't care for TODO links "TRY003", # Overly pedantic @@ -412,6 +412,7 @@ mypy-init-return = true "F841", # Tests can have unused locals "N802", # Tests function names can match class names "PLR2004", # Tests can have magic values + "S301", # can test pickle ] "docs/**.ipynb" = [ "PLE1142", # allow async @@ -432,14 +433,16 @@ max-line-length = 120 convention = "google" [tool.setuptools.packages.find] -where = ["src"] +exclude = ["tests"] +include = ["llmclient"] +where = ["."] [tool.setuptools_scm] -version_file = "src/llmclient/version.py" +version_file = "llmclient/version.py" [tool.tomlsort] all = true in_place = true spaces_before_inline_comment = 2 # Match Python PEP 8 spaces_indent_inline_array = 4 # Match Python PEP 8 -trailing_comma_inline_array = true \ No newline at end of file +trailing_comma_inline_array = true diff --git a/src/llmclient/__init__.py b/src/llmclient/__init__.py deleted file mode 100644 index df3cc4a..0000000 --- a/src/llmclient/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from llmclient.model import LLMModel -from llmclient.result import LLMResult - -__all__ = [ - "LLMModel", - "LLMResult", -] diff --git a/src/llmclient/model.py b/src/llmclient/model.py deleted file mode 100644 index c2c433a..0000000 --- a/src/llmclient/model.py +++ /dev/null @@ -1,504 +0,0 @@ -import asyncio -import json -from collections.abc import AsyncGenerator, Awaitable, Callable, Iterable -from typing import Any, AsyncIterable, ClassVar, Self, cast - -import litellm -from aviary.core import ( - Message, - Tool, - ToolRequestMessage, - ToolsAdapter, -) -from pydantic import BaseModel, ConfigDict, Field, ValidationError, model_validator - -from llmclient.constants import default_system_prompt -from llmclient.result import LLMResult -from llmclient.util import do_callbacks, is_coroutine_callable - - -class Chunk(BaseModel): - model_config = ConfigDict(extra="forbid", frozen=True) - - text: str | None - prompt_tokens: int - completion_tokens: int - - def __str__(self): - return self.text - - -class JSONSchemaValidationError(ValueError): - """Raised when the completion does not match the specified schema.""" - - -def sum_logprobs(choice: litellm.utils.Choices) -> float | None: - """Calculate the sum of the log probabilities of an LLM completion (a Choices object). - - Args: - choice: A sequence of choices from the completion. - - Returns: - The sum of the log probabilities of the choice. - """ - try: - logprob_obj = choice.logprobs - except AttributeError: - return None - if isinstance(logprob_obj, dict): - if logprob_obj.get("content"): - return sum( - logprob_info["logprob"] for logprob_info in logprob_obj["content"] - ) - elif choice.logprobs.content: - return sum(logprob_info.logprob for logprob_info in choice.logprobs.content) - return None - - -def validate_json_completion( - completion: litellm.ModelResponse, output_type: type[BaseModel] -) -> None: - """Validate a completion against a JSON schema. - - Args: - completion: The completion to validate. - output_type: The Pydantic model to validate the completion against. - """ - try: - for choice in completion.choices: - if not hasattr(choice, "message") or not choice.message.content: - continue - # make sure it is a JSON completion, even if None - # We do want to modify the underlying message - # so that users of it can just parse it as expected - choice.message.content = ( - choice.message.content.split("```json")[-1].split("```")[0] or "" - ) - output_type.model_validate_json(choice.message.content) - except ValidationError as err: - raise JSONSchemaValidationError( - "The completion does not match the specified schema." - ) from err - - -class LLMModel(BaseModel): - """Run n completions at once, all starting from the same messages.""" - - model_config = ConfigDict(extra="forbid") - - # this should keep the original model - # if fine-tuned, this should still refer to the base model - name: str = "unknown" - llm_type: str | None = None - llm_result_callback: ( - Callable[[LLMResult], None] | Callable[[LLMResult], Awaitable[None]] | None - ) = Field( - default=None, - description=( - "An async callback that will be executed on each" - " LLMResult (different than callbacks that execute on each chunk)" - ), - exclude=True, - ) - config: dict = Field( - default={ - "model": "gpt-3.5-turbo", # Default model should have cheap input/output for testing - "temperature": 0.1, - } - ) - encoding: Any | None = None - - def __str__(self) -> str: - return f"{type(self).__name__} {self.name}" - - def infer_llm_type(self) -> str: - return "completion" - - def count_tokens(self, text: str) -> int: - return len(text) // 4 # gross approximation - - async def run_prompt( - self, - prompt: str, - data: dict, - callbacks: list[Callable] | None = None, - name: str | None = None, - skip_system: bool = False, - system_prompt: str = default_system_prompt, - ) -> LLMResult: - if not self.llm_type: - self.llm_type = self.infer_llm_type() - - run = getattr(self, "_run_" + self.llm_type) - if not run: - raise ValueError(f"Unknown llm_type {self.llm_type!r}.") - - return await run(prompt, data, callbacks, name, skip_system, system_prompt) - - async def get_result(self, usage, result, output, start_clock): - if sum(usage) > 0: - result.prompt_count, result.completion_count = usage - elif output: - result.completion_count = self.count_tokens(output) - - result.text = output - result.seconds_to_last_token = asyncio.get_running_loop().time() - start_clock - - if self.llm_result_callback: - if is_coroutine_callable(self.llm_result_callback): - await self.llm_result_callback(result) # type: ignore[misc] - else: - self.llm_result_callback(result) - return result - - async def add_chunk_text( - self, result, callbacks, chunk, text_result, start_clock, name - ): - if not chunk.text: - return - - if result.seconds_to_first_token == 0: - result.seconds_to_first_token = ( - asyncio.get_running_loop().time() - start_clock - ) - - text_result.append(chunk.text) - await do_callbacks(callbacks, chunk.text, name) - - async def _run_chat( - self, - prompt: str, - data: dict, - callbacks: list[Callable] | None = None, - name: str | None = None, - skip_system: bool = False, - system_prompt: str = default_system_prompt, - ) -> LLMResult: - """Run a chat prompt. - - Args: - prompt: Prompt to use. - data: Keys for the input variables that will be formatted into prompt. - callbacks: Optional functions to call with each chunk of the completion. - name: Optional name for the result. - skip_system: Set True to skip the system prompt. - system_prompt: System prompt to use. - - Returns: - Result of the chat. - """ - system_message_prompt = {"role": "system", "content": system_prompt} - human_message_prompt = {"role": "user", "content": prompt} - messages = [ - {"role": m["role"], "content": m["content"].format(**data)} - for m in ( - [human_message_prompt] - if skip_system - else [system_message_prompt, human_message_prompt] - ) - ] - result = LLMResult( - model=self.name, - name=name, - prompt=messages, - prompt_count=( - sum(self.count_tokens(m["content"]) for m in messages) - + sum(self.count_tokens(m["role"]) for m in messages) - ), - ) - - start_clock = asyncio.get_running_loop().time() - if not callbacks: - chunk = await self.achat(messages) - output = chunk.text - else: - completion = await self.achat_iter(messages) # type: ignore[misc] - text_result = [] - async for chunk in completion: - await self.add_chunk_text( - result, callbacks, chunk, text_result, start_clock, name - ) - output = "".join(text_result) - - usage = chunk.prompt_tokens, chunk.completion_tokens - return await self.get_result(usage, result, output, start_clock) - - async def _run_completion( - self, - prompt: str, - data: dict, - callbacks: Iterable[Callable] | None = None, - name: str | None = None, - skip_system: bool = False, - system_prompt: str = default_system_prompt, - ) -> LLMResult: - """Run a completion prompt. - - Args: - prompt: Prompt to use. - data: Keys for the input variables that will be formatted into prompt. - callbacks: Optional functions to call with each chunk of the completion. - name: Optional name for the result. - skip_system: Set True to skip the system prompt. - system_prompt: System prompt to use. - - Returns: - Result of the completion. - """ - formatted_prompt: str = ( - prompt if skip_system else system_prompt + "\n\n" + prompt - ).format(**data) - result = LLMResult( - model=self.name, - name=name, - prompt=formatted_prompt, - prompt_count=self.count_tokens(formatted_prompt), - ) - - start_clock = asyncio.get_running_loop().time() - if not callbacks: - chunk = await self.acomplete(formatted_prompt) - output = chunk.text - else: - completion = self.acomplete_iter(formatted_prompt) - text_result = [] - async for chunk in completion: - await self.add_chunk_text( - result, callbacks, chunk, text_result, start_clock, name - ) - output = "".join(text_result) - - usage = chunk.prompt_tokens, chunk.completion_tokens - return await self.get_result(usage, result, output, start_clock) - - @model_validator(mode="after") - def set_model_name(self) -> Self: - if self.name != "unknown" and self.config.get("model", "unknown") in ( - "gpt-3.5-turbo", - None, - ): - self.config["model"] = self.name - elif "model" in self.config and self.name == "unknown": - self.name = self.config["model"] - # note we do not consider case where both are set - # because that could be true if the model is fine-tuned - return self - - async def acomplete(self, prompt: str) -> Chunk: - """Return the completion as string and the number of tokens in the prompt and completion.""" - raise NotImplementedError - - async def acomplete_iter(self, prompt: str) -> AsyncIterable[Chunk]: # noqa: ARG002 - """Return an async generator that yields chunks of the completion. - - Only the last tuple will be non-zero. - """ - raise NotImplementedError - - async def achat( - self, messages: Iterable[Message], **kwargs - ) -> litellm.ModelResponse: - return await litellm.acompletion( - messages=[m.model_dump(by_alias=True) for m in messages], - **(self.config | kwargs), - ) - - async def achat_iter(self, messages: Iterable[Message], **kwargs) -> AsyncGenerator: - return cast( - AsyncGenerator, - await litellm.acompletion( - messages=[m.model_dump(by_alias=True) for m in messages], - stream=True, - stream_options={ - "include_usage": True, # Included to get prompt token counts - }, - **(self.config | kwargs), - ), - ) - - # SEE: https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice - # > `required` means the model must call one or more tools. - TOOL_CHOICE_REQUIRED: ClassVar[str] = "required" - - async def handle_callbacks( - self, tools, n, chat_kwargs, prompt, callbacks, messages, start_clock, results - ): - if tools: - raise NotImplementedError("Using tools with callbacks is not supported") - if n > 1: - raise NotImplementedError( - "Multiple completions with callbacks is not supported" - ) - result = LLMResult(model=self.name, config=chat_kwargs, prompt=prompt) - stream_completion = await self.achat_iter(messages, **chat_kwargs) - role = "assistant" - text_result = [] - - async for chunk in stream_completion: - delta = chunk.choices[0].delta - role = delta.role or role - if hasattr(chunk, "usage"): - result.prompt_count = chunk.usage.prompt_tokens - - if not delta.content: - continue - - if result.seconds_to_first_token == 0: - result.seconds_to_first_token = ( - asyncio.get_running_loop().time() - start_clock - ) - - text_result.append(delta.content) - await do_callbacks(callbacks, delta.content) - - output = "".join(text_result) - result.completion_count = litellm.token_counter( - model=self.name, - text=output, - ) - # TODO: figure out how tools stream, and log probs - result.messages = [Message(role=role, content=output)] - results.append(result) - - async def handle_no_callbacks( - self, tools, chat_kwargs, prompt, results, output_type - ): - completion: litellm.ModelResponse = await self.achat(prompt, **chat_kwargs) - if output_type: - validate_json_completion(completion, output_type) - - for choice in completion.choices: - if isinstance(choice, litellm.utils.StreamingChoices): - raise NotImplementedError("Streaming is not yet supported.") - - if ( - tools is not None # Allows for empty tools list - or choice.finish_reason == "tool_calls" - or (getattr(choice.message, "tool_calls", None) is not None) - ): - serialized_choice_message = choice.message.model_dump() - serialized_choice_message["tool_calls"] = ( - serialized_choice_message.get("tool_calls") or [] - ) - output_messages: list[Message | ToolRequestMessage] = [ - ToolRequestMessage(**serialized_choice_message) - ] - else: - output_messages = [Message(**choice.message.model_dump())] - - results.append( - LLMResult( - model=self.name, - config=chat_kwargs, - prompt=prompt, - messages=output_messages, - logprob=sum_logprobs(choice), - system_fingerprint=completion.system_fingerprint, - # Note that these counts are aggregated over all choices - completion_count=completion.usage.completion_tokens, # type: ignore[attr-defined,unused-ignore] - prompt_count=completion.usage.prompt_tokens, # type: ignore[attr-defined,unused-ignore] - ) - ) - - async def call( # noqa: C901, PLR0915 - self, - messages: list[Message], - callbacks: list[Callable] | None = None, - output_type: type[BaseModel] | None = None, - tools: list[Tool] | None = None, - tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED, - **chat_kwargs, - ) -> list[LLMResult]: - start_clock = asyncio.get_running_loop().time() - - # Deal with tools. OpenAI throws an error if tool list is empty, - # so skip this block if tools in (None, []) - if tools: - chat_kwargs["tools"] = ToolsAdapter.dump_python( - tools, exclude_none=True, by_alias=True - ) - if tool_choice is not None: - chat_kwargs["tool_choice"] = ( - { - "type": "function", - "function": {"name": tool_choice.info.name}, - } - if isinstance(tool_choice, Tool) - else tool_choice - ) - - # deal with specifying output type - if output_type: - schema = json.dumps(output_type.model_json_schema(mode="serialization")) - schema_msg = f"Respond following this JSON schema:\n\n{schema}" - # Get the system prompt and its index, or the index to add it - i, system_prompt = next( - ((i, m) for i, m in enumerate(messages) if m.role == "system"), - (0, None), - ) - messages = [ - *messages[:i], - ( - system_prompt.append_text(schema_msg, inplace=False) - if system_prompt - else Message(role="system", content=schema_msg) - ), - *messages[i + 1 if system_prompt else i :], - ] - chat_kwargs["response_format"] = {"type": "json_object"} - - # add static configuration to kwargs - chat_kwargs = self.config | chat_kwargs - n = chat_kwargs.get("n", 1) # number of completions - if n < 1: - raise ValueError("Number of completions (n) must be >= 1.") - - prompt = [ - ( - m - if not isinstance(m, ToolRequestMessage) or m.tool_calls - # OpenAI doesn't allow for empty tool_calls lists, so downcast empty - # ToolRequestMessage to Message here - else Message(role=m.role, content=m.content) - ) - for m in messages - ] - results: list[LLMResult] = [] - - if callbacks: - await self.handle_callbacks( - tools, n, chat_kwargs, prompt, callbacks, messages, start_clock, results - ) - else: - await self.handle_no_callbacks( - tools, chat_kwargs, prompt, results, output_type - ) - - if not results: - # This happens in unit tests. We should probably not keep this block around - # long-term. Previously, we would emit an empty ToolRequestMessage if - # completion.choices were empty, so I am replicating that here. - results.append( - LLMResult( - model=self.name, - config=chat_kwargs, - prompt=prompt, - messages=[ToolRequestMessage(tool_calls=[])], - ) - ) - - end_clock = asyncio.get_running_loop().time() - - for result in results: - # Manually update prompt count if not set, which can - # happen if the target model doesn't support 'include_usage' - if not result.prompt_count: - result.prompt_count = litellm.token_counter( - model=self.name, - messages=[m.model_dump() for m in result.messages], # type: ignore[union-attr] - ) - - # update with server-side counts - result.seconds_to_last_token = end_clock - start_clock - - return results diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cassettes/TestLiteLLMModel.test_max_token_truncation[with-router].yaml b/tests/cassettes/TestLiteLLMModel.test_max_token_truncation[with-router].yaml new file mode 100644 index 0000000..6a2c0d8 --- /dev/null +++ b/tests/cassettes/TestLiteLLMModel.test_max_token_truncation[with-router].yaml @@ -0,0 +1,103 @@ +interactions: + - request: + body: + '{"messages": [{"role": "user", "content": "Please tell me a story"}], "model": + "gpt-4o-mini", "max_tokens": 3}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "110" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4ySUUvDMBSF3/srQp5b6bp13fomE3wRRBEUREqW3LaZaRKTFBxj/13SdmuHE3zJ + w/3uOTn3JocAIcwZzhGmNXG00SK6fSNfyw1/TWdsV64fnjYZZC/ts7in+/oOh16htjug7qS6oarR + AhxXssfUAHHgXWfZPEmX62y96kCjGAgvq7SLFipquORREieLKM6i2WpQ14pTsDhH7wFCCB260+eU + DL5xjuLwVGnAWlIBzs9NCGGjhK9gYi23jkiHwxFSJR3ILvqjpIBarSQi0w4DZWuJTylbIYb68Xyl + UJU2amsHfq6XXHJbFwaIVdLbC5CVq3HHjwFCH91w7UVerI1qtCuc+gTpLWdJb4jHlY5wPjCnHBET + TRpeMSsYOMKFnewGU0JrYKNyXCRpGVcTEEyG/p3lmnc/OJfVf+xHQCloB6zQBhinl/OObQb8f/ur + 7bziLjC2e+ugKUouKzDa8P61S13EWZxuy1VGYxwcgx8AAAD//wMAIAzc+vsCAAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e84b2f90c81230e-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 25 Nov 2024 21:23:18 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=KqKw89zgaG32GNn3lg4IvjG2X2zLmPKRiY1oedcDUVM-1732569798-1.0.1.1-y_oblt_Jp3n1T.HtHFHrxbRegDqoC8gojQPBSV52IMBH.bx8c0QNAUrWotLzzQGqqbIDjdhl0AUutzvWk20psg; + path=/; expires=Mon, 25-Nov-24 21:53:18 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=F_vLQWJJbY8GvEB4YIomOCy2NMswE7Ex8TL0Z4OIxgg-1732569798934-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "256" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29997" + x-ratelimit-remaining-tokens: + - "149998170" + x-ratelimit-reset-requests: + - 4ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_77977a66fa96e40ffe5c3bc7840c0948 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestLiteLLMModel.test_max_token_truncation[without-router].yaml b/tests/cassettes/TestLiteLLMModel.test_max_token_truncation[without-router].yaml new file mode 100644 index 0000000..1b1690c --- /dev/null +++ b/tests/cassettes/TestLiteLLMModel.test_max_token_truncation[without-router].yaml @@ -0,0 +1,103 @@ +interactions: + - request: + body: + '{"messages": [{"role": "user", "content": "Please tell me a story"}], "model": + "gpt-4o-mini", "max_tokens": 3}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "110" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4ySMW+DMBCFd36F5RkqQkJI2DpnqBSpUtqqQo59gFtju7aRkkb575UhAaKmUhcP + 9917fnf2KUAIc4ZzhGlNHG20iB535GvewmZ53MR8szhu29ftLnmx3/T5UOHQK9T+A6i7qh6oarQA + x5XsMTVAHHjXWTZP0uU6W6cdaBQD4WWVdtFCRQ2XPEriZBHFWTRbXdS14hQsztFbgBBCp+70OSWD + A85RHF4rDVhLKsD50IQQNkr4CibWcuuIdDgcIVXSgeyiP0kKqNVKIjLtMFC2lviUshXiUj8PVwpV + aaP29sKHesklt3VhgFglvb0AWbkad/wcIPTeDdfe5MXaqEa7wqlPkN5ylvSGeFzpCOcX5pQjYqJJ + wztmBQNHuLCT3WBKaA1sVI6LJC3jagKCydC/s9zz7gfnsvqP/QgoBe2AFdoA4/R23rHNgP9vf7UN + K+4CY3u0Dpqi5LICow3vX7vURZzF6b5cZTTGwTn4AQAA//8DAGJIF8n7AgAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e84b2e46a3e5c18-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 25 Nov 2024 21:23:15 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=pmbS4O0SdzCjzvZjensXpq5w1I1GUEOUOh_2ExJ8_Rc-1732569795-1.0.1.1-RsW0ExCXu..OFPcHXSvL3vh7_PqZu9gX0DgJI0BGjr2oborEPzdC6ZSsqZTfP9zf3YOigH1hcfDePksbYyIO8A; + path=/; expires=Mon, 25-Nov-24 21:53:15 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=v_wT.VKOzzmIot1JtDgHglmHPmgOB.YvZxQznjpUEiA-1732569795663-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "150" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999990" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_817b41328aed9a1e4a84bd7ec79e22f7 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestLiteLLMModel.test_run_prompt[with-router].yaml b/tests/cassettes/TestLiteLLMModel.test_run_prompt[with-router].yaml new file mode 100644 index 0000000..e234281 --- /dev/null +++ b/tests/cassettes/TestLiteLLMModel.test_run_prompt[with-router].yaml @@ -0,0 +1,495 @@ +interactions: + - request: + body: + '{"messages": [{"role": "user", "content": "The duck says"}], "model": "gpt-4o-mini", + "max_tokens": 56, "stream": true, "stream_options": {"include_usage": true}, + "temperature": 0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "179" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: + 'data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"The"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + duck"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + says"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + \""},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"qu"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"ack"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"!\""},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + What"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + else"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + would"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + like"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + know"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + about"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + ducks"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + or"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + their"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + sounds"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3wHeRRIuOofrbBi9d44mS2uhs","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":20,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e84b2e5a826fb34-SJC + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 25 Nov 2024 21:23:15 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=EPEcKxQAwr.FHIAtJH7QaNJ8DYP5ttMHPLWWG9mlovI-1732569795-1.0.1.1-Zi_lzLa.4UpZcO4ApjlwTQCvgvKFV5K08QpJsarCNOrGVdpVg732lx_eRJlTSq3F0xrwgvKi4S.YOGn1drcoqQ; + path=/; expires=Mon, 25-Nov-24 21:53:15 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=na9.ja6VE.k8hCQXCpou8cLN_mMFPF5vEW9J4lpkdCI-1732569795943-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "374" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999938" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_43f164b53abd2b51e63195007476701e + status: + code: 200 + message: OK + - request: + body: + '{"messages": [{"role": "user", "content": "The duck says"}], "model": "gpt-4o-mini", + "max_tokens": 56, "stream": false, "temperature": 0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "137" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSTY/TMBS851c8fG5Q2m03214QEuLICcFqWRS59ktj4vi5fvaWsup/R0k/0gqQ + uPgw82Y88+zXDEAYLVYgVCOj6rzN3z/K7bz79FKUH7b7Rfu05OnT7teX9PL4ca3FpFfQ+geqeFa9 + VdR5i9GQO9IqoIzYu07Lu9niflku7weiI422l218zOeUd8aZfFbM5nlR5tOHk7oho5DFCr5lAACv + w9nndBp/ihUUkzPSIbPcoFhdhgBEINsjQjIbjtJFMRlJRS6iG6J/bhB0Ui2w3DM8i22Sqn3zLOBr + IyOgZYQdJathTwmsaREiQetoB3JNKQ5aBgoQGzQBmJLT/O76soB1YtkXdsnaE364pLe08YHWfOIv + eG2c4aYKKJlcn5QjeTGwhwzg+7CldFNc+ECdj1WkFl1vOD0tSYxvM5KzMxkpSjvid2f8xq3SGKWx + fLVloaRqUI/K8Ulk0oauiOyq859h/uZ97G3c5n/sR0Ip9BF15QNqo24Lj2MB+5/7r7HLjofAgvcc + satq4zYYfDDHf1P7qiiLxbp+KFUhskP2GwAA//8DALtwqC9FAwAA + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e84b2ea2b04172a-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 25 Nov 2024 21:23:16 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=VzsSiHm0Z_BUTeSN65gluQiQrzN30OY3FbFYc2f2GGI-1732569796-1.0.1.1-qLA_SlZNrcg_rchFZXQL8x1i44Xf0JSNTu5cVR2qroTv40NhPne58JGlR6an_biXT6kILUb7UkRQuFktt2.CbA; + path=/; expires=Mon, 25-Nov-24 21:53:16 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=EBPncJv5hpEuGElVMHoZQPGVCQJ0dmgebSzO3h1NrW4-1732569796811-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "510" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999938" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_12734547c5251069ac4d6765baab7c39 + status: + code: 200 + message: OK + - request: + body: + '{"messages": [{"role": "user", "content": "The duck says"}], "model": "gpt-4o-mini", + "max_tokens": 56, "stream": true, "stream_options": {"include_usage": true}, + "temperature": 0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "179" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: + 'data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"The"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + duck"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + says"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + \""},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"qu"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"ack"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"!\""},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + It''s"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + a"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + classic"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + sound"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + associated"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + ducks"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + Is"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + there"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + something"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + specific"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + you''d"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + like"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + know"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + about"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + ducks"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + or"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + their"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + sounds"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-AXaq4vpiIBmDV1ZDl16ItHXocRR4R","object":"chat.completion.chunk","created":1732569796,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":29,"total_tokens":39,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e84b2ee2ea2fb34-SJC + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 25 Nov 2024 21:23:17 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "223" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999938" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_718a08157d6246d33e3155fab603aa96 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestLiteLLMModel.test_run_prompt[without-router].yaml b/tests/cassettes/TestLiteLLMModel.test_run_prompt[without-router].yaml new file mode 100644 index 0000000..082fa72 --- /dev/null +++ b/tests/cassettes/TestLiteLLMModel.test_run_prompt[without-router].yaml @@ -0,0 +1,501 @@ +interactions: + - request: + body: + '{"messages": [{"role": "user", "content": "The duck says"}], "model": "gpt-4o-mini", + "max_tokens": 56, "stream": true, "stream_options": {"include_usage": true}, + "temperature": 0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "179" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: + 'data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"The"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + duck"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + says"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + \""},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"qu"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"ack"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"!\""},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + Ducks"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + are"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + known"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + for"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + their"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + distinctive"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + qu"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"acking"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + sound"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + Is"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + there"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + something"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + specific"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + would"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + like"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + know"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + about"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + ducks"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + or"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + their"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + sounds"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-AXaq3IR0z8uEvoVJE0vyqiQTgtOWX","object":"chat.completion.chunk","created":1732569795,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":32,"total_tokens":42,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e84b2e55beacf0a-SJC + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 25 Nov 2024 21:23:15 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=yEIUr9nquP_ccNfJrvwHg8LrvAJbcRrseCICyMhmTOU-1732569795-1.0.1.1-wJWO00pGrQLCiAnCpi3CgxNTF6QY1KT8.LAbPFNGzHYuumm_kDTw6l5BnZz4DkH0_XPdFgVv2jgZmWma2GNF2Q; + path=/; expires=Mon, 25-Nov-24 21:53:15 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=JXYFaWG3HJZsfGDbb62wzCtTr6fxOAAubmPRa36h4Wk-1732569795729-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "212" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999938" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2e462744dab3bbcaa3bacabf4a8941f4 + status: + code: 200 + message: OK + - request: + body: + '{"messages": [{"role": "user", "content": "The duck says"}], "model": "gpt-4o-mini", + "max_tokens": 56, "temperature": 0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "120" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSTYvbMBC9+1dMddlLvDjZTZ3kUhbaQrq9FFpI6RajyGNbjaxRNDLbsOS/L3I+ + nNAWevHhvXnP783oJQEQuhQLEKqRQbXOpA8ruZ1+284y+vxl9V7Wqw+fvqtJ9dE8Pj4sxSgqaP0L + VTipbhW1zmDQZA+08igDRtdxfjeZvp3n87wnWirRRFntQnpPaautTifZ5D7N8nQ8O6ob0gpZLOBH + AgDw0n9jTlvib7GAbHRCWmSWNYrFeQhAeDIREZJZc5A2iNFAKrIBbR/9a4NQdmoDLHcMT2LbSbV5 + 8yRgGW4YJCgTHRQwdbYEyUxKx1rwrEPTK/kWlgyhQY/A1GJotK2BHSpdaQU76m5KMHqDEAg2lp5B + rqkLBy2Qj1LtDz/gd5cxPVYdy7gq2xlzxPfn3oZq52nNR/6MV9pqbgqPksnGjhzIiZ7dJwA/+/12 + VysTzlPrQhFogzYajo/rFcNVB3IyP5KBgjQDfnfCr9yKEoPUhi/uI5RUDZaDcjim7EpNF0Ry0fnP + MH/zPvTWtv4f+4FQCl3AsnAeS62uCw9jHuOb/9fYecd9YME7DtgWlbY1euf14cVVrsjybLquZrnK + RLJPXgEAAP//AwCKfB1wfwMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e84b2ef7f0dcf0a-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Mon, 25 Nov 2024 21:23:17 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "692" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999938" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_b7fc03e0f47f932ab987eb23b736f3f1 + status: + code: 200 + message: OK + - request: + body: + '{"messages": [{"role": "user", "content": "The duck says"}], "model": "gpt-4o-mini", + "max_tokens": 56, "stream": true, "stream_options": {"include_usage": true}, + "temperature": 0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "179" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.46.1 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.46.1 + x-stainless-raw-response: + - "true" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: + 'data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"The"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + duck"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + says"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + \""},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"qu"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"ack"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"!\""},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + What"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + else"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + would"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + like"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + know"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + about"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + ducks"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + or"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + their"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":" + sounds"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null} + + + data: {"id":"chatcmpl-AXaq6Oo60rPu2e3GjHLbqHmfCibSr","object":"chat.completion.chunk","created":1732569798,"model":"gpt-4o-mini-2024-07-18","system_fingerprint":"fp_0705bf87c0","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":20,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e84b2f52d9ccf0a-SJC + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 25 Nov 2024 21:23:18 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "106" + openai-version: + - "2020-10-01" + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999938" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f253a75b18af17320fdaa40a9449ee60 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..b0f5177 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import logging +import shutil +from collections.abc import Iterator +from pathlib import Path +from typing import Any + +# from unittest.mock import patch +import pytest +from dotenv import load_dotenv + +TESTS_DIR = Path(__file__).parent +CASSETTES_DIR = TESTS_DIR / "cassettes" + + +@pytest.fixture(autouse=True, scope="session") +def _load_env() -> None: + load_dotenv() + + +OPENAI_API_KEY_HEADER = "authorization" +ANTHROPIC_API_KEY_HEADER = "x-api-key" +CROSSREF_HEADER_KEY = "Crossref-Plus-API-Token" +SEMANTIC_SCHOLAR_HEADER_KEY = "x-api-key" +# SEE: https://github.com/kevin1024/vcrpy/blob/v6.0.1/vcr/config.py#L43 +VCR_DEFAULT_MATCH_ON = "method", "scheme", "host", "port", "path", "query" + + +@pytest.fixture(scope="session", name="vcr_config") +def fixture_vcr_config() -> dict[str, Any]: + return { + "filter_headers": [ + CROSSREF_HEADER_KEY, + SEMANTIC_SCHOLAR_HEADER_KEY, + OPENAI_API_KEY_HEADER, + ANTHROPIC_API_KEY_HEADER, + "cookie", + ], + "record_mode": "once", + "allow_playback_repeats": True, + "cassette_library_dir": str(CASSETTES_DIR), + } + + +@pytest.fixture +def tmp_path_cleanup(tmp_path: Path) -> Iterator[Path]: + yield tmp_path + # Cleanup after the test + if tmp_path.exists(): + shutil.rmtree(tmp_path, ignore_errors=True) + + +@pytest.fixture(scope="session", name="stub_data_dir") +def fixture_stub_data_dir() -> Path: + return Path(__file__).parent / "stub_data" + + +@pytest.fixture(name="reset_log_levels") +def fixture_reset_log_levels(caplog) -> Iterator[None]: + logging.getLogger().setLevel(logging.DEBUG) + + for name in logging.root.manager.loggerDict: + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + logger.propagate = True + + caplog.set_level(logging.DEBUG) + + yield + + for name in logging.root.manager.loggerDict: + logger = logging.getLogger(name) + logger.setLevel(logging.NOTSET) + logger.propagate = True diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py new file mode 100644 index 0000000..c91cc5a --- /dev/null +++ b/tests/test_embeddings.py @@ -0,0 +1,67 @@ +import pytest + +from llmclient.embeddings import MODEL_COST_MAP, LiteLLMEmbeddingModel + + +class TestLiteLLMEmbeddingModel: + @pytest.fixture + def embedding_model(self): + return LiteLLMEmbeddingModel() + + def test_default_config_injection(self, embedding_model): + # field_validator is only triggered if the attribute is passed + embedding_model = LiteLLMEmbeddingModel(config={}) + + config = embedding_model.config + assert "kwargs" in config + assert config["kwargs"]["timeout"] == 120 + + def test_truncate_if_large_no_truncation(self, embedding_model): + texts = ["short text", "another short text"] + truncated_texts = embedding_model._truncate_if_large(texts) + assert truncated_texts == texts + + def test_truncate_if_large_with_truncation(self, embedding_model, mocker): + texts = ["a" * 10000, "b" * 10000] + mocker.patch.dict( + MODEL_COST_MAP, {embedding_model.name: {"max_input_tokens": 100}} + ) + mocker.patch( + "tiktoken.encoding_for_model", + return_value=mocker.Mock( + encode_ordinary_batch=lambda texts: [[1] * 1000 for _ in texts], + decode=lambda text: "truncated text", # noqa: ARG005 + ), + ) + truncated_texts = embedding_model._truncate_if_large(texts) + assert truncated_texts == ["truncated text", "truncated text"] + + def test_truncate_if_large_key_error(self, embedding_model, mocker): + texts = ["a" * 10000, "b" * 10000] + mocker.patch.dict( + MODEL_COST_MAP, {embedding_model.name: {"max_input_tokens": 100}} + ) + mocker.patch("tiktoken.encoding_for_model", side_effect=KeyError) + truncated_texts = embedding_model._truncate_if_large(texts) + assert truncated_texts == ["a" * 300, "b" * 300] + + @pytest.mark.asyncio + async def test_embed_documents(self, embedding_model, mocker): + texts = ["short text", "another short text"] + mocker.patch( + "llmclient.embeddings.LiteLLMEmbeddingModel._truncate_if_large", + return_value=texts, + ) + mocker.patch( + "llmclient.embeddings.LiteLLMEmbeddingModel.check_rate_limit", + return_value=None, + ) + mock_response = mocker.Mock() + mock_response.data = [ + {"embedding": [0.1, 0.2, 0.3]}, + {"embedding": [0.4, 0.5, 0.6]}, + ] + mocker.patch("litellm.aembedding", return_value=mock_response) + + embeddings = await embedding_model.embed_documents(texts) + assert embeddings == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] diff --git a/tests/test_llms.py b/tests/test_llms.py new file mode 100644 index 0000000..7ad7358 --- /dev/null +++ b/tests/test_llms.py @@ -0,0 +1,261 @@ +import pathlib +import pickle +from typing import Any +from unittest.mock import patch + +import litellm +import pytest + +from llmclient.embeddings import ( + HybridEmbeddingModel, + LiteLLMEmbeddingModel, + SentenceTransformerEmbeddingModel, + SparseEmbeddingModel, + embedding_model_factory, +) +from llmclient.llms import Chunk, LiteLLMModel +from tests.conftest import VCR_DEFAULT_MATCH_ON + + +class TestLiteLLMModel: + @pytest.mark.vcr(match_on=[*VCR_DEFAULT_MATCH_ON, "body"]) + @pytest.mark.parametrize( + "config", + [ + pytest.param( + { + "model_list": [ + { + "model_name": "gpt-4o-mini", + "litellm_params": { + "model": "gpt-4o-mini", + "temperature": 0, + "max_tokens": 56, + }, + } + ] + }, + id="with-router", + ), + pytest.param( + { + "pass_through_router": True, + "router_kwargs": {"temperature": 0, "max_tokens": 56}, + }, + id="without-router", + ), + ], + ) + @pytest.mark.asyncio + async def test_run_prompt(self, config: dict[str, Any]) -> None: + llm = LiteLLMModel(name="gpt-4o-mini", config=config) + + outputs = [] + + def accum(x) -> None: + outputs.append(x) + + completion = await llm.run_prompt( + prompt="The {animal} says", + data={"animal": "duck"}, + system_prompt=None, + callbacks=[accum], + ) + assert completion.model == "gpt-4o-mini" + assert completion.seconds_to_first_token > 0 + assert completion.prompt_count > 0 + assert completion.completion_count > 0 + assert str(completion) == "".join(outputs) + assert completion.cost > 0 + + completion = await llm.run_prompt( + prompt="The {animal} says", + data={"animal": "duck"}, + system_prompt=None, + ) + assert completion.seconds_to_first_token == 0 + assert completion.seconds_to_last_token > 0 + assert completion.cost > 0 + + # check with mixed callbacks + async def ac(x) -> None: + pass + + completion = await llm.run_prompt( + prompt="The {animal} says", + data={"animal": "duck"}, + system_prompt=None, + callbacks=[accum, ac], + ) + assert completion.cost > 0 + + @pytest.mark.vcr + @pytest.mark.parametrize( + ("config", "bypassed_router"), + [ + pytest.param( + { + "model_list": [ + { + "model_name": "gpt-4o-mini", + "litellm_params": {"model": "gpt-4o-mini", "max_tokens": 3}, + } + ] + }, + False, + id="with-router", + ), + pytest.param( + {"pass_through_router": True, "router_kwargs": {"max_tokens": 3}}, + True, + id="without-router", + ), + ], + ) + @pytest.mark.asyncio + async def test_max_token_truncation( + self, config: dict[str, Any], bypassed_router: bool + ) -> None: + llm = LiteLLMModel(name="gpt-4o-mini", config=config) + with patch( + "litellm.Router.atext_completion", + side_effect=litellm.Router.atext_completion, + autospec=True, + ) as mock_atext_completion: + chunk = await llm.acomplete("Please tell me a story") # type: ignore[call-arg] + if bypassed_router: + mock_atext_completion.assert_not_awaited() + else: + mock_atext_completion.assert_awaited_once() + assert isinstance(chunk, Chunk) + assert chunk.completion_tokens == 3 + assert chunk.text + assert len(chunk.text) < 20 + + def test_pickling(self, tmp_path: pathlib.Path) -> None: + pickle_path = tmp_path / "llm_model.pickle" + llm = LiteLLMModel( + name="gpt-4o-mini", + config={ + "model_list": [ + { + "model_name": "gpt-4o-mini", + "litellm_params": { + "model": "gpt-4o-mini", + "temperature": 0, + "max_tokens": 56, + }, + } + ] + }, + ) + with pickle_path.open("wb") as f: + pickle.dump(llm, f) + with pickle_path.open("rb") as f: + rehydrated_llm = pickle.load(f) + assert llm.name == rehydrated_llm.name + assert llm.config == rehydrated_llm.config + assert llm.router.deployment_names == rehydrated_llm.router.deployment_names + + +@pytest.mark.asyncio +async def test_embedding_model_factory_sentence_transformer() -> None: + """Test that the factory creates a SentenceTransformerEmbeddingModel when given an 'st-' prefix.""" + embedding = "st-multi-qa-MiniLM-L6-cos-v1" + model = embedding_model_factory(embedding) + assert isinstance( + model, SentenceTransformerEmbeddingModel + ), "Factory did not create SentenceTransformerEmbeddingModel" + assert model.name == "multi-qa-MiniLM-L6-cos-v1", "Incorrect model name assigned" + + # Test embedding functionality + texts = ["Hello world", "Test sentence"] + embeddings = await model.embed_documents(texts) + assert len(embeddings) == 2, "Incorrect number of embeddings returned" + assert all( + isinstance(embed, list) for embed in embeddings + ), "Embeddings are not in list format" + assert all(len(embed) > 0 for embed in embeddings), "Embeddings should not be empty" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_hybrid_with_sentence_transformer() -> None: + """Test that the factory creates a HybridEmbeddingModel containing a SentenceTransformerEmbeddingModel.""" + embedding = "hybrid-st-multi-qa-MiniLM-L6-cos-v1" + model = embedding_model_factory(embedding) + assert isinstance( + model, HybridEmbeddingModel + ), "Factory did not create HybridEmbeddingModel" + assert len(model.models) == 2, "Hybrid model should contain two component models" + assert isinstance( + model.models[0], SentenceTransformerEmbeddingModel + ), "First component should be SentenceTransformerEmbeddingModel" + assert isinstance( + model.models[1], SparseEmbeddingModel + ), "Second component should be SparseEmbeddingModel" + + # Test embedding functionality + texts = ["Hello world", "Test sentence"] + embeddings = await model.embed_documents(texts) + assert len(embeddings) == 2, "Incorrect number of embeddings returned" + expected_length = len((await model.models[0].embed_documents(texts))[0]) + len( + (await model.models[1].embed_documents(texts))[0] + ) + assert all( + len(embed) == expected_length for embed in embeddings + ), "Embeddings do not match expected combined length" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_invalid_st_prefix() -> None: + """Test that the factory raises a ValueError when 'st-' prefix is provided without a model name.""" + embedding = "st-" + with pytest.raises( + ValueError, + match="SentenceTransformer model name must be specified after 'st-'.", + ): + embedding_model_factory(embedding) + + +@pytest.mark.asyncio +async def test_embedding_model_factory_unknown_prefix() -> None: + """Test that the factory defaults to LiteLLMEmbeddingModel when an unknown prefix is provided.""" + embedding = "unknown-prefix-model" + model = embedding_model_factory(embedding) + assert isinstance( + model, LiteLLMEmbeddingModel + ), "Factory did not default to LiteLLMEmbeddingModel for unknown prefix" + assert model.name == "unknown-prefix-model", "Incorrect model name assigned" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_sparse() -> None: + """Test that the factory creates a SparseEmbeddingModel when 'sparse' is provided.""" + embedding = "sparse" + model = embedding_model_factory(embedding) + assert isinstance( + model, SparseEmbeddingModel + ), "Factory did not create SparseEmbeddingModel" + assert model.name == "sparse", "Incorrect model name assigned" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_litellm() -> None: + """Test that the factory creates a LiteLLMEmbeddingModel when 'litellm-' prefix is provided.""" + embedding = "litellm-text-embedding-3-small" + model = embedding_model_factory(embedding) + assert isinstance( + model, LiteLLMEmbeddingModel + ), "Factory did not create LiteLLMEmbeddingModel" + assert model.name == "text-embedding-3-small", "Incorrect model name assigned" + + +@pytest.mark.asyncio +async def test_embedding_model_factory_default() -> None: + """Test that the factory defaults to LiteLLMEmbeddingModel when no known prefix is provided.""" + embedding = "default-model" + model = embedding_model_factory(embedding) + assert isinstance( + model, LiteLLMEmbeddingModel + ), "Factory did not default to LiteLLMEmbeddingModel" + assert model.name == "default-model", "Incorrect model name assigned" diff --git a/tests/test_rate_limiter.py b/tests/test_rate_limiter.py new file mode 100644 index 0000000..4da6b9a --- /dev/null +++ b/tests/test_rate_limiter.py @@ -0,0 +1,297 @@ +import asyncio +import time +from itertools import product +from typing import Any + +import pytest +from limits import RateLimitItemPerSecond + +from llmclient.constants import CHARACTERS_PER_TOKEN_ASSUMPTION +from llmclient.embeddings import LiteLLMEmbeddingModel +from llmclient.llms import ( + Chunk, + LiteLLMModel, +) +from llmclient.types import LLMResult + +LLM_CONFIG_W_RATE_LIMITS = [ + # following ensures that "short-form" rate limits are also supported + # where the user doesn't specify the model_list + { + "name": "gpt-4o-mini", + "config": { + "rate_limit": {"gpt-4o-mini": RateLimitItemPerSecond(20, 3)}, + }, + }, + { + "name": "gpt-4o-mini", + "config": { + "model_list": [ + { + "model_name": "gpt-4o-mini", + "litellm_params": { + "model": "gpt-4o-mini", + "temperature": 0, + }, + } + ], + "rate_limit": {"gpt-4o-mini": RateLimitItemPerSecond(20, 1)}, + }, + }, + { + "name": "gpt-4o-mini", + "config": { + "model_list": [ + { + "model_name": "gpt-4o-mini", + "litellm_params": { + "model": "gpt-4o-mini", + "temperature": 0, + }, + } + ], + "rate_limit": {"gpt-4o-mini": RateLimitItemPerSecond(1_000_000, 1)}, + }, + }, + { + "name": "gpt-4o-mini", + "config": { + "model_list": [ + { + "model_name": "gpt-4o-mini", + "litellm_params": { + "model": "gpt-4o-mini", + "temperature": 0, + }, + } + ] + }, + }, +] + +RATE_LIMITER_PROMPT = "Animals make many noises. The duck says" + +LLM_METHOD_AND_INPUTS = [ + { + "method": "acomplete", + "kwargs": {"prompt": RATE_LIMITER_PROMPT}, + }, + { + "method": "acomplete_iter", + "kwargs": {"prompt": RATE_LIMITER_PROMPT}, + }, + { + "method": "achat", + "kwargs": {"messages": [{"role": "user", "content": RATE_LIMITER_PROMPT}]}, + }, + { + "method": "achat_iter", + "kwargs": {"messages": [{"role": "user", "content": RATE_LIMITER_PROMPT}]}, + }, +] + +rate_limit_configurations = list( + product(LLM_CONFIG_W_RATE_LIMITS, LLM_METHOD_AND_INPUTS) +) + +EMBEDDING_CONFIG_W_RATE_LIMITS = [ + {"config": {"rate_limit": RateLimitItemPerSecond(20, 5)}}, + {"config": {"rate_limit": RateLimitItemPerSecond(20, 3)}}, + {"config": {"rate_limit": RateLimitItemPerSecond(1_000_000, 1)}}, + {}, +] + +ACCEPTABLE_RATE_LIMIT_ERROR: float = 0.10 # 10% error margin for token estimate error + + +async def time_n_llm_methods( + llm: LiteLLMModel, method: str, n: int, use_gather: bool = False, *args, **kwargs +) -> float: + """Give the token per second rate of a method call.""" + start_time = time.time() + outputs = [] + + if not use_gather: + for _ in range(n): + if "iter" in method: + outputs.extend( + [ + output + async for output in await getattr(llm, method)(*args, **kwargs) + ] + ) + else: + outputs.append(await getattr(llm, method)(*args, **kwargs)) + + else: + outputs = await asyncio.gather( + *[getattr(llm, method)(*args, **kwargs) for _ in range(n)] + ) + + character_count = 0 + token_count = 0 + + if isinstance(outputs[0], Chunk | LLMResult): + character_count = sum(len(o.text or "") for o in outputs) + else: + character_count = sum(len(o) for o in outputs) + + if hasattr(outputs[0], "prompt_tokens"): + token_count = sum(o.prompt_tokens + o.completion_tokens for o in outputs) + + return ( + (character_count / CHARACTERS_PER_TOKEN_ASSUMPTION) + if token_count == 0 + else token_count + ) / (time.time() - start_time) + + +@pytest.mark.parametrize("llm_config_w_rate_limits", LLM_CONFIG_W_RATE_LIMITS) +@pytest.mark.asyncio +async def test_rate_limit_on_run_prompt( + llm_config_w_rate_limits: dict[str, Any], +) -> None: + + llm = LiteLLMModel(**llm_config_w_rate_limits) + + outputs = [] + + def accum(x) -> None: + outputs.append(x) + + estimated_tokens_per_second = await time_n_llm_methods( + llm, + "run_prompt", + 3, + prompt="The {animal} says", + data={"animal": "duck"}, + system_prompt=None, + callbacks=[accum], + ) + + if "rate_limit" in llm.config: + max_tokens_per_second = ( + llm.config["rate_limit"]["gpt-4o-mini"].amount + / llm.config["rate_limit"]["gpt-4o-mini"].multiples + ) + assert estimated_tokens_per_second / max_tokens_per_second < ( + 1.0 + ACCEPTABLE_RATE_LIMIT_ERROR + ) + else: + assert estimated_tokens_per_second > 0 + + outputs = [] + + def accum2(x) -> None: + outputs.append(x) + + estimated_tokens_per_second = await time_n_llm_methods( + llm, + "run_prompt", + 3, + use_gather=True, + prompt="The {animal} says", + data={"animal": "duck"}, + system_prompt=None, + callbacks=[accum2], + ) + + if "rate_limit" in llm.config: + max_tokens_per_second = ( + llm.config["rate_limit"]["gpt-4o-mini"].amount + / llm.config["rate_limit"]["gpt-4o-mini"].multiples + ) + assert estimated_tokens_per_second / max_tokens_per_second < ( + 1.0 + ACCEPTABLE_RATE_LIMIT_ERROR + ) + else: + assert estimated_tokens_per_second > 0 + + +@pytest.mark.parametrize( + ("llm_config_w_rate_limits", "llm_method_kwargs"), rate_limit_configurations +) +@pytest.mark.asyncio +async def test_rate_limit_on_sequential_completion_litellm_methods( + llm_config_w_rate_limits: dict[str, Any], + llm_method_kwargs: dict[str, Any], +) -> None: + + llm = LiteLLMModel(**llm_config_w_rate_limits) + + estimated_tokens_per_second = await time_n_llm_methods( + llm, + llm_method_kwargs["method"], + 3, + use_gather=False, + **llm_method_kwargs["kwargs"], + ) + if "rate_limit" in llm.config: + max_tokens_per_second = ( + llm.config["rate_limit"]["gpt-4o-mini"].amount + / llm.config["rate_limit"]["gpt-4o-mini"].multiples + ) + assert estimated_tokens_per_second / max_tokens_per_second < ( + 1.0 + ACCEPTABLE_RATE_LIMIT_ERROR + ) + else: + assert estimated_tokens_per_second > 0 + + +@pytest.mark.parametrize( + ("llm_config_w_rate_limits", "llm_method_kwargs"), rate_limit_configurations +) +@pytest.mark.asyncio +async def test_rate_limit_on_parallel_completion_litellm_methods( + llm_config_w_rate_limits: dict[str, Any], + llm_method_kwargs: dict[str, Any], +) -> None: + + llm = LiteLLMModel(**llm_config_w_rate_limits) + + if "iter" not in llm_method_kwargs["method"]: + estimated_tokens_per_second = await time_n_llm_methods( + llm, + llm_method_kwargs["method"], + 3, + use_gather=True, + **llm_method_kwargs["kwargs"], + ) + if "rate_limit" in llm.config: + max_tokens_per_second = ( + llm.config["rate_limit"]["gpt-4o-mini"].amount + / llm.config["rate_limit"]["gpt-4o-mini"].multiples + ) + assert estimated_tokens_per_second / max_tokens_per_second < ( + 1.0 + ACCEPTABLE_RATE_LIMIT_ERROR + ) + else: + assert estimated_tokens_per_second > 0 + + +@pytest.mark.parametrize( + "embedding_config_w_rate_limits", EMBEDDING_CONFIG_W_RATE_LIMITS +) +@pytest.mark.asyncio +async def test_embedding_rate_limits( + embedding_config_w_rate_limits: dict[str, Any], +) -> None: + + embedding_model = LiteLLMEmbeddingModel(**embedding_config_w_rate_limits) + texts_to_embed = ["the duck says"] * 10 + start = time.time() + await embedding_model.embed_documents(texts=texts_to_embed, batch_size=5) + estimated_tokens_per_second = sum( + len(t) / CHARACTERS_PER_TOKEN_ASSUMPTION for t in texts_to_embed + ) / (time.time() - start) + + if "rate_limit" in embedding_config_w_rate_limits: + max_tokens_per_second = ( + embedding_config_w_rate_limits["rate_limit"].amount + / embedding_config_w_rate_limits["rate_limit"].multiples + ) + assert estimated_tokens_per_second / max_tokens_per_second < ( + 1.0 + ACCEPTABLE_RATE_LIMIT_ERROR + ) + else: + assert estimated_tokens_per_second > 0 diff --git a/uv.lock b/uv.lock index 05939e8..b1dcc0f 100644 --- a/uv.lock +++ b/uv.lock @@ -158,6 +158,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/45/86/4736ac618d82a20d87d2f92ae19441ebc7ac9e7a581d7e58bbe79233b24a/asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24", size = 27764 }, ] +[[package]] +name = "async-timeout" +version = "4.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/87/d6/21b30a550dafea84b1b8eee21b5e23fa16d010ae006011221f33dcd8d7f8/async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f", size = 8345 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/fa/e01228c2938de91d47b307831c62ab9e4001e747789d0b05baf779a6488c/async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028", size = 5721 }, +] + [[package]] name = "attrs" version = "24.2.0" @@ -392,6 +401,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2b/1e/1e726ba66eddf21c940821df8cf1a7d15cb165f0682d62161eaa5e93dae1/contourpy-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:33c92cdae89ec5135d036e7218e69b0bb2851206077251f04a6c4e0e21f03927", size = 1314829 }, ] +[[package]] +name = "coredis" +version = "4.17.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout" }, + { name = "deprecated" }, + { name = "packaging" }, + { name = "pympler" }, + { name = "typing-extensions" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/0c/0f2fb1cedd224666ef08e898447bb9cf4d1e98a86b03119f1c6513093ddc/coredis-4.17.0.tar.gz", hash = "sha256:04e9976e71a42004dfe19a862c648b4047bf813e15184cddfd3cb37eb704b83f", size = 243157 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/2c/2335e476f0c0b33eea53c307169bcafe9c19a4b277738258eb80354ee90c/coredis-4.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f3050806b4854a6624e3c2efa013b540265d88e766f815963d447c116240d75d", size = 330690 }, + { url = "https://files.pythonhosted.org/packages/6a/b1/3c24a708b24f8e2566b1b91b64b4dc75f74633b875def19f2ac0fa03a0a0/coredis-4.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5f0f1044bdafc93f421e59e711da762c6c741ab76df0c12a42c447c1db1fcd75", size = 328051 }, + { url = "https://files.pythonhosted.org/packages/0f/a6/e5a8add1ae7b31240248528f669127e5fd347c69625a9b423965a5902302/coredis-4.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1befa7db121978fd0995151af5d15ce5e37a14847797c3fbd9403882f21b48c", size = 352651 }, + { url = "https://files.pythonhosted.org/packages/b8/d1/0ece1b888547ec26f4d33be30513cd44c77df25c9f943e7d3c20b49cc634/coredis-4.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52583dcef671c8d3a1cbecbf81cd630b1a72f946cf46601016c4f85d3f12a4a1", size = 355472 }, + { url = "https://files.pythonhosted.org/packages/00/c2/771bafa43c37d8c968804b6bb34063eb631b5d2377db31bca6d784131f48/coredis-4.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:845f5c0bb7012609a1f41f8308e5166c01f162599af33cb001bd2b0d6a4386f5", size = 358740 }, + { url = "https://files.pythonhosted.org/packages/fb/d3/90846efc003d692c46f2988ddaffaac47f2c95f378102dad490e911de157/coredis-4.17.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e3638c9a894ac7d0a04fa14515f24d0f717c431266ee0ac612ddb3a142862258", size = 330509 }, + { url = "https://files.pythonhosted.org/packages/4c/2d/1f97441d377b457831bd9327dbdaa29888effa2edf6318cb4138a425538f/coredis-4.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73cb260bf96eacb4e455c300b5e41382bc52d9a2125f3f7e55657662a627e0cb", size = 327735 }, + { url = "https://files.pythonhosted.org/packages/3a/3f/1dcd57f6df67b7a20b1c27abcf768cf6789be5f33d173739f482d672e9d1/coredis-4.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9421423bb109eb62b7595e1d0c84d8c9399bf160826ee478b6b7771bf6ad831e", size = 353755 }, + { url = "https://files.pythonhosted.org/packages/38/24/de68bdd4b3549a8a05674f0952e646d45afd15453543e0e679dc6899174c/coredis-4.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a74abdeda89ff5ea40d0da771d2871148b64b2f1c758f11485397adc1928b08e", size = 357309 }, + { url = "https://files.pythonhosted.org/packages/ab/66/2bd9f9e1c10b307caf8f4e77527c620a0320291aa83a9e0e98e8df5a326c/coredis-4.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ddad826c5bc91f05e5fe36435086cdbe51019b2f4f0faf96d40250823548fee", size = 360856 }, + { url = "https://files.pythonhosted.org/packages/08/1c/7249845c0f6105290d70d90c9ad48b550f5bcb989766819d38aa0f784aec/coredis-4.17.0-py3-none-any.whl", hash = "sha256:a8254fcc746efd72990d565d87e5399646ad737b7a61d86ef129df846e86b0d3", size = 239667 }, +] + [[package]] name = "cycler" version = "0.12.1" @@ -410,6 +446,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, ] +[[package]] +name = "deprecated" +version = "1.2.15" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/a3/53e7d78a6850ffdd394d7048a31a6f14e44900adedf190f9a165f6b69439/deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d", size = 2977612 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/8f/c7f227eb42cfeaddce3eb0c96c60cbca37797fa7b34f8e1aeadf6c5c0983/Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320", size = 9941 }, +] + [[package]] name = "dicttoxml" version = "1.7.16" @@ -535,6 +583,195 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/99/f6/af0d1f58f86002be0cf1e2665cdd6f7a4a71cdc8a7a9438cdc9e3b5375fe/fastapi-0.115.4-py3-none-any.whl", hash = "sha256:0b504a063ffb3cf96a5e27dc1bc32c80ca743a2528574f9cdc77daa2d31b4742", size = 94732 }, ] +[[package]] +name = "fh-llm-client" +version = "0.1.dev37+g63f57b5.d20241126" +source = { editable = "." } +dependencies = [ + { name = "aiofiles" }, + { name = "coredis" }, + { name = "dm-tree" }, + { name = "fhaviary" }, + { name = "httpx" }, + { name = "limits" }, + { name = "litellm" }, + { name = "networkx", extra = ["default"] }, + { name = "numpy" }, + { name = "pydantic" }, + { name = "pymupdf" }, + { name = "tenacity" }, + { name = "tiktoken" }, + { name = "tqdm" }, + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, + { name = "usearch" }, +] + +[package.optional-dependencies] +dev = [ + { name = "fastapi" }, + { name = "fhaviary", extra = ["xml"] }, + { name = "ipython" }, + { name = "mypy" }, + { name = "pre-commit" }, + { name = "pydantic" }, + { name = "pydot" }, + { name = "pylint-pydantic" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-mock" }, + { name = "pytest-recording" }, + { name = "pytest-rerunfailures" }, + { name = "pytest-subtests" }, + { name = "pytest-sugar" }, + { name = "pytest-timer", extra = ["colorama"] }, + { name = "pytest-xdist" }, + { name = "python-dotenv" }, + { name = "refurb" }, + { name = "rich" }, + { name = "sentence-transformers" }, + { name = "torch" }, + { name = "tqdm" }, + { name = "types-aiofiles" }, + { name = "types-tqdm" }, + { name = "wandb" }, +] +local = [ + { name = "sentence-transformers" }, +] +monitor = [ + { name = "wandb" }, +] +nn = [ + { name = "torch" }, +] +rich = [ + { name = "rich" }, + { name = "tqdm" }, +] +server = [ + { name = "fastapi" }, +] +typing = [ + { name = "types-aiofiles" }, + { name = "types-tqdm" }, +] +visualization = [ + { name = "pydot" }, +] + +[package.dev-dependencies] +codeflash = [ + { name = "codeflash" }, + { name = "fastapi" }, + { name = "fhaviary", extra = ["xml"] }, + { name = "ipython" }, + { name = "mypy" }, + { name = "pre-commit" }, + { name = "pydantic" }, + { name = "pydot" }, + { name = "pylint-pydantic" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-mock" }, + { name = "pytest-recording" }, + { name = "pytest-rerunfailures" }, + { name = "pytest-subtests" }, + { name = "pytest-sugar" }, + { name = "pytest-timer", extra = ["colorama"] }, + { name = "pytest-xdist" }, + { name = "python-dotenv" }, + { name = "refurb" }, + { name = "rich" }, + { name = "sentence-transformers" }, + { name = "torch" }, + { name = "tqdm" }, + { name = "types-aiofiles" }, + { name = "types-tqdm" }, + { name = "wandb" }, +] +dev = [ + { name = "fastapi" }, + { name = "fhaviary", extra = ["xml"] }, + { name = "ipython" }, + { name = "mypy" }, + { name = "pre-commit" }, + { name = "pydantic" }, + { name = "pydot" }, + { name = "pylint-pydantic" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-mock" }, + { name = "pytest-recording" }, + { name = "pytest-rerunfailures" }, + { name = "pytest-subtests" }, + { name = "pytest-sugar" }, + { name = "pytest-timer", extra = ["colorama"] }, + { name = "pytest-xdist" }, + { name = "python-dotenv" }, + { name = "refurb" }, + { name = "rich" }, + { name = "sentence-transformers" }, + { name = "torch" }, + { name = "tqdm" }, + { name = "types-aiofiles" }, + { name = "types-tqdm" }, + { name = "wandb" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiofiles" }, + { name = "coredis" }, + { name = "dm-tree" }, + { name = "fastapi", marker = "extra == 'server'", specifier = ">=0.109" }, + { name = "fh-llm-client", extras = ["local", "monitor", "nn", "rich", "server", "typing", "visualization"], marker = "extra == 'dev'" }, + { name = "fhaviary", specifier = ">=0.8.2" }, + { name = "fhaviary", extras = ["xml"], marker = "extra == 'dev'" }, + { name = "httpx" }, + { name = "ipython", marker = "extra == 'dev'", specifier = ">=8" }, + { name = "limits" }, + { name = "litellm", specifier = ">=1.44" }, + { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8" }, + { name = "networkx", extras = ["default"], specifier = "~=3.4" }, + { name = "numpy" }, + { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.4" }, + { name = "pydantic", specifier = "~=2.0" }, + { name = "pydantic", marker = "extra == 'dev'", specifier = "~=2.0" }, + { name = "pydot", marker = "extra == 'visualization'", specifier = ">=3.0.1" }, + { name = "pylint-pydantic", marker = "extra == 'dev'" }, + { name = "pymupdf", specifier = ">=1.24.12" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8" }, + { name = "pytest-asyncio", marker = "extra == 'dev'" }, + { name = "pytest-mock", marker = "extra == 'dev'" }, + { name = "pytest-recording", marker = "extra == 'dev'" }, + { name = "pytest-rerunfailures", marker = "extra == 'dev'" }, + { name = "pytest-subtests", marker = "extra == 'dev'" }, + { name = "pytest-sugar", marker = "extra == 'dev'" }, + { name = "pytest-timer", extras = ["colorama"], marker = "extra == 'dev'" }, + { name = "pytest-xdist", marker = "extra == 'dev'" }, + { name = "python-dotenv", marker = "extra == 'dev'" }, + { name = "refurb", marker = "extra == 'dev'", specifier = ">=2" }, + { name = "rich", marker = "extra == 'rich'" }, + { name = "sentence-transformers", marker = "extra == 'local'" }, + { name = "tenacity" }, + { name = "tiktoken", specifier = ">=0.4.0" }, + { name = "torch", marker = "extra == 'nn'", specifier = ">=2.2" }, + { name = "tqdm" }, + { name = "tqdm", marker = "extra == 'rich'", specifier = ">=4.56" }, + { name = "types-aiofiles", marker = "extra == 'typing'" }, + { name = "types-tqdm", marker = "extra == 'typing'" }, + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, + { name = "usearch", specifier = ">=2.13" }, + { name = "wandb", marker = "extra == 'monitor'" }, +] + +[package.metadata.requires-dev] +codeflash = [ + { name = "codeflash", specifier = ">=0.7" }, + { name = "fh-llm-client", extras = ["dev"] }, +] +dev = [{ name = "fh-llm-client", extras = ["dev"] }] + [[package]] name = "fhaviary" version = "0.10.0" @@ -776,6 +1013,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 }, ] +[[package]] +name = "importlib-resources" +version = "6.4.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/be/f3e8c6081b684f176b761e6a2fef02a0be939740ed6f54109a2951d806f3/importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065", size = 43372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/6a/4604f9ae2fa62ef47b9de2fa5ad599589d28c9fd1d335f32759813dfa91e/importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717", size = 36115 }, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -909,6 +1155,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/96/58b3d260e212add0087563672931b1176e70bef1225839a4470ec66157a5/jiter-0.7.0-cp313-none-win_amd64.whl", hash = "sha256:7417c2b928062c496f381fb0cb50412eee5ad1d8b53dbc0e011ce45bb2de522c", size = 199305 }, ] +[[package]] +name = "joblib" +version = "1.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/64/33/60135848598c076ce4b231e1b1895170f45fbcaeaa2c9d5e38b04db70c35/joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e", size = 2116621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 }, +] + [[package]] name = "jsonschema" version = "4.23.0" @@ -1030,6 +1285,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/9f/5b5481d716670ed5fbd8d06dfa94b7108272b645da2f2406eb909cb6a450/libcst-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:4d6acb0bdee1e55b44c6215c59755ec4693ac01e74bb1fde04c37358b378835d", size = 2029600 }, ] +[[package]] +name = "limits" +version = "3.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "importlib-resources" }, + { name = "packaging" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c4/5f/89fb5405ee37d8b172e48e357438dd79482731b0cd5db2f734ac58f019e4/limits-3.13.0.tar.gz", hash = "sha256:6571b0c567bfa175a35fed9f8a954c0c92f1c3200804282f1b8f1de4ad98a953", size = 70218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/80/b340bc7c3eb8f5c40e4d38c8e3cd04c127756d8de06b9e54caefb4ae16d5/limits-3.13.0-py3-none-any.whl", hash = "sha256:9767f7233da4255e9904b79908a728e8ec0984c0b086058b4cbbd309aea553f6", size = 45547 }, +] + [[package]] name = "litellm" version = "1.48.10" @@ -1052,192 +1322,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/5b/b6eb2098ed289f99abb55ab966b4f318a467294c218ad846e96ba72949b0/litellm-1.48.10-py3-none-any.whl", hash = "sha256:752efd59747a0895f4695d025c66f0b2258d80a61175f7cfa41dbe4894ef95e1", size = 6238318 }, ] -[[package]] -name = "llm-client" -version = "0.1.dev22+g7d88740.d20241107" -source = { editable = "." } -dependencies = [ - { name = "aiofiles" }, - { name = "dm-tree" }, - { name = "fhaviary" }, - { name = "httpx" }, - { name = "litellm" }, - { name = "networkx", extra = ["default"] }, - { name = "numpy" }, - { name = "openai" }, - { name = "pydantic" }, - { name = "tenacity" }, - { name = "tiktoken" }, - { name = "tqdm" }, - { name = "typing-extensions", marker = "python_full_version < '3.12'" }, - { name = "usearch" }, -] - -[package.optional-dependencies] -dev = [ - { name = "fastapi" }, - { name = "fhaviary", extra = ["xml"] }, - { name = "ipython" }, - { name = "litellm" }, - { name = "mypy" }, - { name = "openai" }, - { name = "pre-commit" }, - { name = "pydantic" }, - { name = "pydot" }, - { name = "pylint" }, - { name = "pylint-pydantic" }, - { name = "pytest" }, - { name = "pytest-asyncio" }, - { name = "pytest-mock" }, - { name = "pytest-recording" }, - { name = "pytest-rerunfailures" }, - { name = "pytest-subtests" }, - { name = "pytest-sugar" }, - { name = "pytest-timer", extra = ["colorama"] }, - { name = "pytest-xdist" }, - { name = "refurb" }, - { name = "rich" }, - { name = "torch" }, - { name = "tqdm" }, - { name = "types-aiofiles" }, - { name = "types-tqdm" }, - { name = "wandb" }, -] -monitor = [ - { name = "wandb" }, -] -nn = [ - { name = "torch" }, -] -rich = [ - { name = "rich" }, - { name = "tqdm" }, -] -server = [ - { name = "fastapi" }, -] -typing = [ - { name = "types-aiofiles" }, - { name = "types-tqdm" }, -] -visualization = [ - { name = "pydot" }, -] - -[package.dev-dependencies] -codeflash = [ - { name = "codeflash" }, - { name = "fastapi" }, - { name = "fhaviary", extra = ["xml"] }, - { name = "ipython" }, - { name = "litellm" }, - { name = "mypy" }, - { name = "openai" }, - { name = "pre-commit" }, - { name = "pydantic" }, - { name = "pydot" }, - { name = "pylint" }, - { name = "pylint-pydantic" }, - { name = "pytest" }, - { name = "pytest-asyncio" }, - { name = "pytest-mock" }, - { name = "pytest-recording" }, - { name = "pytest-rerunfailures" }, - { name = "pytest-subtests" }, - { name = "pytest-sugar" }, - { name = "pytest-timer", extra = ["colorama"] }, - { name = "pytest-xdist" }, - { name = "refurb" }, - { name = "rich" }, - { name = "torch" }, - { name = "tqdm" }, - { name = "types-aiofiles" }, - { name = "types-tqdm" }, - { name = "wandb" }, -] -dev = [ - { name = "fastapi" }, - { name = "fhaviary", extra = ["xml"] }, - { name = "ipython" }, - { name = "litellm" }, - { name = "mypy" }, - { name = "openai" }, - { name = "pre-commit" }, - { name = "pydantic" }, - { name = "pydot" }, - { name = "pylint" }, - { name = "pylint-pydantic" }, - { name = "pytest" }, - { name = "pytest-asyncio" }, - { name = "pytest-mock" }, - { name = "pytest-recording" }, - { name = "pytest-rerunfailures" }, - { name = "pytest-subtests" }, - { name = "pytest-sugar" }, - { name = "pytest-timer", extra = ["colorama"] }, - { name = "pytest-xdist" }, - { name = "refurb" }, - { name = "rich" }, - { name = "torch" }, - { name = "tqdm" }, - { name = "types-aiofiles" }, - { name = "types-tqdm" }, - { name = "wandb" }, -] - -[package.metadata] -requires-dist = [ - { name = "aiofiles" }, - { name = "dm-tree" }, - { name = "fastapi", marker = "extra == 'server'", specifier = ">=0.109" }, - { name = "fhaviary", specifier = ">=0.8.2" }, - { name = "fhaviary", extras = ["xml"], marker = "extra == 'dev'" }, - { name = "httpx" }, - { name = "ipython", marker = "extra == 'dev'", specifier = ">=8" }, - { name = "litellm", specifier = ">=1.40.15" }, - { name = "litellm", marker = "extra == 'dev'", specifier = "!=1.49.4,!=1.49.5,!=1.49.6" }, - { name = "llm-client", extras = ["monitor", "nn", "rich", "server", "typing", "visualization"], marker = "extra == 'dev'" }, - { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8" }, - { name = "networkx", extras = ["default"], specifier = "~=3.4" }, - { name = "numpy", specifier = ">=1.20" }, - { name = "openai", specifier = ">=1" }, - { name = "openai", marker = "extra == 'dev'", specifier = "<1.47" }, - { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.4" }, - { name = "pydantic", specifier = "~=2.0" }, - { name = "pydantic", marker = "extra == 'dev'", specifier = "~=2.9" }, - { name = "pydot", marker = "extra == 'visualization'", specifier = ">=3.0.1" }, - { name = "pylint", marker = "extra == 'dev'", specifier = ">=3.2" }, - { name = "pylint-pydantic", marker = "extra == 'dev'" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=8" }, - { name = "pytest-asyncio", marker = "extra == 'dev'" }, - { name = "pytest-mock", marker = "extra == 'dev'" }, - { name = "pytest-recording", marker = "extra == 'dev'" }, - { name = "pytest-rerunfailures", marker = "extra == 'dev'" }, - { name = "pytest-subtests", marker = "extra == 'dev'" }, - { name = "pytest-sugar", marker = "extra == 'dev'" }, - { name = "pytest-timer", extras = ["colorama"], marker = "extra == 'dev'" }, - { name = "pytest-xdist", marker = "extra == 'dev'" }, - { name = "refurb", marker = "extra == 'dev'", specifier = ">=2" }, - { name = "rich", marker = "extra == 'rich'" }, - { name = "tenacity" }, - { name = "tiktoken" }, - { name = "torch", marker = "extra == 'nn'", specifier = ">=2.2" }, - { name = "tqdm" }, - { name = "tqdm", marker = "extra == 'rich'", specifier = ">=4.56" }, - { name = "types-aiofiles", marker = "extra == 'typing'" }, - { name = "types-tqdm", marker = "extra == 'typing'" }, - { name = "typing-extensions", marker = "python_full_version < '3.12'" }, - { name = "usearch", specifier = ">=2.13" }, - { name = "wandb", marker = "extra == 'monitor'" }, -] - -[package.metadata.requires-dev] -codeflash = [ - { name = "codeflash", specifier = ">=0.7" }, - { name = "llm-client", extras = ["dev"] }, -] -dev = [{ name = "llm-client", extras = ["dev"] }] - [[package]] name = "lxml" version = "5.3.0" @@ -2180,6 +2264,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/80/34b429c6534be99ef3d6d20bd794b26fda0682d38e2d57f85df258beaac2/pylint_pydantic-0.3.2-py3-none-any.whl", hash = "sha256:e5cec02370aa68ac8eff138e5d573b0ac049bab864e9a6c3a9057cf043440aa1", size = 15951 }, ] +[[package]] +name = "pympler" +version = "1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/37/c384631908029676d8e7213dd956bb686af303a80db7afbc9be36bc49495/pympler-1.1.tar.gz", hash = "sha256:1eaa867cb8992c218430f1708fdaccda53df064144d1c5656b1e6f1ee6000424", size = 179954 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/4f/a6a2e2b202d7fd97eadfe90979845b8706676b41cbd3b42ba75adf329d1f/Pympler-1.1-py3-none-any.whl", hash = "sha256:5b223d6027d0619584116a0cbc28e8d2e378f7a79c1e5e024f9ff3b673c58506", size = 165766 }, +] + +[[package]] +name = "pymupdf" +version = "1.24.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/6b/6bd735144a190d26dcc23f98b4aae0e09b259cc4c87bba266a39b7b91f56/PyMuPDF-1.24.14.tar.gz", hash = "sha256:0eed9f998525eaf39706dbf2d0cf3162150f0f526e4a36b1748ffa50bde581ae", size = 56242747 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/ce/972b080c526af80577ffaa49676c05361ba152de94de3af339a2f3ac07c2/PyMuPDF-1.24.14-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b3ad7a4f4b607ff97f2e1b8111823dd3797dbb381ec851c3ae4695fea6f68478", size = 19167365 }, + { url = "https://files.pythonhosted.org/packages/2c/11/8d6f4c8fca86b93759e430c4b0b7b66f8067d58893d6fe0a193420d14453/PyMuPDF-1.24.14-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:755906af4b4d693552ae5469ba682075853f4dc8a70639affd1bd6c049c5d900", size = 18417324 }, + { url = "https://files.pythonhosted.org/packages/51/69/518e6c088e20a5ded1fc658d4aec1e54c0f98f2d62d91362bd4231df9ecf/PyMuPDF-1.24.14-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:37f24108e2e18150fb8d512dcccdfa1e3d9b9dd203ffaa7ffb959bb20aea40b4", size = 19303826 }, + { url = "https://files.pythonhosted.org/packages/27/bf/203d06c68660d5535db65b6c54cacd35b950945c11c1c4546d674f270892/PyMuPDF-1.24.14-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0de4f5ed903c2be6d0abcccdc796368939b51ce03916eb53292916e3b6ea65d6", size = 19833056 }, + { url = "https://files.pythonhosted.org/packages/77/ed/40eb23cf5e91de0510dfedb7d9feedeab5ce9691544ad09599e124a0a333/PyMuPDF-1.24.14-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d1b5c47df2f8055de5dedfbd3189c742188261a8c257f406378382adac94cff", size = 20963535 }, + { url = "https://files.pythonhosted.org/packages/87/2b/46af7461bd299c3f52bc5455332cc82608cea1667cd692652505fdf9308e/PyMuPDF-1.24.14-cp39-abi3-win32.whl", hash = "sha256:60a7ee7db3e0d3a4dcbe6df2781ba4487acb7e515c64ea9c857504f44effcb25", size = 14965671 }, + { url = "https://files.pythonhosted.org/packages/25/b2/82d70d9f5aea5a33e770f37e6db43ed08b5dc71b3526c5d7051689d1031e/PyMuPDF-1.24.14-cp39-abi3-win_amd64.whl", hash = "sha256:3d1f1ec2fe0249484afde7a0fc02589f19aaeb47c42939d23ae1d012aa1bc59b", size = 16257645 }, +] + [[package]] name = "pyparsing" version = "3.2.0" @@ -2353,6 +2464,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725", size = 508002 }, ] +[[package]] +name = "pywin32" +version = "308" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/e2/02652007469263fe1466e98439831d65d4ca80ea1a2df29abecedf7e47b7/pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a", size = 5928156 }, + { url = "https://files.pythonhosted.org/packages/48/ef/f4fb45e2196bc7ffe09cad0542d9aff66b0e33f6c0954b43e49c33cad7bd/pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b", size = 6559559 }, + { url = "https://files.pythonhosted.org/packages/79/ef/68bb6aa865c5c9b11a35771329e95917b5559845bd75b65549407f9fc6b4/pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6", size = 7972495 }, + { url = "https://files.pythonhosted.org/packages/00/7c/d00d6bdd96de4344e06c4afbf218bc86b54436a94c01c71a8701f613aa56/pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897", size = 5939729 }, + { url = "https://files.pythonhosted.org/packages/21/27/0c8811fbc3ca188f93b5354e7c286eb91f80a53afa4e11007ef661afa746/pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47", size = 6543015 }, + { url = "https://files.pythonhosted.org/packages/9d/0f/d40f8373608caed2255781a3ad9a51d03a594a1248cd632d6a298daca693/pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091", size = 7976033 }, + { url = "https://files.pythonhosted.org/packages/a9/a4/aa562d8935e3df5e49c161b427a3a2efad2ed4e9cf81c3de636f1fdddfd0/pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed", size = 5938579 }, + { url = "https://files.pythonhosted.org/packages/c7/50/b0efb8bb66210da67a53ab95fd7a98826a97ee21f1d22949863e6d588b22/pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4", size = 6542056 }, + { url = "https://files.pythonhosted.org/packages/26/df/2b63e3e4f2df0224f8aaf6d131f54fe4e8c96400eb9df563e2aae2e1a1f9/pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd", size = 7974986 }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -2574,6 +2701,77 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/86/d6/17caf2e4af1dec288477a0cbbe4a96fbc9b8a28457dce3f1f452630ce216/runs-1.2.2-py3-none-any.whl", hash = "sha256:0980dcbc25aba1505f307ac4f0e9e92cbd0be2a15a1e983ee86c24c87b839dfd", size = 7033 }, ] +[[package]] +name = "safetensors" +version = "0.4.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/46/a1c56ed856c6ac3b1a8b37abe5be0cac53219367af1331e721b04d122577/safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310", size = 65702 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/a5/25bcf75e373412daf1fd88045ab3aa8140a0d804ef0e70712c4f2c5b94d8/safetensors-0.4.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:21f848d7aebd5954f92538552d6d75f7c1b4500f51664078b5b49720d180e47c", size = 392256 }, + { url = "https://files.pythonhosted.org/packages/08/8c/ece3bf8756506a890bd980eca02f47f9d98dfbf5ce16eda1368f53560f67/safetensors-0.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb07000b19d41e35eecef9a454f31a8b4718a185293f0d0b1c4b61d6e4487971", size = 381490 }, + { url = "https://files.pythonhosted.org/packages/39/83/c4a7ce01d626e46ea2b45887f2e59b16441408031e2ce2f9fe01860c6946/safetensors-0.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09dedf7c2fda934ee68143202acff6e9e8eb0ddeeb4cfc24182bef999efa9f42", size = 441093 }, + { url = "https://files.pythonhosted.org/packages/47/26/cc52de647e71bd9a0b0d78ead0d31d9c462b35550a817aa9e0cab51d6db4/safetensors-0.4.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:59b77e4b7a708988d84f26de3ebead61ef1659c73dcbc9946c18f3b1786d2688", size = 438960 }, + { url = "https://files.pythonhosted.org/packages/06/78/332538546775ee97e749867df2d58f2282d9c48a1681e4891eed8b94ec94/safetensors-0.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d3bc83e14d67adc2e9387e511097f254bd1b43c3020440e708858c684cbac68", size = 478031 }, + { url = "https://files.pythonhosted.org/packages/d9/03/a3c8663f1ddda54e624ecf43fce651659b49e8e1603c52c3e464b442acfa/safetensors-0.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39371fc551c1072976073ab258c3119395294cf49cdc1f8476794627de3130df", size = 494754 }, + { url = "https://files.pythonhosted.org/packages/e6/ee/69e498a892f208bd1da4104d4b9be887f8611bf4942144718b6738482250/safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6c19feda32b931cae0acd42748a670bdf56bee6476a046af20181ad3fee4090", size = 435013 }, + { url = "https://files.pythonhosted.org/packages/a2/61/f0cfce984515b86d1260f556ba3b782158e2855e6a318446ac2613786fa9/safetensors-0.4.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a659467495de201e2f282063808a41170448c78bada1e62707b07a27b05e6943", size = 455984 }, + { url = "https://files.pythonhosted.org/packages/e7/a9/3e3b48fcaade3eb4e347d39ebf0bd44291db21a3e4507854b42a7cb910ac/safetensors-0.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bad5e4b2476949bcd638a89f71b6916fa9a5cae5c1ae7eede337aca2100435c0", size = 619513 }, + { url = "https://files.pythonhosted.org/packages/80/23/2a7a1be24258c0e44c1d356896fd63dc0545a98d2d0184925fa09cd3ec76/safetensors-0.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a3a315a6d0054bc6889a17f5668a73f94f7fe55121ff59e0a199e3519c08565f", size = 604841 }, + { url = "https://files.pythonhosted.org/packages/b4/5c/34d082ff1fffffd8545fb22cbae3285ab4236f1f0cfc64b7e58261c2363b/safetensors-0.4.5-cp311-none-win32.whl", hash = "sha256:a01e232e6d3d5cf8b1667bc3b657a77bdab73f0743c26c1d3c5dd7ce86bd3a92", size = 272602 }, + { url = "https://files.pythonhosted.org/packages/6d/41/948c96c8a7e9fef57c2e051f1871c108a6dbbc6d285598bdb1d89b98617c/safetensors-0.4.5-cp311-none-win_amd64.whl", hash = "sha256:cbd39cae1ad3e3ef6f63a6f07296b080c951f24cec60188378e43d3713000c04", size = 285973 }, + { url = "https://files.pythonhosted.org/packages/bf/ac/5a63082f931e99200db95fd46fb6734f050bb6e96bf02521904c6518b7aa/safetensors-0.4.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:473300314e026bd1043cef391bb16a8689453363381561b8a3e443870937cc1e", size = 392015 }, + { url = "https://files.pythonhosted.org/packages/73/95/ab32aa6e9bdc832ff87784cdf9da26192b93de3ef82b8d1ada8f345c5044/safetensors-0.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:801183a0f76dc647f51a2d9141ad341f9665602a7899a693207a82fb102cc53e", size = 381774 }, + { url = "https://files.pythonhosted.org/packages/d6/6c/7e04b7626809fc63f3698f4c50e43aff2864b40089aa4506c918a75b8eed/safetensors-0.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1524b54246e422ad6fb6aea1ac71edeeb77666efa67230e1faf6999df9b2e27f", size = 441134 }, + { url = "https://files.pythonhosted.org/packages/58/2b/ffe7c86a277e6c1595fbdf415cfe2903f253f574a5405e93fda8baaa582c/safetensors-0.4.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3139098e3e8b2ad7afbca96d30ad29157b50c90861084e69fcb80dec7430461", size = 438467 }, + { url = "https://files.pythonhosted.org/packages/67/9c/f271bd804e08c7fda954d17b70ff281228a88077337a9e70feace4f4cc93/safetensors-0.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65573dc35be9059770808e276b017256fa30058802c29e1038eb1c00028502ea", size = 476566 }, + { url = "https://files.pythonhosted.org/packages/4c/ad/4cf76a3e430a8a26108407fa6cb93e6f80d996a5cb75d9540c8fe3862990/safetensors-0.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd33da8e9407559f8779c82a0448e2133737f922d71f884da27184549416bfed", size = 492253 }, + { url = "https://files.pythonhosted.org/packages/d9/40/a6f75ea449a9647423ec8b6f72c16998d35aa4b43cb38536ac060c5c7bf5/safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3685ce7ed036f916316b567152482b7e959dc754fcc4a8342333d222e05f407c", size = 434769 }, + { url = "https://files.pythonhosted.org/packages/52/47/d4b49b1231abf3131f7bb0bc60ebb94b27ee33e0a1f9569da05f8ac65dee/safetensors-0.4.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dde2bf390d25f67908278d6f5d59e46211ef98e44108727084d4637ee70ab4f1", size = 457166 }, + { url = "https://files.pythonhosted.org/packages/c3/cd/006468b03b0fa42ff82d795d47c4193e99001e96c3f08bd62ef1b5cab586/safetensors-0.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7469d70d3de970b1698d47c11ebbf296a308702cbaae7fcb993944751cf985f4", size = 619280 }, + { url = "https://files.pythonhosted.org/packages/22/4d/b6208d918e83daa84b424c0ac3191ae61b44b3191613a3a5a7b38f94b8ad/safetensors-0.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3a6ba28118636a130ccbb968bc33d4684c48678695dba2590169d5ab03a45646", size = 605390 }, + { url = "https://files.pythonhosted.org/packages/e8/20/bf0e01825dc01ed75538021a98b9a046e60ead63c6c6700764c821a8c873/safetensors-0.4.5-cp312-none-win32.whl", hash = "sha256:c859c7ed90b0047f58ee27751c8e56951452ed36a67afee1b0a87847d065eec6", size = 273250 }, + { url = "https://files.pythonhosted.org/packages/f1/5f/ab6b6cec85b40789801f35b7d2fb579ae242d8193929974a106d5ff5c835/safetensors-0.4.5-cp312-none-win_amd64.whl", hash = "sha256:b5a8810ad6a6f933fff6c276eae92c1da217b39b4d8b1bc1c0b8af2d270dc532", size = 286307 }, + { url = "https://files.pythonhosted.org/packages/90/61/0e27b1403e311cba0be20026bee4ee822d90eda7dad372179e7f18bb99f3/safetensors-0.4.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:25e5f8e2e92a74f05b4ca55686234c32aac19927903792b30ee6d7bd5653d54e", size = 392062 }, + { url = "https://files.pythonhosted.org/packages/b1/9f/cc31fafc9f5d79da10a83a820ca37f069bab0717895ad8cbcacf629dd1c5/safetensors-0.4.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:81efb124b58af39fcd684254c645e35692fea81c51627259cdf6d67ff4458916", size = 382517 }, + { url = "https://files.pythonhosted.org/packages/a4/c7/4fda8a0ebb96662550433378f4a74c677fa5fc4d0a43a7ec287d1df254a9/safetensors-0.4.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:585f1703a518b437f5103aa9cf70e9bd437cb78eea9c51024329e4fb8a3e3679", size = 441378 }, + { url = "https://files.pythonhosted.org/packages/14/31/9abb431f6209de9c80dab83e1112ebd769f1e32e7ab7ab228a02424a4693/safetensors-0.4.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4b99fbf72e3faf0b2f5f16e5e3458b93b7d0a83984fe8d5364c60aa169f2da89", size = 438831 }, + { url = "https://files.pythonhosted.org/packages/37/37/99bfb195578a808b8d045159ee9264f8da58d017ac0701853dcacda14d4e/safetensors-0.4.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b17b299ca9966ca983ecda1c0791a3f07f9ca6ab5ded8ef3d283fff45f6bcd5f", size = 477112 }, + { url = "https://files.pythonhosted.org/packages/7d/05/fac3ef107e60d2a78532bed171a91669d4bb259e1236f5ea8c67a6976c75/safetensors-0.4.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:76ded72f69209c9780fdb23ea89e56d35c54ae6abcdec67ccb22af8e696e449a", size = 493373 }, + { url = "https://files.pythonhosted.org/packages/cf/7a/825800ee8c68214b4fd3506d5e19209338c69b41e01c6e14dd13969cc8b9/safetensors-0.4.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2783956926303dcfeb1de91a4d1204cd4089ab441e622e7caee0642281109db3", size = 435422 }, + { url = "https://files.pythonhosted.org/packages/5e/6c/7a3233c08bde558d6c33a41219119866cb596139a4673cc6c24024710ffd/safetensors-0.4.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d94581aab8c6b204def4d7320f07534d6ee34cd4855688004a4354e63b639a35", size = 457382 }, + { url = "https://files.pythonhosted.org/packages/a0/58/0b7bcba3788ff503990cf9278d611b56c029400612ba93e772c987b5aa03/safetensors-0.4.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:67e1e7cb8678bb1b37ac48ec0df04faf689e2f4e9e81e566b5c63d9f23748523", size = 619301 }, + { url = "https://files.pythonhosted.org/packages/82/cc/9c2cf58611daf1c83ce5d37f9de66353e23fcda36008b13fd3409a760aa3/safetensors-0.4.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:dbd280b07e6054ea68b0cb4b16ad9703e7d63cd6890f577cb98acc5354780142", size = 605580 }, +] + +[[package]] +name = "scikit-learn" +version = "1.5.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numpy" }, + { name = "scipy" }, + { name = "threadpoolctl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/37/59/44985a2bdc95c74e34fef3d10cb5d93ce13b0e2a7baefffe1b53853b502d/scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d", size = 7001680 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/91/609961972f694cb9520c4c3d201e377a26583e1eb83bc5a334c893729214/scikit_learn-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445", size = 12088580 }, + { url = "https://files.pythonhosted.org/packages/cd/7a/19fe32c810c5ceddafcfda16276d98df299c8649e24e84d4f00df4a91e01/scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de", size = 10975994 }, + { url = "https://files.pythonhosted.org/packages/4c/75/62e49f8a62bf3c60b0e64d0fce540578ee4f0e752765beb2e1dc7c6d6098/scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675", size = 12465782 }, + { url = "https://files.pythonhosted.org/packages/49/21/3723de321531c9745e40f1badafd821e029d346155b6c79704e0b7197552/scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1", size = 13322034 }, + { url = "https://files.pythonhosted.org/packages/17/1c/ccdd103cfcc9435a18819856fbbe0c20b8fa60bfc3343580de4be13f0668/scikit_learn-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6", size = 11015224 }, + { url = "https://files.pythonhosted.org/packages/a4/db/b485c1ac54ff3bd9e7e6b39d3cc6609c4c76a65f52ab0a7b22b6c3ab0e9d/scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a", size = 12110344 }, + { url = "https://files.pythonhosted.org/packages/54/1a/7deb52fa23aebb855431ad659b3c6a2e1709ece582cb3a63d66905e735fe/scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1", size = 11033502 }, + { url = "https://files.pythonhosted.org/packages/a1/32/4a7a205b14c11225609b75b28402c196e4396ac754dab6a81971b811781c/scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd", size = 12085794 }, + { url = "https://files.pythonhosted.org/packages/c6/29/044048c5e911373827c0e1d3051321b9183b2a4f8d4e2f11c08fcff83f13/scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6", size = 12945797 }, + { url = "https://files.pythonhosted.org/packages/aa/ce/c0b912f2f31aeb1b756a6ba56bcd84dd1f8a148470526a48515a3f4d48cd/scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1", size = 10985467 }, + { url = "https://files.pythonhosted.org/packages/a4/50/8891028437858cc510e13578fe7046574a60c2aaaa92b02d64aac5b1b412/scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5", size = 12025584 }, + { url = "https://files.pythonhosted.org/packages/d2/79/17feef8a1c14149436083bec0e61d7befb4812e272d5b20f9d79ea3e9ab1/scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908", size = 10959795 }, + { url = "https://files.pythonhosted.org/packages/b1/c8/f08313f9e2e656bd0905930ae8bf99a573ea21c34666a813b749c338202f/scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3", size = 12077302 }, + { url = "https://files.pythonhosted.org/packages/a7/48/fbfb4dc72bed0fe31fe045fb30e924909ad03f717c36694351612973b1a9/scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12", size = 13002811 }, + { url = "https://files.pythonhosted.org/packages/a5/e7/0c869f9e60d225a77af90d2aefa7a4a4c0e745b149325d1450f0f0ce5399/scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f", size = 10951354 }, +] + [[package]] name = "scipy" version = "1.14.1" @@ -2609,6 +2807,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f5/1b/6ee032251bf4cdb0cc50059374e86a9f076308c1512b61c4e003e241efb7/scipy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84", size = 44469524 }, ] +[[package]] +name = "sentence-transformers" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "pillow" }, + { name = "scikit-learn" }, + { name = "scipy" }, + { name = "torch" }, + { name = "tqdm" }, + { name = "transformers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/79/0a/c677efe908b20e7e8d4ed6cce3a3447eebc7dc5e348e458f5f9a44a72b00/sentence_transformers-3.3.1.tar.gz", hash = "sha256:9635dbfb11c6b01d036b9cfcee29f7716ab64cf2407ad9f403a2e607da2ac48b", size = 217914 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/c8/990e22a465e4771338da434d799578865d6d7ef1fdb50bd844b7ecdcfa19/sentence_transformers-3.3.1-py3-none-any.whl", hash = "sha256:abffcc79dab37b7d18d21a26d5914223dd42239cfe18cb5e111c66c54b658ae7", size = 268797 }, +] + [[package]] name = "sentry-sdk" version = "2.18.0" @@ -2788,6 +3004,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/be/df630c387a0a054815d60be6a97eb4e8f17385d5d6fe660e1c02750062b4/termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8", size = 7755 }, ] +[[package]] +name = "threadpoolctl" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/55/b5148dcbf72f5cde221f8bfe3b6a540da7aa1842f6b491ad979a6c8b84af/threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107", size = 41936 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 }, +] + [[package]] name = "tiktoken" version = "0.8.0" @@ -2939,6 +3164,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, ] +[[package]] +name = "transformers" +version = "4.46.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "regex" }, + { name = "requests" }, + { name = "safetensors" }, + { name = "tokenizers" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/37/5a/58f96c83e566f907ae39f16d4401bbefd8bb85c60bd1e6a95c419752ab90/transformers-4.46.3.tar.gz", hash = "sha256:8ee4b3ae943fe33e82afff8e837f4b052058b07ca9be3cb5b729ed31295f72cc", size = 8627944 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/51/b87caa939fedf307496e4dbf412f4b909af3d9ca8b189fc3b65c1faa456f/transformers-4.46.3-py3-none-any.whl", hash = "sha256:a12ef6f52841fd190a3e5602145b542d03507222f2c64ebb7ee92e8788093aef", size = 10034536 }, +] + [[package]] name = "triton" version = "3.1.0"