From f127cba3586b6303733983bc4a83a8e8b8bb3269 Mon Sep 17 00:00:00 2001 From: nikhil Date: Sun, 3 Nov 2024 10:57:58 -0800 Subject: [PATCH] Updated caching support with LiteLLM and added UT for caching --- .github/tests/lm_tests.py | 2 -- lotus/models/lm.py | 1 - 2 files changed, 3 deletions(-) diff --git a/.github/tests/lm_tests.py b/.github/tests/lm_tests.py index c0e177b4..4249a1d9 100644 --- a/.github/tests/lm_tests.py +++ b/.github/tests/lm_tests.py @@ -51,8 +51,6 @@ def test_filter_caching(setup_models): # Verify results are the same assert filtered_df1.equals(filtered_df2) - assert gpt_4o_mini.api_calls == 0 - assert initial_api_calls == 0 # Verify first call made API calls assert first_call_api_count == 0, "First call should make API calls" diff --git a/lotus/models/lm.py b/lotus/models/lm.py index 48efdca6..8ea50cc7 100644 --- a/lotus/models/lm.py +++ b/lotus/models/lm.py @@ -59,7 +59,6 @@ def __call__(self, messages: list[dict[str, str]] | list[list[dict[str, str]]], if kwargs.get("logprobs", False): kwargs["top_logprobs"] = kwargs.get("top_logprobs", 10) - request=ujson.dumps(dict(model=self.model, messages=messages, **kwargs)) if cache: messages_tuple = self._messages_to_cache_key(messages) responses = self._cached_completion(messages_tuple, **kwargs_for_batch)