Updated caching support with LiteLLM and added UT for caching

guestrin-lab · Nov 3, 2024 · f127cba · f127cba
1 parent 13252af
commit f127cba
Show file tree

Hide file tree

Showing 2 changed files with 0 additions and 3 deletions.
diff --git a/.github/tests/lm_tests.py b/.github/tests/lm_tests.py
@@ -51,8 +51,6 @@ def test_filter_caching(setup_models):
     # Verify results are the same
     assert filtered_df1.equals(filtered_df2)
 
-    assert gpt_4o_mini.api_calls == 0
-    assert initial_api_calls == 0
     # Verify first call made API calls
     assert first_call_api_count == 0, "First call should make API calls"
 

diff --git a/lotus/models/lm.py b/lotus/models/lm.py
@@ -59,7 +59,6 @@ def __call__(self, messages: list[dict[str, str]] | list[list[dict[str, str]]],
         if kwargs.get("logprobs", False):
             kwargs["top_logprobs"] = kwargs.get("top_logprobs", 10)
 
-        request=ujson.dumps(dict(model=self.model, messages=messages, **kwargs))
         if cache:
             messages_tuple = self._messages_to_cache_key(messages)
             responses = self._cached_completion(messages_tuple, **kwargs_for_batch)