From f127cba3586b6303733983bc4a83a8e8b8bb3269 Mon Sep 17 00:00:00 2001
From: nikhil <nikhil@DN51qo2t.SUNet>
Date: Sun, 3 Nov 2024 10:57:58 -0800
Subject: [PATCH] Updated caching support with LiteLLM and added UT for caching

---
 .github/tests/lm_tests.py | 2 --
 lotus/models/lm.py        | 1 -
 2 files changed, 3 deletions(-)

diff --git a/.github/tests/lm_tests.py b/.github/tests/lm_tests.py
index c0e177b4..4249a1d9 100644
--- a/.github/tests/lm_tests.py
+++ b/.github/tests/lm_tests.py
@@ -51,8 +51,6 @@ def test_filter_caching(setup_models):
     # Verify results are the same
     assert filtered_df1.equals(filtered_df2)
     
-    assert gpt_4o_mini.api_calls == 0
-    assert initial_api_calls == 0
     # Verify first call made API calls
     assert first_call_api_count == 0, "First call should make API calls"
     
diff --git a/lotus/models/lm.py b/lotus/models/lm.py
index 48efdca6..8ea50cc7 100644
--- a/lotus/models/lm.py
+++ b/lotus/models/lm.py
@@ -59,7 +59,6 @@ def __call__(self, messages: list[dict[str, str]] | list[list[dict[str, str]]],
         if kwargs.get("logprobs", False):
             kwargs["top_logprobs"] = kwargs.get("top_logprobs", 10)
 
-        request=ujson.dumps(dict(model=self.model, messages=messages, **kwargs))
         if cache:
             messages_tuple = self._messages_to_cache_key(messages)
             responses = self._cached_completion(messages_tuple, **kwargs_for_batch)