Merge remote-tracking branch 'origin/master' into fb-dia-1716

HumanSignal · Jan 29, 2025 · 5fc0a31 · 5fc0a31
2 parents 11368f4 + 4132c6f
commit 5fc0a31
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 3 deletions.
diff --git a/adala/runtimes/_litellm.py b/adala/runtimes/_litellm.py
@@ -145,7 +145,7 @@ def _get_usage_dict(usage: Usage, model: str) -> Dict:
         data["_prompt_cost_usd"] = prompt_cost
         data["_completion_cost_usd"] = completion_cost
         data["_total_cost_usd"] = prompt_cost + completion_cost
-    except NotFoundError:
+    except:
         logger.error(f"Failed to get cost for model {model}")
         data["_prompt_cost_usd"] = None
         data["_completion_cost_usd"] = None
@@ -215,6 +215,8 @@ def handle_llm_exception(
         # usage = e.total_usage
         # not available here, so have to approximate by hand, assuming the same error occurred each time
         n_attempts = retries.stop.max_attempt_number
+        # Note that the default model used in token_counter is gpt-3.5-turbo as of now - if model passed in
+        # does not match a mapped model, falls back to default
         prompt_tokens = n_attempts * litellm.token_counter(
             model=model, messages=messages[:-1]
         )  # response is appended as the last message
@@ -368,11 +370,16 @@ def record_to_record(
             )
             usage = completion.usage
             dct = to_jsonable_python(response)
+            # With successful completions we can get canonical model name
+            usage_model = completion.model
+
         except Exception as e:
             dct, usage = handle_llm_exception(e, messages, self.model, retries)
+            # With exceptions we dont have access to completion.model
+            usage_model = self.model
 
         # Add usage data to the response (e.g. token counts, cost)
-        dct.update(_get_usage_dict(usage, model=self.model))
+        dct.update(_get_usage_dict(usage, model=usage_model))
 
         return dct
 
@@ -499,13 +506,17 @@ async def batch_to_batch(
                 dct, usage = handle_llm_exception(
                     response, messages, self.model, retries
                 )
+                # With exceptions we dont have access to completion.model
+                usage_model = self.model
             else:
                 resp, completion = response
                 usage = completion.usage
                 dct = to_jsonable_python(resp)
+                # With successful completions we can get canonical model name
+                usage_model = completion.model
 
             # Add usage data to the response (e.g. token counts, cost)
-            dct.update(_get_usage_dict(usage, model=self.model))
+            dct.update(_get_usage_dict(usage, model=usage_model))
 
             df_data.append(dct)
 

diff --git a/server/tasks/stream_inference.py b/server/tasks/stream_inference.py
@@ -144,6 +144,7 @@ async def async_process_streaming_input(input_task_done: asyncio.Event, agent: A
         logger.error(
             f"Error in async_process_streaming_input: {e}. Traceback: {traceback.format_exc()}"
         )
+        input_task_done.set()
     # cleans up after any exceptions raised here as well as asyncio.CancelledError resulting from failure in async_process_streaming_output
     finally:
         await agent.environment.finalize()