Skip to content

Commit

Permalink
Fix token usage response (#348)
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixTJDietrich authored Oct 21, 2024
1 parent 2d62940 commit b336006
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 14 deletions.
4 changes: 3 additions & 1 deletion llm_core/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ DATABASE_URL=sqlite:///../data/data.sqlite

# Default model to use
# See below for options, available models are also logged on startup
LLM_DEFAULT_MODEL="azure_openai_gpt-35-turbo"
LLM_DEFAULT_MODEL="azure_openai_gpt-4o"
LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5
LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15

# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled
LLM_ENABLE_LLM_AS_A_JUDGE=1
Expand Down
32 changes: 21 additions & 11 deletions llm_core/llm_core/models/callbacks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from langchain.callbacks.base import BaseCallbackHandler
from langchain_core.outputs import LLMResult
from langchain_core.messages.ai import UsageMetadata
Expand All @@ -7,27 +9,35 @@

class UsageHandler(BaseCallbackHandler):
def on_llm_end(self, response: LLMResult, **kwargs) -> None:
cost_per_million_input_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN", 0.0))
cost_per_million_output_tokens = float(os.environ.get("LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN", 0.0))

meta = get_meta()

total_usage = meta.get("total_usage", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
llm_calls = meta.get("llm_calls", [])
total_usage = meta.get("totalUsage", {"numInputTokens": 0, "numOutputTokens": 0, "numTotalTokens": 0, "cost": 0 })
llm_calls = meta.get("llmRequests", [])

for generations in response.generations:
for generation in generations:
message = generation.dict()["message"]
generation_usage: UsageMetadata = message["usage_metadata"]
model_name = message["response_metadata"].get("model_name", None)

total_usage["input_tokens"] += generation_usage["input_tokens"]
total_usage["output_tokens"] += generation_usage["output_tokens"]
total_usage["total_tokens"] += generation_usage["total_tokens"]
total_usage["numInputTokens"] += generation_usage["input_tokens"]
total_usage["numOutputTokens"] += generation_usage["output_tokens"]
total_usage["numTotalTokens"] += generation_usage["total_tokens"]

total_usage["cost"] += int(generation_usage["input_tokens"]) * cost_per_million_output_tokens / 1_000_000
total_usage["cost"] += int(generation_usage["output_tokens"]) * cost_per_million_output_tokens / 1_000_000

llm_calls.append({
"model_name": model_name,
"input_tokens": generation_usage["input_tokens"],
"output_tokens": generation_usage["output_tokens"],
"total_tokens": generation_usage["total_tokens"],
"model": model_name,
"costPerMillionInputToken": cost_per_million_input_tokens,
"costPerMillionOutputToken": cost_per_million_output_tokens,
"numInputTokens": generation_usage["input_tokens"],
"numOutputTokens": generation_usage["output_tokens"],
"numTotalTokens": generation_usage["total_tokens"],
})

emit_meta("total_usage", total_usage)
emit_meta("llm_calls", llm_calls)
emit_meta("totalUsage", total_usage)
emit_meta("llmRequests", llm_calls)
2 changes: 2 additions & 0 deletions modules/modeling/module_modeling_llm/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ DATABASE_URL=sqlite:///../data/data.sqlite
# Default model to use
# See below for options, available models are also logged on startup
LLM_DEFAULT_MODEL="azure_openai_gpt-4o"
LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5
LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15

# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled
LLM_ENABLE_LLM_AS_A_JUDGE=1
Expand Down
4 changes: 3 additions & 1 deletion modules/programming/module_programming_llm/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ DATABASE_URL=sqlite:///../data/data.sqlite

# Default model to use
# See below for options, available models are also logged on startup
LLM_DEFAULT_MODEL="azure_openai_gpt-35"
LLM_DEFAULT_MODEL="azure_openai_gpt-4o"
LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5
LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15

# Standard OpenAI (Non-Azure) [leave blank if not used]
# Model names prefixed with `openai_` followed by the model name, e.g. `openai_text-davinci-003`
Expand Down
4 changes: 3 additions & 1 deletion modules/text/module_text_llm/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ DATABASE_URL=sqlite:///../data/data.sqlite

# Default model to use
# See below for options, available models are also logged on startup
LLM_DEFAULT_MODEL="azure_openai_gpt-35-turbo"
LLM_DEFAULT_MODEL="azure_openai_gpt-4o"
LLM_DEFAULT_MODEL_COST_PER_MILLION_INPUT_TOKEN=5
LLM_DEFAULT_MODEL_COST_PER_MILLION_OUTPUT_TOKEN=15

# Enable LLM-as-a-judge approach 0 = disabled, 1 = enabled
LLM_ENABLE_LLM_AS_A_JUDGE=1
Expand Down

0 comments on commit b336006

Please sign in to comment.