vscode: add link to tracing, show token counts

BoundaryML · Jun 20, 2024 · 28a7a81 · 28a7a81
1 parent e074613
commit 28a7a81
Show file tree

Hide file tree

Showing 13 changed files with 8,502 additions and 6,736 deletions.
diff --git a/engine/.vscode/settings.json b/engine/.vscode/settings.json
diff --git a/engine/Cargo.lock b/engine/Cargo.lock
diff --git a/engine/Cargo.toml b/engine/Cargo.toml
@@ -48,6 +48,7 @@ serde = { version = "1", features = ["derive"] }
 static_assertions = "1.1.0"
 strum = { version = "0.26.2", features = ["derive"] }
 strum_macros = "0.26.2"
+time = { version = "0.3.36", features = ["formatting"] }
 walkdir = "2.5.0"
 web-time = "1.1.0"
 baml-types = { path = "baml-lib/baml-types" }

diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs
@@ -272,7 +272,13 @@ impl RequestBuilder for OpenAIClient {
         }
 
         if stream {
-            body_obj.insert("stream".into(), true.into());
+            body_obj.insert("stream".into(), json!(true));
+            body_obj.insert(
+                "stream_options".into(),
+                json!({
+                    "include_usage": true,
+                }),
+            );
         }
 
         req.json(&body)
@@ -366,6 +372,11 @@ impl SseResponseTrait for OpenAIClient {
                             }
                         }
                         inner.latency = instant_start.elapsed();
+                        if let Some(usage) = event.usage.as_ref() {
+                            inner.metadata.prompt_tokens = Some(usage.prompt_tokens);
+                            inner.metadata.output_tokens = Some(usage.completion_tokens);
+                            inner.metadata.total_tokens = Some(usage.total_tokens);
+                        }
 
                         std::future::ready(Some(LLMResponse::Success(inner.clone())))
                     },

diff --git a/engine/baml-schema-wasm/Cargo.toml b/engine/baml-schema-wasm/Cargo.toml
@@ -25,6 +25,7 @@ log.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 serde-wasm-bindgen = "0.4"
+time.workspace = true
 uuid = { version = "1.8", features = ["v4", "js"] }
 wasm-bindgen = "=0.2.92"
 wasm-bindgen-futures = "0.4.42"

diff --git a/engine/baml-schema-wasm/src/runtime_wasm/mod.rs b/engine/baml-schema-wasm/src/runtime_wasm/mod.rs
@@ -18,7 +18,7 @@ use wasm_bindgen::prelude::*;
 
 #[wasm_bindgen(start)]
 pub fn on_wasm_init() {
-    match console_log::init_with_level(log::Level::Warn) {
+    match console_log::init_with_level(log::Level::Trace) {
         Ok(_) => web_sys::console::log_1(&"Initialized BAML runtime logging".into()),
         Err(e) => web_sys::console::log_1(
             &format!("Failed to initialize BAML runtime logging: {:?}", e).into(),
@@ -324,6 +324,9 @@ pub struct WasmLLMResponse {
     pub content: String,
     pub start_time_unix_ms: u64,
     pub latency_ms: u64,
+    pub input_tokens: Option<u64>,
+    pub output_tokens: Option<u64>,
+    pub total_tokens: Option<u64>,
 }
 
 #[wasm_bindgen(getter_with_clone, inspectable)]
@@ -450,6 +453,43 @@ impl WasmTestResponse {
             Err(e) => Some(e.to_string()),
         }
     }
+
+    fn _trace_url(&self) -> anyhow::Result<String> {
+        let test_response = match self.test_response.as_ref() {
+            Ok(t) => t,
+            Err(e) => anyhow::bail!("Failed to get test response: {:?}", e),
+        };
+        let start_time = match test_response.function_response.llm_response() {
+            LLMResponse::Success(s) => s.start_time,
+            LLMResponse::LLMFailure(f) => f.start_time,
+            _ => anyhow::bail!("Test has no start time"),
+        };
+        let start_time = time::OffsetDateTime::from_unix_timestamp(
+            start_time
+                .duration_since(web_time::UNIX_EPOCH)?
+                .as_secs()
+                .try_into()?,
+        )?
+        .format(&time::format_description::well_known::Rfc3339)?;
+
+        let event_span_id = self
+            .span
+            .as_ref()
+            .ok_or(anyhow::anyhow!("Test has no span ID"))?
+            .to_string();
+        let subevent_span_id = test_response
+            .function_span
+            .as_ref()
+            .ok_or(anyhow::anyhow!("Function call has no span ID"))?
+            .to_string();
+
+        Ok(format!("https://app.boundaryml.com/dashboard/projects/{}/drilldown?start_time={start_time}&eid={event_span_id}&s_eid={subevent_span_id}&test=false&onlyRootEvents=true", "proj_7a9833d3-e585-4ac0-8e21-ff5cfe46f556" ))
+    }
+
+    #[wasm_bindgen]
+    pub fn trace_url(&self) -> Option<String> {
+        self._trace_url().ok()
+    }
 }
 
 fn llm_response_to_wasm_error(
@@ -500,6 +540,9 @@ impl IntoWasm
                     .unwrap_or(web_time::Duration::ZERO)
                     .as_millis() as u64,
                 latency_ms: s.latency.as_millis() as u64,
+                input_tokens: s.metadata.prompt_tokens,
+                output_tokens: s.metadata.output_tokens,
+                total_tokens: s.metadata.total_tokens,
             }),
             _ => None,
         }

diff --git a/integ-tests/baml_src/test-files/functions/prompts/with-chat-messages.baml b/integ-tests/baml_src/test-files/functions/prompts/with-chat-messages.baml
@@ -41,15 +41,15 @@ function PromptTestClaudeChatNoSystem(input: string) -> string {
   "#
 }
 
-test PromptTestOpenAIChat {
+test TestSystemAndNonSystemChat1 {
   functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]
   args {
     input "cats"
   }
 }
 
-test TestClaude {
-  functions [PromptTestClaudeChatNoSystem]
+test TestSystemAndNonSystemChat2 {
+  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]
   args {
     input "lion"
   }

diff --git a/integ-tests/python/baml_client/inlinedbaml.py b/integ-tests/python/baml_client/inlinedbaml.py
@@ -54,7 +54,7 @@
     "test-files/functions/output/string-list.baml": "function FnOutputStringList(input: string) -> string[] {\n  client GPT35\n  prompt #\"\n    Return a list of strings in json format like [\"string1\", \"string2\", \"string3\"].\n\n    JSON:\n  \"#\n}\n\ntest FnOutputStringList {\n  functions [FnOutputStringList]\n  args {\n    input \"example input\"\n  }\n}\n",
     "test-files/functions/output/unions.baml": "class UnionTest_ReturnType {\n  prop1 string | bool\n  prop2 (float | bool)[]\n  prop3 (bool[] | int[])\n}\n\nfunction UnionTest_Function(input: string | bool) -> UnionTest_ReturnType {\n  client GPT35\n  prompt #\"\n    Return a JSON blob with this schema: \n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest UnionTest_Function {\n  functions [UnionTest_Function]\n  args {\n    input \"example input\"\n  }\n}\n",
     "test-files/functions/prompts/no-chat-messages.baml": "\n\nfunction PromptTestClaude(input: string) -> string {\n  client Claude\n  prompt #\"\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    Tell me a haiku about {{ input }}\n  \"#\n}",
-    "test-files/functions/prompts/with-chat-messages.baml": "\nfunction PromptTestOpenAIChat(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestOpenAIChatNoSystem(input: string) -> string {\n  client GPT35\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChat(input: string) -> string {\n  client Claude\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChatNoSystem(input: string) -> string {\n  client Claude\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\ntest PromptTestOpenAIChat {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"cats\"\n  }\n}\n\ntest TestClaude {\n  functions [PromptTestClaudeChatNoSystem]\n  args {\n    input \"lion\"\n  }\n}",
+    "test-files/functions/prompts/with-chat-messages.baml": "\nfunction PromptTestOpenAIChat(input: string) -> string {\n  client GPT35\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestOpenAIChatNoSystem(input: string) -> string {\n  client GPT35\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChat(input: string) -> string {\n  client Claude\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\nfunction PromptTestClaudeChatNoSystem(input: string) -> string {\n  client Claude\n  prompt #\"\n    You are an assistant that always responds in a very excited way with emojis and also outputs this word 4 times after giving a response: {{ input }}\n    \n    {{ _.role(\"user\") }}\n    Tell me a haiku about {{ input }}\n  \"#\n}\n\ntest TestSystemAndNonSystemChat1 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"cats\"\n  }\n}\n\ntest TestSystemAndNonSystemChat2 {\n  functions [PromptTestClaude, PromptTestOpenAI, PromptTestOpenAIChat, PromptTestOpenAIChatNoSystem, PromptTestClaudeChat, PromptTestClaudeChatNoSystem]\n  args {\n    input \"lion\"\n  }\n}",
     "test-files/functions/v2/basic.baml": "\n\nfunction ExtractResume2(resume: string) -> Resume {\n    client GPT4\n    prompt #\"\n        {{ _.role('system') }}\n\n        Extract the following information from the resume:\n\n        Resume:\n        <<<<\n        {{ resume }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}\n\n\nclass WithReasoning {\n    value string\n    reasoning string @description(#\"\n        Why the value is a good fit.\n    \"#)\n}\n\n\nclass SearchParams {\n    dateRange int? @description(#\"\n        In ISO duration format, e.g. P1Y2M10D.\n    \"#)\n    location string[]\n    jobTitle WithReasoning? @description(#\"\n        An exact job title, not a general category.\n    \"#)\n    company WithReasoning? @description(#\"\n        The exact name of the company, not a product or service.\n    \"#)\n    description WithReasoning[] @description(#\"\n        Any specific projects or features the user is looking for.\n    \"#)\n    tags (Tag | string)[]\n}\n\nenum Tag {\n    Security\n    AI\n    Blockchain\n}\n\nfunction GetQuery(query: string) -> SearchParams {\n    client GPT4\n    prompt #\"\n        Extract the following information from the query:\n\n        Query:\n        <<<<\n        {{ query }}\n        <<<<\n\n        OUTPUT_JSON_SCHEMA:\n        {{ ctx.output_format }}\n\n        Before OUTPUT_JSON_SCHEMA, list 5 intentions the user may have.\n        --- EXAMPLES ---\n        1. <intent>\n        2. <intent>\n        3. <intent>\n        4. <intent>\n        5. <intent>\n\n        {\n            ... // OUTPUT_JSON_SCHEMA\n        }\n    \"#\n}\n\nclass RaysData {\n    dataType DataType\n    value Resume | Event\n}\n\nenum DataType {\n    Resume\n    Event\n}\n\nclass Event {\n    title string\n    date string\n    location string\n    description string\n}\n\nfunction GetDataType(text: string) -> RaysData {\n    client GPT4\n    prompt #\"\n        Extract the relevant info.\n\n        Text:\n        <<<<\n        {{ text }}\n        <<<<\n\n        Output JSON schema:\n        {{ ctx.output_format }}\n\n        JSON:\n    \"#\n}",
     "test-files/providers/providers.baml": "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestOpenAI(input: string) -> string {\n  client GPT35\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestOllama(input: string) -> string {\n  client Ollama\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestGemini(input: string) -> string {\n  client Gemini\n  prompt #\"\n    Write a nice short story about {{ input }}\n  \"#\n}\n\n\ntest TestProvider {\n  functions [TestAnthropic, TestOpenAI, TestAzure, TestOllama, TestGemini]\n  args {\n    input \"Donkey kong and peanut butter\"\n  }\n}\n\n\n",
     "test-files/strategies/fallback.baml": "\nclient<llm> FaultyClient {\n  provider openai\n  options {\n    model unknown-model\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> FallbackClient {\n  provider fallback\n  options {\n    // first 2 clients are expected to fail.\n    strategy [\n      FaultyClient,\n      RetryClientConstant,\n      GPT35\n    ]\n  }\n}\n\nfunction TestFallbackClient() -> string {\n  client FallbackClient\n  // TODO make it return the client name instead\n  prompt #\"\n    Say a haiku about mexico.\n  \"#\n}",