From 09c65497c3218387756775827ba22bcad16f0362 Mon Sep 17 00:00:00 2001
From: hellovai <vbv@boundaryml.com>
Date: Mon, 11 Nov 2024 11:00:58 -0800
Subject: [PATCH] Add ability for certain models to disable streaming. (#1157)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

openai/o1-* models by default don't support streaming
*/* by default do support streaming

users can add the `supports_streaming <bool>` as an option to their
client to manually configure this

✅ Raw Curl works
✅ Shorthand clients work well for `o1-*` models without any twiddling
✅ Docs updated
<img width="620" alt="Screenshot 2024-11-11 at 11 46 39 AM"
src="https://github.com/user-attachments/assets/7d1fdd7d-a8ce-4049-87a9-8ef6a19cf759">



<!-- ELLIPSIS_HIDDEN -->

----

> [!IMPORTANT]
> Add `supports_streaming` option to configure streaming for models,
defaulting `openai/o1-*` to non-streaming, with documentation updates.
>
>   - **Behavior**:
> - Adds `supports_streaming` option to client configuration to manually
enable/disable streaming.
> - `openai/o1-*` models default to non-streaming; other models default
to streaming.
> - Updates `supports_streaming()` in `WithClientProperties` to reflect
new behavior.
>   - **Code Changes**:
> - Modifies `resolve_properties()` in `anthropic_client.rs`,
`aws_client.rs`, `googleai_client.rs`, `openai_client.rs`, and
`vertex_client.rs` to handle `SupportedRequestModes`.
>     - Adds `SupportedRequestModes` struct in `llm_client/mod.rs`.
>     - Updates `WithStreamable` trait to handle non-streaming fallback.
>   - **Documentation**:
> - Updates various `.mdx` files to document `supports_streaming`
option.
> - Adds new snippets `supports-streaming.mdx` and
`supports-streaming-openai.mdx`.
>
> <sup>This description was created by </sup>[<img alt="Ellipsis"
src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=BoundaryML%2Fbaml&utm_source=github&utm_medium=referral)<sup>
for 4e8efd15006cd3e2ec4f8e2d79cb50fef5aa952c. It will automatically
update as commits are pushed.</sup>

<!-- ELLIPSIS_HIDDEN -->
---
 .../schema-ast/src/parser/parse_expression.rs | 12 ++-
 .../src/internal/llm_client/mod.rs            |  6 ++
 .../primitive/anthropic/anthropic_client.rs   | 12 ++-
 .../anthropic/properties/anthropic.rs         | 37 ++++++++++
 .../primitive/anthropic/properties/mod.rs     |  6 ++
 .../llm_client/primitive/aws/aws_client.rs    |  9 ++-
 .../primitive/google/googleai_client.rs       | 13 +++-
 .../primitive/google/properties/google.rs     | 32 ++++++++
 .../primitive/google/properties/mod.rs        |  7 ++
 .../src/internal/llm_client/primitive/mod.rs  |  3 +
 .../primitive/openai/openai_client.rs         | 21 ++++--
 .../primitive/openai/properties/azure.rs      | 18 +++--
 .../primitive/openai/properties/generic.rs    |  8 +-
 .../primitive/openai/properties/mod.rs        |  3 +-
 .../primitive/openai/properties/ollama.rs     |  3 +
 .../primitive/openai/properties/openai.rs     |  5 +-
 .../primitive/vertex/vertex_client.rs         |  9 ++-
 .../internal/llm_client/properties_hander.rs  | 27 ++++++-
 .../src/internal/llm_client/traits/mod.rs     | 74 ++++++++++++-------
 .../baml/clients/providers/anthropic.mdx      |  2 +
 .../baml/clients/providers/aws-bedrock.mdx    |  3 +-
 .../baml/clients/providers/azure.mdx          |  5 +-
 .../baml/clients/providers/google-ai.mdx      |  4 +-
 .../baml/clients/providers/ollama.mdx         |  4 +-
 .../baml/clients/providers/openai-generic.mdx |  4 +
 .../baml/clients/providers/openai.mdx         |  5 +-
 .../baml/clients/providers/vertex.mdx         |  4 +-
 fern/snippets/supports-streaming-openai.mdx   | 43 +++++++++++
 fern/snippets/supports-streaming.mdx          | 35 +++++++++
 .../functions/output/recursive-class.baml     | 10 ++-
 integ-tests/python/baml_client/inlinedbaml.py |  2 +-
 integ-tests/ruby/baml_client/inlined.rb       |  2 +-
 .../typescript/baml_client/inlinedbaml.ts     |  2 +-
 .../src/utils/ErrorFallback.tsx               | 38 ++++++++--
 34 files changed, 390 insertions(+), 78 deletions(-)
 create mode 100644 engine/baml-runtime/src/internal/llm_client/primitive/anthropic/properties/anthropic.rs
 create mode 100644 engine/baml-runtime/src/internal/llm_client/primitive/anthropic/properties/mod.rs
 create mode 100644 engine/baml-runtime/src/internal/llm_client/primitive/google/properties/google.rs
 create mode 100644 engine/baml-runtime/src/internal/llm_client/primitive/google/properties/mod.rs
 create mode 100644 fern/snippets/supports-streaming-openai.mdx
 create mode 100644 fern/snippets/supports-streaming.mdx
diff --git a/engine/baml-lib/schema-ast/src/parser/parse_expression.rs b/engine/baml-lib/schema-ast/src/parser/parse_expression.rs
index 823e85b85..c175f97ee 100644
--- a/engine/baml-lib/schema-ast/src/parser/parse_expression.rs
+++ b/engine/baml-lib/schema-ast/src/parser/parse_expression.rs
@@ -85,11 +85,15 @@ fn parse_string_literal(token: Pair<'_>, diagnostics: &mut Diagnostics) -> Expre
             if content.contains(' ') {
                 Expression::StringValue(content, span)
             } else {
-                match Identifier::from((content.as_str(), span.clone())) {
-                    Identifier::Invalid(..) | Identifier::String(..) => {
-                        Expression::StringValue(content, span)
+                if content.eq("true") || content.eq("false") {
+                    Expression::BoolValue(content.eq("true"), span)
+                } else {
+                    match Identifier::from((content.as_str(), span.clone())) {
+                        Identifier::Invalid(..) | Identifier::String(..) => {
+                            Expression::StringValue(content, span)
+                        }
+                        identifier => Expression::Identifier(identifier),
                     }
-                    identifier => Expression::Identifier(identifier),
                 }
             }
         }
diff --git a/engine/baml-runtime/src/internal/llm_client/mod.rs b/engine/baml-runtime/src/internal/llm_client/mod.rs
index 0926dd8d6..d61ab0caf 100644
--- a/engine/baml-runtime/src/internal/llm_client/mod.rs
+++ b/engine/baml-runtime/src/internal/llm_client/mod.rs
@@ -88,6 +88,12 @@ pub enum AllowedMetadata {
     Only(HashSet<String>),
 }
 
+#[derive(Clone, Serialize, Deserialize)]
+pub struct SupportedRequestModes {
+    // If unset, treat as auto
+    pub stream: Option<bool>,
+}
+
 impl AllowedMetadata {
     pub fn is_allowed(&self, key: &str) -> bool {
         match self {
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/anthropic_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/anthropic_client.rs
index 8da5a53b5..58523b37b 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/anthropic_client.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/anthropic_client.rs
@@ -1,7 +1,7 @@
 use crate::internal::llm_client::{
     properties_hander::PropertiesHandler,
     traits::{ToProviderMessage, ToProviderMessageExt, WithClientProperties},
-    AllowedMetadata, ResolveMediaUrls,
+    AllowedMetadata, ResolveMediaUrls, SupportedRequestModes,
 };
 use std::collections::HashMap;
 
@@ -46,6 +46,7 @@ struct PostRequestProperities {
     allowed_metadata: AllowedMetadata,
     // These are passed directly to the Anthropic API.
     properties: HashMap<String, serde_json::Value>,
+    supported_request_modes: SupportedRequestModes,
 }
 
 // represents client that interacts with the Anthropic API
@@ -82,6 +83,8 @@ fn resolve_properties(
         .entry("anthropic-version".to_string())
         .or_insert("2023-06-01".to_string());
 
+        let supported_request_modes = properties.pull_supported_request_modes()?;
+
     let mut properties = properties.finalize();
     // Anthropic has a very low max_tokens by default, so we increase it to 4096.
     properties
@@ -89,6 +92,7 @@ fn resolve_properties(
         .or_insert_with(|| 4096.into());
     let properties = properties;
 
+
     Ok(PostRequestProperities {
         default_role,
         base_url,
@@ -97,6 +101,7 @@ fn resolve_properties(
         allowed_metadata,
         properties,
         proxy_url: ctx.env.get("BOUNDARY_PROXY_URL").map(|s| s.to_string()),
+        supported_request_modes,
     })
 }
 
@@ -114,6 +119,9 @@ impl WithClientProperties for AnthropicClient {
     fn client_properties(&self) -> &HashMap<String, serde_json::Value> {
         &self.properties.properties
     }
+    fn supports_streaming(&self) -> bool {
+        self.properties.supported_request_modes.stream.unwrap_or(true)
+    }
 }
 
 impl WithClient for AnthropicClient {
@@ -351,7 +359,7 @@ impl RequestBuilder for AnthropicClient {
             self.properties
                 .proxy_url
                 .as_ref()
-                .unwrap_or(&self.properties.base_url)
+                .unwrap_or_else(|| &self.properties.base_url)
         } else {
             &self.properties.base_url
         };
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/properties/anthropic.rs b/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/properties/anthropic.rs
new file mode 100644
index 000000000..f21b7755f
--- /dev/null
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/properties/anthropic.rs
@@ -0,0 +1,37 @@
+use std::collections::HashMap;
+use anyhow::Result;
+use crate::{
+    internal::llm_client::{properties_hander::PropertiesHandler, SharedProperties},
+    RuntimeContext,
+};
+use super::PostRequestProperties;
+
+pub fn resolve_properties(
+    mut properties: PropertiesHandler,
+    ctx: &RuntimeContext,
+) -> Result<PostRequestProperties> {
+    let shared = properties.pull_shared_properties("system");
+    
+    // Override defaults in shared
+    let shared = SharedProperties {
+        base_url: shared.base_url
+            .map(|url| url.unwrap_or_else(|| "https://api.anthropic.com".into())),
+        api_key: shared.api_key
+            .map(|key| key.or_else(|| ctx.env.get("ANTHROPIC_API_KEY").map(|s| s.to_string()))),
+        headers: shared.headers.map(|mut h| {
+            h.entry("anthropic-version".to_string())
+                .or_insert("2023-06-01".to_string());
+            h
+        }),
+        ..shared
+    };
+
+    let mut properties = properties.finalize();
+    properties.entry("max_tokens".into())
+        .or_insert_with(|| 4096.into());
+
+    Ok(PostRequestProperties {
+        shared,
+        proxy_url: ctx.env.get("BOUNDARY_PROXY_URL").map(|s| s.to_string()),
+    })
+} 
\ No newline at end of file
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/properties/mod.rs b/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/properties/mod.rs
new file mode 100644
index 000000000..089bed34a
--- /dev/null
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/properties/mod.rs
@@ -0,0 +1,6 @@
+use crate::internal::llm_client::properties_hander::SharedProperties;
+
+pub struct PostRequestProperties {
+    pub shared: SharedProperties,
+    pub proxy_url: Option<String>,
+} 
\ No newline at end of file
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/aws/aws_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/aws/aws_client.rs
index 6b7ececdf..dbca701b9 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/aws/aws_client.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/aws/aws_client.rs
@@ -19,7 +19,7 @@ use web_time::Instant;
 use web_time::SystemTime;
 
 use crate::internal::llm_client::traits::{ToProviderMessageExt, WithClientProperties};
-use crate::internal::llm_client::AllowedMetadata;
+use crate::internal::llm_client::{AllowedMetadata, SupportedRequestModes};
 use crate::internal::llm_client::{
     primitive::request::RequestBuilder,
     traits::{
@@ -44,6 +44,7 @@ struct RequestProperties {
 
     request_options: HashMap<String, serde_json::Value>,
     ctx_env: HashMap<String, String>,
+    supported_request_modes: SupportedRequestModes,
 }
 
 // represents client that interacts with the Anthropic API
@@ -88,6 +89,8 @@ fn resolve_properties(client: &ClientWalker, ctx: &RuntimeContext) -> Result<Req
         .remove_str("region")
         .unwrap_or_else(|_| ctx.env.get("AWS_REGION").map(|s| s.to_string()));
 
+    let supported_request_modes = properties.pull_supported_request_modes()?;
+
     Ok(RequestProperties {
         model_id,
         aws_region,
@@ -96,6 +99,7 @@ fn resolve_properties(client: &ClientWalker, ctx: &RuntimeContext) -> Result<Req
         allowed_metadata,
         request_options: properties.finalize(),
         ctx_env: ctx.env.clone(),
+        supported_request_modes,
     })
 }
 
@@ -301,6 +305,9 @@ impl WithClientProperties for AwsClient {
     fn allowed_metadata(&self) -> &crate::internal::llm_client::AllowedMetadata {
         &self.properties.allowed_metadata
     }
+    fn supports_streaming(&self) -> bool {
+        self.properties.supported_request_modes.stream.unwrap_or(true)
+    }
 }
 
 impl WithClient for AwsClient {
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/google/googleai_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/google/googleai_client.rs
index 7820937b6..1dc5a929f 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/google/googleai_client.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/google/googleai_client.rs
@@ -3,7 +3,7 @@ use crate::internal::llm_client::properties_hander::{PropertiesHandler};
 use crate::internal::llm_client::traits::{
     ToProviderMessage, ToProviderMessageExt, WithClientProperties,
 };
-use crate::internal::llm_client::{AllowedMetadata, ResolveMediaUrls};
+use crate::internal::llm_client::{AllowedMetadata, ResolveMediaUrls, SupportedRequestModes};
 use crate::RuntimeContext;
 use crate::{
     internal::llm_client::{
@@ -38,6 +38,7 @@ struct PostRequestProperities {
     model_id: Option<String>,
     properties: HashMap<String, serde_json::Value>,
     allowed_metadata: AllowedMetadata,
+    supported_request_modes: SupportedRequestModes,
 }
 
 pub struct GoogleAIClient {
@@ -69,15 +70,18 @@ fn resolve_properties(
     let allowed_metadata = properties.pull_allowed_role_metadata()?;
     let headers = properties.pull_headers()?;
 
+    let supported_request_modes = properties.pull_supported_request_modes()?;
+
     Ok(PostRequestProperities {
         default_role,
         api_key,
         headers,
-        properties: properties.finalize(),
         base_url,
-        model_id: Some(model_id),
         proxy_url: ctx.env.get("BOUNDARY_PROXY_URL").map(|s| s.to_string()),
+        model_id: Some(model_id),
+        properties: properties.finalize(),
         allowed_metadata,
+        supported_request_modes,
     })
 }
 
@@ -94,6 +98,9 @@ impl WithClientProperties for GoogleAIClient {
     fn allowed_metadata(&self) -> &crate::internal::llm_client::AllowedMetadata {
         &self.properties.allowed_metadata
     }
+    fn supports_streaming(&self) -> bool {
+        self.properties.supported_request_modes.stream.unwrap_or(true)
+    }
 }
 
 impl WithClient for GoogleAIClient {
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/google/properties/google.rs b/engine/baml-runtime/src/internal/llm_client/primitive/google/properties/google.rs
new file mode 100644
index 000000000..376be8b3b
--- /dev/null
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/google/properties/google.rs
@@ -0,0 +1,32 @@
+use std::collections::HashMap;
+use anyhow::Result;
+use crate::{
+    internal::llm_client::{properties_hander::PropertiesHandler, SharedProperties},
+    RuntimeContext,
+};
+use super::PostRequestProperties;
+
+pub fn resolve_properties(
+    mut properties: PropertiesHandler,
+    ctx: &RuntimeContext,
+) -> Result<PostRequestProperties> {
+    let shared = properties.pull_shared_properties("user");
+    
+    // Override defaults in shared
+    let shared = SharedProperties {
+        base_url: shared.base_url
+            .map(|url| url.unwrap_or_else(|| "https://generativelanguage.googleapis.com/v1beta".to_string())),
+        api_key: shared.api_key
+            .map(|key| key.or_else(|| ctx.env.get("GOOGLE_API_KEY").map(|s| s.to_string()))),
+        ..shared
+    };
+
+    let model_id = properties.remove_str("model")?
+        .unwrap_or_else(|| "gemini-1.5-flash".to_string());
+
+    Ok(PostRequestProperties {
+        shared,
+        proxy_url: ctx.env.get("BOUNDARY_PROXY_URL").map(|s| s.to_string()),
+        model_id: Some(model_id),
+    })
+} 
\ No newline at end of file
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/google/properties/mod.rs b/engine/baml-runtime/src/internal/llm_client/primitive/google/properties/mod.rs
new file mode 100644
index 000000000..fbcebe0ce
--- /dev/null
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/google/properties/mod.rs
@@ -0,0 +1,7 @@
+use crate::internal::llm_client::properties_hander::SharedProperties;
+
+pub struct PostRequestProperties {
+    pub shared: SharedProperties,
+    pub proxy_url: Option<String>,
+    pub model_id: Option<String>,
+} 
\ No newline at end of file
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/mod.rs b/engine/baml-runtime/src/internal/llm_client/primitive/mod.rs
index 4dd21f4e4..63c5ad6fd 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/mod.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/mod.rs
@@ -93,6 +93,9 @@ impl WithClientProperties for LLMPrimitiveProvider {
     fn allowed_metadata(&self) -> &super::AllowedMetadata {
         match_llm_provider!(self, allowed_metadata)
     }
+    fn supports_streaming(&self) -> bool {
+        match_llm_provider!(self, supports_streaming)
+    }
 }
 
 impl TryFrom<(&ClientProperty, &RuntimeContext)> for LLMPrimitiveProvider {
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs
index 0cd2f5838..fb66fff46 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs
@@ -57,6 +57,20 @@ impl WithClientProperties for OpenAIClient {
     fn allowed_metadata(&self) -> &crate::internal::llm_client::AllowedMetadata {
         &self.properties.allowed_metadata
     }
+    fn supports_streaming(&self) -> bool {
+        match self.properties.supported_request_modes.stream {
+            Some(v) => v,
+            None => {
+                match self.properties.properties.get("model") {
+                    Some(serde_json::Value::String(model)) => {
+                        // OpenAI's streaming is not available for o1-* models
+                        !model.starts_with("o1-")
+                    }
+                    _ => true,
+                }
+            }
+        }
+    }
 }
 
 impl WithClient for OpenAIClient {
@@ -228,16 +242,11 @@ impl RequestBuilder for OpenAIClient {
         allow_proxy: bool,
         stream: bool,
     ) -> Result<reqwest::RequestBuilder> {
-        // Never proxy requests to Ollama
-        let allow_proxy = allow_proxy
-            && self.properties.proxy_url.is_some()
-            && !self.properties.base_url.starts_with("http://localhost");
-
         let destination_url = if allow_proxy {
             self.properties
                 .proxy_url
                 .as_ref()
-                .unwrap_or(&self.properties.base_url)
+                .unwrap_or_else(|| &self.properties.base_url)
         } else {
             &self.properties.base_url
         };
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/azure.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/azure.rs
index ae7ec2c3d..7d12e8ac4 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/azure.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/azure.rs
@@ -48,11 +48,18 @@ pub fn resolve_properties(
         query_params.insert("api-version".to_string(), v.to_string());
     };
 
-    let mut properties = properties.finalize();
-    properties
-        .entry("max_tokens".into())
+    let supported_request_modes = properties.pull_supported_request_modes()?;
+
+
+    let properties = {
+        let mut properties = properties.finalize();
+        // Azure has very low default max_tokens, so we set it to 4096
+        properties
+            .entry("max_tokens".into())
         .or_insert_with(|| 4096.into());
-    let properties = properties;
+        properties
+    };
+
 
     Ok(PostRequestProperties {
         default_role,
@@ -61,9 +68,8 @@ pub fn resolve_properties(
         headers,
         properties,
         allowed_metadata,
-        // Replace proxy_url with code below to disable proxying
-        // proxy_url: None,
         proxy_url: ctx.env.get("BOUNDARY_PROXY_URL").map(|s| s.to_string()),
         query_params,
+        supported_request_modes,
     })
 }
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/generic.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/generic.rs
index 7a69af1fb..fee92cd16 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/generic.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/generic.rs
@@ -27,6 +27,7 @@ pub fn resolve_properties(
         Some(api_key) if !api_key.is_empty() => Some(api_key),
         _ => None,
     };
+    let supported_request_modes = properties.pull_supported_request_modes()?;
 
     let properties = properties.finalize();
 
@@ -36,12 +37,9 @@ pub fn resolve_properties(
         api_key,
         headers,
         properties,
-        proxy_url: ctx
-            .env
-            .get("BOUNDARY_PROXY_URL")
-            .map(|s| Some(s.to_string()))
-            .unwrap_or(None),
+        proxy_url: ctx.env.get("BOUNDARY_PROXY_URL").map(|s| s.to_string()),
         query_params: Default::default(),
         allowed_metadata,
+        supported_request_modes,
     })
 }
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/mod.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/mod.rs
index 9389afbe5..d23a0363d 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/mod.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/mod.rs
@@ -3,7 +3,7 @@ pub(crate) mod generic;
 pub(crate) mod ollama;
 pub(crate) mod openai;
 
-use crate::internal::llm_client::AllowedMetadata;
+use crate::internal::llm_client::{AllowedMetadata, SupportedRequestModes};
 use std::collections::HashMap;
 
 pub struct PostRequestProperties {
@@ -16,4 +16,5 @@ pub struct PostRequestProperties {
     // These are passed directly to the OpenAI API.
     pub properties: HashMap<String, serde_json::Value>,
     pub allowed_metadata: AllowedMetadata,
+    pub supported_request_modes: SupportedRequestModes,
 }
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/ollama.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/ollama.rs
index 9bd29d1e1..c6dcd361a 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/ollama.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/ollama.rs
@@ -20,6 +20,8 @@ pub fn resolve_properties(
     let allowed_metadata = properties.pull_allowed_role_metadata()?;
     let headers = properties.pull_headers()?;
 
+    let supported_request_modes = properties.pull_supported_request_modes()?;
+
     Ok(PostRequestProperties {
         default_role,
         base_url,
@@ -33,5 +35,6 @@ pub fn resolve_properties(
             .map(|s| Some(s.to_string()))
             .unwrap_or(None),
         query_params: Default::default(),
+        supported_request_modes,
     })
 }
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/openai.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/openai.rs
index 908c20918..cbf5fc49e 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/openai.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/properties/openai.rs
@@ -25,6 +25,8 @@ pub fn resolve_properties(
     let allowed_metadata = properties.pull_allowed_role_metadata()?;
     let headers = properties.pull_headers()?;
 
+    let supported_request_modes = properties.pull_supported_request_modes()?;
+
     Ok(PostRequestProperties {
         default_role,
         base_url,
@@ -32,13 +34,12 @@ pub fn resolve_properties(
         headers,
         properties: properties.finalize(),
         allowed_metadata,
-        // Replace proxy_url with code below to disable proxying
-        // proxy_url: None,
         proxy_url: ctx
             .env
             .get("BOUNDARY_PROXY_URL")
             .map(|s| Some(s.to_string()))
             .unwrap_or(None),
         query_params: Default::default(),
+        supported_request_modes,
     })
 }
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/vertex/vertex_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/vertex/vertex_client.rs
index 8b59a14ed..5e03e25ee 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/vertex/vertex_client.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/vertex/vertex_client.rs
@@ -3,7 +3,7 @@ use crate::internal::llm_client::properties_hander::{ PropertiesHandler};
 use crate::internal::llm_client::traits::{
     ToProviderMessage, ToProviderMessageExt, WithClientProperties,
 };
-use crate::internal::llm_client::{AllowedMetadata, ResolveMediaUrls};
+use crate::internal::llm_client::{AllowedMetadata, ResolveMediaUrls, SupportedRequestModes};
 use crate::RuntimeContext;
 use crate::{
     internal::llm_client::{
@@ -63,6 +63,7 @@ struct PostRequestProperties {
     model_id: Option<String>,
     location: Option<String>,
     allowed_metadata: AllowedMetadata,
+    supported_request_modes: SupportedRequestModes,
 }
 
 pub struct VertexClient {
@@ -162,6 +163,8 @@ fn resolve_properties(
         None => anyhow::bail!("location must be provided"),
     };
 
+    let supported_request_modes = properties.pull_supported_request_modes()?;
+
     Ok(PostRequestProperties {
         default_role,
         base_url,
@@ -173,6 +176,7 @@ fn resolve_properties(
         location: Some(location),
         proxy_url: ctx.env.get("BOUNDARY_PROXY_URL").map(|s| s.to_string()),
         allowed_metadata,
+        supported_request_modes,
     })
 }
 
@@ -189,6 +193,9 @@ impl WithClientProperties for VertexClient {
     fn allowed_metadata(&self) -> &crate::internal::llm_client::AllowedMetadata {
         &self.properties.allowed_metadata
     }
+    fn supports_streaming(&self) -> bool {
+        self.properties.supported_request_modes.stream.unwrap_or(true)
+    }
 }
 
 impl WithClient for VertexClient {
diff --git a/engine/baml-runtime/src/internal/llm_client/properties_hander.rs b/engine/baml-runtime/src/internal/llm_client/properties_hander.rs
index 348698797..76e5ad3ae 100644
--- a/engine/baml-runtime/src/internal/llm_client/properties_hander.rs
+++ b/engine/baml-runtime/src/internal/llm_client/properties_hander.rs
@@ -1,8 +1,7 @@
 use anyhow::{Context, Result};
 use std::collections::HashMap;
 
-use super::AllowedMetadata;
-
+use super::{AllowedMetadata, SupportedRequestModes};
 
 pub(super) struct PropertiesHandler {
     properties: HashMap<String, serde_json::Value>,
@@ -25,6 +24,7 @@ impl PropertiesHandler {
         // Ban certain keys
         match key {
             "allowed_role_metadata"
+            | "supports_streaming"
             | "base_url"
             | "api_key"
             | "headers"
@@ -37,7 +37,9 @@ impl PropertiesHandler {
 
     pub fn remove_serde<T: serde::de::DeserializeOwned>(&mut self, key: &str) -> Result<Option<T>> {
         match self.remove(key) {
-            Some(value) => Ok(Some(serde_json::from_value(value).context(format!("Failed to parse: {key}"))?)),
+            Some(value) => Ok(Some(
+                serde_json::from_value(value).context(format!("Failed to parse: {key}"))?,
+            )),
             None => Ok(None),
         }
     }
@@ -130,6 +132,25 @@ impl PropertiesHandler {
             None => Ok(None),
         }
     }
+
+    pub fn pull_supported_request_modes(&mut self) -> Result<SupportedRequestModes> {
+        let supports_streaming = match self.get("supports_streaming") {
+            Some(v) => match v {
+                serde_json::Value::Bool(s) => Some(s),
+                _ => {
+                    return Err(anyhow::anyhow!(
+                        "supports_streaming must be a bool: Got {:?}",
+                        v
+                    ))
+                }
+            },
+            None => None,
+        };
+
+        Ok(SupportedRequestModes {
+            stream: supports_streaming,
+        })
+    }
 }
 
 impl crate::client_registry::ClientProperty {
diff --git a/engine/baml-runtime/src/internal/llm_client/traits/mod.rs b/engine/baml-runtime/src/internal/llm_client/traits/mod.rs
index 19b05266a..62379ed2d 100644
--- a/engine/baml-runtime/src/internal/llm_client/traits/mod.rs
+++ b/engine/baml-runtime/src/internal/llm_client/traits/mod.rs
@@ -10,10 +10,7 @@ pub use self::{
     chat::{WithChat, WithStreamChat},
     completion::{WithCompletion, WithNoCompletion, WithStreamCompletion},
 };
-use super::{
-    primitive::request::RequestBuilder, LLMResponse,
-    ModelFeatures,
-};
+use super::{primitive::request::RequestBuilder, LLMResponse, ModelFeatures};
 use crate::{internal::llm_client::ResolveMediaUrls, RenderCurlSettings};
 use crate::{internal::prompt_renderer::PromptRenderer, RuntimeContext};
 use baml_types::{BamlMedia, BamlMediaContent, BamlMediaType, BamlValue, MediaBase64, MediaUrl};
@@ -38,6 +35,7 @@ pub trait WithRetryPolicy {
 pub trait WithClientProperties {
     fn client_properties(&self) -> &HashMap<String, serde_json::Value>;
     fn allowed_metadata(&self) -> &super::AllowedMetadata;
+    fn supports_streaming(&self) -> bool;
 }
 
 pub trait WithSingleCallable {
@@ -267,7 +265,7 @@ where
 
 impl<T> WithRenderRawCurl for T
 where
-    T: WithClient + WithChat + WithCompletion + RequestBuilder,
+    T: WithClient + WithChat + WithCompletion + RequestBuilder + WithClientProperties,
 {
     async fn render_raw_curl(
         &self,
@@ -275,7 +273,7 @@ where
         prompt: &Vec<internal_baml_jinja::RenderedChatMessage>,
         render_settings: RenderCurlSettings,
     ) -> Result<String> {
-        let chat_messages = process_media_urls(
+        let chat_messages: Vec<RenderedChatMessage> = process_media_urls(
             self.model_features().resolve_media_urls,
             true,
             Some(render_settings),
@@ -285,7 +283,7 @@ where
         .await?;
 
         let request_builder = self
-            .build_request(either::Right(&chat_messages), false, render_settings.stream)
+            .build_request(either::Right(&chat_messages), false, render_settings.stream && self.supports_streaming())
             .await?;
         let mut request = request_builder.build()?;
         let url_header_value = {
@@ -336,33 +334,55 @@ pub trait WithStreamable {
 
 impl<T> WithStreamable for T
 where
-    T: WithClient + WithStreamChat + WithStreamCompletion,
+    T: WithClient + WithStreamChat + WithStreamCompletion + WithClientProperties + WithChat + WithCompletion,
 {
     #[allow(async_fn_in_trait)]
     async fn stream(&self, ctx: &RuntimeContext, prompt: &RenderedPrompt) -> StreamResponse {
-        if let RenderedPrompt::Chat(ref chat) = prompt {
-            match process_media_urls(
-                self.model_features().resolve_media_urls,
-                true,
-                None,
-                ctx,
-                chat,
-            )
-            .await
-            {
-                Ok(messages) => return self.stream_chat(ctx, &messages).await,
-                Err(e) => {
-                    return Err(LLMResponse::InternalFailure(format!(
-                        "Error occurred:\n\n{:?}",
-                        e
-                    )))
+        let prompt = {
+            if let RenderedPrompt::Chat(ref chat) = prompt {
+                match process_media_urls(
+                    self.model_features().resolve_media_urls,
+                    true,
+                    None,
+                    ctx,
+                    chat,
+                )
+                .await
+                {
+                    Ok(messages) => &RenderedPrompt::Chat(messages),
+                    Err(e) => {
+                        return Err(LLMResponse::InternalFailure(format!(
+                            "Error occurred:\n\n{:?}",
+                            e
+                        )))
+                    }
                 }
+            } else {
+                prompt
             }
-        }
+        };
 
         match prompt {
-            RenderedPrompt::Chat(p) => self.stream_chat(ctx, p).await,
-            RenderedPrompt::Completion(p) => self.stream_completion(ctx, p).await,
+            RenderedPrompt::Chat(p) => {
+                if self.supports_streaming() {
+                    self.stream_chat(ctx, p).await
+                } else {
+                    let res = self.chat(ctx, p).await;
+                    Ok(Box::pin(futures::stream::once(async move {
+                        res
+                    })))
+                }
+            },
+            RenderedPrompt::Completion(p) => {
+                if self.supports_streaming() {
+                    self.stream_completion(ctx, p).await
+                } else {
+                    let res = self.completion(ctx, p).await;
+                    Ok(Box::pin(futures::stream::once(async move {
+                        res
+                    })))
+                }
+            }
         }
     }
 }
diff --git a/fern/03-reference/baml/clients/providers/anthropic.mdx b/fern/03-reference/baml/clients/providers/anthropic.mdx
index 266001d59..0f81e5693 100644
--- a/fern/03-reference/baml/clients/providers/anthropic.mdx
+++ b/fern/03-reference/baml/clients/providers/anthropic.mdx
@@ -69,6 +69,8 @@ client<llm> MyClient {
 
 <Markdown src="/snippets/allowed-role-metadata.mdx" />
 
+<Markdown src="/snippets/supports-streaming.mdx" />
+
 ## Forwarded options
 <ParamField
    path="system"
diff --git a/fern/03-reference/baml/clients/providers/aws-bedrock.mdx b/fern/03-reference/baml/clients/providers/aws-bedrock.mdx
index 97574e35a..f359f1a3d 100644
--- a/fern/03-reference/baml/clients/providers/aws-bedrock.mdx
+++ b/fern/03-reference/baml/clients/providers/aws-bedrock.mdx
@@ -50,7 +50,8 @@ Add these three environment variables to your extension variables to use the AWS
   We don't have any checks for this field, you can pass any string you wish.
 </ParamField>
 
-<Markdown src="../../../../snippets/allowed-role-metadata-basic.mdx" />
+<Markdown src="/snippets/allowed-role-metadata-basic.mdx" />
+<Markdown src="/snippets/supports-streaming.mdx" />
 
 <ParamField
   path="region"
diff --git a/fern/03-reference/baml/clients/providers/azure.mdx b/fern/03-reference/baml/clients/providers/azure.mdx
index d521358e2..d56047b7f 100644
--- a/fern/03-reference/baml/clients/providers/azure.mdx
+++ b/fern/03-reference/baml/clients/providers/azure.mdx
@@ -95,7 +95,10 @@ client<llm> MyClient {
 ```
 </ParamField>
 
-<Markdown src="../../../../snippets/allowed-role-metadata-basic.mdx" />
+<Markdown src="/snippets/allowed-role-metadata-basic.mdx" />
+
+<Markdown src="/snippets/supports-streaming.mdx" />
+
 
 ## Forwarded options
 <ParamField
diff --git a/fern/03-reference/baml/clients/providers/google-ai.mdx b/fern/03-reference/baml/clients/providers/google-ai.mdx
index 6c499410f..52ed52106 100644
--- a/fern/03-reference/baml/clients/providers/google-ai.mdx
+++ b/fern/03-reference/baml/clients/providers/google-ai.mdx
@@ -81,7 +81,9 @@ client<llm> MyClient {
 ```
 </ParamField>
 
-<Markdown src="../../../../snippets/allowed-role-metadata-basic.mdx" />
+<Markdown src="/snippets/allowed-role-metadata-basic.mdx" />
+
+<Markdown src="/snippets/supports-streaming.mdx" />
 
 ## Forwarded options
 <ParamField
diff --git a/fern/03-reference/baml/clients/providers/ollama.mdx b/fern/03-reference/baml/clients/providers/ollama.mdx
index d156c7a0d..99d3478f0 100644
--- a/fern/03-reference/baml/clients/providers/ollama.mdx
+++ b/fern/03-reference/baml/clients/providers/ollama.mdx
@@ -61,7 +61,9 @@ client<llm> MyClient {
 ```
 </ParamField>
 
-<Markdown src="../../../../snippets/allowed-role-metadata-basic.mdx" />
+<Markdown src="/snippets/allowed-role-metadata-basic.mdx" />
+
+<Markdown src="/snippets/supports-streaming.mdx" />
 
 ## Forwarded options
 <ParamField
diff --git a/fern/03-reference/baml/clients/providers/openai-generic.mdx b/fern/03-reference/baml/clients/providers/openai-generic.mdx
index 89929fb5a..8042d42cc 100644
--- a/fern/03-reference/baml/clients/providers/openai-generic.mdx
+++ b/fern/03-reference/baml/clients/providers/openai-generic.mdx
@@ -62,6 +62,10 @@ client<llm> MyClient {
 
 </ParamField>
 
+<Markdown src="/snippets/allowed-role-metadata-basic.mdx" />
+
+<Markdown src="/snippets/supports-streaming.mdx" />
+
 ## Forwarded options
 
 <ParamField
diff --git a/fern/03-reference/baml/clients/providers/openai.mdx b/fern/03-reference/baml/clients/providers/openai.mdx
index b4e5532e7..b1a32ac7a 100644
--- a/fern/03-reference/baml/clients/providers/openai.mdx
+++ b/fern/03-reference/baml/clients/providers/openai.mdx
@@ -70,7 +70,10 @@ client<llm> MyClient {
 
 </ParamField>
 
-<Markdown src="../../../../snippets/allowed-role-metadata-basic.mdx" />
+<Markdown src="/snippets/allowed-role-metadata-basic.mdx" />
+
+<Markdown src="/snippets/supports-streaming-openai.mdx" />
+
 
 ## Forwarded options
 
diff --git a/fern/03-reference/baml/clients/providers/vertex.mdx b/fern/03-reference/baml/clients/providers/vertex.mdx
index ad9db52ce..c29584196 100644
--- a/fern/03-reference/baml/clients/providers/vertex.mdx
+++ b/fern/03-reference/baml/clients/providers/vertex.mdx
@@ -217,7 +217,9 @@ client<llm> MyClient {
 ```
 </ParamField>
 
-<Markdown src="../../../../snippets/allowed-role-metadata-basic.mdx" />
+<Markdown src="/snippets/allowed-role-metadata-basic.mdx" />
+
+<Markdown src="/snippets/supports-streaming.mdx" />
 
 ## Forwarded options
 <ParamField
diff --git a/fern/snippets/supports-streaming-openai.mdx b/fern/snippets/supports-streaming-openai.mdx
new file mode 100644
index 000000000..d3ead5fa3
--- /dev/null
+++ b/fern/snippets/supports-streaming-openai.mdx
@@ -0,0 +1,43 @@
+<ParamField
+  path="supports_streaming"
+  type="boolean"
+>
+  Whether the internal LLM client should use the streaming API. **Default: `<auto>`**
+
+  | Model | Supports Streaming |
+  | --- | --- |
+  | `o1-preview` | false |
+  | `o1-mini` | false |
+  | `o1-*` | false |
+  | `gpt-4o` | true |
+  | `gpt-4o-mini` | true |
+  | `*` | true |
+
+  Then in your prompt you can use something like:
+  ```baml
+  client<llm> MyClientWithoutStreaming {
+    provider openai
+    options {
+      model gpt-4o
+      api_key env.OPENAI_API_KEY
+      supports_streaming false 
+    }
+  }
+
+  function MyFunction() -> string {
+    client MyClientWithoutStreaming
+    prompt #"Write a short story"#
+  }
+  ```
+
+  ```python
+  # This will be streamed from your python code perspective, 
+  # but under the hood it will call the non-streaming HTTP API
+  # and then return a streamable response with a single event
+  b.stream.MyFunction()
+
+  # This will work exactly the same as before
+  b.MyFunction()
+  ```
+
+</ParamField>
\ No newline at end of file
diff --git a/fern/snippets/supports-streaming.mdx b/fern/snippets/supports-streaming.mdx
new file mode 100644
index 000000000..8fed7404b
--- /dev/null
+++ b/fern/snippets/supports-streaming.mdx
@@ -0,0 +1,35 @@
+<ParamField
+  path="supports_streaming"
+  type="boolean"
+>
+  Whether the internal LLM client should use the streaming API. **Default: `true`**
+
+  Then in your prompt you can use something like:
+  ```baml
+  client<llm> MyClientWithoutStreaming {
+    provider anthropic
+    options {
+      model claude-3-haiku-20240307
+      api_key env.ANTHROPIC_API_KEY
+      max_tokens 1000
+      supports_streaming false
+    }
+  }
+
+  function MyFunction() -> string {
+    client MyClientWithoutStreaming
+    prompt #"Write a short story"#
+  }
+  ```
+
+  ```python
+  # This will be streamed from your python code perspective, 
+  # but under the hood it will call the non-streaming HTTP API
+  # and then return a streamable response with a single event
+  b.stream.MyFunction()
+
+  # This will work exactly the same as before
+  b.MyFunction()
+  ```
+
+</ParamField>
\ No newline at end of file
diff --git a/integ-tests/baml_src/test-files/functions/output/recursive-class.baml b/integ-tests/baml_src/test-files/functions/output/recursive-class.baml
index a51c48d3d..3d9b66267 100644
--- a/integ-tests/baml_src/test-files/functions/output/recursive-class.baml
+++ b/integ-tests/baml_src/test-files/functions/output/recursive-class.baml
@@ -8,8 +8,16 @@ class LinkedList {
   len int
 }
 
+client<llm> O1 {
+  provider "openai"
+  options {
+    model "o1-mini"
+    default_role "user"
+  }
+}
+
 function BuildLinkedList(input: int[]) -> LinkedList {
-  client GPT35
+  client O1
   prompt #"
     Build a linked list from the input array of integers.
 
diff --git a/integ-tests/python/baml_client/inlinedbaml.py b/integ-tests/python/baml_client/inlinedbaml.py
index 9fb3794e1..bd97d6cff 100644
--- a/integ-tests/python/baml_client/inlinedbaml.py
+++ b/integ-tests/python/baml_client/inlinedbaml.py
@@ -77,7 +77,7 @@
     "test-files/functions/output/mutually-recursive-classes.baml": "class Tree {\n  data int\n  children Forest\n}\n\nclass Forest {\n  trees Tree[]\n}\n\nclass BinaryNode {\n  data int\n  left BinaryNode?\n  right BinaryNode?\n}\n\nfunction BuildTree(input: BinaryNode) -> Tree {\n  client GPT35\n  prompt #\"\n    Given the input binary tree, transform it into a generic tree using the given schema.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestTree {\n  functions [BuildTree]\n  args {\n    input {\n      data 2\n      left {\n        data 1\n        left null\n        right null\n      }\n      right {\n        data 3\n        left null\n        right null\n      }\n    }\n  }\n}",
     "test-files/functions/output/optional-class.baml": "class ClassOptionalOutput {\n  prop1 string\n  prop2 string\n}\n\nfunction FnClassOptionalOutput(input: string) -> ClassOptionalOutput? {\n  client GPT35\n  prompt #\"\n    Return a json blob for the following input:\n    {{input}}\n\n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\n\nclass Blah {\n  prop4 string?\n}\n\nclass ClassOptionalOutput2 {\n  prop1 string?\n  prop2 string?\n  prop3 Blah?\n}\n\nfunction FnClassOptionalOutput2(input: string) -> ClassOptionalOutput2? {\n  client GPT35\n  prompt #\"\n    Return a json blob for the following input:\n    {{input}}\n\n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest FnClassOptionalOutput2 {\n  functions [FnClassOptionalOutput2, FnClassOptionalOutput]\n  args {\n    input \"example input\"\n  }\n}\n",
     "test-files/functions/output/optional.baml": "class OptionalTest_Prop1 {\n  omega_a string\n  omega_b int\n}\n\nenum OptionalTest_CategoryType {\n  Aleph\n  Beta\n  Gamma\n}\n \nclass OptionalTest_ReturnType {\n  omega_1 OptionalTest_Prop1?\n  omega_2 string?\n  omega_3 (OptionalTest_CategoryType?)[]\n} \n \nfunction OptionalTest_Function(input: string) -> (OptionalTest_ReturnType?)[]\n{ \n  client GPT35\n  prompt #\"\n    Return a JSON blob with this schema: \n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest OptionalTest_Function {\n  functions [OptionalTest_Function]\n  args {\n    input \"example input\"\n  }\n}\n",
-    "test-files/functions/output/recursive-class.baml": "class Node {\n  data int\n  next Node?\n}\n\nclass LinkedList {\n  head Node?\n  len int\n}\n\nfunction BuildLinkedList(input: int[]) -> LinkedList {\n  client GPT35\n  prompt #\"\n    Build a linked list from the input array of integers.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestLinkedList {\n  functions [BuildLinkedList]\n  args {\n    input [1, 2, 3, 4, 5]\n  }\n}\n",
+    "test-files/functions/output/recursive-class.baml": "class Node {\n  data int\n  next Node?\n}\n\nclass LinkedList {\n  head Node?\n  len int\n}\n\nclient<llm> O1 {\n  provider \"openai\"\n  options {\n    model \"o1-mini\"\n    default_role \"user\"\n  }\n}\n\nfunction BuildLinkedList(input: int[]) -> LinkedList {\n  client O1\n  prompt #\"\n    Build a linked list from the input array of integers.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestLinkedList {\n  functions [BuildLinkedList]\n  args {\n    input [1, 2, 3, 4, 5]\n  }\n}\n",
     "test-files/functions/output/serialization-error.baml": "class DummyOutput {\n  nonce string\n  nonce2 string\n  @@dynamic\n}\n\nfunction DummyOutputFunction(input: string) -> DummyOutput {\n  client GPT35\n  prompt #\"\n    Say \"hello there\".\n  \"#\n}",
     "test-files/functions/output/string-list.baml": "function FnOutputStringList(input: string) -> string[] {\n  client GPT35\n  prompt #\"\n    Return a list of strings in json format like [\"string1\", \"string2\", \"string3\"].\n\n    JSON:\n  \"#\n}\n\ntest FnOutputStringList {\n  functions [FnOutputStringList]\n  args {\n    input \"example input\"\n  }\n}\n",
     "test-files/functions/output/unions.baml": "class UnionTest_ReturnType {\n  prop1 string | bool\n  prop2 (float | bool)[]\n  prop3 (bool[] | int[])\n}\n\nfunction UnionTest_Function(input: string | bool) -> UnionTest_ReturnType {\n  client GPT35\n  prompt #\"\n    Return a JSON blob with this schema: \n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest UnionTest_Function {\n  functions [UnionTest_Function]\n  args {\n    input \"example input\"\n  }\n}\n",
diff --git a/integ-tests/ruby/baml_client/inlined.rb b/integ-tests/ruby/baml_client/inlined.rb
index 71b3d6185..052a92080 100644
--- a/integ-tests/ruby/baml_client/inlined.rb
+++ b/integ-tests/ruby/baml_client/inlined.rb
@@ -77,7 +77,7 @@ module Inlined
         "test-files/functions/output/mutually-recursive-classes.baml" => "class Tree {\n  data int\n  children Forest\n}\n\nclass Forest {\n  trees Tree[]\n}\n\nclass BinaryNode {\n  data int\n  left BinaryNode?\n  right BinaryNode?\n}\n\nfunction BuildTree(input: BinaryNode) -> Tree {\n  client GPT35\n  prompt #\"\n    Given the input binary tree, transform it into a generic tree using the given schema.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestTree {\n  functions [BuildTree]\n  args {\n    input {\n      data 2\n      left {\n        data 1\n        left null\n        right null\n      }\n      right {\n        data 3\n        left null\n        right null\n      }\n    }\n  }\n}",
         "test-files/functions/output/optional-class.baml" => "class ClassOptionalOutput {\n  prop1 string\n  prop2 string\n}\n\nfunction FnClassOptionalOutput(input: string) -> ClassOptionalOutput? {\n  client GPT35\n  prompt #\"\n    Return a json blob for the following input:\n    {{input}}\n\n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\n\nclass Blah {\n  prop4 string?\n}\n\nclass ClassOptionalOutput2 {\n  prop1 string?\n  prop2 string?\n  prop3 Blah?\n}\n\nfunction FnClassOptionalOutput2(input: string) -> ClassOptionalOutput2? {\n  client GPT35\n  prompt #\"\n    Return a json blob for the following input:\n    {{input}}\n\n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest FnClassOptionalOutput2 {\n  functions [FnClassOptionalOutput2, FnClassOptionalOutput]\n  args {\n    input \"example input\"\n  }\n}\n",
         "test-files/functions/output/optional.baml" => "class OptionalTest_Prop1 {\n  omega_a string\n  omega_b int\n}\n\nenum OptionalTest_CategoryType {\n  Aleph\n  Beta\n  Gamma\n}\n \nclass OptionalTest_ReturnType {\n  omega_1 OptionalTest_Prop1?\n  omega_2 string?\n  omega_3 (OptionalTest_CategoryType?)[]\n} \n \nfunction OptionalTest_Function(input: string) -> (OptionalTest_ReturnType?)[]\n{ \n  client GPT35\n  prompt #\"\n    Return a JSON blob with this schema: \n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest OptionalTest_Function {\n  functions [OptionalTest_Function]\n  args {\n    input \"example input\"\n  }\n}\n",
-        "test-files/functions/output/recursive-class.baml" => "class Node {\n  data int\n  next Node?\n}\n\nclass LinkedList {\n  head Node?\n  len int\n}\n\nfunction BuildLinkedList(input: int[]) -> LinkedList {\n  client GPT35\n  prompt #\"\n    Build a linked list from the input array of integers.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestLinkedList {\n  functions [BuildLinkedList]\n  args {\n    input [1, 2, 3, 4, 5]\n  }\n}\n",
+        "test-files/functions/output/recursive-class.baml" => "class Node {\n  data int\n  next Node?\n}\n\nclass LinkedList {\n  head Node?\n  len int\n}\n\nclient<llm> O1 {\n  provider \"openai\"\n  options {\n    model \"o1-mini\"\n    default_role \"user\"\n  }\n}\n\nfunction BuildLinkedList(input: int[]) -> LinkedList {\n  client O1\n  prompt #\"\n    Build a linked list from the input array of integers.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestLinkedList {\n  functions [BuildLinkedList]\n  args {\n    input [1, 2, 3, 4, 5]\n  }\n}\n",
         "test-files/functions/output/serialization-error.baml" => "class DummyOutput {\n  nonce string\n  nonce2 string\n  @@dynamic\n}\n\nfunction DummyOutputFunction(input: string) -> DummyOutput {\n  client GPT35\n  prompt #\"\n    Say \"hello there\".\n  \"#\n}",
         "test-files/functions/output/string-list.baml" => "function FnOutputStringList(input: string) -> string[] {\n  client GPT35\n  prompt #\"\n    Return a list of strings in json format like [\"string1\", \"string2\", \"string3\"].\n\n    JSON:\n  \"#\n}\n\ntest FnOutputStringList {\n  functions [FnOutputStringList]\n  args {\n    input \"example input\"\n  }\n}\n",
         "test-files/functions/output/unions.baml" => "class UnionTest_ReturnType {\n  prop1 string | bool\n  prop2 (float | bool)[]\n  prop3 (bool[] | int[])\n}\n\nfunction UnionTest_Function(input: string | bool) -> UnionTest_ReturnType {\n  client GPT35\n  prompt #\"\n    Return a JSON blob with this schema: \n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest UnionTest_Function {\n  functions [UnionTest_Function]\n  args {\n    input \"example input\"\n  }\n}\n",
diff --git a/integ-tests/typescript/baml_client/inlinedbaml.ts b/integ-tests/typescript/baml_client/inlinedbaml.ts
index 7fc3398d3..70d53c4af 100644
--- a/integ-tests/typescript/baml_client/inlinedbaml.ts
+++ b/integ-tests/typescript/baml_client/inlinedbaml.ts
@@ -78,7 +78,7 @@ const fileMap = {
   "test-files/functions/output/mutually-recursive-classes.baml": "class Tree {\n  data int\n  children Forest\n}\n\nclass Forest {\n  trees Tree[]\n}\n\nclass BinaryNode {\n  data int\n  left BinaryNode?\n  right BinaryNode?\n}\n\nfunction BuildTree(input: BinaryNode) -> Tree {\n  client GPT35\n  prompt #\"\n    Given the input binary tree, transform it into a generic tree using the given schema.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestTree {\n  functions [BuildTree]\n  args {\n    input {\n      data 2\n      left {\n        data 1\n        left null\n        right null\n      }\n      right {\n        data 3\n        left null\n        right null\n      }\n    }\n  }\n}",
   "test-files/functions/output/optional-class.baml": "class ClassOptionalOutput {\n  prop1 string\n  prop2 string\n}\n\nfunction FnClassOptionalOutput(input: string) -> ClassOptionalOutput? {\n  client GPT35\n  prompt #\"\n    Return a json blob for the following input:\n    {{input}}\n\n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\n\nclass Blah {\n  prop4 string?\n}\n\nclass ClassOptionalOutput2 {\n  prop1 string?\n  prop2 string?\n  prop3 Blah?\n}\n\nfunction FnClassOptionalOutput2(input: string) -> ClassOptionalOutput2? {\n  client GPT35\n  prompt #\"\n    Return a json blob for the following input:\n    {{input}}\n\n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest FnClassOptionalOutput2 {\n  functions [FnClassOptionalOutput2, FnClassOptionalOutput]\n  args {\n    input \"example input\"\n  }\n}\n",
   "test-files/functions/output/optional.baml": "class OptionalTest_Prop1 {\n  omega_a string\n  omega_b int\n}\n\nenum OptionalTest_CategoryType {\n  Aleph\n  Beta\n  Gamma\n}\n \nclass OptionalTest_ReturnType {\n  omega_1 OptionalTest_Prop1?\n  omega_2 string?\n  omega_3 (OptionalTest_CategoryType?)[]\n} \n \nfunction OptionalTest_Function(input: string) -> (OptionalTest_ReturnType?)[]\n{ \n  client GPT35\n  prompt #\"\n    Return a JSON blob with this schema: \n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest OptionalTest_Function {\n  functions [OptionalTest_Function]\n  args {\n    input \"example input\"\n  }\n}\n",
-  "test-files/functions/output/recursive-class.baml": "class Node {\n  data int\n  next Node?\n}\n\nclass LinkedList {\n  head Node?\n  len int\n}\n\nfunction BuildLinkedList(input: int[]) -> LinkedList {\n  client GPT35\n  prompt #\"\n    Build a linked list from the input array of integers.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestLinkedList {\n  functions [BuildLinkedList]\n  args {\n    input [1, 2, 3, 4, 5]\n  }\n}\n",
+  "test-files/functions/output/recursive-class.baml": "class Node {\n  data int\n  next Node?\n}\n\nclass LinkedList {\n  head Node?\n  len int\n}\n\nclient<llm> O1 {\n  provider \"openai\"\n  options {\n    model \"o1-mini\"\n    default_role \"user\"\n  }\n}\n\nfunction BuildLinkedList(input: int[]) -> LinkedList {\n  client O1\n  prompt #\"\n    Build a linked list from the input array of integers.\n\n    INPUT:\n    {{ input }}\n\n    {{ ctx.output_format }}    \n  \"#\n}\n\ntest TestLinkedList {\n  functions [BuildLinkedList]\n  args {\n    input [1, 2, 3, 4, 5]\n  }\n}\n",
   "test-files/functions/output/serialization-error.baml": "class DummyOutput {\n  nonce string\n  nonce2 string\n  @@dynamic\n}\n\nfunction DummyOutputFunction(input: string) -> DummyOutput {\n  client GPT35\n  prompt #\"\n    Say \"hello there\".\n  \"#\n}",
   "test-files/functions/output/string-list.baml": "function FnOutputStringList(input: string) -> string[] {\n  client GPT35\n  prompt #\"\n    Return a list of strings in json format like [\"string1\", \"string2\", \"string3\"].\n\n    JSON:\n  \"#\n}\n\ntest FnOutputStringList {\n  functions [FnOutputStringList]\n  args {\n    input \"example input\"\n  }\n}\n",
   "test-files/functions/output/unions.baml": "class UnionTest_ReturnType {\n  prop1 string | bool\n  prop2 (float | bool)[]\n  prop3 (bool[] | int[])\n}\n\nfunction UnionTest_Function(input: string | bool) -> UnionTest_ReturnType {\n  client GPT35\n  prompt #\"\n    Return a JSON blob with this schema: \n    {{ctx.output_format}}\n\n    JSON:\n  \"#\n}\n\ntest UnionTest_Function {\n  functions [UnionTest_Function]\n  args {\n    input \"example input\"\n  }\n}\n",
diff --git a/typescript/playground-common/src/utils/ErrorFallback.tsx b/typescript/playground-common/src/utils/ErrorFallback.tsx
index 30719f790..1d0e17cbe 100644
--- a/typescript/playground-common/src/utils/ErrorFallback.tsx
+++ b/typescript/playground-common/src/utils/ErrorFallback.tsx
@@ -1,13 +1,37 @@
+import { Button } from '@/components/ui/button'
+import { RefreshCcw } from 'lucide-react'
 import { ErrorBoundary, type FallbackProps } from 'react-error-boundary'
 const ErrorFallback: React.FC<FallbackProps> = ({ error, resetErrorBoundary }) => {
   return (
-    <div role='alert'>
-      <p>
-        Something went wrong:<button onClick={resetErrorBoundary}>Try again</button>
-      </p>
-      <pre className='whitespace-pre-wrap'>{error.message}</pre>
-      <pre className='whitespace-pre-wrap'>{error.stack}</pre>
-      <pre className='whitespace-pre-wrap'>{JSON.stringify(error, null, 2)}</pre>
+    <div
+      role='alert'
+      className='p-4 bg-vscode-notifications-background border border-vscode-notifications-border rounded'
+    >
+      <div className='flex items-center justify-between mb-4'>
+        <p className='text-vscode-foreground font-medium'>Something went wrong</p>
+        <Button onClick={resetErrorBoundary} variant='outline' className='hover:bg-vscode-button-hoverBackground'>
+          <RefreshCcw className='w-4 h-4' />
+          Reload
+        </Button>
+      </div>
+
+      <div className='space-y-2'>
+        {error.message && (
+          <pre className='p-3 bg-vscode-editor-background border border-vscode-panel-border rounded text-sm whitespace-pre-wrap'>
+            {error.message}
+          </pre>
+        )}
+        {error.stack && (
+          <pre className='p-3 bg-vscode-editor-background border border-vscode-panel-border rounded text-sm whitespace-pre-wrap'>
+            {error.stack}
+          </pre>
+        )}
+        {error && Object.keys(error).length > 0 && (
+          <pre className='p-3 bg-vscode-editor-background border border-vscode-panel-border rounded text-sm whitespace-pre-wrap'>
+            {JSON.stringify(error, null, 2)}
+          </pre>
+        )}
+      </div>
     </div>
   )
 }