diff --git a/engine/baml-lib/baml-types/src/media.rs b/engine/baml-lib/baml-types/src/media.rs index 8aeee5500..18557569a 100644 --- a/engine/baml-lib/baml-types/src/media.rs +++ b/engine/baml-lib/baml-types/src/media.rs @@ -11,8 +11,8 @@ pub enum BamlMediaType { impl fmt::Display for BamlMediaType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { - BamlMediaType::Image => write!(f, "Image"), - BamlMediaType::Audio => write!(f, "Audio"), + BamlMediaType::Image => write!(f, "image"), + BamlMediaType::Audio => write!(f, "audio"), } } } diff --git a/engine/baml-runtime/Cargo.toml b/engine/baml-runtime/Cargo.toml index 87a59df94..334bedcf1 100644 --- a/engine/baml-runtime/Cargo.toml +++ b/engine/baml-runtime/Cargo.toml @@ -41,7 +41,6 @@ web-time.workspace = true static_assertions.workspace = true mime_guess = "2.0.4" mime = "0.3.17" - # For tracing envy = "0.4.2" chrono = "0.4.38" @@ -55,7 +54,6 @@ pin-project-lite = "0.2.14" async-trait = "0.1.80" cfg-if = "1.0.0" include_dir = "0.7.3" - [target.'cfg(target_arch = "wasm32")'.dependencies] serde-wasm-bindgen = "0.6.5" wasm-bindgen = { version = "^0.2.74", features = ["serde-serialize"] } diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/anthropic_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/anthropic_client.rs index 87afecfd4..defc19917 100644 --- a/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/anthropic_client.rs +++ b/engine/baml-runtime/src/internal/llm_client/primitive/anthropic/anthropic_client.rs @@ -338,7 +338,7 @@ impl RequestBuilder for AnthropicClient { &self.client } - fn build_request( + async fn build_request( &self, prompt: either::Either<&String, &Vec>, stream: bool, diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/google/google_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/google/google_client.rs index 28eaefd78..88b376602 100644 --- a/engine/baml-runtime/src/internal/llm_client/primitive/google/google_client.rs +++ b/engine/baml-runtime/src/internal/llm_client/primitive/google/google_client.rs @@ -262,7 +262,7 @@ impl RequestBuilder for GoogleClient { &self.client } - fn build_request( + async fn build_request( &self, prompt: either::Either<&String, &Vec>, stream: bool, @@ -298,13 +298,49 @@ impl RequestBuilder for GoogleClient { let mut body = json!(self.properties.properties); let body_obj = body.as_object_mut().unwrap(); - match prompt { either::Either::Left(prompt) => { body_obj.extend(convert_completion_prompt_to_body(prompt)) } either::Either::Right(messages) => { - body_obj.extend(convert_chat_prompt_to_body(messages)) + body_obj.extend(convert_chat_prompt_to_body(messages)); + if let Some(contents) = body_obj.get("contents").and_then(|c| c.as_array()) { + let mut contents = contents.clone(); // clone contents here + + for content in contents.iter_mut() { + if let Some(parts) = content.get_mut("parts").and_then(|p| p.as_array_mut()) + { + for part in parts.iter_mut() { + if let Some(file_data) = part.get_mut("fileData") { + if let Some(data_url) = + file_data.get("data").and_then(|d| d.as_str()) + { + // Make a curl request to get the media file + let response = reqwest::get(data_url).await.unwrap(); + let bytes = response.bytes().await.unwrap(); + + // Base64 encode the media file + let base64_encoded = base64::encode(&bytes); + + // Replace the fileData block with inlineData + let mut inline_data = serde_json::Map::new(); + if let Some(mime_type) = file_data.get("mimeType") { + inline_data + .insert("mimeType".to_string(), mime_type.clone()); + } + inline_data + .insert("data".to_string(), json!(base64_encoded)); + + part.as_object_mut() + .unwrap() + .insert("inlineData".to_string(), json!(inline_data)); + part.as_object_mut().unwrap().remove("fileData"); + } + } + } + } + } + } } } diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs index 37956c1df..0e09b7ed2 100644 --- a/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs +++ b/engine/baml-runtime/src/internal/llm_client/primitive/openai/openai_client.rs @@ -212,7 +212,7 @@ impl RequestBuilder for OpenAIClient { &self.client } - fn build_request( + async fn build_request( &self, prompt: either::Either<&String, &Vec>, stream: bool, diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/request.rs b/engine/baml-runtime/src/internal/llm_client/primitive/request.rs index c288da7d4..fa93826c9 100644 --- a/engine/baml-runtime/src/internal/llm_client/primitive/request.rs +++ b/engine/baml-runtime/src/internal/llm_client/primitive/request.rs @@ -8,7 +8,7 @@ use serde::de::DeserializeOwned; use crate::internal::llm_client::{traits::WithClient, ErrorCode, LLMErrorResponse, LLMResponse}; pub trait RequestBuilder { - fn build_request( + async fn build_request( &self, prompt: either::Either<&String, &Vec>, stream: bool, @@ -36,7 +36,7 @@ pub async fn make_request( let (system_now, instant_now) = (web_time::SystemTime::now(), web_time::Instant::now()); log::info!("Making request using client {}", client.context().name); - let req = match client.build_request(prompt, stream).build() { + let req = match client.build_request(prompt, stream).await.build() { Ok(req) => req, Err(e) => { return Err(LLMResponse::LLMFailure(LLMErrorResponse {