From 935a190556d12077f961ce083723e7c1f816f387 Mon Sep 17 00:00:00 2001 From: revidious <104689994+revidious@users.noreply.github.com> Date: Wed, 11 Dec 2024 04:20:41 +0400 Subject: [PATCH] fix: Support parsing primitive values from single-key objects (#1224) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Fixes #1141 This PR adds support for parsing primitive values from single-key objects in unions. When an LLM responds with an object-wrapped primitive (e.g., `{"status": 1}`), we now correctly extract and validate the inner value. ## Changes - Add support for extracting primitive values (numbers, booleans, strings) from single-key objects - Track the extracted key name using `ImpliedKey` flag - Reject invalid cases: - Nested objects (e.g., `{"status": {"code": 1}}`) - Arrays (e.g., `{"values": [1]}`) - Multi-key objects ## Test Plan Added test cases covering: - Basic object extraction: `{"status": 1}` → `1` - Multiple primitive types in union - Invalid cases (nested objects, arrays) All tests are passing. ---- > [!IMPORTANT] > Adds support for parsing primitive values from single-key objects in unions, with tests for valid and invalid cases. > > - **Behavior**: > - Supports parsing primitive values from single-key objects in unions in `coerce_literal.rs`. > - Extracts and validates inner values if they are numbers, booleans, or strings. > - Rejects nested objects, arrays, and multi-key objects. > - **Flags**: > - Uses `ImpliedKey` flag to track extracted key names. > - **Tests**: > - Added tests in `test_literals.rs` for basic object extraction, multiple primitive types, and invalid cases (nested objects, arrays). > > This description was created by [Ellipsis](https://www.ellipsis.dev?ref=BoundaryML%2Fbaml&utm_source=github&utm_medium=referral) for b4bf23f51f42dc9478dd71ba4bc0340bb5f06340. It will automatically update as commits are pushed. --------- Co-authored-by: aaronvg --- .../deserializer/coercer/coerce_literal.rs | 17 +++ .../jsonish/src/tests/test_literals.rs | 134 ++++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/engine/baml-lib/jsonish/src/deserializer/coercer/coerce_literal.rs b/engine/baml-lib/jsonish/src/deserializer/coercer/coerce_literal.rs index 6a8629921..ccda9ab3b 100644 --- a/engine/baml-lib/jsonish/src/deserializer/coercer/coerce_literal.rs +++ b/engine/baml-lib/jsonish/src/deserializer/coercer/coerce_literal.rs @@ -7,6 +7,7 @@ use internal_baml_core::ir::FieldType; use crate::{ deserializer::{ coercer::{coerce_primitive::coerce_bool, match_string::match_string, TypeCoercer}, + deserialize_flags::{DeserializerConditions, Flag}, types::BamlValueWithFlags, }, jsonish, @@ -46,6 +47,22 @@ impl TypeCoercer for LiteralValue { Some(v) => v, }; + // If we get an object with a single key-value pair, try to extract the value + if let jsonish::Value::Object(obj) = value { + if obj.len() == 1 { + let (key, inner_value) = obj.iter().next().unwrap(); + // only extract value if it's a primitive (not an object or array, hoping to god its fixed) + match inner_value { + jsonish::Value::Number(_) | jsonish::Value::Boolean(_) | jsonish::Value::String(_) => { + let mut result = self.coerce(ctx, target, Some(inner_value))?; + result.add_flag(Flag::ObjectToPrimitive(jsonish::Value::Object(obj.clone()))); + return Ok(result); + } + _ => {} + } + } + } + match literal { LiteralValue::Int(literal_int) => { let BamlValueWithFlags::Int(coerced_int) = coerce_int(ctx, target, Some(value))? diff --git a/engine/baml-lib/jsonish/src/tests/test_literals.rs b/engine/baml-lib/jsonish/src/tests/test_literals.rs index e75361b18..5874b3ccb 100644 --- a/engine/baml-lib/jsonish/src/tests/test_literals.rs +++ b/engine/baml-lib/jsonish/src/tests/test_literals.rs @@ -209,3 +209,137 @@ test_deserializer!( ]), "TWO" ); + +test_deserializer!( + test_union_literal_with_multiple_types_from_object, + EMPTY_FILE, + r#"{ + "status": 1 +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]), + 1 +); + +// Test with integer value +test_deserializer!( + test_union_literal_with_multiple_types_from_object_int, + EMPTY_FILE, + r#"{ + "status": 1 +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]), + 1 +); + +// Test with boolean value +test_deserializer!( + test_union_literal_with_multiple_types_from_object_bool, + EMPTY_FILE, + r#"{ + "result": true +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]), + true +); + +// Test with string value +test_deserializer!( + test_union_literal_with_multiple_types_from_object_string, + EMPTY_FILE, + r#"{ + "value": "THREE" +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]), + "THREE" +); + +// Test with object that has multiple keys (should fail) +test_failing_deserializer!( + test_union_literal_with_multiple_types_from_multi_key_object, + EMPTY_FILE, + r#"{ + "status": 1, + "message": "success" +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]) +); + +// Test with nested object (should fail) +test_failing_deserializer!( + test_union_literal_with_multiple_types_from_nested_object, + EMPTY_FILE, + r#"{ + "status": { + "code": 1 + } +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]) +); + +// Test with quoted string value +test_deserializer!( + test_union_literal_with_multiple_types_from_object_quoted_string, + EMPTY_FILE, + r#"{ + "value": "\"THREE\"" +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]), + "THREE" +); + +// Test with string value and extra text +test_deserializer!( + test_union_literal_with_multiple_types_from_object_string_extra, + EMPTY_FILE, + r#"{ + "value": "The answer is THREE" +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]), + "THREE" +); + +// Test with array value (should fail) +test_failing_deserializer!( + test_union_literal_with_multiple_types_from_object_array, + EMPTY_FILE, + r#"{ + "values": [1] +}"#, + FieldType::Union(vec![ + FieldType::Literal(LiteralValue::Int(1)), + FieldType::Literal(LiteralValue::Bool(true)), + FieldType::Literal(LiteralValue::String("THREE".into())), + ]) +);