Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: allow optional lists and maps
Browse files Browse the repository at this point in the history
Resolves BoundaryML#948

- Added support for optional arrays with string[]? syntax\n- Added support for optional maps with map<string, int>? syntax\n- Updated JSON schema generation for optional types\n- Added comprehensive test cases
afyef committed Dec 17, 2024
1 parent 2974458 commit 8a31ab9
Showing 3 changed files with 169 additions and 27 deletions.
48 changes: 37 additions & 11 deletions engine/baml-lib/baml-core/src/ir/json_schema.rs
Original file line number Diff line number Diff line change
@@ -186,16 +186,40 @@ impl WithJsonSchema for FieldType {
"required": ["url"],
}),
},
FieldType::List(item) => json!({
"type": "array",
"items": (*item).json_schema()
}),
FieldType::Map(_k, v) => json!({
"type": "object",
"additionalProperties": {
"type": v.json_schema(),
// Handle list types (arrays) with optional support
// For example: string[]? generates a schema that allows both array and null
FieldType::List(item) => {
let mut schema = json!({
"type": "array",
"items": (*item).json_schema()
});
// If the list itself is optional (marked with ?),
// modify the schema to accept either an array or null
if self.is_optional() {
schema["type"] = json!(["array", "null"]);
// Add default null value for optional arrays
schema["default"] = serde_json::Value::Null;
}
}),
schema
},
// Handle map types with optional support
// For example: map<string, int>? generates a schema that allows both object and null
FieldType::Map(_k, v) => {
let mut schema = json!({
"type": "object",
"additionalProperties": {
"type": v.json_schema(),
}
});
// If the map itself is optional (marked with ?),
// modify the schema to accept either an object or null
if self.is_optional() {
schema["type"] = json!(["object", "null"]);
// Add default null value for optional maps
schema["default"] = serde_json::Value::Null;
}
schema
},
FieldType::Union(options) => json!({
"anyOf": options.iter().map(|t| {
let mut res = t.json_schema();
@@ -211,18 +235,20 @@ impl WithJsonSchema for FieldType {
"type": "array",
"prefixItems": options.iter().map(|t| t.json_schema()).collect::<Vec<_>>(),
}),
// The caller object is responsible for adding the "null" type
// Handle optional types (marked with ?) that aren't lists or maps
FieldType::Optional(inner) => {
match **inner {
// For primitive types, we can simply add "null" to the allowed types
FieldType::Primitive(_) => {
let mut res = inner.json_schema();
res["type"] = json!([res["type"], "null"]);
res["default"] = serde_json::Value::Null;
res
}
// For complex types, we need to use anyOf to allow either the type or null
_ => {
let mut res = inner.json_schema();
// if res is a map, add a "title" field
// Add a title for better schema documentation
if let serde_json::Value::Object(r) = &mut res {
r.insert("title".to_string(), json!(inner.to_string()));
}
4 changes: 2 additions & 2 deletions engine/baml-lib/schema-ast/src/parser/datamodel.pest
Original file line number Diff line number Diff line change
@@ -56,9 +56,9 @@ base_type_with_attr = { base_type ~ (NEWLINE? ~ field_attribute)* }
base_type = { array_notation | map | identifier | group | tuple | parenthesized_type | literal_type }

array_suffix = { "[]" }
array_notation = { base_type_without_array ~ array_suffix+ }
array_notation = { base_type_without_array ~ array_suffix+ ~ optional_token? }

map = { "map" ~ "<" ~ field_type ~ "," ~ field_type ~ ">" }
map = { "map" ~ "<" ~ field_type ~ "," ~ field_type ~ ">" ~ optional_token? }

openParan = { "(" }
closeParan = { ")" }
144 changes: 130 additions & 14 deletions engine/baml-lib/schema-ast/src/parser/parse_types.rs
Original file line number Diff line number Diff line change
@@ -238,57 +238,105 @@ fn parse_literal_type(pair: Pair<'_>, diagnostics: &mut Diagnostics) -> Option<F
))
}

/// parses array type notation from input pair. handles both required and optional arrays like string[] and string[]?.
///
/// arguments:
/// pair - input pair with array notation tokens.
/// diagnostics - mutable reference to diagnostics collector for error reporting.
///
/// returns:
/// some(fieldtype::list) - successfully parsed array type with arity.
/// none - if parsing fails.
///
/// implementation details:
/// supports multiple dimensions like string[][].
/// handles optional arrays with ? suffix.
/// preserves source span info for errors.
/// valid inputs: string[], int[]?, myclass[][]?.
fn parse_array(pair: Pair<'_>, diagnostics: &mut Diagnostics) -> Option<FieldType> {
assert_correct_parser!(pair, Rule::array_notation);

let mut dims = 0_u32;
let mut field = None;
// track whether this array is optional (e.g., string[]?)
// default to Required, will be updated to Optional if ? token is found
let mut arity = FieldArity::Required;
let span = diagnostics.span(pair.as_span());

for current in pair.into_inner() {
match current.as_rule() {
// Parse the base type of the array (e.g., 'string' in string[])
Rule::base_type_without_array => field = parse_base_type(current, diagnostics),
// Count array dimensions (number of [] pairs)
Rule::array_suffix => dims += 1,
// Handle optional marker (?) for arrays like string[]?
// This makes the entire array optional, not its elements
Rule::optional_token => arity = FieldArity::Optional,
_ => unreachable_rule!(current, Rule::map),
}
}

match field {
Some(field) => Some(FieldType::List(
FieldArity::Required,
Box::new(field),
dims,
span,
None,
arity, // Whether the array itself is optional
Box::new(field), // The type of elements in the array
dims, // Number of dimensions (e.g., 2 for string[][])
span, // Source location for error reporting
None, // No attributes initially
)),
_ => unreachable!("Field must have been defined"),
}
}

/// parses a map type notation from the input pair.
/// handles both required and optional maps (e.g., `map<string, int>` and `map<string, int>?`).
///
/// # arguments
/// * `pair` - the input pair containing map notation tokens
/// * `diagnostics` - mutable reference to the diagnostics collector for error reporting
///
/// # returns
/// * `some(fieldtype::map)` - successfully parsed map type with appropriate arity
/// * `none` - if parsing fails
///
/// # implementation details
/// - always uses string keys as per baml specification
/// - supports optional maps with the `?` suffix
/// - preserves source span information for error reporting
/// - example valid inputs: `map<string, int>`, `map<string, myclass>?`
fn parse_map(pair: Pair<'_>, diagnostics: &mut Diagnostics) -> Option<FieldType> {
assert_correct_parser!(pair, Rule::map);

let mut fields = Vec::new();
// Track whether this map is optional (e.g., map<string, int>?)
// Default to Required, will be updated to Optional if ? token is found
let mut arity = FieldArity::Required;
let span = diagnostics.span(pair.as_span());

for current in pair.into_inner() {
match current.as_rule() {
// Parse both key and value types of the map
Rule::field_type => {
if let Some(f) = parse_field_type(current, diagnostics) {
fields.push(f)
}
}
// Handle optional marker (?) for maps like map<string, int>?
// This makes the entire map optional, not its values
Rule::optional_token => arity = FieldArity::Optional,
_ => unreachable_rule!(current, Rule::map),
}
}

match fields.len() {
0 => None,
1 => None,
0 => None, // Invalid: no types specified
1 => None, // Invalid: only key type specified
2 => Some(FieldType::Map(
FieldArity::Required,
Box::new((fields[0].to_owned(), fields[1].to_owned())),
span,
None,
arity, // Whether the map itself is optional
Box::new((fields[0].to_owned(), fields[1].to_owned())), // Key and value types
span, // Source location for error reporting
None, // No attributes initially
)),
_ => unreachable!("Maps must specify a key type and value type"),
}
@@ -352,13 +400,13 @@ fn parse_tuple(pair: Pair<'_>, diagnostics: &mut Diagnostics) -> Option<FieldTyp
}
}

/// For the last variant of a Union, remove the attributes from that variant
/// for the last variant of a union, here we remove the attributes from that variant
/// and attach them to the union, unless the attribute was tagged with the
/// `parenthesized` field.
///
/// This is done because `field_foo int | string @description("d")`
/// this is done because `field_foo int | string @description("d")`
/// is naturally parsed as a field with a union whose secord variant has
/// a description. But the correct BAML interpretation is a union with a
/// a description. but the correct baml interpretation is a union with a
/// description.
pub fn reassociate_union_attributes(field_type: &mut FieldType) {
match field_type {
@@ -412,4 +460,72 @@ mod tests {
]
}
}

/// Tests the parsing of optional array and map types.
/// This test ensures that the parser correctly handles the optional token (?)
/// when applied to arrays and maps.
///
/// # Test Cases
/// 1. Optional Arrays:
/// - Tests `string[]?` syntax
/// - Verifies correct token positions and nesting
/// - Ensures optional token is properly associated with array type
///
/// 2. Optional Maps:
/// - Tests `map<string, int>?` syntax
/// - Verifies correct token positions and nesting
/// - Ensures optional token is properly associated with map type
///
/// These test cases verify the implementation of issue #948,
/// which requested support for optional lists and maps in BAML.
#[test]
fn optional_types() {
// Test Case 1: Optional Arrays
parses_to! {
parser: BAMLParser,
input: r#"string[]?"#,
rule: Rule::field_type,
tokens: [field_type(0,9,[
non_union(0,9,[
array_notation(0,9,[
base_type_without_array(0,6,[
identifier(0,6,[
single_word(0,6)
])
]),
array_suffix(6,8),
optional_token(8,9)
])
])
])]
};

// Test Case 2: Optional Maps
parses_to! {
parser: BAMLParser,
input: r#"map<string, int>?"#,
rule: Rule::field_type,
tokens: [field_type(0,17,[
non_union(0,17,[
map(0,17,[
field_type(4,10,[
non_union(4,10,[
identifier(4,10,[
single_word(4,10)
])
])
]),
field_type(12,15,[
non_union(12,15,[
identifier(12,15,[
single_word(12,15)
])
])
]),
optional_token(16,17)
])
])
])]
}
}
}

0 comments on commit 8a31ab9

Please sign in to comment.