Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow optional lists and maps #1251

Merged
merged 6 commits into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 37 additions & 11 deletions engine/baml-lib/baml-core/src/ir/json_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,40 @@ impl WithJsonSchema for FieldType {
"required": ["url"],
}),
},
FieldType::List(item) => json!({
"type": "array",
"items": (*item).json_schema()
}),
FieldType::Map(_k, v) => json!({
"type": "object",
"additionalProperties": {
"type": v.json_schema(),
// Handle list types (arrays) with optional support
// For example: string[]? generates a schema that allows both array and null
FieldType::List(item) => {
let mut schema = json!({
"type": "array",
"items": (*item).json_schema()
});
// If the list itself is optional (marked with ?),
// modify the schema to accept either an array or null
if self.is_optional() {
schema["type"] = json!(["array", "null"]);
// Add default null value for optional arrays
schema["default"] = serde_json::Value::Null;
}
}),
schema
},
// Handle map types with optional support
// For example: map<string, int>? generates a schema that allows both object and null
FieldType::Map(_k, v) => {
let mut schema = json!({
"type": "object",
"additionalProperties": {
"type": v.json_schema(),
}
});
// If the map itself is optional (marked with ?),
// modify the schema to accept either an object or null
if self.is_optional() {
schema["type"] = json!(["object", "null"]);
// Add default null value for optional maps
schema["default"] = serde_json::Value::Null;
}
schema
},
FieldType::Union(options) => json!({
"anyOf": options.iter().map(|t| {
let mut res = t.json_schema();
Expand All @@ -211,18 +235,20 @@ impl WithJsonSchema for FieldType {
"type": "array",
"prefixItems": options.iter().map(|t| t.json_schema()).collect::<Vec<_>>(),
}),
// The caller object is responsible for adding the "null" type
// Handle optional types (marked with ?) that aren't lists or maps
FieldType::Optional(inner) => {
match **inner {
// For primitive types, we can simply add "null" to the allowed types
FieldType::Primitive(_) => {
let mut res = inner.json_schema();
res["type"] = json!([res["type"], "null"]);
res["default"] = serde_json::Value::Null;
res
}
// For complex types, we need to use anyOf to allow either the type or null
_ => {
let mut res = inner.json_schema();
// if res is a map, add a "title" field
// Add a title for better schema documentation
if let serde_json::Value::Object(r) = &mut res {
r.insert("title".to_string(), json!(inner.to_string()));
}
Expand Down
4 changes: 2 additions & 2 deletions engine/baml-lib/schema-ast/src/parser/datamodel.pest
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ base_type_with_attr = { base_type ~ (NEWLINE? ~ field_attribute)* }
base_type = { array_notation | map | identifier | group | tuple | parenthesized_type | literal_type }

array_suffix = { "[]" }
array_notation = { base_type_without_array ~ array_suffix+ }
array_notation = { base_type_without_array ~ array_suffix+ ~ optional_token? }

map = { "map" ~ "<" ~ field_type ~ "," ~ field_type ~ ">" }
map = { "map" ~ "<" ~ field_type ~ "," ~ field_type ~ ">" ~ optional_token? }

openParan = { "(" }
closeParan = { ")" }
Expand Down
144 changes: 130 additions & 14 deletions engine/baml-lib/schema-ast/src/parser/parse_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,57 +238,105 @@ fn parse_literal_type(pair: Pair<'_>, diagnostics: &mut Diagnostics) -> Option<F
))
}

/// parses array type notation from input pair. handles both required and optional arrays like string[] and string[]?.
///
/// arguments:
/// pair - input pair with array notation tokens.
/// diagnostics - mutable reference to diagnostics collector for error reporting.
///
/// returns:
/// some(fieldtype::list) - successfully parsed array type with arity.
/// none - if parsing fails.
///
/// implementation details:
/// supports multiple dimensions like string[][].
/// handles optional arrays with ? suffix.
/// preserves source span info for errors.
/// valid inputs: string[], int[]?, myclass[][]?.

fn parse_array(pair: Pair<'_>, diagnostics: &mut Diagnostics) -> Option<FieldType> {
assert_correct_parser!(pair, Rule::array_notation);

let mut dims = 0_u32;
let mut field = None;
// track whether this array is optional (e.g., string[]?)
// default to Required, will be updated to Optional if ? token is found
let mut arity = FieldArity::Required;
let span = diagnostics.span(pair.as_span());

for current in pair.into_inner() {
match current.as_rule() {
// Parse the base type of the array (e.g., 'string' in string[])
Rule::base_type_without_array => field = parse_base_type(current, diagnostics),
// Count array dimensions (number of [] pairs)
Rule::array_suffix => dims += 1,
// Handle optional marker (?) for arrays like string[]?
// This makes the entire array optional, not its elements
Rule::optional_token => arity = FieldArity::Optional,
_ => unreachable_rule!(current, Rule::map),
}
}

match field {
Some(field) => Some(FieldType::List(
FieldArity::Required,
Box::new(field),
dims,
span,
None,
arity, // Whether the array itself is optional
Box::new(field), // The type of elements in the array
dims, // Number of dimensions (e.g., 2 for string[][])
span, // Source location for error reporting
None, // No attributes initially
)),
_ => unreachable!("Field must have been defined"),
}
}

/// parses a map type notation from the input pair.
/// handles both required and optional maps (e.g., `map<string, int>` and `map<string, int>?`).
///
/// # arguments
/// * `pair` - the input pair containing map notation tokens
/// * `diagnostics` - mutable reference to the diagnostics collector for error reporting
///
/// # returns
/// * `some(fieldtype::map)` - successfully parsed map type with appropriate arity
/// * `none` - if parsing fails
///
/// # implementation details
/// - always uses string keys as per baml specification
/// - supports optional maps with the `?` suffix
/// - preserves source span information for error reporting
/// - example valid inputs: `map<string, int>`, `map<string, myclass>?`
fn parse_map(pair: Pair<'_>, diagnostics: &mut Diagnostics) -> Option<FieldType> {
assert_correct_parser!(pair, Rule::map);

let mut fields = Vec::new();
// Track whether this map is optional (e.g., map<string, int>?)
// Default to Required, will be updated to Optional if ? token is found
let mut arity = FieldArity::Required;
let span = diagnostics.span(pair.as_span());

for current in pair.into_inner() {
match current.as_rule() {
// Parse both key and value types of the map
Rule::field_type => {
if let Some(f) = parse_field_type(current, diagnostics) {
fields.push(f)
}
}
// Handle optional marker (?) for maps like map<string, int>?
// This makes the entire map optional, not its values
Rule::optional_token => arity = FieldArity::Optional,
_ => unreachable_rule!(current, Rule::map),
}
}

match fields.len() {
0 => None,
1 => None,
0 => None, // Invalid: no types specified
1 => None, // Invalid: only key type specified
2 => Some(FieldType::Map(
FieldArity::Required,
Box::new((fields[0].to_owned(), fields[1].to_owned())),
span,
None,
arity, // Whether the map itself is optional
Box::new((fields[0].to_owned(), fields[1].to_owned())), // Key and value types
span, // Source location for error reporting
None, // No attributes initially
)),
_ => unreachable!("Maps must specify a key type and value type"),
}
Expand Down Expand Up @@ -352,13 +400,13 @@ fn parse_tuple(pair: Pair<'_>, diagnostics: &mut Diagnostics) -> Option<FieldTyp
}
}

/// For the last variant of a Union, remove the attributes from that variant
/// for the last variant of a union, here we remove the attributes from that variant
/// and attach them to the union, unless the attribute was tagged with the
/// `parenthesized` field.
///
/// This is done because `field_foo int | string @description("d")`
/// this is done because `field_foo int | string @description("d")`
/// is naturally parsed as a field with a union whose secord variant has
/// a description. But the correct BAML interpretation is a union with a
/// a description. but the correct baml interpretation is a union with a
/// description.
pub fn reassociate_union_attributes(field_type: &mut FieldType) {
match field_type {
Expand Down Expand Up @@ -412,4 +460,72 @@ mod tests {
]
}
}

/// Tests the parsing of optional array and map types.
/// This test ensures that the parser correctly handles the optional token (?)
/// when applied to arrays and maps.
///
/// # Test Cases
/// 1. Optional Arrays:
/// - Tests `string[]?` syntax
/// - Verifies correct token positions and nesting
/// - Ensures optional token is properly associated with array type
///
/// 2. Optional Maps:
/// - Tests `map<string, int>?` syntax
/// - Verifies correct token positions and nesting
/// - Ensures optional token is properly associated with map type
///
/// These test cases verify the implementation of issue #948,
/// which requested support for optional lists and maps in BAML.
#[test]
fn optional_types() {
// Test Case 1: Optional Arrays
parses_to! {
parser: BAMLParser,
input: r#"string[]?"#,
rule: Rule::field_type,
tokens: [field_type(0,9,[
non_union(0,9,[
array_notation(0,9,[
base_type_without_array(0,6,[
identifier(0,6,[
single_word(0,6)
])
]),
array_suffix(6,8),
optional_token(8,9)
])
])
])]
};

// Test Case 2: Optional Maps
parses_to! {
parser: BAMLParser,
input: r#"map<string, int>?"#,
rule: Rule::field_type,
tokens: [field_type(0,17,[
non_union(0,17,[
map(0,17,[
field_type(4,10,[
non_union(4,10,[
identifier(4,10,[
single_word(4,10)
])
])
]),
field_type(12,15,[
non_union(12,15,[
identifier(12,15,[
single_word(12,15)
])
])
]),
optional_token(16,17)
])
])
])]
}
}
}
Loading