Skip to content

Commit

Permalink
Extract infinite cycle finding into generic function
Browse files Browse the repository at this point in the history
  • Loading branch information
antoniosarosi committed Nov 27, 2024
1 parent 273f13d commit dc8d994
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 26 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
use std::collections::{HashMap, HashSet};
use std::{
collections::{HashMap, HashSet},
hash::Hash,
ops::Index,
};

use internal_baml_diagnostics::DatamodelError;
use internal_baml_parser_database::{Tarjan, TypeWalker};
use internal_baml_schema_ast::ast::{FieldType, TypeExpId, WithName, WithSpan};
use internal_baml_schema_ast::ast::{FieldType, SchemaAst, TypeExpId, WithName, WithSpan};

use crate::validate::validation_pipeline::context::Context;

Expand Down Expand Up @@ -37,24 +41,36 @@ pub(super) fn validate(ctx: &mut Context<'_>) {
(class.id, dependencies)
}));

for component in Tarjan::components(&dependency_graph) {
let cycle = component
.iter()
.map(|id| ctx.db.ast()[*id].name().to_string())
.collect::<Vec<_>>()
.join(" -> ");

// TODO: We can push an error for every sinlge class here (that's what
// Rust does), for now it's an error for every cycle found.
ctx.push_error(DatamodelError::new_validation_error(
&format!("These classes form a dependency cycle: {}", cycle),
ctx.db.ast()[component[0]].span().clone(),
));
}
report_infinite_cycles(
&dependency_graph,
ctx,
"These classes form a dependency cycle",
);

// The graph for aliases is already built when visiting each alias. Maybe
// we can use the same logic for the required dependencies graph above.
report_infinite_cycles(
&ctx.db.type_alias_dependencies(),
ctx,
"These aliases form a dependency cycle",
);
}

// TODO: Extract this into some generic function.
eprintln!("Type aliases: {:?}", ctx.db.type_aliases());
for component in Tarjan::components(&ctx.db.type_aliases()) {
/// Finds and reports all the infinite cycles in the given graph.
///
/// It prints errors like this:
///
/// "Error validating: These classes form a dependency cycle: A -> B -> C"
fn report_infinite_cycles<V: Ord + Eq + Hash + Copy>(
graph: &HashMap<V, HashSet<V>>,
ctx: &mut Context<'_>,
message: &str,
) where
SchemaAst: Index<V>,
<SchemaAst as Index<V>>::Output: WithName,
<SchemaAst as Index<V>>::Output: WithSpan,
{
for component in Tarjan::components(graph) {
let cycle = component
.iter()
.map(|id| ctx.db.ast()[*id].name().to_string())
Expand All @@ -64,7 +80,7 @@ pub(super) fn validate(ctx: &mut Context<'_>) {
// TODO: We can push an error for every sinlge class here (that's what
// Rust does), for now it's an error for every cycle found.
ctx.push_error(DatamodelError::new_validation_error(
&format!("These aliases form a dependency cycle: {}", cycle),
&format!("{message}: {cycle}"),
ctx.db.ast()[component[0]].span().clone(),
));
}
Expand Down
6 changes: 3 additions & 3 deletions engine/baml-lib/parser-database/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ impl ParserDatabase {
// Resolve type aliases.
// Cycles are already validated so this should not stack overflow and
// it should find the final type.
for alias_id in self.types.type_aliases.keys() {
for alias_id in self.types.type_alias_dependencies.keys() {
let resolved = resolve_type_alias(&self.ast[*alias_id].value, &self);
self.types.resolved_type_aliases.insert(*alias_id, resolved);
}
Expand Down Expand Up @@ -240,8 +240,8 @@ impl ParserDatabase {
///
/// Each vertex is a type alias and each edge is a reference to another type
/// alias.
pub fn type_aliases(&self) -> &HashMap<ast::TypeAliasId, HashSet<ast::TypeAliasId>> {
&self.types.type_aliases
pub fn type_alias_dependencies(&self) -> &HashMap<ast::TypeAliasId, HashSet<ast::TypeAliasId>> {
&self.types.type_alias_dependencies
}

/// The total number of enums in the schema. This is O(1).
Expand Down
2 changes: 1 addition & 1 deletion engine/baml-lib/parser-database/src/tarjan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub struct Tarjan<'g, V> {
}

// V is Copy because we mostly use opaque identifiers for class or alias IDs.
// In practice T ends up being a u32, but if for some reason this needs to
// In practice V ends up being a u32, but if for some reason this needs to
// be used with strings then we can make V Clone instead of Copy and refactor
// the code below.
impl<'g, V: Eq + Ord + Hash + Copy> Tarjan<'g, V> {
Expand Down
8 changes: 6 additions & 2 deletions engine/baml-lib/parser-database/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ pub(super) struct Types {
/// Graph of type aliases.
///
/// This graph is only used to detect infinite cycles in type aliases.
pub(crate) type_aliases: HashMap<ast::TypeAliasId, HashSet<ast::TypeAliasId>>,
pub(crate) type_alias_dependencies: HashMap<ast::TypeAliasId, HashSet<ast::TypeAliasId>>,

/// Fully resolved type aliases.
///
Expand Down Expand Up @@ -454,7 +454,11 @@ fn visit_type_alias<'db>(
// Insert the entry as soon as we get here then if we find something we'll
// add edges to the graph. Otherwise no edges but we still need the Vertex
// in order for the cycles algorithm to work.
let alias_refs = ctx.types.type_aliases.entry(alias_id).or_default();
let alias_refs = ctx
.types
.type_alias_dependencies
.entry(alias_id)
.or_default();

let mut stack = vec![&assignment.value];

Expand Down

0 comments on commit dc8d994

Please sign in to comment.