pass more constraints info to the frontend

BoundaryML · Nov 21, 2024 · 29d6200 · 29d6200
1 parent bd68b27
commit 29d6200
Show file tree

Hide file tree

Showing 5 changed files with 172 additions and 51 deletions.
diff --git a/engine/baml-runtime/src/constraints.rs b/engine/baml-runtime/src/constraints.rs
@@ -34,19 +34,54 @@ pub fn evaluate_test_constraints(
 /// The result of running a series of block-level constraints within a test.
 #[derive(Clone, Debug, PartialEq)]
 pub enum TestConstraintsResult {
-    /// All checks and asserts passed.
-    Passed,
-
-    /// An assert failed.
-    Failed { reason: String },
-
-    /// At least one check failed to evaluate to true.
-    Partial { failed_checks: Vec<String> },
+    /// Constraint testing finished with the following check
+    /// results, and optionally a failing assert.
+    Completed {
+        checks: Vec<(String, bool)>,
+        failed_assert: Option<String>,
+    },
 
     /// There was a problem evaluating a constraint.
     InternalError { details: String },
 }
 
+/// State update helper functions.
+impl TestConstraintsResult {
+    pub fn empty() -> Self {
+        TestConstraintsResult::Completed {
+            checks: Vec::new(),
+            failed_assert: None,
+        }
+    }
+    fn checks(self) -> Vec<(String, bool)> {
+        match self {
+            TestConstraintsResult::Completed { checks, .. } => checks,
+            _ => Vec::new(),
+        }
+    }
+    fn add_check_result(mut self, name: String, result: bool) -> Self {
+        match self {
+            TestConstraintsResult::Completed { mut checks, .. } => {
+                checks.push((name, result));
+                TestConstraintsResult::Completed {
+                    checks,
+                    failed_assert: None,
+                }
+            }
+            _ => self,
+        }
+    }
+    fn fail_assert(self, name: Option<String>) -> Self {
+        match self {
+            TestConstraintsResult::Completed { checks, .. } => TestConstraintsResult::Completed {
+                checks,
+                failed_assert: Some(name.unwrap_or("".to_string())),
+            },
+            _ => self,
+        }
+    }
+}
+
 /// The state that we track as we iterate over constraints in the test block.
 struct Accumulator {
     pub result: TestConstraintsResult,
@@ -56,7 +91,10 @@ struct Accumulator {
 impl Accumulator {
     pub fn new() -> Self {
         Accumulator {
-            result: TestConstraintsResult::Passed,
+            result: TestConstraintsResult::Completed {
+                checks: Vec::new(),
+                failed_assert: None,
+            },
             check_results: Vec::new(),
         }
     }
@@ -74,8 +112,13 @@ fn step_constraints(
     // Short-circuit if we have already had a hard failure. We can skip
     // the work in the rest of this function if we have already encountered
     // a hard failure.
-    let already_failed = matches!(acc.result, TestConstraintsResult::Failed { .. })
-        || matches!(acc.result, TestConstraintsResult::InternalError { .. });
+    let already_failed = matches!(
+        acc.result,
+        TestConstraintsResult::Completed {
+            failed_assert: Some(_),
+            ..
+        }
+    ) || matches!(acc.result, TestConstraintsResult::InternalError { .. });
     if already_failed {
         return acc;
     }
@@ -134,21 +177,18 @@ fn step_constraints(
         bool_result_or_internal_error,
     ) {
         // A check ran to completion and succeeded or failed
-        // (i.e. returned a bool). This updates both the checks context
+        // (i.e. returned a bool). This updates both the checks jinja context
         // and the status.
         (Check, Some(check_name), Ok(check_passed)) => {
             check_results.push((check_name.clone(), check_passed.into()));
-            let result = if check_passed {
-                acc.result
-            } else {
-                let mut new_failed_checks = match acc.result {
-                    TestConstraintsResult::Partial { failed_checks } => failed_checks,
-                    _ => Vec::new(),
-                };
-                new_failed_checks.push(check_name);
-                TestConstraintsResult::Partial {
-                    failed_checks: new_failed_checks,
-                }
+            let mut new_checks = match acc.result {
+                TestConstraintsResult::Completed { checks, .. } => checks,
+                _ => Vec::new(),
+            };
+            new_checks.push((check_name, check_passed));
+            let result = TestConstraintsResult::Completed {
+                checks: new_checks,
+                failed_assert: None,
             };
             return Accumulator {
                 result,
@@ -181,11 +221,7 @@ fn step_constraints(
 
         // A failing assert is a hard error.
         (Assert, maybe_name, Ok(false)) => {
-            let reason = match maybe_name {
-                Some(name) => format!("Failed assert {name}."),
-                None => "Failed assert.".to_string(),
-            };
-            let result = TestConstraintsResult::Failed { reason };
+            let result = acc.result.fail_assert(maybe_name);
             return Accumulator {
                 result,
                 check_results,
@@ -334,7 +370,13 @@ mod tests {
     #[test]
     fn basic_test_constraints() {
         let res = run_pipeline(&[mk_assert("has_kids", "_.result.kids|length > 0")]);
-        assert_eq!(res, TestConstraintsResult::Passed);
+        assert_eq!(
+            res,
+            TestConstraintsResult::Completed {
+                checks: vec![("has_kids".to_string(), true)],
+                failed_assert: None,
+            }
+        );
     }
 
     #[test]
@@ -344,7 +386,17 @@ mod tests {
             mk_check("not_too_many", "this.kids.length < 100"),
             mk_assert("both_pass", "_.checks.has_kids and _.checks.not_too_many"),
         ]);
-        assert_eq!(res, TestConstraintsResult::Passed);
+        assert_eq!(
+            res,
+            TestConstraintsResult::Completed {
+                checks: vec![
+                    ("has_kids".to_string(), true),
+                    ("not_too_many".to_string(), true),
+                    ("both_pass".to_string(), true),
+                ],
+                failed_assert: None
+            }
+        );
     }
 
     #[test]
@@ -358,8 +410,13 @@ mod tests {
         // a check, therefore it doesn't get a field in `checks`.
         assert_eq!(
             res,
-            TestConstraintsResult::Failed {
-                reason: "Failed assert both_pass.".to_string()
+            TestConstraintsResult::Completed {
+                checks: vec![
+                    ("has_kids".to_string(), true),
+                    ("not_too_many".to_string(), true),
+                    ("both_pass".to_string(), true),
+                ],
+                failed_assert: Some("both_pass.".to_string())
             }
         );
     }
@@ -372,7 +429,18 @@ mod tests {
             mk_check("both_pass", "_.checks.has_kids and _.checks.not_too_many"),
             mk_assert("either_or", "_.checks.both_pass or _.latency_ms < 1000"),
         ]);
-        assert_eq!(res, TestConstraintsResult::Passed);
+        assert_eq!(
+            res,
+            TestConstraintsResult::Completed {
+                checks: vec![
+                    ("has_kids".to_string(), true),
+                    ("not_too_many".to_string(), true),
+                    ("both_pass".to_string(), true),
+                    ("either_or".to_string(), true)
+                ],
+                failed_assert: None
+            }
+        );
     }
 
     #[test]
@@ -386,15 +454,23 @@ mod tests {
         ]);
         assert_eq!(
             res,
-            TestConstraintsResult::Partial {
-                failed_checks: vec!["no_kids".to_string(), "way_too_many".to_string()]
+            TestConstraintsResult::Completed {
+                checks: vec![
+                    ("has_kids".to_string(), true),
+                    ("not_too_many".to_string(), true),
+                    ("both_pass".to_string(), true),
+                    ("no_kids".to_string(), false),
+                    ("way_too_many".to_string(), false)
+                ],
+                failed_assert: None
             }
         );
     }
 
     #[test]
     fn test_internal_error() {
         let res = run_pipeline(&[mk_check("faulty", "__.result.kids|length > 0")]);
+        // This test fails because there is a typo: `__` (double underscore).
         assert!(matches!(res, TestConstraintsResult::InternalError { .. }));
     }
 }
diff --git a/engine/baml-runtime/src/lib.rs b/engine/baml-runtime/src/lib.rs
@@ -250,15 +250,14 @@ impl BamlRuntime {
                 LLMResponse::Success(complete_llm_response) => Ok(complete_llm_response),
                 _ => Err(anyhow::anyhow!("LLM Response was not successful")),
             }?;
-            // web_sys::console::log_1(&format!("constraints: {constraints:?}").into());
             let test_constraints_result = if constraints.is_empty() {
-                TestConstraintsResult::Passed
+                TestConstraintsResult::empty()
             } else {
                 match val {
                     Some(Ok(value)) => {
                         evaluate_test_constraints(&params, &value, &complete_resp, constraints)
                     }
-                    _ => TestConstraintsResult::Passed,
+                    _ => TestConstraintsResult::empty(),
                 }
             };
             // web_sys::console::log_1(&format!("test_constraints_result: {test_constraints_result:?}").into());

diff --git a/engine/baml-runtime/src/types/response.rs b/engine/baml-runtime/src/types/response.rs
@@ -1,8 +1,8 @@
 pub use crate::internal::llm_client::LLMResponse;
 use crate::{
+    constraints::TestConstraintsResult,
     errors::ExposedError,
     internal::llm_client::{orchestrator::OrchestrationScope, ResponseBamlValue},
-    constraints::TestConstraintsResult,
 };
 use anyhow::Result;
 use colored::*;
@@ -207,18 +207,25 @@ impl From<TestStatus<'_>> for BamlValue {
     fn from(status: TestStatus) -> Self {
         match status {
             TestStatus::Pass => BamlValue::String("pass".to_string()),
-            TestStatus::NeedsHumanEval(checks) => BamlValue::String(format!("checks need human evaluation: {:?}", checks)),
+            TestStatus::NeedsHumanEval(checks) => {
+                BamlValue::String(format!("checks need human evaluation: {:?}", checks))
+            }
             TestStatus::Fail(r) => BamlValue::String(format!("failed! {:?}", r)),
         }
     }
 }
 
 #[derive(Debug)]
 pub enum TestFailReason<'a> {
-    TestUnspecified(&'a anyhow::Error),
+    TestUnspecified(anyhow::Error),
     TestLLMFailure(&'a LLMResponse),
     TestParseFailure(&'a anyhow::Error),
-    TestConstraintFailure(anyhow::Error),
+    TestConstraintsFailure {
+        checks: Vec<(String, bool)>,
+        failed_assert: Option<String>,
+    },
+    // TestCheckFailures(Vec<String>),
+    // TestAssertFailure(String),
 }
 
 impl PartialEq for TestFailReason<'_> {
@@ -242,10 +249,24 @@ impl TestResponse {
         if let Some(parsed) = func_res.result_with_constraints() {
             if parsed.is_ok() {
                 match self.constraints_result.clone() {
-                    TestConstraintsResult::Passed => TestStatus::Pass,
-                    TestConstraintsResult::Failed { reason } => TestStatus::Fail( TestFailReason::TestConstraintFailure(anyhow::anyhow!(reason))),
-                    TestConstraintsResult::Partial { failed_checks } => TestStatus::NeedsHumanEval( failed_checks ),
-                    TestConstraintsResult::InternalError { details } => TestStatus::Fail( TestFailReason::TestConstraintFailure(anyhow::anyhow!(details)))
+                    TestConstraintsResult::InternalError { details } => {
+                        TestStatus::Fail(TestFailReason::TestUnspecified(anyhow::anyhow!(details)))
+                    }
+                    TestConstraintsResult::Completed {
+                        checks,
+                        failed_assert,
+                    } => {
+                        let n_failed_checks: usize =
+                            checks.iter().filter(|(_, pass)| !pass).count();
+                        if failed_assert.is_some() || n_failed_checks > 0 {
+                            TestStatus::Fail(TestFailReason::TestConstraintsFailure {
+                                checks,
+                                failed_assert,
+                            })
+                        } else {
+                            TestStatus::Pass
+                        }
+                    }
                 }
             } else {
                 TestStatus::Fail(TestFailReason::TestParseFailure(

diff --git a/engine/baml-schema-wasm/src/runtime_wasm/mod.rs b/engine/baml-schema-wasm/src/runtime_wasm/mod.rs
@@ -12,8 +12,7 @@ use baml_runtime::RenderCurlSettings;
 use baml_runtime::{
     internal::llm_client::LLMResponse, BamlRuntime, DiagnosticsError, IRHelper, RenderedPrompt,
 };
-use baml_types::ResponseCheck;
-use baml_types::{BamlMediaType, BamlValue, BamlValueWithMeta, GeneratorOutputType, TypeValue};
+use baml_types::{BamlMediaType, BamlValue, GeneratorOutputType, TypeValue};
 use indexmap::IndexMap;
 use internal_baml_codegen::version_check::GeneratorType;
 use internal_baml_codegen::version_check::{check_version, VersionCheckMode};
@@ -597,7 +596,7 @@ impl WasmTestResponse {
                     baml_runtime::TestFailReason::TestUnspecified(_) => TestStatus::UnableToRun,
                     baml_runtime::TestFailReason::TestLLMFailure(_) => TestStatus::LLMFailure,
                     baml_runtime::TestFailReason::TestParseFailure(_) => TestStatus::ParseFailure,
-                    baml_runtime::TestFailReason::TestConstraintFailure(_) => {
+                    baml_runtime::TestFailReason::TestConstraintsFailure { .. } => {
                         TestStatus::ConstraintsFailed
                     }
                 },
@@ -779,7 +778,23 @@ impl WithRenderError for baml_runtime::TestFailReason<'_> {
             baml_runtime::TestFailReason::TestUnspecified(e) => Some(format!("{e:#}")),
             baml_runtime::TestFailReason::TestLLMFailure(f) => f.render_error(),
             baml_runtime::TestFailReason::TestParseFailure(e) => Some(format!("{e:#}")),
-            baml_runtime::TestFailReason::TestConstraintFailure(e) => Some(format!("{e:#}")),
+            baml_runtime::TestFailReason::TestConstraintsFailure {
+                checks,
+                failed_assert,
+            } => {
+                let checks_msg = if checks.len() > 0 {
+                    let check_msgs = checks.into_iter().map(|(name, pass)| {
+                        format!("{name}: {}", if *pass { "Passed" } else { "Failed" })
+                    });
+                    format!("Check results:\n{}", join(check_msgs, "\n"))
+                } else {
+                    String::new()
+                };
+                let assert_msg = failed_assert
+                    .as_ref()
+                    .map_or("".to_string(), |name| format!("\nFailed assert: {name}"));
+                Some(format!("{checks_msg}{assert_msg}"))
+            }
         }
     }
 }

diff --git a/typescript/playground-common/src/baml_wasm_web/test_uis/testHooks.ts b/typescript/playground-common/src/baml_wasm_web/test_uis/testHooks.ts
@@ -50,6 +50,15 @@ export const statusCountAtom = atom({
   error: 0,
 })
 
+/// This atom will track the state of the full test suite.
+/// 'unknown` means tests haven't been run yet. `pass` means
+/// they have all run to completion.
+/// 'warn' means at least one check has failed, and `fail`
+/// means at least one assert has failed, or an internal error
+/// occurred.
+export type TestSuiteSummary = 'pass' | 'warn' | 'fail' | 'unknown'
+export const testSuiteSummary = atom<TestSuiteSummary>('unknown')
+
 export const useRunHooks = () => {
   const isRunning = useAtomValue(isRunningAtom)
 
@@ -69,6 +78,7 @@ export const useRunHooks = () => {
         }
         set(isRunningAtom, true)
         set(showTestsAtom, true)
+        set(testSuiteSummary,'unknown')
 
         vscode.postMessage({
           command: 'telemetry',
@@ -146,7 +156,7 @@ export const useRunHooks = () => {
               const { res, elapsed } = result.value
               // console.log('result', i, result.value.res.llm_response(), 'batch[i]', batch[i])
 
-              let status = res.status()
+              let status: Number = res.status()
               let response_status: DoneTestStatusType = 'error'
               if (status === 0) {
                 response_status = 'passed'