Skip to content

Commit

Permalink
pass more constraints info to the frontend
Browse files Browse the repository at this point in the history
  • Loading branch information
imalsogreg committed Nov 21, 2024
1 parent bd68b27 commit 29d6200
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 51 deletions.
146 changes: 111 additions & 35 deletions engine/baml-runtime/src/constraints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,54 @@ pub fn evaluate_test_constraints(
/// The result of running a series of block-level constraints within a test.
#[derive(Clone, Debug, PartialEq)]
pub enum TestConstraintsResult {
/// All checks and asserts passed.
Passed,

/// An assert failed.
Failed { reason: String },

/// At least one check failed to evaluate to true.
Partial { failed_checks: Vec<String> },
/// Constraint testing finished with the following check
/// results, and optionally a failing assert.
Completed {
checks: Vec<(String, bool)>,
failed_assert: Option<String>,
},

/// There was a problem evaluating a constraint.
InternalError { details: String },
}

/// State update helper functions.
impl TestConstraintsResult {
pub fn empty() -> Self {
TestConstraintsResult::Completed {
checks: Vec::new(),
failed_assert: None,
}
}
fn checks(self) -> Vec<(String, bool)> {
match self {
TestConstraintsResult::Completed { checks, .. } => checks,
_ => Vec::new(),
}
}
fn add_check_result(mut self, name: String, result: bool) -> Self {
match self {
TestConstraintsResult::Completed { mut checks, .. } => {
checks.push((name, result));
TestConstraintsResult::Completed {
checks,
failed_assert: None,
}
}
_ => self,
}
}
fn fail_assert(self, name: Option<String>) -> Self {
match self {
TestConstraintsResult::Completed { checks, .. } => TestConstraintsResult::Completed {
checks,
failed_assert: Some(name.unwrap_or("".to_string())),
},
_ => self,
}
}
}

/// The state that we track as we iterate over constraints in the test block.
struct Accumulator {
pub result: TestConstraintsResult,
Expand All @@ -56,7 +91,10 @@ struct Accumulator {
impl Accumulator {
pub fn new() -> Self {
Accumulator {
result: TestConstraintsResult::Passed,
result: TestConstraintsResult::Completed {
checks: Vec::new(),
failed_assert: None,
},
check_results: Vec::new(),
}
}
Expand All @@ -74,8 +112,13 @@ fn step_constraints(
// Short-circuit if we have already had a hard failure. We can skip
// the work in the rest of this function if we have already encountered
// a hard failure.
let already_failed = matches!(acc.result, TestConstraintsResult::Failed { .. })
|| matches!(acc.result, TestConstraintsResult::InternalError { .. });
let already_failed = matches!(
acc.result,
TestConstraintsResult::Completed {
failed_assert: Some(_),
..
}
) || matches!(acc.result, TestConstraintsResult::InternalError { .. });
if already_failed {
return acc;
}
Expand Down Expand Up @@ -134,21 +177,18 @@ fn step_constraints(
bool_result_or_internal_error,
) {
// A check ran to completion and succeeded or failed
// (i.e. returned a bool). This updates both the checks context
// (i.e. returned a bool). This updates both the checks jinja context
// and the status.
(Check, Some(check_name), Ok(check_passed)) => {
check_results.push((check_name.clone(), check_passed.into()));
let result = if check_passed {
acc.result
} else {
let mut new_failed_checks = match acc.result {
TestConstraintsResult::Partial { failed_checks } => failed_checks,
_ => Vec::new(),
};
new_failed_checks.push(check_name);
TestConstraintsResult::Partial {
failed_checks: new_failed_checks,
}
let mut new_checks = match acc.result {
TestConstraintsResult::Completed { checks, .. } => checks,
_ => Vec::new(),
};
new_checks.push((check_name, check_passed));
let result = TestConstraintsResult::Completed {
checks: new_checks,
failed_assert: None,
};
return Accumulator {
result,
Expand Down Expand Up @@ -181,11 +221,7 @@ fn step_constraints(

// A failing assert is a hard error.
(Assert, maybe_name, Ok(false)) => {
let reason = match maybe_name {
Some(name) => format!("Failed assert {name}."),
None => "Failed assert.".to_string(),
};
let result = TestConstraintsResult::Failed { reason };
let result = acc.result.fail_assert(maybe_name);
return Accumulator {
result,
check_results,
Expand Down Expand Up @@ -334,7 +370,13 @@ mod tests {
#[test]
fn basic_test_constraints() {
let res = run_pipeline(&[mk_assert("has_kids", "_.result.kids|length > 0")]);
assert_eq!(res, TestConstraintsResult::Passed);
assert_eq!(
res,
TestConstraintsResult::Completed {
checks: vec![("has_kids".to_string(), true)],
failed_assert: None,
}
);
}

#[test]
Expand All @@ -344,7 +386,17 @@ mod tests {
mk_check("not_too_many", "this.kids.length < 100"),
mk_assert("both_pass", "_.checks.has_kids and _.checks.not_too_many"),
]);
assert_eq!(res, TestConstraintsResult::Passed);
assert_eq!(
res,
TestConstraintsResult::Completed {
checks: vec![
("has_kids".to_string(), true),
("not_too_many".to_string(), true),
("both_pass".to_string(), true),
],
failed_assert: None
}
);
}

#[test]
Expand All @@ -358,8 +410,13 @@ mod tests {
// a check, therefore it doesn't get a field in `checks`.
assert_eq!(
res,
TestConstraintsResult::Failed {
reason: "Failed assert both_pass.".to_string()
TestConstraintsResult::Completed {
checks: vec![
("has_kids".to_string(), true),
("not_too_many".to_string(), true),
("both_pass".to_string(), true),
],
failed_assert: Some("both_pass.".to_string())
}
);
}
Expand All @@ -372,7 +429,18 @@ mod tests {
mk_check("both_pass", "_.checks.has_kids and _.checks.not_too_many"),
mk_assert("either_or", "_.checks.both_pass or _.latency_ms < 1000"),
]);
assert_eq!(res, TestConstraintsResult::Passed);
assert_eq!(
res,
TestConstraintsResult::Completed {
checks: vec![
("has_kids".to_string(), true),
("not_too_many".to_string(), true),
("both_pass".to_string(), true),
("either_or".to_string(), true)
],
failed_assert: None
}
);
}

#[test]
Expand All @@ -386,15 +454,23 @@ mod tests {
]);
assert_eq!(
res,
TestConstraintsResult::Partial {
failed_checks: vec!["no_kids".to_string(), "way_too_many".to_string()]
TestConstraintsResult::Completed {
checks: vec![
("has_kids".to_string(), true),
("not_too_many".to_string(), true),
("both_pass".to_string(), true),
("no_kids".to_string(), false),
("way_too_many".to_string(), false)
],
failed_assert: None
}
);
}

#[test]
fn test_internal_error() {
let res = run_pipeline(&[mk_check("faulty", "__.result.kids|length > 0")]);
// This test fails because there is a typo: `__` (double underscore).
assert!(matches!(res, TestConstraintsResult::InternalError { .. }));
}
}
5 changes: 2 additions & 3 deletions engine/baml-runtime/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,15 +250,14 @@ impl BamlRuntime {
LLMResponse::Success(complete_llm_response) => Ok(complete_llm_response),
_ => Err(anyhow::anyhow!("LLM Response was not successful")),
}?;
// web_sys::console::log_1(&format!("constraints: {constraints:?}").into());
let test_constraints_result = if constraints.is_empty() {
TestConstraintsResult::Passed
TestConstraintsResult::empty()
} else {
match val {
Some(Ok(value)) => {
evaluate_test_constraints(&params, &value, &complete_resp, constraints)
}
_ => TestConstraintsResult::Passed,
_ => TestConstraintsResult::empty(),
}
};
// web_sys::console::log_1(&format!("test_constraints_result: {test_constraints_result:?}").into());
Expand Down
37 changes: 29 additions & 8 deletions engine/baml-runtime/src/types/response.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
pub use crate::internal::llm_client::LLMResponse;
use crate::{
constraints::TestConstraintsResult,
errors::ExposedError,
internal::llm_client::{orchestrator::OrchestrationScope, ResponseBamlValue},
constraints::TestConstraintsResult,
};
use anyhow::Result;
use colored::*;
Expand Down Expand Up @@ -207,18 +207,25 @@ impl From<TestStatus<'_>> for BamlValue {
fn from(status: TestStatus) -> Self {
match status {
TestStatus::Pass => BamlValue::String("pass".to_string()),
TestStatus::NeedsHumanEval(checks) => BamlValue::String(format!("checks need human evaluation: {:?}", checks)),
TestStatus::NeedsHumanEval(checks) => {
BamlValue::String(format!("checks need human evaluation: {:?}", checks))
}
TestStatus::Fail(r) => BamlValue::String(format!("failed! {:?}", r)),
}
}
}

#[derive(Debug)]
pub enum TestFailReason<'a> {
TestUnspecified(&'a anyhow::Error),
TestUnspecified(anyhow::Error),
TestLLMFailure(&'a LLMResponse),
TestParseFailure(&'a anyhow::Error),
TestConstraintFailure(anyhow::Error),
TestConstraintsFailure {
checks: Vec<(String, bool)>,
failed_assert: Option<String>,
},
// TestCheckFailures(Vec<String>),
// TestAssertFailure(String),
}

impl PartialEq for TestFailReason<'_> {
Expand All @@ -242,10 +249,24 @@ impl TestResponse {
if let Some(parsed) = func_res.result_with_constraints() {
if parsed.is_ok() {
match self.constraints_result.clone() {
TestConstraintsResult::Passed => TestStatus::Pass,
TestConstraintsResult::Failed { reason } => TestStatus::Fail( TestFailReason::TestConstraintFailure(anyhow::anyhow!(reason))),
TestConstraintsResult::Partial { failed_checks } => TestStatus::NeedsHumanEval( failed_checks ),
TestConstraintsResult::InternalError { details } => TestStatus::Fail( TestFailReason::TestConstraintFailure(anyhow::anyhow!(details)))
TestConstraintsResult::InternalError { details } => {
TestStatus::Fail(TestFailReason::TestUnspecified(anyhow::anyhow!(details)))
}
TestConstraintsResult::Completed {
checks,
failed_assert,
} => {
let n_failed_checks: usize =
checks.iter().filter(|(_, pass)| !pass).count();
if failed_assert.is_some() || n_failed_checks > 0 {
TestStatus::Fail(TestFailReason::TestConstraintsFailure {
checks,
failed_assert,
})
} else {
TestStatus::Pass
}
}
}
} else {
TestStatus::Fail(TestFailReason::TestParseFailure(
Expand Down
23 changes: 19 additions & 4 deletions engine/baml-schema-wasm/src/runtime_wasm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ use baml_runtime::RenderCurlSettings;
use baml_runtime::{
internal::llm_client::LLMResponse, BamlRuntime, DiagnosticsError, IRHelper, RenderedPrompt,
};
use baml_types::ResponseCheck;
use baml_types::{BamlMediaType, BamlValue, BamlValueWithMeta, GeneratorOutputType, TypeValue};
use baml_types::{BamlMediaType, BamlValue, GeneratorOutputType, TypeValue};
use indexmap::IndexMap;
use internal_baml_codegen::version_check::GeneratorType;
use internal_baml_codegen::version_check::{check_version, VersionCheckMode};
Expand Down Expand Up @@ -597,7 +596,7 @@ impl WasmTestResponse {
baml_runtime::TestFailReason::TestUnspecified(_) => TestStatus::UnableToRun,
baml_runtime::TestFailReason::TestLLMFailure(_) => TestStatus::LLMFailure,
baml_runtime::TestFailReason::TestParseFailure(_) => TestStatus::ParseFailure,
baml_runtime::TestFailReason::TestConstraintFailure(_) => {
baml_runtime::TestFailReason::TestConstraintsFailure { .. } => {
TestStatus::ConstraintsFailed
}
},
Expand Down Expand Up @@ -779,7 +778,23 @@ impl WithRenderError for baml_runtime::TestFailReason<'_> {
baml_runtime::TestFailReason::TestUnspecified(e) => Some(format!("{e:#}")),
baml_runtime::TestFailReason::TestLLMFailure(f) => f.render_error(),
baml_runtime::TestFailReason::TestParseFailure(e) => Some(format!("{e:#}")),
baml_runtime::TestFailReason::TestConstraintFailure(e) => Some(format!("{e:#}")),
baml_runtime::TestFailReason::TestConstraintsFailure {
checks,
failed_assert,
} => {
let checks_msg = if checks.len() > 0 {
let check_msgs = checks.into_iter().map(|(name, pass)| {
format!("{name}: {}", if *pass { "Passed" } else { "Failed" })
});
format!("Check results:\n{}", join(check_msgs, "\n"))
} else {
String::new()
};
let assert_msg = failed_assert
.as_ref()
.map_or("".to_string(), |name| format!("\nFailed assert: {name}"));
Some(format!("{checks_msg}{assert_msg}"))
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ export const statusCountAtom = atom({
error: 0,
})

/// This atom will track the state of the full test suite.
/// 'unknown` means tests haven't been run yet. `pass` means
/// they have all run to completion.
/// 'warn' means at least one check has failed, and `fail`
/// means at least one assert has failed, or an internal error
/// occurred.
export type TestSuiteSummary = 'pass' | 'warn' | 'fail' | 'unknown'
export const testSuiteSummary = atom<TestSuiteSummary>('unknown')

export const useRunHooks = () => {
const isRunning = useAtomValue(isRunningAtom)

Expand All @@ -69,6 +78,7 @@ export const useRunHooks = () => {
}
set(isRunningAtom, true)
set(showTestsAtom, true)
set(testSuiteSummary,'unknown')

vscode.postMessage({
command: 'telemetry',
Expand Down Expand Up @@ -146,7 +156,7 @@ export const useRunHooks = () => {
const { res, elapsed } = result.value
// console.log('result', i, result.value.res.llm_response(), 'batch[i]', batch[i])

let status = res.status()
let status: Number = res.status()
let response_status: DoneTestStatusType = 'error'
if (status === 0) {
response_status = 'passed'
Expand Down

0 comments on commit 29d6200

Please sign in to comment.