diff --git a/engine/baml-lib/jinja/src/output_format/types.rs b/engine/baml-lib/jinja/src/output_format/types.rs index 5c0764502..612bde2c0 100644 --- a/engine/baml-lib/jinja/src/output_format/types.rs +++ b/engine/baml-lib/jinja/src/output_format/types.rs @@ -68,10 +68,10 @@ impl Default for RenderSetting { } pub struct RenderOptions { - pub prefix: RenderSetting, - pub or_splitter: String, - pub enum_value_prefix: RenderSetting, - pub always_hoist_enums: RenderSetting, + prefix: RenderSetting, + or_splitter: String, + enum_value_prefix: RenderSetting, + always_hoist_enums: RenderSetting, } impl Default for RenderOptions { diff --git a/engine/baml-runtime/src/lib.rs b/engine/baml-runtime/src/lib.rs index 7548de77c..5dfca44b6 100644 --- a/engine/baml-runtime/src/lib.rs +++ b/engine/baml-runtime/src/lib.rs @@ -17,6 +17,7 @@ mod macros; mod request; mod runtime; pub mod runtime_interface; +pub mod schema; pub mod tracing; pub mod type_builder; mod types; diff --git a/engine/baml-runtime/src/schema/mod.rs b/engine/baml-runtime/src/schema/mod.rs new file mode 100644 index 000000000..6adcd9334 --- /dev/null +++ b/engine/baml-runtime/src/schema/mod.rs @@ -0,0 +1,367 @@ +use anyhow::Context; +use anyhow::Result; +use baml_types::FieldType; +use baml_types::TypeValue; +use internal_baml_jinja::types as jt; +use internal_baml_jinja::types::{OutputFormatContent, RenderOptions}; +use serde::Deserialize; +use std::collections::HashMap; + +pub enum OutputFormatMode { + JsonSchema, + TsInterface, +} + +// can you model a list directly in pydantic? + +// a dict is modelled as "additionalProperties" wtf? +// - humans don't understand this, why would an LLM? + +// TODO: +// - maps, unions, tuples +mod json_schema { + + use super::*; + + #[derive(Debug, Deserialize)] + pub struct Schema { + #[serde(rename = "$defs")] + defs: HashMap, + + #[serde(default)] + properties: HashMap, + + #[serde(default)] + required: Vec, + + r#type: String, + } + + #[derive(Debug, Deserialize)] + pub struct TypeSpecWithMeta { + /// Pydantic includes this by default. + #[serde(rename = "title")] + _title: Option, + + #[serde(flatten)] + type_spec: TypeSpec, + } + + #[derive(Debug, Deserialize)] + #[serde(untagged)] + pub enum TypeSpec { + #[serde(rename = "string")] + Ref(TypeRef), + Inline(TypeDef), + Union(UnionRef), + } + + #[derive(Debug, Deserialize)] + pub struct UnionRef { + #[serde(rename = "anyOf")] + any_of: Vec, + } + + #[derive(Debug, Deserialize)] + pub struct TypeRef { + #[serde(rename = "$ref")] + r#ref: String, + } + + #[derive(Debug, Deserialize)] + #[serde(tag = "type")] + pub enum TypeDef { + #[serde(rename = "string")] + StringOrEnum(StringOrEnumDef), + + #[serde(rename = "object")] + Class(ClassDef), + + #[serde(rename = "array")] + Array(Box), + + #[serde(rename = "integer")] + Int, + + #[serde(rename = "number")] + Float, + + #[serde(rename = "boolean")] + Bool, + + #[serde(rename = "null")] + Null, + } + + #[derive(Debug, Deserialize)] + pub struct StringOrEnumDef { + r#enum: Option>, + } + + #[derive(Debug, Deserialize)] + pub struct ClassDef { + #[serde(default)] + properties: HashMap, + + #[serde(default)] + required: Vec, + } + + #[derive(Debug, Deserialize)] + pub struct ArrayDef { + items: TypeSpecWithMeta, + } + + impl Into for &Schema { + fn into(self) -> OutputFormatContent { + let mut enums = vec![]; + let mut classes = vec![]; + + for (name, type_def) in self.defs.iter() { + match type_def { + TypeDef::StringOrEnum(string_or_enum_def) => { + if let Some(enum_values) = &string_or_enum_def.r#enum { + enums.push(jt::Enum { + name: jt::Name::new(name.clone()), + values: enum_values + .iter() + .map(|v| (jt::Name::new(v.clone()), None)) + .collect(), + }); + } + } + TypeDef::Class(class_def) => { + classes.push(jt::Class { + name: jt::Name::new(name.clone()), + fields: class_def + .properties + .iter() + .map(|(field_name, field_type)| { + (jt::Name::new(field_name.clone()), field_type.into(), None) + }) + .collect(), + }); + } + _ => {} + } + } + todo!() + } + } + + impl Into for &TypeSpecWithMeta { + fn into(self) -> FieldType { + match &self.type_spec { + TypeSpec::Inline(type_def) => match type_def { + TypeDef::StringOrEnum(StringOrEnumDef { r#enum: None }) => { + FieldType::Primitive(TypeValue::String) + } + TypeDef::StringOrEnum(StringOrEnumDef { r#enum: Some(_) }) => { + // todo + FieldType::Enum("".to_string()) + } + TypeDef::Int => FieldType::Primitive(TypeValue::Int), + TypeDef::Float => FieldType::Primitive(TypeValue::Float), + TypeDef::Bool => FieldType::Primitive(TypeValue::Bool), + TypeDef::Null => FieldType::Primitive(TypeValue::Null), + TypeDef::Array(array_def) => { + FieldType::List(Box::new((&array_def.items).into())) + } + TypeDef::Class(class_def) => FieldType::Class("".to_string()), + }, + TypeSpec::Ref(TypeRef { r#ref }) => todo!(), + TypeSpec::Union(UnionRef { any_of }) => { + FieldType::Union(any_of.iter().map(|t| t.into()).collect()) + } + } + } + } +} + +pub fn create_output_format( + from_schema: OutputFormatContent, + mode: OutputFormatMode, +) -> Result { + let rendered = from_schema + .render(RenderOptions::default()) + .context("Failed to render output format")?; + Ok("".to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_output_format() -> Result<()> { + let model_json_schema = serde_json::json!({ + "$defs": { + "Role": { + "enum": [ + "admin", + "user", + "guest" + ], + "title": "Role", + "type": "string" + }, + "__main____Address": { + "properties": { + "street": { + "title": "Street", + "type": "string" + }, + "city": { + "title": "City", + "type": "string" + }, + "postal_code": { + "title": "Postal Code", + "type": "string" + } + }, + "required": [ + "street", + "city", + "postal_code" + ], + "title": "Address", + "type": "object" + }, + "other_demo__Address": { + "properties": { + "street": { + "title": "Street", + "type": "string" + }, + "city": { + "title": "City", + "type": "string" + }, + "postal_code": { + "title": "Postal Code", + "type": "string" + } + }, + "required": [ + "street", + "city", + "postal_code" + ], + "title": "Address", + "type": "object" + }, + "zebra__Address": { + "properties": { + "street": { + "title": "Street", + "type": "string" + }, + "city": { + "title": "City", + "type": "string" + }, + "postal_code": { + "title": "Postal Code", + "type": "string" + }, + "continent": { + "title": "Continent", + "type": "string" + } + }, + "required": [ + "street", + "city", + "postal_code", + "continent" + ], + "title": "Address", + "type": "object" + } + }, + "properties": { + "name": { + "title": "Name", + "type": "string" + }, + "age": { + "title": "Age", + "type": "integer" + }, + "roles": { + "items": { + "$ref": "#/$defs/Role" + }, + "title": "Roles", + "type": "array" + }, + "primary_address": { + "$ref": "#/$defs/__main____Address" + }, + "secondary_addresses": { + "items": { + "$ref": "#/$defs/other_demo__Address" + }, + "title": "Secondary Addresses", + "type": "array" + }, + "zebra_addresses": { + "items": { + "$ref": "#/$defs/zebra__Address" + }, + "title": "Zebra Addresses", + "type": "array" + }, + "tertiary_address": { + "anyOf": [ + { + "$ref": "#/$defs/other_demo__Address" + }, + { + "items": { + "$ref": "#/$defs/other_demo__Address" + }, + "type": "array" + } + ], + "title": "Tertiary Addresses" + }, + "gpa": { + "title": "Gpa", + "type": "number" + }, + "alive": { + "title": "Alive", + "type": "boolean" + }, + "nope": { + "title": "Nope", + "type": "null" + }, + //"tricky": { + // "additionalProperties": { + // "type": "string" + // }, + // "title": "Tricky", + // "type": "object" + //} + }, + "required": [ + "name", + "age", + "roles", + "primary_address", + "secondary_addresses", + "zebra_addresses" + ], + "title": "User", + "type": "object" + }); + + let schema = json_schema::Schema::deserialize(&model_json_schema)?; + println!("{:#?}", schema); + + Ok(()) + } +} diff --git a/root.code-workspace b/root.code-workspace index a0f0058c6..f3c75eaef 100644 --- a/root.code-workspace +++ b/root.code-workspace @@ -21,13 +21,16 @@ "[typescriptreact]": { "editor.defaultFormatter": "biomejs.biome" }, + "biome.lspBin": "typescript/node_modules/@biomejs/biome/bin/biome", "editor.colorDecoratorsLimit": 2000, "editor.formatOnSaveMode": "file", "editor.formatOnSave": true, "editor.tabSize": 2, "mypy.enabled": false, "git.openDiffOnClick": true, + "python.analysis.autoImportCompletions": true, "python.analysis.typeCheckingMode": "strict", + "python.testing.pytestEnabled": true, "rust-analyzer.procMacro.ignored": { "napi-derive": [ "napi" @@ -55,7 +58,6 @@ "titleBar.activeBackground": "#8B0002", "titleBar.activeForeground": "#FFFBFB" }, - "biome.lspBin": "typescript/node_modules/@biomejs/biome/bin/biome", }, "extensions": { "recommendations": [ diff --git a/tools/build b/tools/build index b66d41103..e4c211cd0 100755 --- a/tools/build +++ b/tools/build @@ -126,7 +126,8 @@ case "$_path" in if [ "$_test_mode" -eq 1 ]; then #command="wasm-pack test --chrome . --features=wasm" #command="wasm-pack test --node . --features=wasm" - command="cargo test --features=no_wasm,internal internal --verbose -- --nocapture" + #command="cargo test --features=no_wasm,internal internal --verbose -- --nocapture" + command="cargo test --features=internal schema -- --nocapture" else command="cargo build --features=wasm --target=wasm32-unknown-unknown" fi @@ -136,7 +137,7 @@ case "$_path" in npx nodemon \ --ext rs,hb,hbs,j2,toml,baml \ --watch "${_repo_root}/engine" \ - --ignore 'target' \ + --ignore 'target/**' \ --exec "${command}" else eval "${command}" @@ -318,6 +319,19 @@ case "$_path" in fi ;; + */.sandbox/instructor-demo | /.sandbox/instructor-demo/* ) + command="OPENAI_LOG=debug poetry run python demo.py; date" + if [ "$_watch_mode" -eq 1 ]; then + npx nodemon \ + --ext py \ + --watch . \ + --exec "${command}" + else + eval "${command}" + date + fi + ;; + *) echo "Nothing to build in repo root" ;; diff --git a/tools/ppy b/tools/ppy new file mode 100755 index 000000000..de1bf02af --- /dev/null +++ b/tools/ppy @@ -0,0 +1,2 @@ +#!/usr/bin/env /bin/bash +poetry run python "$@"