diff --git a/docs/concepts.md b/docs/concepts.md new file mode 100644 index 0000000..b010f1e --- /dev/null +++ b/docs/concepts.md @@ -0,0 +1,76 @@ +# Response Model + +Defining LLM Output Schemas in Instructor is done via our `InstructMacro` macro which generates the underlying JSON Schema from your Rust Struct. We support a variety of different types out of the box like Enums, Vecs and Option types, read more about how to use them [here](/types). + +## What happens behind the scenes? + +At Runtime, Rust compiles your structs into blocks of memory with specific offsets for each field. + +Accessing fields involves using these offsets, similar to accessing array elements with indices. This approach ensures that structs are efficient and have no additional runtime overhead compared to manual memory management techniques. + +```rust +struct Example { + a: u32, + b: u64, + c: u8, +} +``` + +This means that we lose a significant amount of information about the types and fields that you use in your code. When we use the `InstructMacro`, we rewrite your struct under the hood to expose a `get_info()` method which contains information on your struct. + +```rust + #[derive(InstructMacro, Debug)] +#[allow(dead_code)] +#[description("This is a struct")] +struct TestStruct { + #[description( + "This is a sample example \ + that spans across \ + three lines" + )] + pub field1: String, + #[description("This is a test field")] + pub field2: str, +} +``` + +We add this code under the hood ( You can view all the expanded code using `cargo-expand` with the command `cargo expand `). + +```rust +impl instruct_macros_types::InstructMacro for TestStruct { + fn get_info() -> instruct_macros_types::InstructMacroResult { + let mut parameters = Vec::new(); + parameters + .push( + Parameter::Field(ParameterInfo { + name: "field1".to_string(), + r#type: "String".to_string(), + comment: "This is a sample example that spans across three lines" + .to_string(), + is_optional: false, + is_list: false, + }), + ); + parameters + .push( + Parameter::Field(ParameterInfo { + name: "field2".to_string(), + r#type: "str".to_string(), + comment: "This is a test field".to_string(), + is_optional: false, + is_list: false, + }), + ); + instruct_macros_types::InstructMacroResult::Struct(StructInfo { + name: "TestStruct".to_string(), + description: "This is a struct".to_string(), + parameters, + is_optional: false, + is_list: false, + }) + } + fn validate(&self) -> Result<(), String> { + Ok(()) + } +} +``` diff --git a/docs/index.md b/docs/index.md index 4205d92..8e38899 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,8 +1,8 @@ -# What is instructor-ai? +# Instructor Instructor makes it easy to get structured data like JSON from LLMs like GPT-3.5, GPT-4, GPT-4-Vision, and open-source models including Mistral/Mixtral, Anyscale, Ollama, and llama-cpp-python. -> Instructor's Rust Client is in active developement. This means that the api and package might change moving forward. We are looking for active contributors to the repository in the meantime to help flesh out more of the features. +> Instructor's Rust Client is in active development. This means that the API and package might change moving forward. We are looking for active contributors to the repository in the meantime to help flesh out more of the features. ## Roadmap @@ -11,11 +11,11 @@ Here is a rough roadmap of features we'd like to implement **Struct -> JSON parsing** - [x] Strings -- [ ] Handle Booleans -- [ ] Integers -- [ ] Handle String Enums -- [ ] Lists -- [ ] Nested Structs +- [x] Handle Booleans +- [x] Integers +- [x] Handle String Enums +- [x] Lists +- [x] Nested Structs - [ ] Union Types (Eg. Struct1 | Struct 2 ) **Validators** @@ -43,12 +43,12 @@ Here is a rough roadmap of features we'd like to implement To install `instructor-ai`, you'll need to add the following to your cargo.toml file ```toml -instructor-ai = "0.1.0" -instruct-macros = "0.1.1" +instructor-ai = "0.1.8" +instruct-macros = "0.1.8" openai-api-rs = "4.1.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -instruct-macros-types = "0.1.2" +instruct-macros-types = "0.1.8" ``` Getting started with structured extraction is then as simple as declaring a new struct with the `InstructMacro` and importing the `ParamterInfo` and `StructInfo` types. diff --git a/docs/types.md b/docs/types.md new file mode 100644 index 0000000..07a7d39 --- /dev/null +++ b/docs/types.md @@ -0,0 +1,315 @@ +# Support for Simple Types + +We support most basic types out of the box with our `InstructMacro` macro now. Here's a quick rundown of how to use our macro. Note that the struct name and the description you provide is fed in to the OpenAI call - what we show below is just the function parameter that we provide. + +## Importing the Macro + +You'll need to use the following imports to use the macro + +```rust +use instruct_macros::{InstructMacro}; +use instruct_macros_types::{ + InstructMacro, InstructMacroResult, Parameter, ParameterInfo, StructInfo, +}; + + #[derive(InstructMacro, Debug)] +struct User { + pub name: String, + pub age: String +} + +/* +{ + "type": "object", + "properties": { + "age": { + "type": "string", + "description": "" + }, + "name": { + "type": "string", + "description": "" + } + }, + "required": [ + "name", + "age" + ] +} +*/ +``` + +### Adding a Description + +We provide a `#[description( Description goes here )]` annoration that you can add to your struct. This will be included in the function call which we will send over to OpenAI/other inference providers. + +This is the same for individual fields or the entire struct with multi-line comments being relatively easy to implement. + +```rust +#[derive(InstructMacro, Debug)] +#[description("This is a user object")] +struct User { + #[description("This is the name of the user")] + pub name: String, + #[description( + "This is\ + a multi-line description\ + which can be used" + )] + pub age: String, +} + +/* +{ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "This is the name of the user" + }, + "age": { + "type": "string", + "description": "This isa multi-line descriptionwhich can be used" + } + }, + "required": [ + "name", + "age" + ] +} +*/ +``` + +## Advanced Types + +### Enums + +Enums are supported in the same way. Just declare it as if you would a normal `Serde` object and it'll work out of the box seamlessly. + +```rust +#[derive(InstructMacro, Debug)] +#[description("This is an enum representing the status of a person")] +pub enum Status { + Active, + Inactive, + Pending, +} + +#[derive(InstructMacro, Debug)] +pub struct User { + name: String, + status: Status, +} + +/* +{ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "" + }, + "status": { + "type": "string", + "description": "This is an enum representing the status of a person", + "enum_values": [ + "Active", + "Inactive", + "Pending" + ] + } + }, + "required": [ + "name", + "status" + ] +} +*/ +``` + +If you'll like to provide a custom description for your enum field in your struct, just use the `description` annoration and we'll override the default description of the enum when we generate the function parameter. + +```rust +#[derive(InstructMacro, Debug)] +#[description("This is an enum representing the status of a person")] +pub enum Status { + Active, + Inactive, + Pending, +} + +#[derive(InstructMacro, Debug)] +pub struct User { + name: String, + #[description("This is the person's status")] + status: Status, +} + +/* +{ + "type": "object", + "properties": { + "status": { + "type": "string", + "description": "This is the person's status", + "enum_values": [ + "Active", + "Inactive", + "Pending" + ] + }, + "name": { + "type": "string", + "description": "" + } + }, + "required": [ + "name", + "status" + ] +} +*/ +``` + +### Vectors + +Sometimes you might want to extract a list of objects (Eg. Users). To do so, you can just use a simple `Vec` object. + +```rust +#[derive(InstructMacro, Debug)] +#[description("This is a struct with Option types")] +struct Numbers { + #[description("This is a list of numbers")] + pub numbers: Vec, +} + +/* +{ + "type": "object", + "properties": { + "users": { + "type": "array", + "description": "A list of users", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "" + } + } + } + } + }, + "required": [ + "users" + ] +} +*/ +``` + +### Options + +We also support Option types. This is most popular when using a `Maybe` pattern where we have some form of data that we might want to extract. + +```rust +#[derive(InstructMacro, Debug)] +#[allow(dead_code)] +#[description("This is a user struct")] +struct User { + #[description("This is the user's name")] + pub name: String, + #[description("This is the user's age")] + pub age: i32, +} + +#[derive(InstructMacro, Debug)] +#[allow(dead_code)] +#[description("This is a struct with Option type")] +struct MaybeUser { + #[description("This is an optional user field")] + pub user: Option, + error_message: Option +} + +/* +{ + "type": "object", + "properties": { + "user": { + "type": "object", + "description": "This is an optional user field. If the user is not present, the field will be null", + "properties": { + "age": { + "type": "number", + "description": "" + }, + "name": { + "type": "string", + "description": "" + } + } + }, + "error_message": { + "type": "string", + "description": "" + } + }, + "required": [] +} +*/ +``` + +### Nested Structs + +We also support Nested Structs out of the box - see example below + +```rust +#[derive(InstructMacro, Debug, Serialize, Deserialize)] +struct Address { + location: String, + distance: i32, +} + +#[derive(InstructMacro, Debug, Serialize, Deserialize)] +struct User { + name: String, + age: u8, + address: Address, +} + +/* +{ + "type": "object", + "properties": { + "address": { + "type": "object", + "description": "", + "properties": { + "location": { + "type": "string", + "description": "" + }, + "distance": { + "type": "number", + "description": "" + } + } + }, + "name": { + "type": "string", + "description": "" + }, + "age": { + "type": "number", + "description": "" + } + }, + "required": [ + "name", + "age", + "address" + ] +} +*/ +``` diff --git a/mkdocs.yml b/mkdocs.yml index 465917e..5a3463b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -90,3 +90,7 @@ markdown_extensions: nav: - Introduction: - Welcome To Instructor: "index.md" + + - Concepts: + - Models: "concepts.md" + - Types: "types.md"