diff --git a/Cargo.toml b/Cargo.toml index df59d18f..34b7baff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,12 +68,17 @@ logos-derive = {version = "0.14.2", path = "./logos-derive", optional = true} [dev-dependencies] ariadne = {version = "0.4", features = ["auto-color"]} +chumsky = {version = "0.9.3" } [[example]] doc-scrape-examples = true # Only needed once, because requires dev-dependencies name = "brainfuck" path = "examples/brainfuck.rs" +[[example]] +name = "calculator" +path = "examples/calculator.rs" + [[example]] name = "custom_error" path = "examples/custom_error.rs" diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index abbde79c..9005896c 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -14,6 +14,7 @@ + [Unsafe Code](./unsafe.md) + [Examples](./examples.md) + [Brainfuck interpreter](./examples/brainfuck.md) + + [Simple calculator](./examples/calculator.md) + [JSON parser](./examples/json.md) + [JSON-borrowed parser](./examples/json_borrowed.md) + [Contributing](./contributing.md) diff --git a/book/src/assets/calculator_example_flow.png b/book/src/assets/calculator_example_flow.png new file mode 100644 index 00000000..c0102867 Binary files /dev/null and b/book/src/assets/calculator_example_flow.png differ diff --git a/book/src/assets/calculator_example_how_evaluator_works.png b/book/src/assets/calculator_example_how_evaluator_works.png new file mode 100644 index 00000000..b29b7a9d Binary files /dev/null and b/book/src/assets/calculator_example_how_evaluator_works.png differ diff --git a/book/src/examples.md b/book/src/examples.md index e705b039..738adc2d 100644 --- a/book/src/examples.md +++ b/book/src/examples.md @@ -4,6 +4,8 @@ The following examples are ordered by increasing level of complexity. **[Brainfuck interpreter](./examples/brainfuck.md)**: Lexers are very powerful tools for parsing code programs into meaningful instructions. We show you how you can build an interpreter for the Brainfuck programming language under 100 lines of code! +**[Simple calculator](./examples/calculator.md)**: For a relatively large domain-specifc language (DSL), or any programming language, implementing an interpreter typically involves converting the tokens generated by a lexer into an abstract syntax tree (AST) via a parser, and then evaluating it. We show you how you can build a simple calculator that evaluates arithmetic expressions by combining Logos and a parser generator library. + **[JSON parser](./examples/json.md)**: We present a JSON parser written with Logos that does nice error reporting when invalid values are encountered. **[JSON-borrowed parser](./examples/json_borrowed.md)**: A variant of the previous parser, but that does not own its data. diff --git a/book/src/examples/brainfuck.md b/book/src/examples/brainfuck.md index 5f289719..ad871018 100644 --- a/book/src/examples/brainfuck.md +++ b/book/src/examples/brainfuck.md @@ -24,7 +24,7 @@ Finally, we provide you the full code that you should be able to run with[^2]: cargo run --example brainfuck examples/hello_word.bf ``` -[^2] You first need to clone [this repository](https://github.com/maciejhirsz/logos). +[^2]: You first need to clone [this repository](https://github.com/maciejhirsz/logos). ```rust,no_run,noplayground {{#include ../../../examples/brainfuck.rs:all}} diff --git a/book/src/examples/calculator.md b/book/src/examples/calculator.md new file mode 100644 index 00000000..b7cc450c --- /dev/null +++ b/book/src/examples/calculator.md @@ -0,0 +1,143 @@ +# Simple calculator + +This page (including the images) was contributed by [ynn](https://github.com/your-diary). + +When you implement an interpreter for a [domain-specific language (DSL)](https://en.wikipedia.org/wiki/Domain-specific_language), or any programming language, the process typically involves the following steps: + +1. **Lexing**: Splitting the input stream (i.e., source code string) into tokens via a lexer. + +2. **Parsing**: Converting the tokens into an [abstract syntax tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree) via a parser. + +3. **Evaluation**: Evaluating the AST to produce the result. + +In this example, we implement a simple calculator that evaluates arithmetic expressions such as `1 + 2 * 3` or `((1 + 2) * 3 + 4) * 2 + 4 / 3`. + +We use `logos` as the lexer generator and [`chumsky`](https://github.com/zesterer/chumsky) as the parser generator. + +![flow chart](/assets/calculator_example_flow.png) + +## 1. Try It + +Before diving into the implementation details, let's play with it[^1]. + +```bash +$ cargo run --example calculator '1 + 7 * (3 - 4) / 2' +``` + +[^1]: You first need to clone [this repository](https://github.com/maciejhirsz/logos). + +**Output**: + +``` +[AST] +Add( + Int( + 1, + ), + Div( + Mul( + Int( + 7, + ), + Sub( + Int( + 3, + ), + Int( + 4, + ), + ), + ), + Int( + 2, + ), + ), +) + +[result] +-2 +``` + +~~~admonish note title="Full Code" collapsible=true + +```rust,no_run,noplayground +{{#include ../../../examples/calculator.rs:all}} +``` + +~~~ + +## 2. Lexer + +Our calculator supports the following tokens: + +- Integer literals: `0`, `1`, `15`, etc; + +- Unary operator: `-`; + +- Binary operators: `+`, `-`, `*`, `/`; + +- Parenthesized expressions: `(3 + 5) * 2`, `((1 + 2) * 3 + 4) * 2 + 3 / 2`, etc. + +```rust,no_run,noplayground +{{#include ../../../examples/calculator.rs:tokens}} +``` + +## 3. Parser + +While it is easy enough to manually implement a parser in this case (e.g., [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing)), let's just use [`chumsky`](https://github.com/zesterer/chumsky) crate, which is one of the most popular parser generator libraries in Rust. + +### 3.1 AST Definition + +First, we define the AST. + +```rust,no_run,noplayground +{{#include ../../../examples/calculator.rs:ast}} +``` + +Note that + +- We name the enum not `AST` but `Expr` because an AST is just nested expressions. + +- There is no `Parenthesized` variant because parentheses only affect the order of operations (i.e., precedence), which is reflected in the AST structure. + +- `Box` is used as [a recursive enum is not allowed in Rust](https://stackoverflow.com/questions/25296195/why-are-recursive-struct-types-illegal-in-rust). + +### 3.2 Parser Implementation + +Next, we define the parser. The code may look a bit complicated if you are not familiar with parser combinator libraries, but it is actually quite simple. See [Chumsky's official tutorial](https://github.com/zesterer/chumsky/blob/main/tutorial.md) for the details. + +```rust,no_run,noplayground +{{#include ../../../examples/calculator.rs:parser}} +``` + +## 4. Evaluator + +Evaluating the AST is straightforward. We just implement it using [depth-first search (DFS)](https://en.wikipedia.org/wiki/Depth-first_search) such that the mathematical operations are processed in the correct order. + +```rust,no_run,noplayground +{{#include ../../../examples/calculator.rs:evaluator}} +``` + +**Example** + +Evaluating `1 + 3 * 12` will proceed as below. + +![how evaluator works](/assets/calculator_example_how_evaluator_works.png) + +## 5. `main()` Function + +Finally, we put everything together in the `main()` function. + +```rust,no_run,noplayground +{{#include ../../../examples/calculator.rs:main}} +``` + +## 6. Extend the Calculator + +Now that you've implemented a basic calculator, try extending its functionality with the following tasks: + +- **Handle zero-division gracefully**: The current evaluator panics when zero-division occurs. Change the return type of the evaluator from `isize` to `Result`, making it possible to return an error message. + +- **Add support for the modulo operator (`%`)**: Update the lexer, parser, and evaluator to handle expressions like `10 % 3`. + +- **Add support for built-in functions**: Implement built-in functions such as `abs(x)`, `pow(x, y)` or `rand()`. diff --git a/examples/calculator.rs b/examples/calculator.rs new file mode 100644 index 00000000..af23093c --- /dev/null +++ b/examples/calculator.rs @@ -0,0 +1,174 @@ +//! Simple calculator. +//! +//! Usage: +//! cargo run --example calculator +//! +//! Example: +//! cargo run --example calculator '1 + 7 * (3 - 4) / 2' +//! +//! Following constructs are supported: +//! - integer literals: `0`, `1`, `15`, etc. +//! - unary operator: `-` +//! - binary operators: `+`, `-`, `*`, `/` +//! - parentheses: `(`, `)` + +/* ANCHOR: all */ +use std::env; + +use chumsky::prelude::*; +use logos::Logos; + +/* ANCHOR: tokens */ +#[derive(Logos, Debug, PartialEq, Eq, Hash, Clone)] +#[logos(skip r"[ \t\n]+")] +#[logos(error = String)] +enum Token { + #[token("+")] + Plus, + + #[token("-")] + Minus, + + #[token("*")] + Multiply, + + #[token("/")] + Divide, + + #[token("(")] + LParen, + + #[token(")")] + RParen, + + #[regex("[0-9]+", |lex| lex.slice().parse::().unwrap())] + Integer(isize), +} +/* ANCHOR_END: tokens */ + +/* ANCHOR: ast */ +#[derive(Debug)] +enum Expr { + // Integer literal. + Int(isize), + + // Unary minus. + Neg(Box), + + // Binary operators. + Add(Box, Box), + Sub(Box, Box), + Mul(Box, Box), + Div(Box, Box), +} +/* ANCHOR_END: ast */ + +/* ANCHOR: evaluator */ +impl Expr { + fn eval(&self) -> isize { + match self { + Expr::Int(n) => *n, + Expr::Neg(rhs) => -rhs.eval(), + Expr::Add(lhs, rhs) => lhs.eval() + rhs.eval(), + Expr::Sub(lhs, rhs) => lhs.eval() - rhs.eval(), + Expr::Mul(lhs, rhs) => lhs.eval() * rhs.eval(), + Expr::Div(lhs, rhs) => lhs.eval() / rhs.eval(), + } + } +} +/* ANCHOR_END: evaluator */ + +#[allow(clippy::let_and_return)] +/* ANCHOR: parser */ +fn parser() -> impl Parser> { + recursive(|p| { + let atom = { + let parenthesized = p + .clone() + .delimited_by(just(Token::LParen), just(Token::RParen)); + + let integer = select! { + Token::Integer(n) => Expr::Int(n), + }; + + parenthesized.or(integer) + }; + + let unary = just(Token::Minus) + .repeated() + .then(atom) + .foldr(|_op, rhs| Expr::Neg(Box::new(rhs))); + + let binary_1 = unary + .clone() + .then( + just(Token::Multiply) + .or(just(Token::Divide)) + .then(unary) + .repeated(), + ) + .foldl(|lhs, (op, rhs)| match op { + Token::Multiply => Expr::Mul(Box::new(lhs), Box::new(rhs)), + Token::Divide => Expr::Div(Box::new(lhs), Box::new(rhs)), + _ => unreachable!(), + }); + + let binary_2 = binary_1 + .clone() + .then( + just(Token::Plus) + .or(just(Token::Minus)) + .then(binary_1) + .repeated(), + ) + .foldl(|lhs, (op, rhs)| match op { + Token::Plus => Expr::Add(Box::new(lhs), Box::new(rhs)), + Token::Minus => Expr::Sub(Box::new(lhs), Box::new(rhs)), + _ => unreachable!(), + }); + + binary_2 + }) + .then_ignore(end()) +} +/* ANCHOR_END: parser */ + +/* ANCHOR: main */ +fn main() { + //reads the input expression from the command line + let input = env::args() + .nth(1) + .expect("Expected expression argument (e.g. `1 + 7 * (3 - 4) / 5`)"); + + //creates a lexer instance from the input + let lexer = Token::lexer(&input); + + //splits the input into tokens, using the lexer + let mut tokens = vec![]; + for (token, span) in lexer.spanned() { + match token { + Ok(token) => tokens.push(token), + Err(e) => { + println!("lexer error at {:?}: {}", span, e); + return; + } + } + } + + //parses the tokens to construct an AST + let ast = match parser().parse(tokens) { + Ok(expr) => { + println!("[AST]\n{:#?}", expr); + expr + } + Err(e) => { + println!("parse error: {:#?}", e); + return; + } + }; + + //evaluates the AST to get the result + println!("\n[result]\n{}", ast.eval()); +} +/* ANCHOR_END: main */ +/* ANCHOR_END: all */