Skip to content

Commit

Permalink
Merge pull request #6 from sybila/feat/expressions
Browse files Browse the repository at this point in the history
Initial Tokenizing and parsing into Expressions
  • Loading branch information
daemontus authored Mar 8, 2024
2 parents 84ee20f + ea6f94e commit 5ad39f6
Show file tree
Hide file tree
Showing 18 changed files with 1,689 additions and 6 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
thiserror = "1.0.50"
itertools = "0.12.0"
regex = "1.10.3"
lazy_static = "1.4.0"
19 changes: 13 additions & 6 deletions src/expressions/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,16 @@ use std::fmt::Debug;
use std::hash::Hash;

use crate::expressions::Expression::{self, And, Constant, Literal, Not, Or};
use crate::traits::{Evaluate, GatherLiterals, PowerSet, SemanticEq};
use crate::parser::{parse_tokens, tokenize, ParseError};
use crate::traits::{Evaluate, GatherLiterals, Parse, PowerSet, SemanticEq};

impl<TLiteral: Debug + Clone + Eq + Hash> SemanticEq<TLiteral> for Expression<TLiteral> {
fn semantic_eq(&self, other: &Self) -> bool {
let self_literals = self.gather_literals();
let other_literals = other.gather_literals();
let literals_union = HashSet::from_iter(self_literals.union(&other_literals).cloned());

if self_literals != other_literals {
return false;
}

let all_options = Self::generate_power_set(self_literals);
let all_options = Self::generate_power_set(literals_union);

all_options.into_iter().all(|literal_settings| {
self.evaluate(&literal_settings) == other.evaluate(&literal_settings)
Expand Down Expand Up @@ -75,3 +73,12 @@ impl<TLiteral: Debug + Clone + Eq + Hash> Evaluate<TLiteral> for Expression<TLit
}
}
}

impl Parse for Expression<String> {
fn from_str(input: &str) -> Result<Self, ParseError> {
let tokens = tokenize(input)?;
let parsed = parse_tokens(&tokens)?;

Ok(parsed)
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod expressions;
pub mod parser;
pub mod traits;
36 changes: 36 additions & 0 deletions src/parser/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#[derive(Debug, thiserror::Error, PartialEq)]
pub enum TokenizeError {
#[error("Unexpected `)` encountered on position {position} near '{vicinity}'")]
UnexpectedClosingParenthesis { position: usize, vicinity: String },
#[error("Missing `)` on position {position} near '{vicinity}'")]
MissingClosingParenthesis { position: usize, vicinity: String },
#[error("Unexpected `}}` encountered on position {position} near '{vicinity}'")]
UnexpectedClosingCurlyBrace { position: usize, vicinity: String },
#[error("Missing `}}` on position {position} near '{vicinity}'")]
MissingClosingCurlyBrace { position: usize, vicinity: String },
#[error("No name literal `{{}}` encountered on position {position} near '{vicinity}'")]
EmptyLiteralName { position: usize, vicinity: String },
#[error("Unknown symbol {symbol} encountered on position {position}'")]
UnknownSymbolError { position: usize, symbol: String },
#[error("Unexpected whitespace encountered in the middle of operator")]
UnexpectedWhitespace,
}

// TODO extend enum variants to carry position of error, code, impl position from errors
#[derive(Debug, thiserror::Error, PartialEq)]
pub enum ParseTokensError {
#[error("Operator had nothing as its operand")]
EmptySideOfOperator,
#[error("Unexpected multiple consecutive literals, maybe you are missing an operator?")]
UnexpectedLiteralsGroup,
}

#[derive(thiserror::Error, Debug, PartialEq)]
pub enum ParseError {
#[error(transparent)]
TokenizingError(#[from] TokenizeError),
#[error(transparent)]
ParsingError(#[from] ParseTokensError),
}

pub const EOL_VICINITY: &str = "EOL";
14 changes: 14 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use crate::parser::structs::PositionTracker;
pub use error::ParseError;
use itertools::MultiPeek;
pub use parse::parse_tokens;
use std::str::Chars;
pub use tokenize::tokenize;

mod error;
mod parse;
mod structs;
mod tokenize;
mod utils;

type TokenizerInput<'a> = PositionTracker<MultiPeek<Chars<'a>>>;
228 changes: 228 additions & 0 deletions src/parser/parse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
use itertools::Itertools;

use crate::expressions::Expression;
use crate::parser::error::ParseTokensError;
use crate::parser::structs::FinalToken;

pub fn parse_tokens(input: &[FinalToken]) -> Result<Expression<String>, ParseTokensError> {
priority_0_parse_or(input)
}

fn priority_0_parse_or(data: &[FinalToken]) -> Result<Expression<String>, ParseTokensError> {
data.split(|t| t == &FinalToken::Or)
.map(priority_1_parse_and)
.fold_ok(None::<Expression<String>>, |acc, item| match acc {
None => Some(item),
Some(Expression::Or(mut es)) => {
es.push(item);
Some(Expression::n_ary_or(es))
}
Some(previous) => Some(Expression::n_ary_or(vec![previous, item])),
})?
.ok_or(ParseTokensError::EmptySideOfOperator)
}

fn priority_1_parse_and(data: &[FinalToken]) -> Result<Expression<String>, ParseTokensError> {
data.split(|t| t == &FinalToken::And)
.map(priority_2_terminal)
.fold_ok(None::<Expression<String>>, |acc, item| match acc {
None => Some(item),
Some(Expression::And(mut es)) => {
es.push(item);
Some(Expression::n_ary_and(es))
}
Some(previous) => Some(Expression::n_ary_and(vec![previous, item])),
})?
.ok_or(ParseTokensError::EmptySideOfOperator)
}

fn priority_2_terminal(data: &[FinalToken]) -> Result<Expression<String>, ParseTokensError> {
if data.is_empty() {
Err(ParseTokensError::EmptySideOfOperator)
} else if data[0] == FinalToken::Not {
Ok(Expression::negate(priority_2_terminal(&data[1..])?))
} else if data.len() > 1 {
Err(ParseTokensError::UnexpectedLiteralsGroup)
} else {
match &data[0] {
FinalToken::ConstantTrue => Ok(Expression::Constant(true)),
FinalToken::ConstantFalse => Ok(Expression::Constant(false)),
FinalToken::Literal(name) => Ok(Expression::Literal(name.clone())),
FinalToken::Parentheses(inner) => Ok(parse_tokens(inner)?),
_ => unreachable!(
"Other tokens are matched by remaining functions, nothing else should remain."
),
}
}
}

#[cfg(test)]
mod tests {
use crate::expressions::Expression::{Constant, Literal};
use crate::parser::error::ParseTokensError::EmptySideOfOperator;
use crate::parser::{tokenize, ParseError};
use crate::traits::SemanticEq;

use super::*;

#[test]
fn test_empty_nok() -> Result<(), ParseError> {
let input = tokenize("")?;
let actual = parse_tokens(&input);

assert!(actual.is_err());
assert_eq!(actual.unwrap_err(), EmptySideOfOperator);

Ok(())
}

#[test]
fn test_binaryand_ok() -> Result<(), ParseError> {
let input = tokenize("a & b")?;
let actual = parse_tokens(&input)?;
let expected = Expression::binary_and(Literal("a".to_string()), Literal("b".to_string()));

assert!(actual.semantic_eq(&expected));
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_naryand_ok() -> Result<(), ParseError> {
let input = tokenize("a & b & c")?;
let actual = parse_tokens(&input)?;
let expected = Expression::n_ary_and(vec![
Literal("a".to_string()),
Literal("b".to_string()),
Literal("c".to_string()),
]);

assert!(actual.semantic_eq(&expected));
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_binaryor_ok() -> Result<(), ParseError> {
let input = tokenize("a | b")?;
let actual = parse_tokens(&input)?;
let expected = Expression::binary_or(Literal("a".to_string()), Literal("b".to_string()));

assert!(actual.semantic_eq(&expected));
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_naryor_ok() -> Result<(), ParseError> {
let input = tokenize("a | b | c")?;
let actual = parse_tokens(&input)?;
let expected = Expression::n_ary_or(vec![
Literal("a".to_string()),
Literal("b".to_string()),
Literal("c".to_string()),
]);

assert!(actual.semantic_eq(&expected));
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_parentheses_toplevel_ok() -> Result<(), ParseError> {
let input = tokenize("(a)")?;
let actual = parse_tokens(&input)?;
let expected = Literal("a".to_string());

assert!(actual.semantic_eq(&expected));
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_parentheses_naryor_naryand_ok() -> Result<(), ParseError> {
let input = tokenize("a | b | (a & b & !c)")?;
let actual = parse_tokens(&input)?;
let expected = Expression::n_ary_or(vec![
Literal("a".to_string()),
Literal("b".to_string()),
Expression::n_ary_and(vec![
Literal("a".to_string()),
Literal("b".to_string()),
Expression::negate(Literal("c".to_string())),
]),
]);

assert!(actual.semantic_eq(&expected));
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_parentheses_naryor_naryand_constants_ok() -> Result<(), ParseError> {
let input = tokenize("F | 0 | False | (T & 1 & True)")?;
let actual = parse_tokens(&input)?;
let expected = Expression::n_ary_or(vec![
Constant(false),
Constant(false),
Constant(false),
Expression::n_ary_and(vec![Constant(true), Constant(true), Constant(true)]),
]);

assert!(actual.semantic_eq(&expected));
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_priorities_naryor_naryand_ok() -> Result<(), ParseError> {
let input = tokenize("a | b | a & b & !c")?;
let actual = parse_tokens(&input)?;
let expected = Expression::n_ary_or(vec![
Literal("a".to_string()),
Literal("b".to_string()),
Expression::n_ary_and(vec![
Literal("a".to_string()),
Literal("b".to_string()),
Expression::negate(Literal("c".to_string())),
]),
]);

assert!(actual.semantic_eq(&expected));
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_terminal_and_emptyside_nok() -> Result<(), ParseError> {
let input = tokenize("a & ")?;
let actual = parse_tokens(&input);

assert!(actual.is_err());
assert_eq!(actual.unwrap_err(), ParseTokensError::EmptySideOfOperator);

Ok(())
}

#[test]
fn test_terminal_and_nok() -> Result<(), ParseError> {
let input = tokenize("a & b c")?;
let actual = parse_tokens(&input);

assert!(actual.is_err());
assert_eq!(
actual.unwrap_err(),
ParseTokensError::UnexpectedLiteralsGroup
);

Ok(())
}
}
10 changes: 10 additions & 0 deletions src/parser/structs/final_token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#[derive(PartialEq, Debug)]
pub enum FinalToken {
And,
Or,
Not,
ConstantTrue,
ConstantFalse,
Literal(String),
Parentheses(Vec<FinalToken>),
}
Loading

0 comments on commit 5ad39f6

Please sign in to comment.