Skip to content

Commit

Permalink
Prototype tokenizer+parser for update function expressions.
Browse files Browse the repository at this point in the history
  • Loading branch information
ondrej33 committed Sep 5, 2024
1 parent 41eeb37 commit b5aeb00
Show file tree
Hide file tree
Showing 6 changed files with 598 additions and 13 deletions.
8 changes: 6 additions & 2 deletions src/_impl_bma_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use crate::bma_model::*;
use crate::enums::VariableType;
use crate::json_model::JsonBmaModel;
use crate::traits::{JsonSerde, XmlSerde};
use crate::update_fn::bma_fn_tree::BmaFnNode;
use crate::update_fn::parser::parse_bma_formula;
use crate::xml_model::XmlBmaModel;
use biodivine_lib_param_bn::{BooleanNetwork, RegulatoryGraph};
use std::collections::HashMap;
Expand Down Expand Up @@ -74,7 +76,8 @@ impl From<JsonBmaModel> for BmaModel {
.unwrap_or(VariableType::Default), // Use the type from layout if available
range_from: var.range_from,
range_to: var.range_to,
formula: var.formula,
// todo: handle the failures and empty formulas
formula: parse_bma_formula(&var.formula).unwrap_or(BmaFnNode::mk_constant(0)),
})
.collect(),
relationships: json_model
Expand Down Expand Up @@ -157,7 +160,8 @@ impl From<XmlBmaModel> for BmaModel {
variable_type: var.r#type,
range_from: var.range_from,
range_to: var.range_to,
formula: var.formula,
// todo: handle the failures and empty formulas
formula: parse_bma_formula(&var.formula).unwrap_or(BmaFnNode::mk_constant(0)),
})
.collect(),
relationships: xml_model
Expand Down
3 changes: 2 additions & 1 deletion src/bma_model.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::enums::{RelationshipType, VariableType};
use crate::update_fn::bma_fn_tree::BmaFnNode;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

Expand Down Expand Up @@ -26,7 +27,7 @@ pub struct Variable {
pub variable_type: VariableType, // Corresponds to "Type" in JSON/XML
pub range_from: u32,
pub range_to: u32,
pub formula: String,
pub formula: BmaFnNode,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
Expand Down
10 changes: 6 additions & 4 deletions src/update_fn/bma_fn_tree.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::update_fn::enums::{AggregateOp, ArithOp, Literal, UnaryOp};
use crate::update_fn::parser::parse_bma_fn_tokens;
use crate::update_fn::tokenizer::BmaFnToken;
use serde::{Deserialize, Serialize};
use std::cmp;
use std::fmt;

Expand All @@ -10,7 +12,7 @@ use std::fmt;
/// - A "unary" node with a `UnaryOp` and a sub-expression.
/// - A binary "arithmetic" node, with a `BinaryOp` and two sub-expressions.
/// - An "aggregation" node with a `AggregateOp` op and a list of sub-expressions.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
pub enum Expression {
Terminal(Literal),
Unary(UnaryOp, Box<BmaFnNode>),
Expand All @@ -24,7 +26,7 @@ pub enum Expression {
/// - `height`; A positive integer starting from 0 (for term nodes).
/// - `expression_tree`; A parse tree for the expression`.
/// - `function_str`; A canonical string representation of the expression.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
pub struct BmaFnNode {
pub function_str: String,
pub height: u32,
Expand All @@ -33,8 +35,8 @@ pub struct BmaFnNode {

impl BmaFnNode {
/// "Parse" a new [BmaFnNode] from a list of [BmaFnToken] objects.
pub fn from_tokens(_tokens: &[BmaFnToken]) -> Result<BmaFnNode, String> {
todo!()
pub fn from_tokens(tokens: &[BmaFnToken]) -> Result<BmaFnNode, String> {
parse_bma_fn_tokens(tokens)
}

/// Create a "unary" [BmaFnNode] from the given arguments.
Expand Down
6 changes: 3 additions & 3 deletions src/update_fn/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pub mod bma_fn_tree;
mod enums;
mod parser;
mod tokenizer;
pub mod enums;
pub mod parser;
pub mod tokenizer;
269 changes: 268 additions & 1 deletion src/update_fn/parser.rs
Original file line number Diff line number Diff line change
@@ -1 +1,268 @@
// todo
use crate::update_fn::bma_fn_tree::*;
use crate::update_fn::enums::*;
use crate::update_fn::tokenizer::{try_tokenize_bma_formula, BmaFnToken};

/// Parse an BMA update function formula string representation into an actual expression tree.
/// Basically a wrapper for tokenize+parse (used often for testing/debug purposes).
///
/// NEEDS to call [validate_props] to fully finish the preprocessing step.
pub fn parse_bma_formula(formula: &str) -> Result<BmaFnNode, String> {
let tokens = try_tokenize_bma_formula(formula.to_string())?;
let tree = parse_bma_fn_tokens(&tokens)?;
Ok(tree)
}

/// Utility method to find the first occurrence of a specific token in the token tree.
fn index_of_first(tokens: &[BmaFnToken], token: BmaFnToken) -> Option<usize> {
return tokens.iter().position(|t| *t == token);
}

/// Parse `tokens` of BMA update fn formula into an abstract syntax tree using recursive steps.
pub fn parse_bma_fn_tokens(tokens: &[BmaFnToken]) -> Result<BmaFnNode, String> {
parse_1_div(tokens)
}

/// Recursive parsing step 1: extract `/` operators.
fn parse_1_div(tokens: &[BmaFnToken]) -> Result<BmaFnNode, String> {
let div_token = index_of_first(tokens, BmaFnToken::Binary(ArithOp::Div));
Ok(if let Some(i) = div_token {
BmaFnNode::mk_arithmetic(
parse_2_mul(&tokens[..i])?,
parse_1_div(&tokens[(i + 1)..])?,
ArithOp::Div,
)
} else {
parse_2_mul(tokens)?
})
}

/// Recursive parsing step 2: extract `*` operators.
fn parse_2_mul(tokens: &[BmaFnToken]) -> Result<BmaFnNode, String> {
let mul_token = index_of_first(tokens, BmaFnToken::Binary(ArithOp::Times));
Ok(if let Some(i) = mul_token {
BmaFnNode::mk_arithmetic(
parse_3_minus(&tokens[..i])?,
parse_2_mul(&tokens[(i + 1)..])?,
ArithOp::Times,
)
} else {
parse_3_minus(tokens)?
})
}

/// Recursive parsing step 3: extract `-` operators.
fn parse_3_minus(tokens: &[BmaFnToken]) -> Result<BmaFnNode, String> {
let minus_token = index_of_first(tokens, BmaFnToken::Binary(ArithOp::Minus));
Ok(if let Some(i) = minus_token {
BmaFnNode::mk_arithmetic(
parse_4_plus(&tokens[..i])?,
parse_3_minus(&tokens[(i + 1)..])?,
ArithOp::Minus,
)
} else {
parse_4_plus(tokens)?
})
}

/// Recursive parsing step 4: extract `+` operators.
fn parse_4_plus(tokens: &[BmaFnToken]) -> Result<BmaFnNode, String> {
let minus_token = index_of_first(tokens, BmaFnToken::Binary(ArithOp::Add));
Ok(if let Some(i) = minus_token {
BmaFnNode::mk_arithmetic(
parse_5_others(&tokens[..i])?,
parse_4_plus(&tokens[(i + 1)..])?,
ArithOp::Add,
)
} else {
parse_5_others(tokens)?
})
}

/// Recursive parsing step 5: extract literals and recursively solve sub-formulae in parentheses
/// and in functions.
fn parse_5_others(tokens: &[BmaFnToken]) -> Result<BmaFnNode, String> {
if tokens.is_empty() {
Err("Expected formula, found nothing.".to_string())
} else {
if tokens.len() == 1 {
// This should be name (var/function) or a parenthesis group, anything
// else does not make sense.
match &tokens[0] {
BmaFnToken::Atomic(Literal::Str(name)) => {
return Ok(BmaFnNode::mk_variable(name.as_str()));
}
BmaFnToken::Atomic(Literal::Int(num)) => {
return Ok(BmaFnNode::mk_constant(*num));
}
BmaFnToken::Aggregate(operator, arguments) => {
let mut arg_expression_nodes = Vec::new();
for inner in arguments {
// it must be a token list
if let BmaFnToken::TokenList(inner_token_list) = inner {
arg_expression_nodes.push(parse_bma_fn_tokens(inner_token_list)?);
} else {
return Err(
"Function must be applied on `BmaFnToken::TokenList` args."
.to_string(),
);
}
}
return Ok(BmaFnNode::mk_aggregation(
operator.clone(),
arg_expression_nodes,
));
}
BmaFnToken::Unary(operator, argument) => {
return if let BmaFnToken::TokenList(inner_token_list) = *argument.clone() {
Ok(BmaFnNode::mk_unary(
parse_bma_fn_tokens(&inner_token_list)?,
operator.clone(),
))
} else {
return Err(
"Function must be applied on `BmaFnToken::TokenList` args.".to_string()
);
}
}
// recursively solve sub-formulae in parentheses
BmaFnToken::TokenList(inner) => {
return parse_bma_fn_tokens(inner);
}
_ => {} // otherwise, fall through to the error at the end.
}
}
Err(format!("Unexpected: {tokens:?}. Expecting formula."))
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::update_fn::bma_fn_tree::BmaFnNode;
use crate::update_fn::enums::{AggregateOp, ArithOp, UnaryOp};

#[test]
fn test_parse_simple_addition() {
let input = "3 + 5";
let result = parse_bma_formula(input);
let expected = BmaFnNode::mk_arithmetic(
BmaFnNode::mk_constant(3),
BmaFnNode::mk_constant(5),
ArithOp::Add,
);
assert_eq!(result, Ok(expected));
}

#[test]
fn test_parse_simple_subtraction() {
let input = "10 - 7";
let result = parse_bma_formula(input);
let expected = BmaFnNode::mk_arithmetic(
BmaFnNode::mk_constant(10),
BmaFnNode::mk_constant(7),
ArithOp::Minus,
);
assert_eq!(result, Ok(expected));
}

#[test]
fn test_parse_multiplication_and_division() {
let input = "8 * 4 / 2";
let result = parse_bma_formula(input);
let expected = BmaFnNode::mk_arithmetic(
BmaFnNode::mk_arithmetic(
BmaFnNode::mk_constant(8),
BmaFnNode::mk_constant(4),
ArithOp::Times,
),
BmaFnNode::mk_constant(2),
ArithOp::Div,
);
assert_eq!(result, Ok(expected));
}

#[test]
fn test_parse_nested_arithmetic() {
let input = "3 + (5 * 2)";
let result = parse_bma_formula(input);
let expected = BmaFnNode::mk_arithmetic(
BmaFnNode::mk_constant(3),
BmaFnNode::mk_arithmetic(
BmaFnNode::mk_constant(5),
BmaFnNode::mk_constant(2),
ArithOp::Times,
),
ArithOp::Add,
);
assert_eq!(result, Ok(expected));
}

#[test]
fn test_parse_abs_function() {
let input = "abs(5)";
let result = parse_bma_formula(input);
let expected = BmaFnNode::mk_unary(BmaFnNode::mk_constant(5), UnaryOp::Abs);
assert_eq!(result, Ok(expected));
}

#[test]
fn test_parse_aggregate_min() {
let input = "min(3, 5, 5 + variable)";
let result = parse_bma_formula(input);
let expected = BmaFnNode::mk_aggregation(
AggregateOp::Min,
vec![
BmaFnNode::mk_constant(3),
BmaFnNode::mk_constant(5),
BmaFnNode::mk_arithmetic(
BmaFnNode::mk_constant(5),
BmaFnNode::mk_variable("variable"),
ArithOp::Add,
),
],
);
assert_eq!(result, Ok(expected));
}

#[test]
fn test_parse_unmatched_parentheses() {
let input = "3 + (5 * 2";
let result = parse_bma_formula(input);
assert!(result.is_err());
assert_eq!(
result,
Err("Expected ')' to previously encountered opening counterpart.".to_string())
);
}

#[test]
fn test_parse_invalid_token() {
let input = "5 + @";
let result = parse_bma_formula(input);
assert!(result.is_err());
assert_eq!(result, Err("Unexpected character: '@'".to_string()));
}

#[test]
fn test_parse_function_with_multiple_arguments() {
let input = "max(3, 5, 10)";
let result = parse_bma_formula(input);
let expected = BmaFnNode::mk_aggregation(
AggregateOp::Max,
vec![
BmaFnNode::mk_constant(3),
BmaFnNode::mk_constant(5),
BmaFnNode::mk_constant(10),
],
);
assert_eq!(result, Ok(expected));
}

#[test]
fn test_parse_empty_formula() {
let input = "";
let result = parse_bma_formula(input);
assert!(result.is_err());
assert_eq!(result, Err("Expected formula, found nothing.".to_string()));
}
}
Loading

0 comments on commit b5aeb00

Please sign in to comment.