diff --git a/Cargo.toml b/Cargo.toml index 4328d3d..80142a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,4 +30,5 @@ rand = "0.8.5" regex = "1.10.2" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -serde-xml-rs = "0.6.0" \ No newline at end of file +serde-xml-rs = "0.6.0" +num-rational = "0.4.2" diff --git a/src/lib.rs b/src/lib.rs index 363d529..0254b2f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ pub mod bma_model; pub mod json_model; pub mod traits; +pub mod update_fn; pub mod xml_model; mod _impl_bma_model; diff --git a/src/update_fn/bma_fn_tree.rs b/src/update_fn/bma_fn_tree.rs new file mode 100644 index 0000000..8ebeddc --- /dev/null +++ b/src/update_fn/bma_fn_tree.rs @@ -0,0 +1,120 @@ +use crate::update_fn::enums::{AggregateOp, ArithOp, Literal, UnaryOp}; +use crate::update_fn::tokenizer::BmaFnToken; +use std::cmp; +use std::fmt; + +/// Enum of possible node types in a BMA expression syntax tree. +/// +/// In particular, a node type can be: +/// - A "terminal" node containing a literal (variable, constant). +/// - A "unary" node with a `UnaryOp` and a sub-expression. +/// - A binary "arithmetic" node, with a `BinaryOp` and two sub-expressions. +/// - An "aggregation" node with a `AggregateOp` op and a list of sub-expressions. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum Expression { + Terminal(Literal), + Unary(UnaryOp, Box), + Arithmetic(ArithOp, Box, Box), + Aggregation(AggregateOp, Vec>), +} + +/// A single node in a syntax tree of a FOL formula. +/// +/// Each node tracks its: +/// - `height`; A positive integer starting from 0 (for term nodes). +/// - `expression_tree`; A parse tree for the expression`. +/// - `function_str`; A canonical string representation of the expression. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct BmaFnNode { + pub function_str: String, + pub height: u32, + pub expression_tree: Expression, +} + +impl BmaFnNode { + /// "Parse" a new [BmaFnNode] from a list of [BmaFnToken] objects. + pub fn from_tokens(_tokens: &[BmaFnToken]) -> Result { + todo!() + } + + /// Create a "unary" [BmaFnNode] from the given arguments. + /// + /// See also [Expression::Unary]. + pub fn mk_unary(child: BmaFnNode, op: UnaryOp) -> BmaFnNode { + let subform_str = format!("{op}({child})"); + BmaFnNode { + function_str: subform_str, + height: child.height + 1, + expression_tree: Expression::Unary(op, Box::new(child)), + } + } + + /// Create a "binary" arithmetic [BmaFnNode] from the given arguments. + /// + /// See also [Expression::Binary]. + pub fn mk_arithmetic(left: BmaFnNode, right: BmaFnNode, op: ArithOp) -> BmaFnNode { + BmaFnNode { + function_str: format!("({left} {op} {right})"), + height: cmp::max(left.height, right.height) + 1, + expression_tree: Expression::Arithmetic(op, Box::new(left), Box::new(right)), + } + } + + /// Create a [BmaFnNode] representing a Boolean constant. + /// + /// See also [Expression::Terminal] and [Atomic::True] / [Atomic::False]. + pub fn mk_constant(constant_val: i32) -> BmaFnNode { + Self::mk_literal(Literal::Int(constant_val)) + } + + /// Create a [BmaFnNode] representing a variable. + /// + /// See also [Expression::Terminal] and [Literal::Str]. + pub fn mk_variable(var_name: &str) -> BmaFnNode { + Self::mk_literal(Literal::Str(var_name.to_string())) + } + + /// A helper function which creates a new [BmaFnNode] for the given [Literal] value. + fn mk_literal(literal: Literal) -> BmaFnNode { + BmaFnNode { + function_str: literal.to_string(), + height: 0, + expression_tree: Expression::Terminal(literal), + } + } + + /// Create a [BmaFnNode] representing an aggregation operator applied to given arguments. + pub fn mk_aggregation(op: AggregateOp, inner_nodes: Vec) -> BmaFnNode { + let max_height = inner_nodes + .iter() + .map(|node| node.height) + .max() + .unwrap_or(0); + let child_expressions: Vec = inner_nodes + .iter() + .map(|child| child.function_str.clone()) + .collect(); + let args_str = child_expressions.join(", "); + let function_str = format!("{}({})", op, args_str); + + let inner_boxed_nodes = inner_nodes.into_iter().map(Box::new).collect(); + + BmaFnNode { + function_str, + height: max_height + 1, + expression_tree: Expression::Aggregation(op, inner_boxed_nodes), + } + } +} + +impl BmaFnNode { + pub fn as_str(&self) -> &str { + self.function_str.as_str() + } +} + +impl fmt::Display for BmaFnNode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.function_str) + } +} diff --git a/src/update_fn/enums.rs b/src/update_fn/enums.rs new file mode 100644 index 0000000..849bcb5 --- /dev/null +++ b/src/update_fn/enums.rs @@ -0,0 +1,70 @@ +use serde::{Deserialize, Serialize}; +use std::fmt; + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)] +pub enum Literal { + Int(i32), + Str(String), +} + +impl fmt::Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Literal::Int(value) => write!(f, "{}", value), + Literal::Str(value) => write!(f, "{}", value), + } + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)] +pub enum ArithOp { + Add, + Minus, + Times, + Div, +} + +impl fmt::Display for ArithOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ArithOp::Add => write!(f, "+"), + ArithOp::Minus => write!(f, "-"), + ArithOp::Times => write!(f, "*"), + ArithOp::Div => write!(f, "/"), + } + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)] +pub enum UnaryOp { + Ceil, + Floor, + Abs, +} + +impl fmt::Display for UnaryOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + UnaryOp::Ceil => write!(f, "ceil"), + UnaryOp::Floor => write!(f, "floor"), + UnaryOp::Abs => write!(f, "abs"), + } + } +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)] +pub enum AggregateOp { + Min, + Max, + Avg, +} + +impl fmt::Display for AggregateOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AggregateOp::Min => write!(f, "min"), + AggregateOp::Max => write!(f, "max"), + AggregateOp::Avg => write!(f, "avg"), + } + } +} diff --git a/src/update_fn/mod.rs b/src/update_fn/mod.rs new file mode 100644 index 0000000..185e2ea --- /dev/null +++ b/src/update_fn/mod.rs @@ -0,0 +1,4 @@ +pub mod bma_fn_tree; +mod enums; +mod parser; +mod tokenizer; diff --git a/src/update_fn/parser.rs b/src/update_fn/parser.rs new file mode 100644 index 0000000..65b3dba --- /dev/null +++ b/src/update_fn/parser.rs @@ -0,0 +1 @@ +// todo diff --git a/src/update_fn/tokenizer.rs b/src/update_fn/tokenizer.rs new file mode 100644 index 0000000..6ca52b5 --- /dev/null +++ b/src/update_fn/tokenizer.rs @@ -0,0 +1,13 @@ +use crate::update_fn::enums::{AggregateOp, ArithOp, Literal, UnaryOp}; + +/// Enum of all possible tokens occurring in a BMA function string. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub enum BmaFnToken { + Atomic(Literal), + Unary(UnaryOp), + Binary(ArithOp), + Aggregate(AggregateOp, Vec), + TokenList(Vec), +} + +// todo