From bf745cf888d8c5fc592bdc6dc8ae85ba4393e824 Mon Sep 17 00:00:00 2001 From: Mario Carneiro Date: Mon, 28 Oct 2024 13:58:46 +0100 Subject: [PATCH] put parse trees in database order fixes #170 --- metamath-rs/src/formula.rs | 71 +++++++++++++++-------- metamath-rs/src/grammar.rs | 116 ++++++++++++++++++++++++++++++++----- metamath-rs/src/tree.rs | 2 +- 3 files changed, 149 insertions(+), 40 deletions(-) diff --git a/metamath-rs/src/formula.rs b/metamath-rs/src/formula.rs index ddb7830..7850518 100644 --- a/metamath-rs/src/formula.rs +++ b/metamath-rs/src/formula.rs @@ -21,7 +21,10 @@ use crate::as_str; use crate::bit_set::Bitset; use crate::nameck::Atom; use crate::nameck::Nameset; +use crate::scopeck::ExprFragment; +use crate::scopeck::Frame; use crate::scopeck::Hyp; +use crate::scopeck::ScopeResult; use crate::segment_set::SegmentSet; use crate::statement::SymbolType; use crate::statement::TokenIter; @@ -359,6 +362,7 @@ impl Formula { children_count, 0, self.is_variable(node_id), + |_| {}, ); } @@ -392,6 +396,7 @@ impl Formula { children_count, 0, self.is_variable(node_id), + |_| {}, ); } } @@ -408,6 +413,7 @@ impl Formula { children_count, 0, self.is_variable(node_id), + |_| {}, ); } } @@ -507,6 +513,7 @@ impl<'a> FormulaRef<'a> { stack: vec![], sset: self.db.parse_result(), nset: self.db.name_result(), + scope: self.db.scope_result(), }; f.step_into(self.root); f @@ -703,28 +710,36 @@ impl<'a> Debug for SubFormulaRef<'a> { #[derive(Debug)] pub struct Flatten<'a> { formula: &'a Formula, - stack: Vec<(TokenIter<'a>, Option>)>, + #[allow(clippy::type_complexity)] + stack: Vec<( + TokenIter<'a>, + Option<( + &'a Frame, + std::slice::Iter<'a, ExprFragment>, + SiblingIter<'a, Label>, + )>, + )>, + scope: &'a ScopeResult, sset: &'a SegmentSet, nset: &'a Nameset, } impl<'a> Flatten<'a> { fn step_into(&mut self, node_id: NodeId) { - let label = self.formula.tree[node_id]; - let sref = self.sset.statement( - self.nset - .lookup_label(self.nset.atom_name(label)) - .unwrap() - .address, - ); + let atom = self.nset.atom_name(self.formula.tree[node_id]); + let sref = self + .sset + .statement(self.nset.lookup_label(atom).unwrap().address); let mut math_iter = sref.math_iter(); + let frame = self.scope.get(atom).unwrap(); + let var_iter = frame.target.tail.iter(); math_iter.next(); // Always skip the typecode token. - if self.formula.tree.has_children(node_id) { - self.stack - .push((math_iter, Some(self.formula.tree.children_iter(node_id)))); + let children_iter = if self.formula.tree.has_children(node_id) { + Some((frame, var_iter, self.formula.tree.children_iter(node_id))) } else { - self.stack.push((math_iter, None)); + None }; + self.stack.push((math_iter, children_iter)); } } @@ -732,24 +747,22 @@ impl<'a> Iterator for Flatten<'a> { type Item = Symbol; fn next(&mut self) -> Option { - if self.stack.is_empty() { - return None; - } - let stack_end = self.stack.len() - 1; - let (ref mut math_iter, ref mut sibling_iter) = self.stack[stack_end]; + let (math_iter, children) = self.stack.last_mut()?; if let Some(token) = math_iter.next() { // Continue with next token of this syntax let symbol = self.nset.lookup_symbol(token.slice).unwrap(); - match (sibling_iter, symbol.stype) { + match (children, symbol.stype) { (_, SymbolType::Constant) | (None, SymbolType::Variable) => Some(symbol.atom), - (Some(ref mut iter), SymbolType::Variable) => { + (Some((frame, var_iter, children_iter)), SymbolType::Variable) => { // Variable : push into the next child - if let Some(next_child_id) = iter.next() { - self.step_into(next_child_id); - self.next() - } else { - panic!("Empty formula!"); + let var = var_iter.next().expect("not enough variables").var; + for (child, hyp) in children_iter.clone().zip(&frame.hypotheses) { + if matches!(*hyp, Hyp::Floating(_, i, _) if i == var) { + self.step_into(child); + return self.next(); + } } + panic!("Empty formula!"); } } } else { @@ -805,11 +818,19 @@ impl FormulaBuilder { /// Every REDUCE pops `var_count` subformula items on the stack, /// and pushes one single new item, with the popped subformulas as children - pub(crate) fn reduce(&mut self, label: Label, var_count: u8, offset: u8, is_variable: bool) { + pub(crate) fn reduce( + &mut self, + label: Label, + var_count: u8, + offset: u8, + is_variable: bool, + reorder: impl FnOnce(&mut [NodeId]), + ) { assert!(self.stack.len() >= (var_count + offset).into()); let reduce_start = self.stack.len().saturating_sub((var_count + offset).into()); let reduce_end = self.stack.len().saturating_sub(offset.into()); let new_node_id = { + reorder(&mut self.stack[reduce_start..reduce_end]); let children = self.stack.drain(reduce_start..reduce_end); self.tree.add_node(label, children.as_slice()) }; diff --git a/metamath-rs/src/grammar.rs b/metamath-rs/src/grammar.rs index 8d7f3a5..f0c0b65 100644 --- a/metamath-rs/src/grammar.rs +++ b/metamath-rs/src/grammar.rs @@ -6,6 +6,7 @@ use crate::diag::{Diagnostic, StmtParseError}; use crate::formula::{Formula, FormulaBuilder, Label, Symbol, TypeCode}; use crate::nameck::{Atom, NameReader, Nameset}; +use crate::scopeck::{Hyp, ScopeResult}; use crate::segment::Segment; use crate::segment_set::SegmentSet; use crate::statement::{CommandToken, SegmentId, StatementAddress, SymbolType, TokenRef}; @@ -158,12 +159,27 @@ pub struct Grammar { logic_type: TypeCode, typecodes: Vec, type_conversions: Vec<(TypeCode, TypeCode, Label)>, + reorder_cache: HashMap>>, nodes: GrammarTree, root: NodeId, // The root of the Grammar tree diagnostics: HashMap, debug: bool, } +/// We precalculate several common swapping patterns to reduce space usage +/// and also to speed up tree construction. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub(crate) enum Reorder { + None, + Swap10, + Swap021, + Swap210, + Swap120, + Swap201, + #[default] + Other, +} + /// A `Reduce` step applies a completed grammar rule to some of the recent parse trees, /// joining them together as one tree with a new root symbol. /// @@ -176,15 +192,17 @@ struct Reduce { var_count: u8, offset: u8, is_variable: bool, + reorder: Reorder, } impl Reduce { - const fn new(label: Label, var_count: u8) -> Self { + const fn new(label: Label, reorder: Reorder, var_count: u8) -> Self { Reduce { label, var_count, offset: 0, is_variable: false, + reorder, } } @@ -194,6 +212,7 @@ impl Reduce { var_count: 0, offset: 0, is_variable: true, + reorder: Reorder::None, } } } @@ -393,6 +412,7 @@ impl Default for Grammar { logic_type: TypeCode::default(), typecodes: Vec::new(), type_conversions: Vec::new(), + reorder_cache: HashMap::default(), nodes: GrammarTree(Vec::new()), root: 0, diagnostics: HashMap::default(), @@ -482,6 +502,48 @@ impl Grammar { ) } + fn get_reorder(&mut self, scope: &ScopeResult, nset: &Nameset, atom: Label) -> Reorder { + if let Some(reorder) = self.reorder_cache.get(&atom) { + return match *reorder { + Ok(reorder) => reorder, + Err(_) => Reorder::Other, + }; + } + let reorder_value = (|| -> Option<_> { + let frame = scope.get(nset.atom_name(atom))?; + if frame.hypotheses.len() != frame.target.tail.len() { + return None; + } + + let reord = (frame.target.tail.iter()) + .map(|frag| { + let i = (frame.hypotheses.iter()) + .position(|hyp| matches!(*hyp, Hyp::Floating(_, i, _) if i == frag.var))?; + u8::try_from(i).ok() + }) + .collect::>>()?; + let diff_bound = (reord.iter().enumerate()) + .rposition(|(i, &j)| i != usize::from(j)) + .map_or(0, |n| n + 1); + Some(match (diff_bound, &*reord) { + (0, _) => Ok(Reorder::None), + (2, [1, 0, ..]) => Ok(Reorder::Swap10), + (3, [0, 2, 1, ..]) => Ok(Reorder::Swap021), + (3, [2, 1, 0, ..]) => Ok(Reorder::Swap210), + (3, [1, 2, 0, ..]) => Ok(Reorder::Swap120), + (3, [2, 0, 1, ..]) => Ok(Reorder::Swap201), + _ => Err(reord), + }) + })() + .unwrap_or(Ok(Reorder::None)); + let reorder = match reorder_value { + Ok(reorder) => reorder, + Err(_) => Reorder::Other, + }; + self.reorder_cache.insert(atom, reorder_value); + reorder + } + /// Gets the map of a branch fn get_branch(&self, node_id: NodeId) -> &HashMap<(SymbolType, Atom), NextNode> { if let GrammarNode::Branch { map } = &self.nodes.get(node_id) { @@ -545,6 +607,7 @@ impl Grammar { /// Build the parse tree, marking variables with their types fn add_axiom( &mut self, + scope: &ScopeResult, sref: &StatementRef<'_>, nset: &Nameset, names: &mut NameReader<'_>, @@ -580,6 +643,7 @@ impl Grammar { } // We will add this syntax axiom to the grammar tree + let reorder = self.get_reorder(scope, nset, this_label); let mut node = self.root; let mut var_count = 0; while let Some(token) = tokens.next() { @@ -603,7 +667,7 @@ impl Grammar { } else { let leaf_node_id = self .nodes - .create_leaf(Reduce::new(this_label, var_count), this_typecode); + .create_leaf(Reduce::new(this_label, reorder, var_count), this_typecode); self.add_branch(node, atom, symbol.stype, &NextNode::new(leaf_node_id)) } { Ok(next_node) => { @@ -692,7 +756,7 @@ impl Grammar { debug!("Type Conv adding to {} node id {}", node_id, next_node_id); debug!("{:?}", self.node_id(db, node_id)); let mut leaf_label = ref_next_node.leaf_label; - leaf_label.insert(0, Reduce::new(label, 1)); + leaf_label.insert(0, Reduce::new(label, Reorder::None, 1)); self.add_branch( node_id, from_typecode, @@ -716,7 +780,7 @@ impl Grammar { self.nodes.copy_branches( next_node_id, existing_next_node_id, - Reduce::new(label, 1), + Reduce::new(label, Reorder::None, 1), )?; } } @@ -1154,13 +1218,36 @@ impl Grammar { Ok(()) } - fn do_reduce(formula_builder: &mut FormulaBuilder, reduce: Reduce, nset: &Nameset) { + fn do_reduce(&self, formula_builder: &mut FormulaBuilder, reduce: Reduce, nset: &Nameset) { debug!(" REDUCE {:?}", as_str(nset.atom_name(reduce.label))); formula_builder.reduce( reduce.label, reduce.var_count, reduce.offset, reduce.is_variable, + |args| match reduce.reorder { + Reorder::None => {} + Reorder::Swap10 => args.swap(0, 1), + Reorder::Swap021 => args.swap(1, 2), + Reorder::Swap210 => args.swap(0, 2), + Reorder::Swap120 => { + args.swap(0, 1); + args.swap(0, 2) + } + Reorder::Swap201 => { + args.swap(0, 1); + args.swap(1, 2) + } + Reorder::Other => { + if let Some(Err(reorder)) = self.reorder_cache.get(&reduce.label) { + let mut buff = vec![0; args.len()]; + for (&i, &arg) in reorder.iter().zip(&*args) { + buff[i as usize] = arg + } + args.copy_from_slice(&buff); + } + } + }, ); //formula_builder.dump(nset); debug!( @@ -1204,7 +1291,7 @@ impl Grammar { mut typecode, } => { // We found a leaf: REDUCE - Self::do_reduce(&mut formula_builder, reduce, nset); + self.do_reduce(&mut formula_builder, reduce, nset); if e.expected_typecodes.contains(&typecode) { // We found an expected typecode, pop from the stack and continue @@ -1223,7 +1310,7 @@ impl Grammar { }) => { // Found a sub-formula: First optionally REDUCE and continue for &reduce in leaf_label { - Self::do_reduce(&mut formula_builder, reduce, nset); + self.do_reduce(&mut formula_builder, reduce, nset); } e.node_id = *next_node_id; @@ -1245,7 +1332,7 @@ impl Grammar { .next_var_node(self.root, typecode) .ok_or(StmtParseError::UnparseableStatement(last_token.span))?; for &reduce in leaf_label { - Self::do_reduce(&mut formula_builder, reduce, nset); + self.do_reduce(&mut formula_builder, reduce, nset); } e.node_id = next_node_id; } @@ -1256,8 +1343,8 @@ impl Grammar { if *from_typecode == typecode && e.expected_typecodes.contains(to_typecode) { - let reduce = Reduce::new(*label, 1); - Self::do_reduce(&mut formula_builder, reduce, nset); + let reduce = Reduce::new(*label, Reorder::None, 1); + self.do_reduce(&mut formula_builder, reduce, nset); let typecode = if *to_typecode == self.logic_type && convert_to_provable { self.provable_type @@ -1274,7 +1361,7 @@ impl Grammar { .next_var_node(self.root, typecode) .ok_or(StmtParseError::UnparseableStatement(last_token.span))?; for &reduce in leaf_label { - Self::do_reduce(&mut formula_builder, reduce, nset); + self.do_reduce(&mut formula_builder, reduce, nset); } e.node_id = next_node_id; } @@ -1291,7 +1378,7 @@ impl Grammar { { // Found an atom matching one of our next nodes: First optionally REDUCE and continue for &reduce in leaf_label { - Self::do_reduce(&mut formula_builder, reduce, nset); + self.do_reduce(&mut formula_builder, reduce, nset); } // Found an atom matching one of our next nodes: SHIFT, to the next node @@ -1527,7 +1614,7 @@ impl Grammar { .find(|(from_tc, to_tc, _)| *from_tc == source_tc && *to_tc == target_tc) .map(|(_, _, label)| { let mut builder = FormulaBuilder::from_formula(fmla); - builder.reduce(*label, 1, 0, false); + builder.reduce(*label, 1, 0, false, |_| {}); builder.build(target_tc) }) } @@ -1634,6 +1721,7 @@ impl Grammar { let mut grammar = Grammar::default(); let sset = db.parse_result(); let nset = db.name_result(); + let scope = db.scope_result(); // Read information about the grammar from the parser commands grammar.initialize(sset, nset); grammar.root = grammar.nodes.create_branch(); @@ -1649,7 +1737,7 @@ impl Grammar { for segment in segments { for sref in segment { if let Err(diag) = match sref.statement_type() { - StatementType::Axiom => grammar.add_axiom(&sref, nset, &mut names), + StatementType::Axiom => grammar.add_axiom(scope, &sref, nset, &mut names), StatementType::Floating => grammar.add_floating(&sref, nset, &mut names), _ => Ok(()), } { diff --git a/metamath-rs/src/tree.rs b/metamath-rs/src/tree.rs index bf19412..26d9202 100644 --- a/metamath-rs/src/tree.rs +++ b/metamath-rs/src/tree.rs @@ -157,7 +157,7 @@ impl Clone for Tree { } /// An iterator through sibling nodes -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct SiblingIter<'a, TreeItem> { tree: &'a Tree, current_id: Option,