From 7cd3b108572a796dc109cd8e45f0c792bd4dc1e5 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 02:38:30 +0800 Subject: [PATCH 01/19] Simplify push_{lhs,rhs}_delimiter --- src/diff/graph.rs | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 23f9c30633..a599e1f5e7 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -211,44 +211,24 @@ fn push_lhs_delimiter<'a>( entered: &Stack>, delimiter: &'a Syntax<'a>, ) -> Stack> { - let mut modifying_head = false; - let (mut lhs_delims, rhs_delims) = match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => { - modifying_head = true; - (lhs_delims.clone(), rhs_delims.clone()) - } - _ => (Stack::new(), Stack::new()), - }; - lhs_delims = lhs_delims.push(delimiter); - - let entered = if modifying_head { - entered.pop().unwrap() - } else { - entered.clone() - }; - entered.push(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) + match entered.peek() { + Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( + EnteredDelimiter::PopEither((lhs_delims.push(delimiter), rhs_delims.clone())), + ), + _ => entered.push(EnteredDelimiter::PopEither((Stack::new().push(delimiter), Stack::new()))), + } } fn push_rhs_delimiter<'a>( entered: &Stack>, delimiter: &'a Syntax<'a>, ) -> Stack> { - let mut modifying_head = false; - let (lhs_delims, mut rhs_delims) = match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => { - modifying_head = true; - (lhs_delims.clone(), rhs_delims.clone()) - } - _ => (Stack::new(), Stack::new()), - }; - rhs_delims = rhs_delims.push(delimiter); - - let entered = if modifying_head { - entered.pop().unwrap() - } else { - entered.clone() - }; - entered.push(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) + match entered.peek() { + Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( + EnteredDelimiter::PopEither((lhs_delims.clone(), rhs_delims.push(delimiter))), + ), + _ => entered.push(EnteredDelimiter::PopEither((Stack::new(), Stack::new().push(delimiter)))), + } } impl<'a, 'b> Vertex<'a, 'b> { From 97f9a525bd9907d7f3df4bb2b8924fad4d926ab1 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 05:00:20 +0800 Subject: [PATCH 02/19] Remove field can_pop_either from Vertex --- src/diff/graph.rs | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index a599e1f5e7..bb47cd355d 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -56,7 +56,6 @@ pub struct Vertex<'a, 'b> { parents: Stack>, lhs_parent_id: Option, rhs_parent_id: Option, - can_pop_either: bool, } impl<'a, 'b> PartialEq for Vertex<'a, 'b> { @@ -86,7 +85,7 @@ impl<'a, 'b> PartialEq for Vertex<'a, 'b> { // where we can pop sides together, we don't consider the // case where we get a better diff by popping each side // separately. - && self.can_pop_either == other.can_pop_either + && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) } } impl<'a, 'b> Eq for Vertex<'a, 'b> {} @@ -98,7 +97,7 @@ impl<'a, 'b> Hash for Vertex<'a, 'b> { self.lhs_parent_id.hash(state); self.rhs_parent_id.hash(state); - self.can_pop_either.hash(state); + can_pop_either_parent(&self.parents).hash(state); } } @@ -215,7 +214,10 @@ fn push_lhs_delimiter<'a>( Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( EnteredDelimiter::PopEither((lhs_delims.push(delimiter), rhs_delims.clone())), ), - _ => entered.push(EnteredDelimiter::PopEither((Stack::new().push(delimiter), Stack::new()))), + _ => entered.push(EnteredDelimiter::PopEither(( + Stack::new().push(delimiter), + Stack::new(), + ))), } } @@ -227,7 +229,10 @@ fn push_rhs_delimiter<'a>( Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( EnteredDelimiter::PopEither((lhs_delims.clone(), rhs_delims.push(delimiter))), ), - _ => entered.push(EnteredDelimiter::PopEither((Stack::new(), Stack::new().push(delimiter)))), + _ => entered.push(EnteredDelimiter::PopEither(( + Stack::new(), + Stack::new().push(delimiter), + ))), } } @@ -246,7 +251,6 @@ impl<'a, 'b> Vertex<'a, 'b> { parents, lhs_parent_id: None, rhs_parent_id: None, - can_pop_either: false, } } } @@ -427,7 +431,6 @@ pub fn get_set_neighbours<'syn, 'b>( predecessor: Cell::new(None), lhs_syntax: lhs_parent.next_sibling(), rhs_syntax: rhs_parent.next_sibling(), - can_pop_either: can_pop_either_parent(&parents_next), parents: parents_next, lhs_parent_id: lhs_parent.parent().map(Syntax::id), rhs_parent_id: rhs_parent.parent().map(Syntax::id), @@ -452,7 +455,6 @@ pub fn get_set_neighbours<'syn, 'b>( predecessor: Cell::new(None), lhs_syntax: lhs_parent.next_sibling(), rhs_syntax: v.rhs_syntax, - can_pop_either: can_pop_either_parent(&parents_next), parents: parents_next, lhs_parent_id: lhs_parent.parent().map(Syntax::id), rhs_parent_id: v.rhs_parent_id, @@ -477,7 +479,6 @@ pub fn get_set_neighbours<'syn, 'b>( predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, rhs_syntax: rhs_parent.next_sibling(), - can_pop_either: can_pop_either_parent(&parents_next), parents: parents_next, lhs_parent_id: v.lhs_parent_id, rhs_parent_id: rhs_parent.parent().map(Syntax::id), @@ -507,7 +508,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: v.parents.clone(), lhs_parent_id: v.lhs_parent_id, rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, }, alloc, seen, @@ -553,7 +553,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: parents_next, lhs_parent_id: Some(lhs_syntax.id()), rhs_parent_id: Some(rhs_syntax.id()), - can_pop_either: false, }, alloc, seen, @@ -591,7 +590,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: v.parents.clone(), lhs_parent_id: v.lhs_parent_id, rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, }, alloc, seen, @@ -621,7 +619,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: v.parents.clone(), lhs_parent_id: v.lhs_parent_id, rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, }, alloc, seen, @@ -647,7 +644,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: parents_next, lhs_parent_id: Some(lhs_syntax.id()), rhs_parent_id: v.rhs_parent_id, - can_pop_either: true, }, alloc, seen, @@ -675,7 +671,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: v.parents.clone(), lhs_parent_id: v.lhs_parent_id, rhs_parent_id: v.rhs_parent_id, - can_pop_either: v.can_pop_either, }, alloc, seen, @@ -701,7 +696,6 @@ pub fn get_set_neighbours<'syn, 'b>( parents: parents_next, lhs_parent_id: v.lhs_parent_id, rhs_parent_id: Some(rhs_syntax.id()), - can_pop_either: true, }, alloc, seen, From 9f1a0ab1e606e7f32c22d42b6652555426c7b1fc Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 05:15:45 +0800 Subject: [PATCH 03/19] Reduce number of branches of Vertex::eq --- src/diff/graph.rs | 60 +++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index bb47cd355d..26365e1f97 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -60,32 +60,40 @@ pub struct Vertex<'a, 'b> { impl<'a, 'b> PartialEq for Vertex<'a, 'b> { fn eq(&self, other: &Self) -> bool { - self.lhs_syntax.map(|node| node.id()) == other.lhs_syntax.map(|node| node.id()) - && self.rhs_syntax.map(|node| node.id()) == other.rhs_syntax.map(|node| node.id()) - // Strictly speaking, we should compare the whole - // EnteredDelimiter stack, not just the immediate - // parents. By taking the immediate parent, we have - // vertices with different stacks that are 'equal'. - // - // This makes the graph traversal path dependent: the - // first vertex we see 'wins', and we use it for deciding - // how we can pop. - // - // In practice this seems to work well. The first vertex - // has the lowest cost, so has the most PopBoth - // occurrences, which is the best outcome. - // - // Handling this properly would require considering many - // more vertices to be distinct, exponentially increasing - // the graph size relative to tree depth. - && self.lhs_parent_id == other.lhs_parent_id - && self.rhs_parent_id == other.rhs_parent_id - // We do want to distinguish whether we can pop each side - // independently though. Without this, if we find a case - // where we can pop sides together, we don't consider the - // case where we get a better diff by popping each side - // separately. - && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) + // Strictly speaking, we should compare the whole + // EnteredDelimiter stack, not just the immediate + // parents. By taking the immediate parent, we have + // vertices with different stacks that are 'equal'. + // + // This makes the graph traversal path dependent: the + // first vertex we see 'wins', and we use it for deciding + // how we can pop. + // + // In practice this seems to work well. The first vertex + // has the lowest cost, so has the most PopBoth + // occurrences, which is the best outcome. + // + // Handling this properly would require considering many + // more vertices to be distinct, exponentially increasing + // the graph size relative to tree depth. + let b0 = match (self.lhs_syntax, other.lhs_syntax) { + (Some(s0), Some(s1)) => s0.id() == s1.id(), + (None, None) => self.lhs_parent_id == other.lhs_parent_id, + _ => false, + }; + let b1 = match (self.rhs_syntax, other.rhs_syntax) { + (Some(s0), Some(s1)) => s0.id() == s1.id(), + (None, None) => self.rhs_parent_id == other.rhs_parent_id, + _ => false, + }; + // We do want to distinguish whether we can pop each side + // independently though. Without this, if we find a case + // where we can pop sides together, we don't consider the + // case where we get a better diff by popping each side + // separately. + let b2 = can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents); + + b0 && b1 && b2 } } impl<'a, 'b> Eq for Vertex<'a, 'b> {} From d2f5e996b60465ee866ae13677930328740a14b3 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 07:19:47 +0800 Subject: [PATCH 04/19] Compress &Syntax and SyntaxId into a usize --- src/diff/dijkstra.rs | 4 +- src/diff/graph.rs | 170 ++++++++++++++++++++++++------------------- 2 files changed, 98 insertions(+), 76 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index f9313c12f6..936dda7815 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -219,9 +219,9 @@ pub fn mark_syntax<'a>( .map(|x| { format!( "{:20} {:20} --- {:3} {:?}", - x.1.lhs_syntax + x.1.lhs_syntax.get_ref() .map_or_else(|| "None".into(), Syntax::dbg_content), - x.1.rhs_syntax + x.1.rhs_syntax.get_ref() .map_or_else(|| "None".into(), Syntax::dbg_content), x.0.cost(), x.0, diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 26365e1f97..52cd9ff196 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -7,6 +7,8 @@ use std::{ cmp::min, fmt, hash::{Hash, Hasher}, + marker::PhantomData, + mem::transmute_copy, }; use strsim::normalized_levenshtein; @@ -19,6 +21,61 @@ use crate::{ }; use Edge::*; +/// Compress `&Syntax` and `SyntaxId` into a usize. +/// +/// Utilize the LSB as flag since `Syntax` is aligned. +/// +/// ```text +/// LSB = 0 -> &Syntax +/// LSB = 1 -> SyntaxId * 2 + 1 +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SyntaxRefOrId<'a> { + data: usize, + phantom: PhantomData<&'a Syntax<'a>>, +} + +impl SyntaxRefOrId<'_> { + pub fn is_ref(&self) -> bool { + self.data & 1 == 0 + } + + pub fn is_id(&self) -> bool { + self.data & 1 == 1 + } + + pub fn get_ref<'a>(&self) -> Option<&'a Syntax<'a>> { + if self.is_ref() { + Some(unsafe { transmute_copy(&self.data) }) + } else { + None + } + } +} + +impl<'a> From<&'a Syntax<'a>> for SyntaxRefOrId<'a> { + fn from(s: &'a Syntax<'a>) -> Self { + Self { + data: unsafe { transmute_copy(&s) }, + phantom: PhantomData, + } + } +} + +impl<'a> From> for SyntaxRefOrId<'a> { + fn from(s: Option) -> Self { + Self { + data: s.map_or(0, |n| n.get() as usize) * 2 + 1, + phantom: PhantomData, + } + } +} + +fn next_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { + let parent_id = syntax.parent().map(Syntax::id); + syntax.next_sibling().map_or(parent_id.into(), |n| n.into()) +} + /// A vertex in a directed acyclic graph that represents a diff. /// /// Each vertex represents two pointers: one to the next unmatched LHS @@ -51,11 +108,9 @@ use Edge::*; pub struct Vertex<'a, 'b> { pub neighbours: RefCell)>>>, pub predecessor: Cell)>>, - pub lhs_syntax: Option<&'a Syntax<'a>>, - pub rhs_syntax: Option<&'a Syntax<'a>>, + pub lhs_syntax: SyntaxRefOrId<'a>, + pub rhs_syntax: SyntaxRefOrId<'a>, parents: Stack>, - lhs_parent_id: Option, - rhs_parent_id: Option, } impl<'a, 'b> PartialEq for Vertex<'a, 'b> { @@ -76,35 +131,24 @@ impl<'a, 'b> PartialEq for Vertex<'a, 'b> { // Handling this properly would require considering many // more vertices to be distinct, exponentially increasing // the graph size relative to tree depth. - let b0 = match (self.lhs_syntax, other.lhs_syntax) { - (Some(s0), Some(s1)) => s0.id() == s1.id(), - (None, None) => self.lhs_parent_id == other.lhs_parent_id, - _ => false, - }; - let b1 = match (self.rhs_syntax, other.rhs_syntax) { - (Some(s0), Some(s1)) => s0.id() == s1.id(), - (None, None) => self.rhs_parent_id == other.rhs_parent_id, - _ => false, - }; + // We do want to distinguish whether we can pop each side // independently though. Without this, if we find a case // where we can pop sides together, we don't consider the // case where we get a better diff by popping each side // separately. - let b2 = can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents); - b0 && b1 && b2 + self.lhs_syntax == other.lhs_syntax + && self.rhs_syntax == other.rhs_syntax + && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) } } impl<'a, 'b> Eq for Vertex<'a, 'b> {} impl<'a, 'b> Hash for Vertex<'a, 'b> { fn hash(&self, state: &mut H) { - self.lhs_syntax.map(|node| node.id()).hash(state); - self.rhs_syntax.map(|node| node.id()).hash(state); - - self.lhs_parent_id.hash(state); - self.rhs_parent_id.hash(state); + self.lhs_syntax.hash(state); + self.rhs_syntax.hash(state); can_pop_either_parent(&self.parents).hash(state); } } @@ -246,7 +290,7 @@ fn push_rhs_delimiter<'a>( impl<'a, 'b> Vertex<'a, 'b> { pub fn is_end(&self) -> bool { - self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty() + self.lhs_syntax.is_id() && self.rhs_syntax.is_id() && self.parents.is_empty() } pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { @@ -254,11 +298,9 @@ impl<'a, 'b> Vertex<'a, 'b> { Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax, - rhs_syntax, + lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), + rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), parents, - lhs_parent_id: None, - rhs_parent_id: None, } } } @@ -425,7 +467,7 @@ pub fn get_set_neighbours<'syn, 'b>( let mut res: Vec<(Edge, &Vertex)> = vec![]; - if v.lhs_syntax.is_none() && v.rhs_syntax.is_none() { + if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&v.parents) { // We have exhausted all the nodes on both lists, so we can // move up to the parent node. @@ -437,11 +479,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_parent.next_sibling(), - rhs_syntax: rhs_parent.next_sibling(), + lhs_syntax: next_syntax(lhs_parent), + rhs_syntax: next_syntax(rhs_parent), parents: parents_next, - lhs_parent_id: lhs_parent.parent().map(Syntax::id), - rhs_parent_id: rhs_parent.parent().map(Syntax::id), }, alloc, seen, @@ -450,7 +490,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if v.lhs_syntax.is_none() { + if v.lhs_syntax.is_id() { if let Some((lhs_parent, parents_next)) = try_pop_lhs(&v.parents) { // Move to next after LHS parent. @@ -461,11 +501,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_parent.next_sibling(), + lhs_syntax: next_syntax(lhs_parent), rhs_syntax: v.rhs_syntax, parents: parents_next, - lhs_parent_id: lhs_parent.parent().map(Syntax::id), - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -474,7 +512,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if v.rhs_syntax.is_none() { + if v.rhs_syntax.is_id() { if let Some((rhs_parent, parents_next)) = try_pop_rhs(&v.parents) { // Move to next after RHS parent. @@ -486,10 +524,8 @@ pub fn get_set_neighbours<'syn, 'b>( neighbours: RefCell::new(None), predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_parent.next_sibling(), + rhs_syntax: next_syntax(rhs_parent), parents: parents_next, - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: rhs_parent.parent().map(Syntax::id), }, alloc, seen, @@ -498,7 +534,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) { + if let (Some(lhs_syntax), Some(rhs_syntax)) = (v.lhs_syntax.get_ref(), v.rhs_syntax.get_ref()) { if lhs_syntax == rhs_syntax { let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -511,11 +547,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), - rhs_syntax: rhs_syntax.next_sibling(), + lhs_syntax: next_syntax(lhs_syntax), + rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -556,11 +590,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_next, - rhs_syntax: rhs_next, + lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), + rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), parents: parents_next, - lhs_parent_id: Some(lhs_syntax.id()), - rhs_parent_id: Some(rhs_syntax.id()), }, alloc, seen, @@ -593,11 +625,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), - rhs_syntax: rhs_syntax.next_sibling(), + lhs_syntax: next_syntax(lhs_syntax), + rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -607,7 +637,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if let Some(lhs_syntax) = &v.lhs_syntax { + if let Some(lhs_syntax) = v.lhs_syntax.get_ref() { match lhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -622,11 +652,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.next_sibling(), + lhs_syntax: next_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -647,11 +675,9 @@ pub fn get_set_neighbours<'syn, 'b>( Vertex { neighbours: RefCell::new(None), predecessor: Cell::new(None), - lhs_syntax: lhs_next, + lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), rhs_syntax: v.rhs_syntax, parents: parents_next, - lhs_parent_id: Some(lhs_syntax.id()), - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -661,7 +687,7 @@ pub fn get_set_neighbours<'syn, 'b>( } } - if let Some(rhs_syntax) = &v.rhs_syntax { + if let Some(rhs_syntax) = v.rhs_syntax.get_ref() { match rhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { @@ -675,10 +701,8 @@ pub fn get_set_neighbours<'syn, 'b>( neighbours: RefCell::new(None), predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_syntax.next_sibling(), + rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: v.rhs_parent_id, }, alloc, seen, @@ -700,10 +724,8 @@ pub fn get_set_neighbours<'syn, 'b>( neighbours: RefCell::new(None), predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_next, + rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), parents: parents_next, - lhs_parent_id: v.lhs_parent_id, - rhs_parent_id: Some(rhs_syntax.id()), }, alloc, seen, @@ -732,8 +754,8 @@ pub fn populate_change_map<'a, 'b>( } UnchangedNode { .. } => { // No change on this node or its children. - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_ref().unwrap(); + let rhs = v.rhs_syntax.get_ref().unwrap(); insert_deep_unchanged(lhs, rhs, change_map); insert_deep_unchanged(rhs, lhs, change_map); @@ -741,14 +763,14 @@ pub fn populate_change_map<'a, 'b>( EnterUnchangedDelimiter { .. } => { // No change on the outer delimiter, but children may // have changed. - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_ref().unwrap(); + let rhs = v.rhs_syntax.get_ref().unwrap(); change_map.insert(lhs, ChangeKind::Unchanged(rhs)); change_map.insert(rhs, ChangeKind::Unchanged(lhs)); } ReplacedComment { levenshtein_pct } => { - let lhs = v.lhs_syntax.unwrap(); - let rhs = v.rhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_ref().unwrap(); + let rhs = v.rhs_syntax.get_ref().unwrap(); if *levenshtein_pct > 40 { change_map.insert(lhs, ChangeKind::ReplacedComment(lhs, rhs)); @@ -759,11 +781,11 @@ pub fn populate_change_map<'a, 'b>( } } NovelAtomLHS { .. } | EnterNovelDelimiterLHS { .. } => { - let lhs = v.lhs_syntax.unwrap(); + let lhs = v.lhs_syntax.get_ref().unwrap(); change_map.insert(lhs, ChangeKind::Novel); } NovelAtomRHS { .. } | EnterNovelDelimiterRHS { .. } => { - let rhs = v.rhs_syntax.unwrap(); + let rhs = v.rhs_syntax.get_ref().unwrap(); change_map.insert(rhs, ChangeKind::Novel); } } From b635df4538471276aff175df204904b502a99334 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 07:57:25 +0800 Subject: [PATCH 05/19] Simplify get_set_neighbours using Default::default --- src/diff/graph.rs | 75 ++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 52cd9ff196..f720da4f68 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -104,7 +104,7 @@ fn next_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { /// LHS: X A RHS: A /// ^ ^ /// ``` -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq)] pub struct Vertex<'a, 'b> { pub neighbours: RefCell)>>>, pub predecessor: Cell)>>, @@ -113,6 +113,23 @@ pub struct Vertex<'a, 'b> { parents: Stack>, } +impl<'a, 'b> Vertex<'a, 'b> { + pub fn is_end(&self) -> bool { + self.lhs_syntax.is_id() && self.rhs_syntax.is_id() && self.parents.is_empty() + } + + pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { + let parents = Stack::new(); + Vertex { + neighbours: RefCell::new(None), + predecessor: Cell::new(None), + lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), + rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), + parents, + } + } +} + impl<'a, 'b> PartialEq for Vertex<'a, 'b> { fn eq(&self, other: &Self) -> bool { // Strictly speaking, we should compare the whole @@ -143,7 +160,6 @@ impl<'a, 'b> PartialEq for Vertex<'a, 'b> { && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) } } -impl<'a, 'b> Eq for Vertex<'a, 'b> {} impl<'a, 'b> Hash for Vertex<'a, 'b> { fn hash(&self, state: &mut H) { @@ -153,8 +169,14 @@ impl<'a, 'b> Hash for Vertex<'a, 'b> { } } +impl<'a, 'b> Default for Vertex<'a, 'b> { + fn default() -> Self { + Self::new(None, None) + } +} + /// Tracks entering syntax List nodes. -#[derive(Clone, PartialEq)] +#[derive(Clone, PartialEq, Eq)] enum EnteredDelimiter<'a> { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. @@ -288,23 +310,6 @@ fn push_rhs_delimiter<'a>( } } -impl<'a, 'b> Vertex<'a, 'b> { - pub fn is_end(&self) -> bool { - self.lhs_syntax.is_id() && self.rhs_syntax.is_id() && self.parents.is_empty() - } - - pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { - let parents = Stack::new(); - Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), - lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), - rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), - parents, - } - } -} - /// An edge in our graph, with an associated [`cost`](Edge::cost). /// /// A syntax node can always be marked as novel, so a vertex will have @@ -477,11 +482,10 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterBoth, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_parent), rhs_syntax: next_syntax(rhs_parent), parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -499,11 +503,10 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterLHS, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_parent), rhs_syntax: v.rhs_syntax, parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -521,11 +524,10 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterRHS, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, rhs_syntax: next_syntax(rhs_parent), parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -545,11 +547,10 @@ pub fn get_set_neighbours<'syn, 'b>( UnchangedNode { depth_difference }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_syntax), rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), + ..Vertex::default() }, alloc, seen, @@ -588,11 +589,10 @@ pub fn get_set_neighbours<'syn, 'b>( EnterUnchangedDelimiter { depth_difference }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -623,11 +623,10 @@ pub fn get_set_neighbours<'syn, 'b>( ReplacedComment { levenshtein_pct }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_syntax), rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), + ..Vertex::default() }, alloc, seen, @@ -650,11 +649,10 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: next_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, parents: v.parents.clone(), + ..Vertex::default() }, alloc, seen, @@ -673,11 +671,10 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), rhs_syntax: v.rhs_syntax, parents: parents_next, + ..Vertex::default() }, alloc, seen, @@ -698,11 +695,10 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, rhs_syntax: next_syntax(rhs_syntax), parents: v.parents.clone(), + ..Vertex::default() }, alloc, seen, @@ -721,11 +717,10 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - neighbours: RefCell::new(None), - predecessor: Cell::new(None), lhs_syntax: v.lhs_syntax, rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), parents: parents_next, + ..Vertex::default() }, alloc, seen, From 69b9214a01b705cd5faaaefe9e905d278f498265 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 08:11:17 +0800 Subject: [PATCH 06/19] Simplify get_set_neighbours by extracting functions --- src/diff/graph.rs | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index f720da4f68..7558a7df47 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -71,11 +71,18 @@ impl<'a> From> for SyntaxRefOrId<'a> { } } -fn next_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { +fn next_sibling_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { let parent_id = syntax.parent().map(Syntax::id); syntax.next_sibling().map_or(parent_id.into(), |n| n.into()) } +fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SyntaxRefOrId<'a> { + children + .get(0) + .copied() + .map_or(Some(syntax.id()).into(), |s| s.into()) +} + /// A vertex in a directed acyclic graph that represents a diff. /// /// Each vertex represents two pointers: one to the next unmatched LHS @@ -482,8 +489,8 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterBoth, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_parent), - rhs_syntax: next_syntax(rhs_parent), + lhs_syntax: next_sibling_syntax(lhs_parent), + rhs_syntax: next_sibling_syntax(rhs_parent), parents: parents_next, ..Vertex::default() }, @@ -503,7 +510,7 @@ pub fn get_set_neighbours<'syn, 'b>( ExitDelimiterLHS, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_parent), + lhs_syntax: next_sibling_syntax(lhs_parent), rhs_syntax: v.rhs_syntax, parents: parents_next, ..Vertex::default() @@ -525,7 +532,7 @@ pub fn get_set_neighbours<'syn, 'b>( allocate_if_new( Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_syntax(rhs_parent), + rhs_syntax: next_sibling_syntax(rhs_parent), parents: parents_next, ..Vertex::default() }, @@ -547,8 +554,8 @@ pub fn get_set_neighbours<'syn, 'b>( UnchangedNode { depth_difference }, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_syntax), - rhs_syntax: next_syntax(rhs_syntax), + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), parents: v.parents.clone(), ..Vertex::default() }, @@ -575,9 +582,6 @@ pub fn get_set_neighbours<'syn, 'b>( { // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { - let lhs_next = lhs_children.get(0).copied(); - let rhs_next = rhs_children.get(0).copied(); - // TODO: be consistent between parents_next and next_parents. let parents_next = push_both_delimiters(&v.parents, lhs_syntax, rhs_syntax); @@ -589,8 +593,8 @@ pub fn get_set_neighbours<'syn, 'b>( EnterUnchangedDelimiter { depth_difference }, allocate_if_new( Vertex { - lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), - rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), + lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), + rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), parents: parents_next, ..Vertex::default() }, @@ -623,8 +627,8 @@ pub fn get_set_neighbours<'syn, 'b>( ReplacedComment { levenshtein_pct }, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_syntax), - rhs_syntax: next_syntax(rhs_syntax), + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), parents: v.parents.clone(), ..Vertex::default() }, @@ -649,7 +653,7 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - lhs_syntax: next_syntax(lhs_syntax), + lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, parents: v.parents.clone(), ..Vertex::default() @@ -661,8 +665,6 @@ pub fn get_set_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - let lhs_next = children.get(0).copied(); - let parents_next = push_lhs_delimiter(&v.parents, lhs_syntax); res.push(( @@ -671,7 +673,7 @@ pub fn get_set_neighbours<'syn, 'b>( }, allocate_if_new( Vertex { - lhs_syntax: lhs_next.map_or(Some(lhs_syntax.id()).into(), |s| s.into()), + lhs_syntax: next_child_syntax(lhs_syntax, children), rhs_syntax: v.rhs_syntax, parents: parents_next, ..Vertex::default() @@ -696,7 +698,7 @@ pub fn get_set_neighbours<'syn, 'b>( allocate_if_new( Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: next_syntax(rhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), parents: v.parents.clone(), ..Vertex::default() }, @@ -707,8 +709,6 @@ pub fn get_set_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - let rhs_next = children.get(0).copied(); - let parents_next = push_rhs_delimiter(&v.parents, rhs_syntax); res.push(( @@ -718,7 +718,7 @@ pub fn get_set_neighbours<'syn, 'b>( allocate_if_new( Vertex { lhs_syntax: v.lhs_syntax, - rhs_syntax: rhs_next.map_or(Some(rhs_syntax.id()).into(), |s| s.into()), + rhs_syntax: next_child_syntax(rhs_syntax, children), parents: parents_next, ..Vertex::default() }, From 827e26d2fc0077ceb359a191ae946592482e0e59 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 08:32:20 +0800 Subject: [PATCH 07/19] Eliminate some vec clones --- src/diff/dijkstra.rs | 32 +++++++++++++++++++------------- src/diff/graph.rs | 12 +++++------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 936dda7815..0c6b59e787 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -5,7 +5,7 @@ use std::{cmp::Reverse, env}; use crate::{ diff::changes::ChangeMap, - diff::graph::{get_set_neighbours, populate_change_map, Edge, Vertex}, + diff::graph::{populate_change_map, set_neighbours, Edge, Vertex}, parse::syntax::Syntax, }; use bumpalo::Bump; @@ -40,18 +40,22 @@ fn shortest_vertex_path<'a, 'b>( break current; } - for neighbour in &get_set_neighbours(current, vertex_arena, &mut seen) { - let (edge, next) = neighbour; - let distance_to_next = distance + edge.cost(); + set_neighbours(current, vertex_arena, &mut seen); - let found_shorter_route = match next.predecessor.get() { - Some((prev_shortest, _)) => distance_to_next < prev_shortest, - None => true, - }; + if let Some(neighbors) = &*current.neighbours.borrow() { + for neighbour in neighbors { + let (edge, next) = neighbour; + let distance_to_next = distance + edge.cost(); - if found_shorter_route { - next.predecessor.replace(Some((distance_to_next, current))); - heap.push(Reverse(distance_to_next), next); + let found_shorter_route = match next.predecessor.get() { + Some((prev_shortest, _)) => distance_to_next < prev_shortest, + None => true, + }; + + if found_shorter_route { + next.predecessor.replace(Some((distance_to_next, current))); + heap.push(Reverse(distance_to_next), next); + } } } @@ -219,9 +223,11 @@ pub fn mark_syntax<'a>( .map(|x| { format!( "{:20} {:20} --- {:3} {:?}", - x.1.lhs_syntax.get_ref() + x.1.lhs_syntax + .get_ref() .map_or_else(|| "None".into(), Syntax::dbg_content), - x.1.rhs_syntax.get_ref() + x.1.rhs_syntax + .get_ref() .map_or_else(|| "None".into(), Syntax::dbg_content), x.0.cost(), x.0, diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 7558a7df47..9a2529698e 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -467,14 +467,13 @@ fn looks_like_punctuation(content: &str) -> bool { /// Compute the neighbours of `v` if we haven't previously done so, /// write them to the .neighbours cell inside `v`, and return them. -pub fn get_set_neighbours<'syn, 'b>( +pub fn set_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, alloc: &'b Bump, seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, -) -> Vec<(Edge, &'b Vertex<'syn, 'b>)> { - match &*v.neighbours.borrow() { - Some(neighbours) => return neighbours.clone(), - None => {} +) { + if v.neighbours.borrow().is_some() { + return; } let mut res: Vec<(Edge, &Vertex)> = vec![]; @@ -734,8 +733,7 @@ pub fn get_set_neighbours<'syn, 'b>( "Must always find some next steps if node is not the end" ); - v.neighbours.replace(Some(res.clone())); - res + v.neighbours.replace(Some(res)); } pub fn populate_change_map<'a, 'b>( From 1631eb220cc8c71e7643b7ff3cd5224661e65389 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 08:39:03 +0800 Subject: [PATCH 08/19] Split predecessor to two parts --- src/diff/dijkstra.rs | 17 ++++++----------- src/diff/graph.rs | 4 +++- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 0c6b59e787..7c936410c6 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -43,17 +43,12 @@ fn shortest_vertex_path<'a, 'b>( set_neighbours(current, vertex_arena, &mut seen); if let Some(neighbors) = &*current.neighbours.borrow() { - for neighbour in neighbors { - let (edge, next) = neighbour; + for &(edge, next) in neighbors { let distance_to_next = distance + edge.cost(); - let found_shorter_route = match next.predecessor.get() { - Some((prev_shortest, _)) => distance_to_next < prev_shortest, - None => true, - }; - - if found_shorter_route { - next.predecessor.replace(Some((distance_to_next, current))); + if distance_to_next < next.shortest_distance.get() { + next.shortest_distance.set(distance_to_next); + next.predecessor.replace(Some(current)); heap.push(Reverse(distance_to_next), next); } } @@ -74,9 +69,9 @@ fn shortest_vertex_path<'a, 'b>( heap.len(), ); - let mut current = Some((0, end)); + let mut current = Some(end); let mut vertex_route: Vec<&'b Vertex<'a, 'b>> = vec![]; - while let Some((_, node)) = current { + while let Some(node) = current { vertex_route.push(node); current = node.predecessor.get(); } diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 9a2529698e..59c96171d5 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -114,7 +114,8 @@ fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> #[derive(Debug, Clone, Eq)] pub struct Vertex<'a, 'b> { pub neighbours: RefCell)>>>, - pub predecessor: Cell)>>, + pub shortest_distance: Cell, + pub predecessor: Cell>>, pub lhs_syntax: SyntaxRefOrId<'a>, pub rhs_syntax: SyntaxRefOrId<'a>, parents: Stack>, @@ -129,6 +130,7 @@ impl<'a, 'b> Vertex<'a, 'b> { let parents = Stack::new(); Vertex { neighbours: RefCell::new(None), + shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), From 9c551abe6876a1514d6505076257faa7ffe1e2db Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 08:59:16 +0800 Subject: [PATCH 09/19] Change a RefCell in Vertex to UnsafeCell --- src/diff/dijkstra.rs | 9 ++++----- src/diff/graph.rs | 23 +++++++++++++---------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 7c936410c6..2c2b911968 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -42,7 +42,7 @@ fn shortest_vertex_path<'a, 'b>( set_neighbours(current, vertex_arena, &mut seen); - if let Some(neighbors) = &*current.neighbours.borrow() { + if let Some(neighbors) = unsafe { &*current.neighbours.get() } { for &(edge, next) in neighbors { let distance_to_next = distance + edge.cost(); @@ -110,7 +110,7 @@ fn shortest_path<'a, 'b>( size_hint: usize, graph_limit: usize, ) -> Result)>, ExceededGraphLimit> { - let start: &'b Vertex<'a, 'b> = vertex_arena.alloc(start.clone()); + let start: &'b Vertex<'a, 'b> = vertex_arena.alloc(start); let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?; Ok(shortest_path_with_edges(&vertex_path)) } @@ -119,9 +119,8 @@ fn edge_between<'a, 'b>(before: &Vertex<'a, 'b>, after: &Vertex<'a, 'b>) -> Edge assert_ne!(before, after); let mut shortest_edge: Option = None; - if let Some(neighbours) = &*before.neighbours.borrow() { - for neighbour in neighbours { - let (edge, next) = *neighbour; + if let Some(neighbours) = unsafe { &*before.neighbours.get() } { + for &(edge, next) in neighbours { // If there are multiple edges that can take us to `next`, // prefer the shortest. if *next == *after { diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 59c96171d5..7879071f7f 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -3,7 +3,7 @@ use bumpalo::Bump; use rustc_hash::FxHashMap; use std::{ - cell::{Cell, RefCell}, + cell::{Cell, UnsafeCell}, cmp::min, fmt, hash::{Hash, Hasher}, @@ -62,7 +62,7 @@ impl<'a> From<&'a Syntax<'a>> for SyntaxRefOrId<'a> { } } -impl<'a> From> for SyntaxRefOrId<'a> { +impl From> for SyntaxRefOrId<'_> { fn from(s: Option) -> Self { Self { data: s.map_or(0, |n| n.get() as usize) * 2 + 1, @@ -111,9 +111,10 @@ fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> /// LHS: X A RHS: A /// ^ ^ /// ``` -#[derive(Debug, Clone, Eq)] +#[derive(Debug)] pub struct Vertex<'a, 'b> { - pub neighbours: RefCell)>>>, + // TODO: how to design the boundary of unsafe? + pub neighbours: UnsafeCell)>>>, pub shortest_distance: Cell, pub predecessor: Cell>>, pub lhs_syntax: SyntaxRefOrId<'a>, @@ -129,7 +130,7 @@ impl<'a, 'b> Vertex<'a, 'b> { pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { let parents = Stack::new(); Vertex { - neighbours: RefCell::new(None), + neighbours: UnsafeCell::new(None), shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), @@ -139,7 +140,7 @@ impl<'a, 'b> Vertex<'a, 'b> { } } -impl<'a, 'b> PartialEq for Vertex<'a, 'b> { +impl PartialEq for Vertex<'_, '_> { fn eq(&self, other: &Self) -> bool { // Strictly speaking, we should compare the whole // EnteredDelimiter stack, not just the immediate @@ -170,7 +171,9 @@ impl<'a, 'b> PartialEq for Vertex<'a, 'b> { } } -impl<'a, 'b> Hash for Vertex<'a, 'b> { +impl Eq for Vertex<'_, '_> {} + +impl Hash for Vertex<'_, '_> { fn hash(&self, state: &mut H) { self.lhs_syntax.hash(state); self.rhs_syntax.hash(state); @@ -178,7 +181,7 @@ impl<'a, 'b> Hash for Vertex<'a, 'b> { } } -impl<'a, 'b> Default for Vertex<'a, 'b> { +impl Default for Vertex<'_, '_> { fn default() -> Self { Self::new(None, None) } @@ -474,7 +477,7 @@ pub fn set_neighbours<'syn, 'b>( alloc: &'b Bump, seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, ) { - if v.neighbours.borrow().is_some() { + if unsafe { (&*v.neighbours.get()).is_some() } { return; } @@ -735,7 +738,7 @@ pub fn set_neighbours<'syn, 'b>( "Must always find some next steps if node is not the end" ); - v.neighbours.replace(Some(res)); + unsafe { *v.neighbours.get() = Some(res) }; } pub fn populate_change_map<'a, 'b>( From 7d36c5f807e9bf361f77a2ac0cdccca63ae7bf1d Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 11:36:55 +0800 Subject: [PATCH 10/19] Refactor seen map --- src/diff/graph.rs | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 7879071f7f..136ad38faf 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -425,38 +425,28 @@ impl Edge { } } +type SeenMap<'syn, 'b> = + FxHashMap<&'b Vertex<'syn, 'b>, (Option<&'b Vertex<'syn, 'b>>, Option<&'b Vertex<'syn, 'b>>)>; + fn allocate_if_new<'syn, 'b>( v: Vertex<'syn, 'b>, alloc: &'b Bump, - seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, + seen: &mut SeenMap<'syn, 'b>, ) -> &'b Vertex<'syn, 'b> { match seen.get_mut(&v) { - Some(existing) => { - // Don't explore more than two possible parenthesis - // nestings for each syntax node pair. - if let Some(allocated) = existing.last() { - if existing.len() >= 2 { - return *allocated; - } - } - - // If we have seen exactly this graph node before, even - // considering parenthesis matching, return it. - for existing_node in existing.iter() { - if existing_node.parents == v.parents { - return existing_node; - } + Some(existing) => match existing { + (Some(_), Some(v2)) => v2, + (Some(v1), None) if v1.parents == v.parents => v1, + (Some(_), sv2) => { + let allocated = alloc.alloc(v); + *sv2 = Some(allocated); + allocated } - - // We haven't reached the graph node limit yet, allocate a - // new one. - let allocated = alloc.alloc(v); - existing.push(allocated); - allocated - } + _ => unreachable!(), + }, None => { let allocated = alloc.alloc(v); - seen.insert(allocated, vec![allocated]); + seen.insert(allocated, (Some(allocated), None)); allocated } } @@ -475,7 +465,7 @@ fn looks_like_punctuation(content: &str) -> bool { pub fn set_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, alloc: &'b Bump, - seen: &mut FxHashMap<&Vertex<'syn, 'b>, Vec<&'b Vertex<'syn, 'b>>>, + seen: &mut SeenMap<'syn, 'b>, ) { if unsafe { (&*v.neighbours.get()).is_some() } { return; From 2293886263222a8a7487345060eb23cb9512991c Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 12:51:07 +0800 Subject: [PATCH 11/19] Minor fixes --- src/diff/graph.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 136ad38faf..300b0b82c5 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -35,7 +35,7 @@ pub struct SyntaxRefOrId<'a> { phantom: PhantomData<&'a Syntax<'a>>, } -impl SyntaxRefOrId<'_> { +impl<'a> SyntaxRefOrId<'a> { pub fn is_ref(&self) -> bool { self.data & 1 == 0 } @@ -44,7 +44,7 @@ impl SyntaxRefOrId<'_> { self.data & 1 == 1 } - pub fn get_ref<'a>(&self) -> Option<&'a Syntax<'a>> { + pub fn get_ref(&self) -> Option<&'a Syntax<'a>> { if self.is_ref() { Some(unsafe { transmute_copy(&self.data) }) } else { @@ -56,7 +56,7 @@ impl SyntaxRefOrId<'_> { impl<'a> From<&'a Syntax<'a>> for SyntaxRefOrId<'a> { fn from(s: &'a Syntax<'a>) -> Self { Self { - data: unsafe { transmute_copy(&s) }, + data: s as *const _ as _, phantom: PhantomData, } } @@ -77,10 +77,8 @@ fn next_sibling_syntax<'a>(syntax: &'a Syntax<'a>) -> SyntaxRefOrId<'a> { } fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> SyntaxRefOrId<'a> { - children - .get(0) - .copied() - .map_or(Some(syntax.id()).into(), |s| s.into()) + let child = children.get(0).copied(); + child.map_or(Some(syntax.id()).into(), |s| s.into()) } /// A vertex in a directed acyclic graph that represents a diff. From 2c6b7060a35a088e4a194fde4c51b18004f64c7f Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 19:20:22 +0800 Subject: [PATCH 12/19] Refactor parents representation --- src/diff/graph.rs | 215 ++++++++++++++++++---------------------------- src/diff/stack.rs | 20 ++--- 2 files changed, 94 insertions(+), 141 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 300b0b82c5..21255ef77f 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -5,7 +5,6 @@ use rustc_hash::FxHashMap; use std::{ cell::{Cell, UnsafeCell}, cmp::min, - fmt, hash::{Hash, Hasher}, marker::PhantomData, mem::transmute_copy, @@ -117,25 +116,34 @@ pub struct Vertex<'a, 'b> { pub predecessor: Cell>>, pub lhs_syntax: SyntaxRefOrId<'a>, pub rhs_syntax: SyntaxRefOrId<'a>, - parents: Stack>, + lhs_parents: Stack>, + rhs_parents: Stack>, } impl<'a, 'b> Vertex<'a, 'b> { pub fn is_end(&self) -> bool { - self.lhs_syntax.is_id() && self.rhs_syntax.is_id() && self.parents.is_empty() + self.lhs_syntax.is_id() + && self.rhs_syntax.is_id() + && self.lhs_parents.is_empty() + && self.rhs_parents.is_empty() } pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { - let parents = Stack::new(); Vertex { neighbours: UnsafeCell::new(None), shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), rhs_syntax: rhs_syntax.map_or(None.into(), |s| s.into()), - parents, + lhs_parents: Stack::new(), + rhs_parents: Stack::new(), } } + + fn can_pop_either_parent(&self) -> bool { + matches!(self.lhs_parents.peek(), Some(PopEither(_))) + || matches!(self.rhs_parents.peek(), Some(PopEither(_))) + } } impl PartialEq for Vertex<'_, '_> { @@ -165,7 +173,7 @@ impl PartialEq for Vertex<'_, '_> { self.lhs_syntax == other.lhs_syntax && self.rhs_syntax == other.rhs_syntax - && can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents) + && self.can_pop_either_parent() == other.can_pop_either_parent() } } @@ -175,7 +183,7 @@ impl Hash for Vertex<'_, '_> { fn hash(&self, state: &mut H) { self.lhs_syntax.hash(state); self.rhs_syntax.hash(state); - can_pop_either_parent(&self.parents).hash(state); + self.can_pop_either_parent().hash(state); } } @@ -186,13 +194,13 @@ impl Default for Vertex<'_, '_> { } /// Tracks entering syntax List nodes. -#[derive(Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] enum EnteredDelimiter<'a> { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. /// /// Assumes that at least one stack is non-empty. - PopEither((Stack<&'a Syntax<'a>>, Stack<&'a Syntax<'a>>)), + PopEither(&'a Syntax<'a>), /// If we've entered the LHS and RHS together, we must pop both /// sides together too. Otherwise we'd consider the following case to have no changes. /// @@ -200,126 +208,59 @@ enum EnteredDelimiter<'a> { /// Old: (a b c) /// New: (a b) c /// ``` - PopBoth((&'a Syntax<'a>, &'a Syntax<'a>)), + PopBoth(&'a Syntax<'a>), } -impl<'a> fmt::Debug for EnteredDelimiter<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let desc = match self { - EnteredDelimiter::PopEither((lhs_delims, rhs_delims)) => { - format!( - "PopEither(lhs count: {}, rhs count: {})", - lhs_delims.size(), - rhs_delims.size() - ) - } - EnteredDelimiter::PopBoth(_) => "PopBoth".to_string(), - }; - f.write_str(&desc) - } -} +use EnteredDelimiter::*; -fn push_both_delimiters<'a>( - entered: &Stack>, +fn push_both<'a>( + lhs_entered: &Stack>, + rhs_entered: &Stack>, lhs_delim: &'a Syntax<'a>, rhs_delim: &'a Syntax<'a>, -) -> Stack> { - entered.push(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim))) -} - -fn can_pop_either_parent(entered: &Stack) -> bool { - matches!(entered.peek(), Some(EnteredDelimiter::PopEither(_))) +) -> (Stack>, Stack>) { + ( + lhs_entered.push(PopBoth(lhs_delim)), + rhs_entered.push(PopBoth(rhs_delim)), + ) } -fn try_pop_both<'a>( +fn push_either<'a>( entered: &Stack>, -) -> Option<(&'a Syntax<'a>, &'a Syntax<'a>, Stack>)> { - match entered.peek() { - Some(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim))) => { - Some((lhs_delim, rhs_delim, entered.pop().unwrap())) - } - _ => None, - } + delim: &'a Syntax<'a>, +) -> Stack> { + entered.push(PopEither(delim)) } -fn try_pop_lhs<'a>( - entered: &Stack>, -) -> Option<(&'a Syntax<'a>, Stack>)> { - match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match lhs_delims.peek() { - Some(lhs_delim) => { - let mut entered = entered.pop().unwrap(); - let new_lhs_delims = lhs_delims.pop().unwrap(); - - if !new_lhs_delims.is_empty() || !rhs_delims.is_empty() { - entered = entered.push(EnteredDelimiter::PopEither(( - new_lhs_delims, - rhs_delims.clone(), - ))); - } - - Some((lhs_delim, entered)) - } - None => None, - }, +fn try_pop_both<'a>( + lhs_entered: &Stack>, + rhs_entered: &Stack>, +) -> Option<( + &'a Syntax<'a>, + &'a Syntax<'a>, + Stack>, + Stack>, +)> { + match (lhs_entered.peek(), rhs_entered.peek()) { + (Some(PopBoth(lhs_delim)), Some(PopBoth(rhs_delim))) => Some(( + lhs_delim, + rhs_delim, + lhs_entered.pop().unwrap(), + rhs_entered.pop().unwrap(), + )), _ => None, } } -fn try_pop_rhs<'a>( +fn try_pop_either<'a>( entered: &Stack>, ) -> Option<(&'a Syntax<'a>, Stack>)> { match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match rhs_delims.peek() { - Some(rhs_delim) => { - let mut entered = entered.pop().unwrap(); - let new_rhs_delims = rhs_delims.pop().unwrap(); - - if !lhs_delims.is_empty() || !new_rhs_delims.is_empty() { - entered = entered.push(EnteredDelimiter::PopEither(( - lhs_delims.clone(), - new_rhs_delims, - ))); - } - - Some((rhs_delim, entered)) - } - None => None, - }, + Some(PopEither(delim)) => Some((delim, entered.pop().unwrap())), _ => None, } } -fn push_lhs_delimiter<'a>( - entered: &Stack>, - delimiter: &'a Syntax<'a>, -) -> Stack> { - match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( - EnteredDelimiter::PopEither((lhs_delims.push(delimiter), rhs_delims.clone())), - ), - _ => entered.push(EnteredDelimiter::PopEither(( - Stack::new().push(delimiter), - Stack::new(), - ))), - } -} - -fn push_rhs_delimiter<'a>( - entered: &Stack>, - delimiter: &'a Syntax<'a>, -) -> Stack> { - match entered.peek() { - Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push( - EnteredDelimiter::PopEither((lhs_delims.clone(), rhs_delims.push(delimiter))), - ), - _ => entered.push(EnteredDelimiter::PopEither(( - Stack::new(), - Stack::new().push(delimiter), - ))), - } -} - /// An edge in our graph, with an associated [`cost`](Edge::cost). /// /// A syntax node can always be marked as novel, so a vertex will have @@ -434,11 +375,14 @@ fn allocate_if_new<'syn, 'b>( match seen.get_mut(&v) { Some(existing) => match existing { (Some(_), Some(v2)) => v2, - (Some(v1), None) if v1.parents == v.parents => v1, - (Some(_), sv2) => { - let allocated = alloc.alloc(v); - *sv2 = Some(allocated); - allocated + (Some(v1), sv2) => { + if v1.lhs_parents == v.lhs_parents && v1.rhs_parents == v.rhs_parents { + v1 + } else { + let allocated = alloc.alloc(v); + *sv2 = Some(allocated); + allocated + } } _ => unreachable!(), }, @@ -472,7 +416,9 @@ pub fn set_neighbours<'syn, 'b>( let mut res: Vec<(Edge, &Vertex)> = vec![]; if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { - if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&v.parents) { + if let Some((lhs_parent, rhs_parent, lhs_parents_next, rhs_parents_next)) = + try_pop_both(&v.lhs_parents, &v.rhs_parents) + { // We have exhausted all the nodes on both lists, so we can // move up to the parent node. @@ -483,7 +429,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_parent), rhs_syntax: next_sibling_syntax(rhs_parent), - parents: parents_next, + lhs_parents: lhs_parents_next, + rhs_parents: rhs_parents_next, ..Vertex::default() }, alloc, @@ -494,7 +441,7 @@ pub fn set_neighbours<'syn, 'b>( } if v.lhs_syntax.is_id() { - if let Some((lhs_parent, parents_next)) = try_pop_lhs(&v.parents) { + if let Some((lhs_parent, lhs_parents_next)) = try_pop_either(&v.lhs_parents) { // Move to next after LHS parent. // Continue from sibling of parent. @@ -504,7 +451,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_parent), rhs_syntax: v.rhs_syntax, - parents: parents_next, + lhs_parents: lhs_parents_next, + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -515,7 +463,7 @@ pub fn set_neighbours<'syn, 'b>( } if v.rhs_syntax.is_id() { - if let Some((rhs_parent, parents_next)) = try_pop_rhs(&v.parents) { + if let Some((rhs_parent, rhs_parents_next)) = try_pop_either(&v.rhs_parents) { // Move to next after RHS parent. // Continue from sibling of parent. @@ -525,7 +473,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_sibling_syntax(rhs_parent), - parents: parents_next, + lhs_parents: v.lhs_parents.clone(), + rhs_parents: rhs_parents_next, ..Vertex::default() }, alloc, @@ -548,7 +497,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: next_sibling_syntax(rhs_syntax), - parents: v.parents.clone(), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -575,7 +525,8 @@ pub fn set_neighbours<'syn, 'b>( // The list delimiters are equal, but children may not be. if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { // TODO: be consistent between parents_next and next_parents. - let parents_next = push_both_delimiters(&v.parents, lhs_syntax, rhs_syntax); + let (lhs_parents_next, rhs_parents_next) = + push_both(&v.lhs_parents, &v.rhs_parents, lhs_syntax, rhs_syntax); let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) @@ -587,7 +538,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), - parents: parents_next, + lhs_parents: lhs_parents_next, + rhs_parents: rhs_parents_next, ..Vertex::default() }, alloc, @@ -621,7 +573,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: next_sibling_syntax(rhs_syntax), - parents: v.parents.clone(), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -647,7 +600,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_sibling_syntax(lhs_syntax), rhs_syntax: v.rhs_syntax, - parents: v.parents.clone(), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -657,8 +611,6 @@ pub fn set_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - let parents_next = push_lhs_delimiter(&v.parents, lhs_syntax); - res.push(( EnterNovelDelimiterLHS { contiguous: lhs_syntax.prev_is_contiguous(), @@ -667,7 +619,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: next_child_syntax(lhs_syntax, children), rhs_syntax: v.rhs_syntax, - parents: parents_next, + lhs_parents: push_either(&v.lhs_parents, lhs_syntax), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -691,7 +644,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_sibling_syntax(rhs_syntax), - parents: v.parents.clone(), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), ..Vertex::default() }, alloc, @@ -701,8 +655,6 @@ pub fn set_neighbours<'syn, 'b>( } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - let parents_next = push_rhs_delimiter(&v.parents, rhs_syntax); - res.push(( EnterNovelDelimiterRHS { contiguous: rhs_syntax.prev_is_contiguous(), @@ -711,7 +663,8 @@ pub fn set_neighbours<'syn, 'b>( Vertex { lhs_syntax: v.lhs_syntax, rhs_syntax: next_child_syntax(rhs_syntax, children), - parents: parents_next, + lhs_parents: v.lhs_parents.clone(), + rhs_parents: push_either(&v.rhs_parents, rhs_syntax), ..Vertex::default() }, alloc, diff --git a/src/diff/stack.rs b/src/diff/stack.rs index b366937a4e..58dad1ca9c 100644 --- a/src/diff/stack.rs +++ b/src/diff/stack.rs @@ -35,16 +35,16 @@ impl Stack { } } - // O(n) - pub fn size(&self) -> usize { - let mut res = 0; - let mut node = &self.head; - while let Some(next) = node { - res += 1; - node = &next.next; - } - res - } + // // O(n) + // pub fn size(&self) -> usize { + // let mut res = 0; + // let mut node = &self.head; + // while let Some(next) = node { + // res += 1; + // node = &next.next; + // } + // res + // } pub fn is_empty(&self) -> bool { self.head.is_none() From cb1c3e0ea38d31735b3f5433fe6582c137b22a2f Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 20:08:48 +0800 Subject: [PATCH 13/19] Compress EnteredDelimiter --- src/diff/graph.rs | 58 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 21255ef77f..abe79003c9 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -141,8 +141,8 @@ impl<'a, 'b> Vertex<'a, 'b> { } fn can_pop_either_parent(&self) -> bool { - matches!(self.lhs_parents.peek(), Some(PopEither(_))) - || matches!(self.rhs_parents.peek(), Some(PopEither(_))) + matches!(self.lhs_parents.peek().map(Into::into), Some(PopEither(_))) + || matches!(self.rhs_parents.peek().map(Into::into), Some(PopEither(_))) } } @@ -195,7 +195,7 @@ impl Default for Vertex<'_, '_> { /// Tracks entering syntax List nodes. #[derive(Debug, Clone, PartialEq, Eq)] -enum EnteredDelimiter<'a> { +enum EnteredDelimiterEnum<'a> { /// If we've entered the LHS or RHS separately, we can pop either /// side independently. /// @@ -211,7 +211,44 @@ enum EnteredDelimiter<'a> { PopBoth(&'a Syntax<'a>), } -use EnteredDelimiter::*; +use EnteredDelimiterEnum::*; + +/// Compress `EnteredDelimiterEnum` into a usize. +/// +/// Utilize the LSB as flag since `Syntax` is aligned. +/// +/// ```text +/// LSB = 0 -> PopEither +/// LSB = 1 -> PopBoth +/// ``` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct EnteredDelimiter<'a> { + data: usize, + phantom: PhantomData<&'a Syntax<'a>>, +} + +impl<'a> From<&EnteredDelimiter<'a>> for EnteredDelimiterEnum<'a> { + fn from(delim: &EnteredDelimiter<'a>) -> Self { + if delim.data & 1 == 0 { + PopEither(unsafe { transmute_copy(&delim.data) }) + } else { + PopBoth(unsafe { transmute_copy(&(delim.data ^ 1)) }) + } + } +} + +impl<'a> From<&EnteredDelimiterEnum<'a>> for EnteredDelimiter<'a> { + fn from(delim: &EnteredDelimiterEnum<'a>) -> Self { + let data = match *delim { + PopEither(s) => s as *const _ as usize, + PopBoth(s) => s as *const _ as usize | 1, + }; + EnteredDelimiter { + data, + phantom: PhantomData, + } + } +} fn push_both<'a>( lhs_entered: &Stack>, @@ -220,8 +257,8 @@ fn push_both<'a>( rhs_delim: &'a Syntax<'a>, ) -> (Stack>, Stack>) { ( - lhs_entered.push(PopBoth(lhs_delim)), - rhs_entered.push(PopBoth(rhs_delim)), + lhs_entered.push((&PopBoth(lhs_delim)).into()), + rhs_entered.push((&PopBoth(rhs_delim)).into()), ) } @@ -229,7 +266,7 @@ fn push_either<'a>( entered: &Stack>, delim: &'a Syntax<'a>, ) -> Stack> { - entered.push(PopEither(delim)) + entered.push((&PopEither(delim)).into()) } fn try_pop_both<'a>( @@ -241,7 +278,10 @@ fn try_pop_both<'a>( Stack>, Stack>, )> { - match (lhs_entered.peek(), rhs_entered.peek()) { + match ( + lhs_entered.peek().map(Into::into), + rhs_entered.peek().map(Into::into), + ) { (Some(PopBoth(lhs_delim)), Some(PopBoth(rhs_delim))) => Some(( lhs_delim, rhs_delim, @@ -255,7 +295,7 @@ fn try_pop_both<'a>( fn try_pop_either<'a>( entered: &Stack>, ) -> Option<(&'a Syntax<'a>, Stack>)> { - match entered.peek() { + match entered.peek().map(Into::into) { Some(PopEither(delim)) => Some((delim, entered.pop().unwrap())), _ => None, } From 4e450cb4c1cf88b03ac00ab26129fb7d3f6f4c48 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 20:23:28 +0800 Subject: [PATCH 14/19] Simplify set_neighbours by extracting a lambda --- src/diff/graph.rs | 229 ++++++++++++++++++++-------------------------- 1 file changed, 98 insertions(+), 131 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index abe79003c9..227aed8789 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -407,6 +407,7 @@ impl Edge { type SeenMap<'syn, 'b> = FxHashMap<&'b Vertex<'syn, 'b>, (Option<&'b Vertex<'syn, 'b>>, Option<&'b Vertex<'syn, 'b>>)>; +#[inline(never)] fn allocate_if_new<'syn, 'b>( v: Vertex<'syn, 'b>, alloc: &'b Bump, @@ -453,7 +454,12 @@ pub fn set_neighbours<'syn, 'b>( return; } - let mut res: Vec<(Edge, &Vertex)> = vec![]; + let mut res = vec![]; + + let mut add_neighbor = std::convert::identity( + #[inline(always)] + |edge, vertex| res.push((edge, allocate_if_new(vertex, alloc, seen))), + ); if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { if let Some((lhs_parent, rhs_parent, lhs_parents_next, rhs_parents_next)) = @@ -463,20 +469,16 @@ pub fn set_neighbours<'syn, 'b>( // move up to the parent node. // Continue from sibling of parent. - res.push(( + add_neighbor( ExitDelimiterBoth, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_parent), - rhs_syntax: next_sibling_syntax(rhs_parent), - lhs_parents: lhs_parents_next, - rhs_parents: rhs_parents_next, - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_parent), + rhs_syntax: next_sibling_syntax(rhs_parent), + lhs_parents: lhs_parents_next, + rhs_parents: rhs_parents_next, + ..Vertex::default() + }, + ); } } @@ -485,20 +487,16 @@ pub fn set_neighbours<'syn, 'b>( // Move to next after LHS parent. // Continue from sibling of parent. - res.push(( + add_neighbor( ExitDelimiterLHS, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_parent), - rhs_syntax: v.rhs_syntax, - lhs_parents: lhs_parents_next, - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_parent), + rhs_syntax: v.rhs_syntax, + lhs_parents: lhs_parents_next, + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } } @@ -507,20 +505,16 @@ pub fn set_neighbours<'syn, 'b>( // Move to next after RHS parent. // Continue from sibling of parent. - res.push(( + add_neighbor( ExitDelimiterRHS, - allocate_if_new( - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling_syntax(rhs_parent), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: rhs_parents_next, - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_sibling_syntax(rhs_parent), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: rhs_parents_next, + ..Vertex::default() + }, + ); } } @@ -531,20 +525,16 @@ pub fn set_neighbours<'syn, 'b>( .unsigned_abs(); // Both nodes are equal, the happy case. - res.push(( + add_neighbor( UnchangedNode { depth_difference }, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), - rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } if let ( @@ -572,20 +562,16 @@ pub fn set_neighbours<'syn, 'b>( - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); - res.push(( + add_neighbor( EnterUnchangedDelimiter { depth_difference }, - allocate_if_new( - Vertex { - lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), - rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), - lhs_parents: lhs_parents_next, - rhs_parents: rhs_parents_next, - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_child_syntax(lhs_syntax, lhs_children), + rhs_syntax: next_child_syntax(rhs_syntax, rhs_children), + lhs_parents: lhs_parents_next, + rhs_parents: rhs_parents_next, + ..Vertex::default() + }, + ); } } @@ -607,20 +593,17 @@ pub fn set_neighbours<'syn, 'b>( if lhs_content != rhs_content { let levenshtein_pct = (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; - res.push(( + + add_neighbor( ReplacedComment { levenshtein_pct }, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), - rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: next_sibling_syntax(rhs_syntax), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } } } @@ -629,44 +612,36 @@ pub fn set_neighbours<'syn, 'b>( match lhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { - res.push(( + add_neighbor( NovelAtomLHS { // TODO: should this apply if prev is a parent // node rather than a sibling? contiguous: lhs_syntax.prev_is_contiguous(), probably_punctuation: looks_like_punctuation(content), }, - allocate_if_new( - Vertex { - lhs_syntax: next_sibling_syntax(lhs_syntax), - rhs_syntax: v.rhs_syntax, - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_sibling_syntax(lhs_syntax), + rhs_syntax: v.rhs_syntax, + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - res.push(( + add_neighbor( EnterNovelDelimiterLHS { contiguous: lhs_syntax.prev_is_contiguous(), }, - allocate_if_new( - Vertex { - lhs_syntax: next_child_syntax(lhs_syntax, children), - rhs_syntax: v.rhs_syntax, - lhs_parents: push_either(&v.lhs_parents, lhs_syntax), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: next_child_syntax(lhs_syntax, children), + rhs_syntax: v.rhs_syntax, + lhs_parents: push_either(&v.lhs_parents, lhs_syntax), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } } } @@ -675,42 +650,34 @@ pub fn set_neighbours<'syn, 'b>( match rhs_syntax { // Step over this novel atom. Syntax::Atom { content, .. } => { - res.push(( + add_neighbor( NovelAtomRHS { contiguous: rhs_syntax.prev_is_contiguous(), probably_punctuation: looks_like_punctuation(content), }, - allocate_if_new( - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_sibling_syntax(rhs_syntax), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: v.rhs_parents.clone(), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_sibling_syntax(rhs_syntax), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: v.rhs_parents.clone(), + ..Vertex::default() + }, + ); } // Step into this partially/fully novel list. Syntax::List { children, .. } => { - res.push(( + add_neighbor( EnterNovelDelimiterRHS { contiguous: rhs_syntax.prev_is_contiguous(), }, - allocate_if_new( - Vertex { - lhs_syntax: v.lhs_syntax, - rhs_syntax: next_child_syntax(rhs_syntax, children), - lhs_parents: v.lhs_parents.clone(), - rhs_parents: push_either(&v.rhs_parents, rhs_syntax), - ..Vertex::default() - }, - alloc, - seen, - ), - )); + Vertex { + lhs_syntax: v.lhs_syntax, + rhs_syntax: next_child_syntax(rhs_syntax, children), + lhs_parents: v.lhs_parents.clone(), + rhs_parents: push_either(&v.rhs_parents, rhs_syntax), + ..Vertex::default() + }, + ); } } } From b0ab6c899a369cab73d79304002563fe08bae10d Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 20:30:20 +0800 Subject: [PATCH 15/19] Reserve vec capacity --- src/diff/graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 227aed8789..cb26973a2e 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -454,7 +454,7 @@ pub fn set_neighbours<'syn, 'b>( return; } - let mut res = vec![]; + let mut res = Vec::with_capacity(4); let mut add_neighbor = std::convert::identity( #[inline(always)] From 5043858b87229da889e20a8923357457fb32c766 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Fri, 30 Sep 2022 22:33:43 +0800 Subject: [PATCH 16/19] Remove transmute_copy --- src/diff/graph.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/diff/graph.rs b/src/diff/graph.rs index cb26973a2e..c34e58999f 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -7,7 +7,6 @@ use std::{ cmp::min, hash::{Hash, Hasher}, marker::PhantomData, - mem::transmute_copy, }; use strsim::normalized_levenshtein; @@ -45,7 +44,7 @@ impl<'a> SyntaxRefOrId<'a> { pub fn get_ref(&self) -> Option<&'a Syntax<'a>> { if self.is_ref() { - Some(unsafe { transmute_copy(&self.data) }) + Some(unsafe { &*(self.data as *const _) }) } else { None } @@ -230,9 +229,9 @@ pub struct EnteredDelimiter<'a> { impl<'a> From<&EnteredDelimiter<'a>> for EnteredDelimiterEnum<'a> { fn from(delim: &EnteredDelimiter<'a>) -> Self { if delim.data & 1 == 0 { - PopEither(unsafe { transmute_copy(&delim.data) }) + PopEither(unsafe { &*(delim.data as *const _) }) } else { - PopBoth(unsafe { transmute_copy(&(delim.data ^ 1)) }) + PopBoth(unsafe { &*((delim.data ^ 1) as *const _) }) } } } From 8327c99e7ba93af8d52c030f90bc873fa2b2adf2 Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 1 Oct 2022 00:47:13 +0800 Subject: [PATCH 17/19] Compress seen map --- Cargo.lock | 40 +++++++++++++++++++++++++++++++++++++++- Cargo.toml | 1 + src/diff/dijkstra.rs | 5 ++--- src/diff/graph.rs | 37 +++++++++++++++++++------------------ 4 files changed, 61 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 794b75ed02..7f45c53f2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -183,6 +194,7 @@ dependencies = [ "cc", "clap", "const_format", + "hashbrown 0.12.3", "itertools", "lazy_static", "libc", @@ -260,12 +272,32 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "getrandom" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -291,7 +323,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.11.2", ] [[package]] @@ -690,6 +722,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index b3db13bf4a..e64d630edb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ clap = { version = "3.1.8", features = ["cargo", "env", "wrap_help"] } itertools = "0.10.1" typed-arena = "2.0.1" rustc-hash = "1.1.0" +hashbrown = "0.12.3" strsim = "0.10.0" lazy_static = "1.4.0" atty = "0.2.14" diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 2c2b911968..2eb95913ff 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -5,13 +5,12 @@ use std::{cmp::Reverse, env}; use crate::{ diff::changes::ChangeMap, - diff::graph::{populate_change_map, set_neighbours, Edge, Vertex}, + diff::graph::{populate_change_map, set_neighbours, Edge, SeenMap, Vertex}, parse::syntax::Syntax, }; use bumpalo::Bump; use itertools::Itertools; use radix_heap::RadixHeapMap; -use rustc_hash::FxHashMap; #[derive(Debug)] pub struct ExceededGraphLimit {} @@ -30,7 +29,7 @@ fn shortest_vertex_path<'a, 'b>( heap.push(Reverse(0), start); - let mut seen = FxHashMap::default(); + let mut seen = SeenMap::default(); seen.reserve(size_hint); let end: &'b Vertex<'a, 'b> = loop { diff --git a/src/diff/graph.rs b/src/diff/graph.rs index c34e58999f..bf3c4a4cd3 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -1,11 +1,11 @@ //! A graph representation for computing tree diffs. use bumpalo::Bump; -use rustc_hash::FxHashMap; +use rustc_hash::FxHasher; use std::{ cell::{Cell, UnsafeCell}, cmp::min, - hash::{Hash, Hasher}, + hash::{BuildHasherDefault, Hash, Hasher}, marker::PhantomData, }; use strsim::normalized_levenshtein; @@ -403,8 +403,11 @@ impl Edge { } } -type SeenMap<'syn, 'b> = - FxHashMap<&'b Vertex<'syn, 'b>, (Option<&'b Vertex<'syn, 'b>>, Option<&'b Vertex<'syn, 'b>>)>; +pub type SeenMap<'syn, 'b> = hashbrown::HashMap< + &'b Vertex<'syn, 'b>, + Option<&'b Vertex<'syn, 'b>>, + BuildHasherDefault, +>; #[inline(never)] fn allocate_if_new<'syn, 'b>( @@ -412,23 +415,21 @@ fn allocate_if_new<'syn, 'b>( alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, ) -> &'b Vertex<'syn, 'b> { - match seen.get_mut(&v) { - Some(existing) => match existing { - (Some(_), Some(v2)) => v2, - (Some(v1), sv2) => { - if v1.lhs_parents == v.lhs_parents && v1.rhs_parents == v.rhs_parents { - v1 - } else { - let allocated = alloc.alloc(v); - *sv2 = Some(allocated); - allocated - } + match seen.get_key_value_mut(&v) { + Some((key, value)) => { + if let Some(existing) = value { + return existing; + } + if key.lhs_parents == v.lhs_parents && key.rhs_parents == v.rhs_parents { + return key; } - _ => unreachable!(), - }, + let allocated = alloc.alloc(v); + *value = Some(allocated); + allocated + } None => { let allocated = alloc.alloc(v); - seen.insert(allocated, (Some(allocated), None)); + seen.insert(allocated, None); allocated } } From 3612d0844af5928416c2237e57a0c9ad000a0ceb Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 1 Oct 2022 01:14:02 +0800 Subject: [PATCH 18/19] Skip visited vertices --- src/diff/dijkstra.rs | 4 +++- src/diff/graph.rs | 12 ++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 2eb95913ff..0b517b4ec1 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -38,7 +38,9 @@ fn shortest_vertex_path<'a, 'b>( if current.is_end() { break current; } - + if unsafe { (&*current.neighbours.get()).is_some() } { + continue; // visited + } set_neighbours(current, vertex_arena, &mut seen); if let Some(neighbors) = unsafe { &*current.neighbours.get() } { diff --git a/src/diff/graph.rs b/src/diff/graph.rs index bf3c4a4cd3..1cd6973187 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -450,15 +450,11 @@ pub fn set_neighbours<'syn, 'b>( alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, ) { - if unsafe { (&*v.neighbours.get()).is_some() } { - return; - } - - let mut res = Vec::with_capacity(4); + let mut neighbors = Vec::with_capacity(4); let mut add_neighbor = std::convert::identity( #[inline(always)] - |edge, vertex| res.push((edge, allocate_if_new(vertex, alloc, seen))), + |edge, vertex| neighbors.push((edge, allocate_if_new(vertex, alloc, seen))), ); if v.lhs_syntax.is_id() && v.rhs_syntax.is_id() { @@ -682,11 +678,11 @@ pub fn set_neighbours<'syn, 'b>( } } assert!( - !res.is_empty(), + !neighbors.is_empty(), "Must always find some next steps if node is not the end" ); - unsafe { *v.neighbours.get() = Some(res) }; + unsafe { *v.neighbours.get() = Some(neighbors) }; } pub fn populate_change_map<'a, 'b>( From 9e11a2213ff9168500389145f34a6784dea9e89b Mon Sep 17 00:00:00 2001 From: QuarticCat Date: Sat, 1 Oct 2022 02:21:38 +0800 Subject: [PATCH 19/19] Refactor shortest path algorithm --- src/diff/dijkstra.rs | 89 +++++++++----------------------------------- src/diff/graph.rs | 15 ++++---- 2 files changed, 25 insertions(+), 79 deletions(-) diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 0b517b4ec1..00bdec4f0a 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -5,7 +5,7 @@ use std::{cmp::Reverse, env}; use crate::{ diff::changes::ChangeMap, - diff::graph::{populate_change_map, set_neighbours, Edge, SeenMap, Vertex}, + diff::graph::{get_neighbours, populate_change_map, Edge, SeenMap, Vertex}, parse::syntax::Syntax, }; use bumpalo::Bump; @@ -21,7 +21,7 @@ fn shortest_vertex_path<'a, 'b>( vertex_arena: &'b Bump, size_hint: usize, graph_limit: usize, -) -> Result>, ExceededGraphLimit> { +) -> Result)>, ExceededGraphLimit> { // We want to visit nodes with the shortest distance first, but // RadixHeapMap is a max-heap. Ensure nodes are wrapped with // Reverse to flip comparisons. @@ -35,23 +35,22 @@ fn shortest_vertex_path<'a, 'b>( let end: &'b Vertex<'a, 'b> = loop { match heap.pop() { Some((Reverse(distance), current)) => { + if current.is_visited.get() { + continue; + } + current.is_visited.set(true); + if current.is_end() { break current; } - if unsafe { (&*current.neighbours.get()).is_some() } { - continue; // visited - } - set_neighbours(current, vertex_arena, &mut seen); - if let Some(neighbors) = unsafe { &*current.neighbours.get() } { - for &(edge, next) in neighbors { - let distance_to_next = distance + edge.cost(); + for (edge, next) in get_neighbours(current, vertex_arena, &mut seen) { + let distance_to_next = distance + edge.cost(); - if distance_to_next < next.shortest_distance.get() { - next.shortest_distance.set(distance_to_next); - next.predecessor.replace(Some(current)); - heap.push(Reverse(distance_to_next), next); - } + if distance_to_next < next.shortest_distance.get() { + next.shortest_distance.set(distance_to_next); + next.predecessor.set(Some((edge, current))); + heap.push(Reverse(distance_to_next), next); } } @@ -70,10 +69,10 @@ fn shortest_vertex_path<'a, 'b>( heap.len(), ); - let mut current = Some(end); - let mut vertex_route: Vec<&'b Vertex<'a, 'b>> = vec![]; - while let Some(node) = current { - vertex_route.push(node); + let mut current = end.predecessor.get(); + let mut vertex_route = vec![]; + while let Some((edge, node)) = current { + vertex_route.push((edge, node)); current = node.predecessor.get(); } @@ -81,26 +80,6 @@ fn shortest_vertex_path<'a, 'b>( Ok(vertex_route) } -fn shortest_path_with_edges<'a, 'b>( - route: &[&'b Vertex<'a, 'b>], -) -> Vec<(Edge, &'b Vertex<'a, 'b>)> { - let mut prev = route.first().expect("Expected non-empty route"); - - let mut cost = 0; - let mut res = vec![]; - - for vertex in route.iter().skip(1) { - let edge = edge_between(prev, vertex); - res.push((edge, *prev)); - cost += edge.cost(); - - prev = vertex; - } - debug!("Found a path of {} with cost {}.", route.len(), cost); - - res -} - /// Return the shortest route from the `start` to the end vertex. /// /// The vec returned does not return the very last vertex. This is @@ -112,39 +91,7 @@ fn shortest_path<'a, 'b>( graph_limit: usize, ) -> Result)>, ExceededGraphLimit> { let start: &'b Vertex<'a, 'b> = vertex_arena.alloc(start); - let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?; - Ok(shortest_path_with_edges(&vertex_path)) -} - -fn edge_between<'a, 'b>(before: &Vertex<'a, 'b>, after: &Vertex<'a, 'b>) -> Edge { - assert_ne!(before, after); - - let mut shortest_edge: Option = None; - if let Some(neighbours) = unsafe { &*before.neighbours.get() } { - for &(edge, next) in neighbours { - // If there are multiple edges that can take us to `next`, - // prefer the shortest. - if *next == *after { - let is_shorter = match shortest_edge { - Some(prev_edge) => edge.cost() < prev_edge.cost(), - None => true, - }; - - if is_shorter { - shortest_edge = Some(edge); - } - } - } - } - - if let Some(edge) = shortest_edge { - return edge; - } - - panic!( - "Expected a route between the two vertices {:#?} and {:#?}", - before, after - ); + shortest_vertex_path(start, vertex_arena, size_hint, graph_limit) } /// What is the total number of AST nodes? diff --git a/src/diff/graph.rs b/src/diff/graph.rs index 1cd6973187..a43b8b0432 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -3,7 +3,7 @@ use bumpalo::Bump; use rustc_hash::FxHasher; use std::{ - cell::{Cell, UnsafeCell}, + cell::Cell, cmp::min, hash::{BuildHasherDefault, Hash, Hasher}, marker::PhantomData, @@ -109,10 +109,9 @@ fn next_child_syntax<'a>(syntax: &'a Syntax<'a>, children: &[&'a Syntax<'a>]) -> /// ``` #[derive(Debug)] pub struct Vertex<'a, 'b> { - // TODO: how to design the boundary of unsafe? - pub neighbours: UnsafeCell)>>>, + pub is_visited: Cell, pub shortest_distance: Cell, - pub predecessor: Cell>>, + pub predecessor: Cell)>>, pub lhs_syntax: SyntaxRefOrId<'a>, pub rhs_syntax: SyntaxRefOrId<'a>, lhs_parents: Stack>, @@ -129,7 +128,7 @@ impl<'a, 'b> Vertex<'a, 'b> { pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self { Vertex { - neighbours: UnsafeCell::new(None), + is_visited: Cell::new(false), shortest_distance: Cell::new(u64::MAX), predecessor: Cell::new(None), lhs_syntax: lhs_syntax.map_or(None.into(), |s| s.into()), @@ -445,11 +444,11 @@ fn looks_like_punctuation(content: &str) -> bool { /// Compute the neighbours of `v` if we haven't previously done so, /// write them to the .neighbours cell inside `v`, and return them. -pub fn set_neighbours<'syn, 'b>( +pub fn get_neighbours<'syn, 'b>( v: &Vertex<'syn, 'b>, alloc: &'b Bump, seen: &mut SeenMap<'syn, 'b>, -) { +) -> Vec<(Edge, &'b Vertex<'syn, 'b>)> { let mut neighbors = Vec::with_capacity(4); let mut add_neighbor = std::convert::identity( @@ -682,7 +681,7 @@ pub fn set_neighbours<'syn, 'b>( "Must always find some next steps if node is not the end" ); - unsafe { *v.neighbours.get() = Some(neighbors) }; + neighbors } pub fn populate_change_map<'a, 'b>(