Skip to content

Commit

Permalink
perf: Introduce an auxiliary table for efficient matching existence c…
Browse files Browse the repository at this point in the history
…heck (#67)

Sure, here's the revised version with the term "auxiliary HashMap":

This work follows up on #66. Currently, the Matchings API provides a
quick way to retrieve a matching entry for two arbitrary nodes. However,
a common use case during the merge process involves finding a matching
entry for a node without prior knowledge of which other node it matches
with. While we have an API for this scenario, it is inefficient as it
may iterate over all matchings in the worst case.

This PR introduces an enhancement to address this inefficiency. The
approach involves storing an auxiliary HashMap where both the key and
value are nodes, say A and B. This way, there is an entry in the HashMap
only if there is a matching between node A and B. This allows us to
quickly check if a matching for node A exists by simply checking if an
entry is present in the HashMap. This change reduces the lookup time to
O(1), at the cost of additional memory usage and some performance
overhead in building and updating this auxiliary HashMap, which were
found to be negligible compared to the performance gains achieved.
  • Loading branch information
jpedroh authored Jul 27, 2024
1 parent 9ddddf8 commit 33433bb
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 41 deletions.
61 changes: 28 additions & 33 deletions matching/src/matchings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,58 +8,46 @@ use crate::matching_entry::MatchingEntry;

#[derive(Debug, Clone)]
pub struct Matchings<'a> {
pub matching_entries: HashMap<UnorderedPair<&'a CSTNode<'a>>, MatchingEntry>,
matching_entries: HashMap<UnorderedPair<&'a CSTNode<'a>>, MatchingEntry>,
individual_matchings: HashMap<&'a CSTNode<'a>, &'a CSTNode<'a>>,
}

impl<'a> Matchings<'a> {
pub fn empty() -> Self {
Matchings {
matching_entries: HashMap::new(),
individual_matchings: HashMap::new(),
}
}

pub fn from_single(key: UnorderedPair<&'a CSTNode>, value: MatchingEntry) -> Self {
Matchings {
matching_entries: HashMap::from([(key, value)]),
individual_matchings: HashMap::from([(key.0, key.1), (key.1, key.0)]),
}
}

pub fn new(matching_entries: HashMap<UnorderedPair<&'a CSTNode<'a>>, MatchingEntry>) -> Self {
Matchings { matching_entries }
Matchings {
individual_matchings: {
matching_entries
.keys()
.flat_map(|key| [(key.0, key.1), (key.1, key.0)])
.collect::<HashMap<&'a CSTNode<'a>, &'a CSTNode<'a>>>()
},
matching_entries,
}
}

pub fn find_matching_for(&self, a_node: &'a CSTNode) -> Option<Matching> {
self.matching_entries
.iter()
.find(|(UnorderedPair(left, right), ..)| {
left.id() == a_node.id() || right.id() == a_node.id()
})
.map(|(UnorderedPair(left, right), matching)| {
let matching_node = if left.id() == a_node.id() {
right
} else {
left
};
Matching {
matching_node,
score: matching.score,
is_perfect_match: matching.is_perfect_match,
}
})
}

pub fn find_matching_node_in_children(
&'a self,
a_node: &'a CSTNode<'a>,
children: &'a [CSTNode<'a>],
) -> Option<Matching> {
children.iter().find_map(|child| {
let matching_entry = self.get_matching_entry(child, a_node)?;
Some(Matching {
matching_node: child,
score: matching_entry.score,
is_perfect_match: matching_entry.is_perfect_match,
})
let matching_node = self.individual_matchings.get(a_node)?;
let matching_entry = self
.matching_entries
.get(&UnorderedPair(a_node, matching_node))?;
Some(Matching {
matching_node,
score: matching_entry.score,
is_perfect_match: matching_entry.is_perfect_match,
})
}

Expand All @@ -72,6 +60,13 @@ impl<'a> Matchings<'a> {
}

pub fn extend(&mut self, matchings: Matchings<'a>) {
self.individual_matchings.extend(
matchings
.matching_entries
.keys()
.flat_map(|key| [(key.0, key.1), (key.1, key.0)])
.collect::<HashMap<&'a CSTNode<'a>, &'a CSTNode<'a>>>(),
);
self.matching_entries.extend(matchings);
}
}
Expand Down
6 changes: 2 additions & 4 deletions merge/src/ordered_merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@ pub fn ordered_merge<'a>(
while let (Some(cur_left), Some(cur_right)) = (cur_left_option, cur_right_option) {
let matching_base_left = base_left_matchings.find_matching_for(cur_left);
let matching_base_right = base_right_matchings.find_matching_for(cur_right);
let left_matching_in_right =
left_right_matchings.find_matching_node_in_children(cur_left, right.get_children());
let right_matching_in_left =
left_right_matchings.find_matching_node_in_children(cur_right, left.get_children());
let left_matching_in_right = left_right_matchings.find_matching_for(cur_left);
let right_matching_in_left = left_right_matchings.find_matching_for(cur_right);
let has_bidirectional_matching_left_right =
left_matching_in_right.is_some() && right_matching_in_left.is_some();

Expand Down
6 changes: 2 additions & 4 deletions merge/src/unordered_merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ pub fn unordered_merge<'a>(
}

let matching_base_left = base_left_matchings.find_matching_for(left_child);
let matching_left_right =
left_right_matchings.find_matching_node_in_children(left_child, right.get_children());
let matching_left_right = left_right_matchings.find_matching_for(left_child);

match (matching_base_left, matching_left_right) {
// Added only by left
Expand Down Expand Up @@ -92,8 +91,7 @@ pub fn unordered_merge<'a>(
.filter(|node| !processed_nodes.contains(&node.id()))
{
let matching_base_right = base_right_matchings.find_matching_for(right_child);
let matching_left_right =
left_right_matchings.find_matching_node_in_children(right_child, left.get_children());
let matching_left_right = left_right_matchings.find_matching_for(right_child);

match (matching_base_right, matching_left_right) {
// Added only by right
Expand Down

0 comments on commit 33433bb

Please sign in to comment.