-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(matching): use the Hungarian Algorithm for unordered matching (#50)
Our current approach to unordered node matching relies on a naive assumption: that all nodes possess an identifier. While this holds true for most nodes we've encountered thus far, such as method and property declarations within a Java class, it proves insufficient when attempting to match nodes lacking a label, like static blocks in Java. In such cases, calculations for matchings may yield incorrect results, consequently leading to erroneous merges. This pull request introduces a solution for matching unordered nodes via the Assignment Problem, utilizing the Hungarian Algorithm to resolve it. This approach mirrors the one used in jDime. Given the widespread recognition of the Hungarian Algorithm, we rely on the implementation provided by the [pathfinding](https://github.com/evenfurther/pathfinding) crate. This simplifies our implementation efforts, as we only need to provide the weights matrix and extract the matching information from the solution. A workaround had to be implemented since pathfinding expects the input matrix weight to have the same number of rows and columns, which might not always be true in our case since nodes can have a different number of children. The solution involves initializing the remaining columns/rows with 0. For now, our naive label implementation has been bypassed and is not being utilized. In a further pull request, the idea is to resort to the Hungarian algorithm only if the nodes are unlabeled, as it's significantly more complex than merely matching identifiers.
- Loading branch information
Showing
9 changed files
with
254 additions
and
7 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
public class Main { | ||
static { | ||
System.out.println("I'm a static block"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
public class Main { | ||
static { | ||
int x = 0; | ||
} | ||
|
||
static { | ||
System.out.println("I'm a static block"); | ||
} | ||
|
||
public Main() { | ||
System.out.println("I'm a constructor"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
public class Main { static { int <<<<<<<<< x ========= y >>>>>>>>> = <<<<<<<<< 0 ========= 2 >>>>>>>>> ; } static { System . out . println ( "I'm a static block" ) ; } public Main ( ) { System . out . println ( "I'm a constructor" ) ; int y = 3 ; } static { System . out . println ( "I don't know what's going on" ) ; } } |
18 changes: 18 additions & 0 deletions
18
bin/tests/scenarios/unordered_with_non_labelled/right.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
public class Main { | ||
static { | ||
System.out.println("I'm a static block"); | ||
} | ||
|
||
static { | ||
int y = 2; | ||
} | ||
|
||
static { | ||
System.out.println("I don't know what's going on"); | ||
} | ||
|
||
public Main() { | ||
System.out.println("I'm a constructor"); | ||
int y = 3; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
use std::cmp::max; | ||
|
||
use matching_handlers::MatchingHandlers; | ||
use model::{cst_node::NonTerminal, CSTNode}; | ||
use pathfinding::{kuhn_munkres::Weights, matrix}; | ||
use unordered_pair::UnorderedPair; | ||
|
||
use crate::{MatchingEntry, Matchings}; | ||
|
||
pub fn calculate_matchings<'a>( | ||
left: &'a CSTNode, | ||
right: &'a CSTNode, | ||
matching_handlers: &'a MatchingHandlers<'a>, | ||
) -> crate::Matchings<'a> { | ||
match (left, right) { | ||
( | ||
CSTNode::NonTerminal(NonTerminal { | ||
kind: kind_left, | ||
children: children_left, | ||
.. | ||
}), | ||
CSTNode::NonTerminal(NonTerminal { | ||
kind: kind_right, | ||
children: children_right, | ||
.. | ||
}), | ||
) => { | ||
if kind_left != kind_right { | ||
return Matchings::empty(); | ||
} | ||
|
||
let children_matchings = children_left | ||
.iter() | ||
.map(|left_child| { | ||
children_right | ||
.iter() | ||
.map(|right_child| { | ||
let w = crate::calculate_matchings(left_child, right_child, matching_handlers); | ||
let matching = w | ||
.get_matching_entry(left_child, right_child) | ||
.unwrap_or_default(); | ||
(matching.score, w) | ||
}) | ||
.collect() | ||
}) | ||
.collect(); | ||
|
||
solve_assignment_problem(left, right, children_matchings) | ||
} | ||
(_, _) => unreachable!( | ||
"Unordered matching must never be called if the nodes are not NonTerminals." | ||
), | ||
} | ||
} | ||
|
||
fn solve_assignment_problem<'a>( | ||
left: &'a CSTNode, | ||
right: &'a CSTNode, | ||
children_matchings: Vec<Vec<(usize, Matchings<'a>)>> | ||
) -> Matchings<'a> { | ||
let m = children_matchings.len(); | ||
let n = children_matchings[0].len(); | ||
let max_size = max(m, n); | ||
|
||
let mut matrix: Vec<Vec<i32>> = vec![vec![0; max_size]; max_size]; | ||
for i in 0..m { | ||
for j in 0..n { | ||
matrix[i][j] = children_matchings[i][j].0.try_into().unwrap(); | ||
} | ||
} | ||
|
||
let weights_matrix = matrix::Matrix::from_rows(matrix) | ||
.expect("Could not build weights matrix for assignment problem."); | ||
let (max_matching, best_matches) = pathfinding::kuhn_munkres::kuhn_munkres(&weights_matrix); | ||
|
||
let mut result = Matchings::empty(); | ||
|
||
for i in 0..best_matches.len() { | ||
let j = best_matches[i]; | ||
let cur_matching = weights_matrix.at(i, j); | ||
if cur_matching > 0 { | ||
result.extend(children_matchings[i][j].1.clone()); | ||
} | ||
} | ||
|
||
result.extend(Matchings::from_single( | ||
UnorderedPair(left, right), | ||
MatchingEntry { | ||
score: max_matching as usize + 1, | ||
is_perfect_match: left.contents() == right.contents(), | ||
}, | ||
)); | ||
|
||
result | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pub mod assignment_problem; |