Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Introduce custom identifier extraction mecanism #62

Merged
merged 14 commits into from
Jul 21, 2024
Merged
23 changes: 15 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 5 additions & 32 deletions matching/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,50 +1,23 @@
mod matches;
mod matching;
pub mod matching_configuration;
mod matching_entry;
mod matchings;
pub mod ordered;
pub mod unordered;

use matches::Matches;
use matching_configuration::MatchingConfiguration;
pub use matching_entry::MatchingEntry;
pub use matchings::Matchings;
use model::cst_node::Terminal;
use unordered_pair::UnorderedPair;

/**
* TODO: This probably belongs on the node declaration itself, but for that we
* need to move the identifiers extraction into there which would be a pain now.
* Furthermore, in the future, we want to move the extraction of identifiers
* from programmatic code to use Tree Sitter query syntax.
*/
fn are_nodes_matching_representations_equal<'a>(
left: &'a model::CSTNode,
right: &'a model::CSTNode,
config: &'a MatchingConfiguration<'a>,
) -> bool {
config
.handlers
.compute_matching_score(left, right)
.map(|score| score == 1)
.unwrap_or(match (left, right) {
(
model::CSTNode::NonTerminal(left_non_terminal),
model::CSTNode::NonTerminal(right_non_terminal),
) => left_non_terminal.kind == right_non_terminal.kind,
(model::CSTNode::Terminal(left_terminal), model::CSTNode::Terminal(right_terminal)) => {
left_terminal.kind == right_terminal.kind
&& left_terminal.value == right_terminal.value
}
(_, _) => false,
})
}

pub fn calculate_matchings<'a>(
left: &'a model::CSTNode,
right: &'a model::CSTNode,
config: &'a MatchingConfiguration<'a>,
) -> Matchings<'a> {
if !are_nodes_matching_representations_equal(left, right, config) {
if !left.matches(right) {
return Matchings::empty();
}

Expand All @@ -61,12 +34,12 @@ pub fn calculate_matchings<'a>(
}
}
(
model::CSTNode::Terminal(Terminal {
model::CSTNode::Terminal(model::cst_node::Terminal {
kind: kind_left,
value: value_left,
..
}),
model::CSTNode::Terminal(Terminal {
model::CSTNode::Terminal(model::cst_node::Terminal {
kind: kind_right,
value: value_right,
..
Expand Down
19 changes: 19 additions & 0 deletions matching/src/matches.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use model::CSTNode;

pub trait Matches {
fn matches(&self, right: &CSTNode) -> bool;
}

impl Matches for CSTNode<'_> {
fn matches(&self, right: &CSTNode) -> bool {
match (self, right) {
(CSTNode::Terminal(left), CSTNode::Terminal(right)) => {
left.get_identifier() == right.get_identifier()
}
(CSTNode::NonTerminal(left), CSTNode::NonTerminal(right)) => {
left.kind == right.kind && left.get_identifier() == right.get_identifier()
}
(_, _) => false,
}
}
}
16 changes: 1 addition & 15 deletions matching/src/matching_configuration.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
use matching_handlers::MatchingHandlers;
use model::Language;
use std::collections::HashSet;

pub struct MatchingConfiguration<'a> {
pub(crate) delimiters: HashSet<&'static str>,
pub(crate) kinds_with_label: HashSet<&'static str>,
#[allow(dead_code)]
pub(crate) handlers: MatchingHandlers<'a>,
}

Expand All @@ -18,18 +16,6 @@ impl From<Language> for MatchingConfiguration<'_> {
fn from(language: Language) -> Self {
match language {
Language::Java => MatchingConfiguration {
delimiters: ["{", "}", ";"].into(),
kinds_with_label: [
"compact_constructor_declaration",
"constructor_declaration",
"field_declaration",
"method_declaration",
"import_declaration",
"class_declaration",
"interface_declaration",
"enum_declaration",
]
.into(),
handlers: MatchingHandlers::from(Language::Java),
},
}
Expand Down
20 changes: 15 additions & 5 deletions matching/src/ordered/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::{
matching_configuration::MatchingConfiguration, matching_entry::MatchingEntry, Matchings,
matches::Matches, matching_configuration::MatchingConfiguration, matching_entry::MatchingEntry,
Matchings,
};
use model::{cst_node::NonTerminal, CSTNode};
use unordered_pair::UnorderedPair;
Expand Down Expand Up @@ -36,10 +37,7 @@ pub fn calculate_matchings<'a>(
..
}),
) => {
let root_matching: usize = config
.handlers
.compute_matching_score(left, right)
.unwrap_or((left.kind() == right.kind()).into());
let root_matching: usize = (left.matches(right)).into();

let m = children_left.len();
let n = children_right.len();
Expand Down Expand Up @@ -128,6 +126,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 1, column: 7 },
children: vec![child.clone()],
..Default::default()
});
let right = CSTNode::NonTerminal(NonTerminal {
id: uuid::Uuid::new_v4(),
Expand All @@ -136,6 +135,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 1, column: 7 },
children: vec![child.clone()],
..Default::default()
});

let matching_configuration = MatchingConfiguration::default();
Expand Down Expand Up @@ -173,6 +173,7 @@ mod tests {
children: vec![left_child.clone()],
start_position: Point { row: 1, column: 0 },
end_position: Point { row: 0, column: 7 },
..Default::default()
});
let right = CSTNode::NonTerminal(NonTerminal {
id: uuid::Uuid::new_v4(),
Expand All @@ -181,6 +182,7 @@ mod tests {
children: vec![right_child.clone()],
start_position: Point { row: 1, column: 0 },
end_position: Point { row: 0, column: 7 },
..Default::default()
});

let matching_configuration = MatchingConfiguration::from(Language::Java);
Expand Down Expand Up @@ -216,6 +218,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 0, column: 7 },
children: vec![common_child.clone()],
..Default::default()
});
let right = CSTNode::NonTerminal(NonTerminal {
id: uuid::Uuid::new_v4(),
Expand All @@ -224,6 +227,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 0, column: 7 },
children: vec![common_child.clone(), unique_right_child],
..Default::default()
});

let matching_configuration = MatchingConfiguration::from(language::Language::Java);
Expand Down Expand Up @@ -252,6 +256,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 0, column: 7 },
children: vec![common_child.clone()],
..Default::default()
});
let right = CSTNode::NonTerminal(NonTerminal {
id: uuid::Uuid::new_v4(),
Expand All @@ -260,6 +265,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 0, column: 7 },
children: vec![common_child.clone()],
..Default::default()
});

let matching_configuration = MatchingConfiguration::from(language::Language::Java);
Expand All @@ -279,6 +285,7 @@ mod tests {
end_position: Point { row: 0, column: 7 },
value: "value_b",
is_block_end_delimiter: false,
..Default::default()
});

let intermediate = CSTNode::NonTerminal(NonTerminal {
Expand All @@ -288,6 +295,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 0, column: 7 },
children: vec![leaf],
..Default::default()
});

let left = CSTNode::NonTerminal(NonTerminal {
Expand All @@ -297,6 +305,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 0, column: 7 },
children: vec![intermediate.clone()],
..Default::default()
});
let right = CSTNode::NonTerminal(NonTerminal {
id: uuid::Uuid::new_v4(),
Expand All @@ -305,6 +314,7 @@ mod tests {
start_position: Point { row: 0, column: 0 },
end_position: Point { row: 0, column: 7 },
children: vec![intermediate.clone()],
..Default::default()
});

let matching_configuration = MatchingConfiguration::default();
Expand Down
9 changes: 3 additions & 6 deletions matching/src/unordered/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub fn calculate_matchings<'a>(
) -> crate::Matchings<'a> {
match (left, right) {
(model::CSTNode::NonTerminal(left_nt), model::CSTNode::NonTerminal(right_nt)) => {
if all_children_labeled(left_nt, config) && all_children_labeled(right_nt, config) {
if all_children_labeled(left_nt) && all_children_labeled(right_nt) {
log::debug!(
"Matching children of \"{}\" with \"{}\" using unique label matching.",
left.kind(),
Expand All @@ -31,9 +31,6 @@ pub fn calculate_matchings<'a>(
}
}

fn all_children_labeled(node: &NonTerminal, config: &MatchingConfiguration) -> bool {
node.children
.iter()
.filter(|child| !config.delimiters.contains(child.kind()))
.all(|child| config.kinds_with_label.contains(child.kind()))
fn all_children_labeled(node: &NonTerminal) -> bool {
node.children.iter().all(|child| child.has_identifier())
}
15 changes: 10 additions & 5 deletions matching/src/unordered/unique_label.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,17 @@ pub fn calculate_matchings<'a>(

for child_left in children_left {
for child_right in children_right {
let is_same_identifier = config
.handlers
.compute_matching_score(child_left, child_right)
.unwrap_or_else(|| (child_left.kind() == child_right.kind()).into());
let is_same_identifier = match (child_left, child_right) {
(CSTNode::Terminal(left), CSTNode::Terminal(right)) => {
left.get_identifier() == right.get_identifier()
}
(CSTNode::NonTerminal(left), CSTNode::NonTerminal(right)) => {
left.get_identifier() == right.get_identifier()
}
(_, _) => false,
};

if is_same_identifier == 1 {
if is_same_identifier {
let child_matchings =
crate::calculate_matchings(child_left, child_right, config);

Expand Down
Loading
Loading