diff --git a/Cargo.lock b/Cargo.lock index 46ab217..a3292bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -415,6 +415,7 @@ dependencies = [ "log", "model", "parsing_handlers", + "regex", "tree-sitter", "tree-sitter-java", "uuid", @@ -492,26 +493,32 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.3" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", + "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rustc-hash" diff --git a/matching/src/lib.rs b/matching/src/lib.rs index a72f05c..a83ca78 100644 --- a/matching/src/lib.rs +++ b/matching/src/lib.rs @@ -1,3 +1,4 @@ +mod matches; mod matching; pub mod matching_configuration; mod matching_entry; @@ -5,46 +6,18 @@ mod matchings; pub mod ordered; pub mod unordered; +use matches::Matches; use matching_configuration::MatchingConfiguration; pub use matching_entry::MatchingEntry; pub use matchings::Matchings; -use model::cst_node::Terminal; use unordered_pair::UnorderedPair; -/** - * TODO: This probably belongs on the node declaration itself, but for that we - * need to move the identifiers extraction into there which would be a pain now. - * Furthermore, in the future, we want to move the extraction of identifiers - * from programmatic code to use Tree Sitter query syntax. - */ -fn are_nodes_matching_representations_equal<'a>( - left: &'a model::CSTNode, - right: &'a model::CSTNode, - config: &'a MatchingConfiguration<'a>, -) -> bool { - config - .handlers - .compute_matching_score(left, right) - .map(|score| score == 1) - .unwrap_or(match (left, right) { - ( - model::CSTNode::NonTerminal(left_non_terminal), - model::CSTNode::NonTerminal(right_non_terminal), - ) => left_non_terminal.kind == right_non_terminal.kind, - (model::CSTNode::Terminal(left_terminal), model::CSTNode::Terminal(right_terminal)) => { - left_terminal.kind == right_terminal.kind - && left_terminal.value == right_terminal.value - } - (_, _) => false, - }) -} - pub fn calculate_matchings<'a>( left: &'a model::CSTNode, right: &'a model::CSTNode, config: &'a MatchingConfiguration<'a>, ) -> Matchings<'a> { - if !are_nodes_matching_representations_equal(left, right, config) { + if !left.matches(right) { return Matchings::empty(); } @@ -61,12 +34,12 @@ pub fn calculate_matchings<'a>( } } ( - model::CSTNode::Terminal(Terminal { + model::CSTNode::Terminal(model::cst_node::Terminal { kind: kind_left, value: value_left, .. }), - model::CSTNode::Terminal(Terminal { + model::CSTNode::Terminal(model::cst_node::Terminal { kind: kind_right, value: value_right, .. diff --git a/matching/src/matches.rs b/matching/src/matches.rs new file mode 100644 index 0000000..bbec656 --- /dev/null +++ b/matching/src/matches.rs @@ -0,0 +1,19 @@ +use model::CSTNode; + +pub trait Matches { + fn matches(&self, right: &CSTNode) -> bool; +} + +impl Matches for CSTNode<'_> { + fn matches(&self, right: &CSTNode) -> bool { + match (self, right) { + (CSTNode::Terminal(left), CSTNode::Terminal(right)) => { + left.get_identifier() == right.get_identifier() + } + (CSTNode::NonTerminal(left), CSTNode::NonTerminal(right)) => { + left.kind == right.kind && left.get_identifier() == right.get_identifier() + } + (_, _) => false, + } + } +} diff --git a/matching/src/matching_configuration.rs b/matching/src/matching_configuration.rs index fc1d31c..5c2d170 100644 --- a/matching/src/matching_configuration.rs +++ b/matching/src/matching_configuration.rs @@ -1,10 +1,8 @@ use matching_handlers::MatchingHandlers; use model::Language; -use std::collections::HashSet; pub struct MatchingConfiguration<'a> { - pub(crate) delimiters: HashSet<&'static str>, - pub(crate) kinds_with_label: HashSet<&'static str>, + #[allow(dead_code)] pub(crate) handlers: MatchingHandlers<'a>, } @@ -18,18 +16,6 @@ impl From for MatchingConfiguration<'_> { fn from(language: Language) -> Self { match language { Language::Java => MatchingConfiguration { - delimiters: ["{", "}", ";"].into(), - kinds_with_label: [ - "compact_constructor_declaration", - "constructor_declaration", - "field_declaration", - "method_declaration", - "import_declaration", - "class_declaration", - "interface_declaration", - "enum_declaration", - ] - .into(), handlers: MatchingHandlers::from(Language::Java), }, } diff --git a/matching/src/ordered/mod.rs b/matching/src/ordered/mod.rs index cbe27bf..23a5bb2 100644 --- a/matching/src/ordered/mod.rs +++ b/matching/src/ordered/mod.rs @@ -1,5 +1,6 @@ use crate::{ - matching_configuration::MatchingConfiguration, matching_entry::MatchingEntry, Matchings, + matches::Matches, matching_configuration::MatchingConfiguration, matching_entry::MatchingEntry, + Matchings, }; use model::{cst_node::NonTerminal, CSTNode}; use unordered_pair::UnorderedPair; @@ -36,10 +37,7 @@ pub fn calculate_matchings<'a>( .. }), ) => { - let root_matching: usize = config - .handlers - .compute_matching_score(left, right) - .unwrap_or((left.kind() == right.kind()).into()); + let root_matching: usize = (left.matches(right)).into(); let m = children_left.len(); let n = children_right.len(); @@ -128,6 +126,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 1, column: 7 }, children: vec![child.clone()], + ..Default::default() }); let right = CSTNode::NonTerminal(NonTerminal { id: uuid::Uuid::new_v4(), @@ -136,6 +135,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 1, column: 7 }, children: vec![child.clone()], + ..Default::default() }); let matching_configuration = MatchingConfiguration::default(); @@ -173,6 +173,7 @@ mod tests { children: vec![left_child.clone()], start_position: Point { row: 1, column: 0 }, end_position: Point { row: 0, column: 7 }, + ..Default::default() }); let right = CSTNode::NonTerminal(NonTerminal { id: uuid::Uuid::new_v4(), @@ -181,6 +182,7 @@ mod tests { children: vec![right_child.clone()], start_position: Point { row: 1, column: 0 }, end_position: Point { row: 0, column: 7 }, + ..Default::default() }); let matching_configuration = MatchingConfiguration::from(Language::Java); @@ -216,6 +218,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![common_child.clone()], + ..Default::default() }); let right = CSTNode::NonTerminal(NonTerminal { id: uuid::Uuid::new_v4(), @@ -224,6 +227,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![common_child.clone(), unique_right_child], + ..Default::default() }); let matching_configuration = MatchingConfiguration::from(language::Language::Java); @@ -252,6 +256,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![common_child.clone()], + ..Default::default() }); let right = CSTNode::NonTerminal(NonTerminal { id: uuid::Uuid::new_v4(), @@ -260,6 +265,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![common_child.clone()], + ..Default::default() }); let matching_configuration = MatchingConfiguration::from(language::Language::Java); @@ -279,6 +285,7 @@ mod tests { end_position: Point { row: 0, column: 7 }, value: "value_b", is_block_end_delimiter: false, + ..Default::default() }); let intermediate = CSTNode::NonTerminal(NonTerminal { @@ -288,6 +295,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![leaf], + ..Default::default() }); let left = CSTNode::NonTerminal(NonTerminal { @@ -297,6 +305,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![intermediate.clone()], + ..Default::default() }); let right = CSTNode::NonTerminal(NonTerminal { id: uuid::Uuid::new_v4(), @@ -305,6 +314,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![intermediate.clone()], + ..Default::default() }); let matching_configuration = MatchingConfiguration::default(); diff --git a/matching/src/unordered/mod.rs b/matching/src/unordered/mod.rs index 7b790f6..b434d8f 100644 --- a/matching/src/unordered/mod.rs +++ b/matching/src/unordered/mod.rs @@ -11,7 +11,7 @@ pub fn calculate_matchings<'a>( ) -> crate::Matchings<'a> { match (left, right) { (model::CSTNode::NonTerminal(left_nt), model::CSTNode::NonTerminal(right_nt)) => { - if all_children_labeled(left_nt, config) && all_children_labeled(right_nt, config) { + if all_children_labeled(left_nt) && all_children_labeled(right_nt) { log::debug!( "Matching children of \"{}\" with \"{}\" using unique label matching.", left.kind(), @@ -31,9 +31,6 @@ pub fn calculate_matchings<'a>( } } -fn all_children_labeled(node: &NonTerminal, config: &MatchingConfiguration) -> bool { - node.children - .iter() - .filter(|child| !config.delimiters.contains(child.kind())) - .all(|child| config.kinds_with_label.contains(child.kind())) +fn all_children_labeled(node: &NonTerminal) -> bool { + node.children.iter().all(|child| child.has_identifier()) } diff --git a/matching/src/unordered/unique_label.rs b/matching/src/unordered/unique_label.rs index 5c961f8..917826f 100644 --- a/matching/src/unordered/unique_label.rs +++ b/matching/src/unordered/unique_label.rs @@ -28,12 +28,17 @@ pub fn calculate_matchings<'a>( for child_left in children_left { for child_right in children_right { - let is_same_identifier = config - .handlers - .compute_matching_score(child_left, child_right) - .unwrap_or_else(|| (child_left.kind() == child_right.kind()).into()); + let is_same_identifier = match (child_left, child_right) { + (CSTNode::Terminal(left), CSTNode::Terminal(right)) => { + left.get_identifier() == right.get_identifier() + } + (CSTNode::NonTerminal(left), CSTNode::NonTerminal(right)) => { + left.get_identifier() == right.get_identifier() + } + (_, _) => false, + }; - if is_same_identifier == 1 { + if is_same_identifier { let child_matchings = crate::calculate_matchings(child_left, child_right, config); diff --git a/matching_handlers/src/java/class_like_declaration.rs b/matching_handlers/src/java/class_like_declaration.rs deleted file mode 100644 index 37d3fc9..0000000 --- a/matching_handlers/src/java/class_like_declaration.rs +++ /dev/null @@ -1,61 +0,0 @@ -use super::utils::find_child_of_kind; -use model::{cst_node::NonTerminal, CSTNode}; - -pub fn compute_matching_score_for_class_like_declaration<'a>( - left: &'a CSTNode, - right: &'a CSTNode, -) -> usize { - match (left, right) { - ( - CSTNode::NonTerminal(NonTerminal { - children: children_left, - .. - }), - CSTNode::NonTerminal(NonTerminal { - children: children_right, - .. - }), - ) => { - let identifier_left = - find_child_of_kind(children_left, "identifier").map(|node| node.contents()); - let identifier_right = - find_child_of_kind(children_right, "identifier").map(|node| node.contents()); - - (identifier_left.is_some() && identifier_left == identifier_right).into() - } - (_, _) => 0, - } -} - -#[cfg(test)] -mod tests { - #[test] - fn classes_with_the_same_name_match_with_score_one() { - let result = super::compute_matching_score_for_class_like_declaration( - &make_class_like_declaration("ABC"), - &make_class_like_declaration("ABC"), - ); - assert_eq!(1, result); - } - - #[test] - fn classes_of_different_names_do_not_match() { - let result = super::compute_matching_score_for_class_like_declaration( - &make_class_like_declaration("ABC"), - &make_class_like_declaration("DEF"), - ); - assert_eq!(0, result); - } - - fn make_class_like_declaration(identifier: &str) -> model::CSTNode { - model::CSTNode::NonTerminal(model::cst_node::NonTerminal { - kind: "class_declaration", - children: vec![model::CSTNode::Terminal(model::cst_node::Terminal { - kind: "identifier", - value: identifier, - ..Default::default() - })], - ..Default::default() - }) - } -} diff --git a/matching_handlers/src/java/field_declaration.rs b/matching_handlers/src/java/field_declaration.rs deleted file mode 100644 index 96686eb..0000000 --- a/matching_handlers/src/java/field_declaration.rs +++ /dev/null @@ -1,104 +0,0 @@ -use super::utils::find_identifier; -use model::{cst_node::NonTerminal, CSTNode}; - -fn find_variable_declarator<'a>(node_children: &'a [CSTNode<'a>]) -> Option<&'a NonTerminal<'a>> { - node_children - .iter() - .find(|node| node.kind() == "variable_declarator") - .and_then(|node| match node { - CSTNode::NonTerminal(non_terminal) => Some(non_terminal), - CSTNode::Terminal(_) => None, - }) -} - -pub fn compute_matching_score_for_field_declaration<'a>( - left: &'a CSTNode, - right: &'a CSTNode, -) -> usize { - match (left, right) { - ( - CSTNode::NonTerminal(NonTerminal { - children: children_left, - .. - }), - CSTNode::NonTerminal(NonTerminal { - children: children_right, - .. - }), - ) => { - // Try to find an identifier on children, and compare them - let identifier_left = find_variable_declarator(children_left) - .and_then(|node| find_identifier(&node.children)) - .map(|node| node.value); - let identifier_right = find_variable_declarator(children_right) - .and_then(|node| find_identifier(&node.children)) - .map(|node| node.value); - - (identifier_left.is_some() && identifier_left == identifier_right).into() - } - (_, _) => 0, - } -} - -#[cfg(test)] -mod tests { - use model::{ - cst_node::{NonTerminal, Terminal}, - CSTNode, - }; - - use crate::java::field_declaration::compute_matching_score_for_field_declaration; - - #[test] - fn it_returns_one_if_nodes_have_the_same_identifier() { - let left = make_field_declarator_node_with_identifier("an_identifier"); - let right = make_field_declarator_node_with_identifier("an_identifier"); - let matching_score = compute_matching_score_for_field_declaration(&left, &right); - assert_eq!(1, matching_score); - } - - #[test] - fn it_returns_zero_if_nodes_have_different_identifiers() { - let left = make_field_declarator_node_with_identifier("an_identifier_a"); - let right = make_field_declarator_node_with_identifier("an_identifier_b"); - let matching_score = compute_matching_score_for_field_declaration(&left, &right); - assert_eq!(0, matching_score); - } - - fn make_field_declarator_node_with_identifier(identifier: &str) -> CSTNode { - return CSTNode::NonTerminal(NonTerminal { - kind: "field_declaration", - children: vec![ - CSTNode::NonTerminal(NonTerminal { - kind: "modifiers", - children: vec![CSTNode::Terminal(Terminal { - kind: "private", - value: "private", - ..Default::default() - })], - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: "type_identifier", - value: "String", - ..Default::default() - }), - CSTNode::NonTerminal(NonTerminal { - kind: "variable_declarator", - children: vec![CSTNode::Terminal(Terminal { - kind: "identifier", - value: identifier, - ..Default::default() - })], - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: ";", - value: ";", - ..Default::default() - }), - ], - ..Default::default() - }); - } -} diff --git a/matching_handlers/src/java/import_declaration.rs b/matching_handlers/src/java/import_declaration.rs deleted file mode 100644 index 1aad2ce..0000000 --- a/matching_handlers/src/java/import_declaration.rs +++ /dev/null @@ -1,79 +0,0 @@ -use model::CSTNode; - -pub fn compute_matching_score_for_import_declaration<'a>( - left: &'a CSTNode<'a>, - right: &'a CSTNode<'a>, -) -> usize { - (left.contents() == right.contents()).into() -} - -#[cfg(test)] -mod tests { - #[test] - fn imports_of_the_same_resource_matches_with_one() { - let result = super::compute_matching_score_for_import_declaration( - &make_import_of_resource("java.util.array"), - &make_import_of_resource("java.util.array"), - ); - assert_eq!(1, result); - } - - #[test] - fn imports_of_different_resources_matches_with_zero() { - let result = super::compute_matching_score_for_import_declaration( - &make_import_of_resource("java.util.array"), - &make_import_of_resource("java.util.list"), - ); - assert_eq!(0, result); - } - - #[test] - fn imports_with_asterisks_do_match_if_they_are_equal() { - let node = model::CSTNode::NonTerminal(model::cst_node::NonTerminal { - kind: "import_declaration", - children: vec![ - model::CSTNode::Terminal(model::cst_node::Terminal { - kind: "identifier", - value: "AST", - ..Default::default() - }), - model::CSTNode::Terminal(model::cst_node::Terminal { - kind: ".", - value: ".", - ..Default::default() - }), - model::CSTNode::Terminal(model::cst_node::Terminal { - kind: "asterisk", - value: "*", - ..Default::default() - }), - ], - ..Default::default() - }); - - let result = super::compute_matching_score_for_import_declaration(&node, &node); - - assert_eq!(1, result); - } - - fn make_import_of_resource(resource: &str) -> model::CSTNode { - model::CSTNode::NonTerminal(model::cst_node::NonTerminal { - kind: "import_declaration", - children: vec![model::CSTNode::NonTerminal(model::cst_node::NonTerminal { - kind: "scoped_identifier", - children: resource - .split('.') - .map(|part| { - model::CSTNode::Terminal(model::cst_node::Terminal { - kind: "identifier", - value: part, - ..Default::default() - }) - }) - .collect(), - ..Default::default() - })], - ..Default::default() - }) - } -} diff --git a/matching_handlers/src/java/method_declaration.rs b/matching_handlers/src/java/method_declaration.rs deleted file mode 100644 index a2262d7..0000000 --- a/matching_handlers/src/java/method_declaration.rs +++ /dev/null @@ -1,300 +0,0 @@ -use super::utils::find_child_of_kind; -use model::{cst_node::NonTerminal, CSTNode}; - -pub fn compute_matching_score_for_method_declaration<'a>( - left: &'a CSTNode, - right: &'a CSTNode, -) -> usize { - match (left, right) { - ( - CSTNode::NonTerminal(NonTerminal { - children: children_left, - .. - }), - CSTNode::NonTerminal(NonTerminal { - children: children_right, - .. - }), - ) => { - // Try to find an identifier on children, and compare them - let identifier_left = - find_child_of_kind(children_left, "identifier").map(|node| node.contents()); - let identifier_right = - find_child_of_kind(children_right, "identifier").map(|node| node.contents()); - - // We also need to take method arguments into account because of overloading - let type_of_left_arguments = find_child_of_kind(children_left, "formal_parameters") - .map(|node| extract_argument_types_from_formal_parameters(node)); - let type_of_right_arguments = find_child_of_kind(children_right, "formal_parameters") - .map(|node| extract_argument_types_from_formal_parameters(node)); - - let identifiers_are_equal = - identifier_left.is_some() && identifier_left == identifier_right; - let arguments_are_equal = type_of_left_arguments.is_some() - && type_of_left_arguments == type_of_right_arguments; - - (identifiers_are_equal && arguments_are_equal).into() - } - (_, _) => 0, - } -} - -fn extract_argument_types_from_formal_parameters(node: &CSTNode) -> Vec { - match node { - CSTNode::Terminal(_) => vec![], - CSTNode::NonTerminal(non_terminal) => non_terminal - .children - .iter() - .filter(|inner_node| { - inner_node.kind() == "formal_parameter" || inner_node.kind() == "spread_parameter" - }) - .filter_map(|inner_node| match inner_node { - CSTNode::Terminal(_) => None, - CSTNode::NonTerminal(non_terminal) => Some( - non_terminal - .children - .iter() - .filter(|node| node.kind() != "modifiers" && node.kind() != "identifier") - .fold(String::new(), |acc, cur| { - format!("{} {}", acc, cur.contents()) - }), - ), - }) - .collect(), - } -} - -#[cfg(test)] -mod tests { - use model::{ - cst_node::{NonTerminal, Terminal}, - CSTNode, - }; - - use crate::java::method_declaration::compute_matching_score_for_method_declaration; - - #[test] - fn it_returns_one_if_methods_have_the_same_identifier() { - let left = - make_method_declaration_node("an_identifier", make_method_parameter("String", "name")); - let right = - make_method_declaration_node("an_identifier", make_method_parameter("String", "name")); - let matching_score = compute_matching_score_for_method_declaration(&left, &right); - assert_eq!(1, matching_score); - } - - #[test] - fn it_returns_zero_if_methods_have_different_identifiers() { - let left = make_method_declaration_node( - "an_identifier_a", - make_method_parameter("String", "name"), - ); - let right = make_method_declaration_node( - "an_identifier_b", - make_method_parameter("String", "name"), - ); - let matching_score = compute_matching_score_for_method_declaration(&left, &right); - assert_eq!(0, matching_score); - } - - #[test] - fn it_returns_one_if_methods_have_equal_identifiers_and_equal_parameters_list() { - let left = - make_method_declaration_node("an_identifier", make_method_parameter("String", "name")); - let right = make_method_declaration_node( - "an_identifier", - make_method_parameter("String", "another_name"), - ); - let matching_score = compute_matching_score_for_method_declaration(&left, &right); - assert_eq!(1, matching_score); - } - - #[test] - fn it_returns_zero_if_methods_have_equal_identifiers_but_different_parameters_list() { - let parameter_left = make_method_parameter("String", "name"); - let parameter_right = make_method_parameter("int", "another_name"); - - let left = make_method_declaration_node("an_identifier", parameter_left); - let right = make_method_declaration_node("an_identifier", parameter_right); - let matching_score = compute_matching_score_for_method_declaration(&left, &right); - assert_eq!(0, matching_score); - } - - #[test] - fn for_matching_formal_parameters_it_takes_into_consideration_all_children_except_identifier() { - let node_a = make_method_declaration_node( - "ASTNodeArtifact", - CSTNode::NonTerminal(NonTerminal { - kind: "formal_parameter", - children: vec![ - CSTNode::NonTerminal(NonTerminal { - kind: "modifiers", - children: vec![CSTNode::Terminal(Terminal { - kind: "final", - value: "final", - ..Default::default() - })], - ..Default::default() - }), - CSTNode::NonTerminal(NonTerminal { - kind: "generic_type", - children: vec![ - CSTNode::Terminal(Terminal { - kind: "type_identifier", - value: "ASTNode", - ..Default::default() - }), - CSTNode::NonTerminal(NonTerminal { - kind: "type_arguments", - children: vec![ - CSTNode::Terminal(Terminal { - kind: "<", - value: "<", - ..Default::default() - }), - CSTNode::NonTerminal(NonTerminal { - kind: "wildcard", - children: vec![CSTNode::Terminal(Terminal { - kind: "?", - value: "?", - ..Default::default() - })], - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: ">", - value: ">", - ..Default::default() - }), - ], - ..Default::default() - }), - ], - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: "identifier", - value: "astnode", - ..Default::default() - }), - ], - ..Default::default() - }), - ); - - let node_b = make_method_declaration_node( - "ASTNodeArtifact", - CSTNode::NonTerminal(NonTerminal { - kind: "formal_parameter", - children: vec![ - CSTNode::NonTerminal(NonTerminal { - kind: "modifiers", - children: vec![CSTNode::Terminal(Terminal { - kind: "final", - value: "final", - ..Default::default() - })], - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: "type_identifier", - value: "FileArtifact", - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: "identifier", - value: "astnode", - ..Default::default() - }), - ], - ..Default::default() - }), - ); - - let result = compute_matching_score_for_method_declaration(&node_a, &node_b); - assert_eq!(0, result); - } - - fn make_method_declaration_node<'a>( - identifier: &'a str, - parameter: CSTNode<'a>, - ) -> CSTNode<'a> { - CSTNode::NonTerminal(NonTerminal { - kind: "method_declaration", - children: vec![ - CSTNode::NonTerminal(NonTerminal { - kind: "modifiers", - children: vec![CSTNode::Terminal(Terminal { - kind: "public", - value: "public", - ..Default::default() - })], - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: "void_type", - value: "void", - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: "identifier", - value: identifier, - ..Default::default() - }), - CSTNode::NonTerminal(NonTerminal { - kind: "formal_parameters", - children: vec![ - CSTNode::Terminal(Terminal { - kind: "(", - value: "(", - ..Default::default() - }), - parameter, - CSTNode::Terminal(Terminal { - kind: ")", - value: ")", - ..Default::default() - }), - ], - ..Default::default() - }), - CSTNode::NonTerminal(NonTerminal { - kind: "block", - children: vec![ - CSTNode::Terminal(Terminal { - kind: "{", - value: "{", - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: "}", - value: "}", - ..Default::default() - }), - ], - ..Default::default() - }), - ], - ..Default::default() - }) - } - - fn make_method_parameter<'a>(a_type: &'a str, identifier: &'a str) -> CSTNode<'a> { - CSTNode::NonTerminal(NonTerminal { - kind: "formal_parameter", - children: vec![ - CSTNode::Terminal(Terminal { - kind: "type_identifier", - value: a_type, - ..Default::default() - }), - CSTNode::Terminal(Terminal { - kind: "identifier", - value: identifier, - ..Default::default() - }), - ], - ..Default::default() - }) - } -} diff --git a/matching_handlers/src/java/mod.rs b/matching_handlers/src/java/mod.rs index 4fd7f06..0b880af 100644 --- a/matching_handlers/src/java/mod.rs +++ b/matching_handlers/src/java/mod.rs @@ -1,47 +1,6 @@ -mod class_like_declaration; -mod field_declaration; -mod import_declaration; -mod method_declaration; -mod utils; - use crate::MatchingHandlers; -use self::{ - class_like_declaration::compute_matching_score_for_class_like_declaration, - field_declaration::compute_matching_score_for_field_declaration, - import_declaration::compute_matching_score_for_import_declaration, - method_declaration::compute_matching_score_for_method_declaration, -}; - pub fn get_default_java_matching_handlers<'a>() -> MatchingHandlers<'a> { - let mut matching_handlers: MatchingHandlers<'a> = MatchingHandlers::new(); - matching_handlers.register( - "field_declaration", - compute_matching_score_for_field_declaration, - ); - matching_handlers.register( - "method_declaration", - compute_matching_score_for_method_declaration, - ); - matching_handlers.register( - "constructor_declaration", - compute_matching_score_for_method_declaration, - ); - matching_handlers.register( - "import_declaration", - compute_matching_score_for_import_declaration, - ); - matching_handlers.register( - "class_declaration", - compute_matching_score_for_class_like_declaration, - ); - matching_handlers.register( - "enum_declaration", - compute_matching_score_for_class_like_declaration, - ); - matching_handlers.register( - "interface_declaration", - compute_matching_score_for_class_like_declaration, - ); + let matching_handlers: MatchingHandlers<'a> = MatchingHandlers::new(); matching_handlers } diff --git a/matching_handlers/src/java/utils.rs b/matching_handlers/src/java/utils.rs deleted file mode 100644 index 33b3349..0000000 --- a/matching_handlers/src/java/utils.rs +++ /dev/null @@ -1,18 +0,0 @@ -use model::{cst_node::Terminal, CSTNode}; - -pub fn find_identifier<'a>(node_children: &'a [CSTNode<'a>]) -> Option<&'a Terminal<'a>> { - node_children - .iter() - .find(|node| node.kind() == "identifier") - .and_then(|node| match node { - CSTNode::Terminal(terminal) => Some(terminal), - CSTNode::NonTerminal(_) => None, - }) -} - -pub fn find_child_of_kind<'a>( - node_children: &'a [CSTNode<'a>], - kind: &'a str, -) -> Option<&'a CSTNode<'a>> { - node_children.iter().find(|node| node.kind() == kind) -} diff --git a/merge/src/merge.rs b/merge/src/merge.rs index 6f12709..1b06b62 100644 --- a/merge/src/merge.rs +++ b/merge/src/merge.rs @@ -98,6 +98,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![], + ..Default::default() }), &Matchings::empty(), &Matchings::empty(), diff --git a/merge/src/ordered_merge.rs b/merge/src/ordered_merge.rs index eff9d01..24bd2a3 100644 --- a/merge/src/ordered_merge.rs +++ b/merge/src/ordered_merge.rs @@ -308,6 +308,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); assert_merge_is_correct_and_idempotent_with_respect_to_parent_side( @@ -328,6 +329,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![], + ..Default::default() }); let parent = CSTNode::NonTerminal(NonTerminal { id: uuid::Uuid::new_v4(), @@ -353,6 +355,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); assert_merge_is_correct_and_idempotent_with_respect_to_parent_side( @@ -373,6 +376,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![], + ..Default::default() }); let initially_empty_parent = CSTNode::NonTerminal(NonTerminal { @@ -382,6 +386,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![], + ..Default::default() }); let parent_that_added = CSTNode::NonTerminal(NonTerminal { @@ -398,6 +403,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -433,6 +439,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let unchanged_parent = CSTNode::NonTerminal(NonTerminal { @@ -449,6 +456,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let parent_that_added = CSTNode::NonTerminal(NonTerminal { @@ -475,6 +483,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let merge = MergedCSTNode::NonTerminal { @@ -517,6 +526,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let changed_parent = CSTNode::NonTerminal(NonTerminal { @@ -533,6 +543,7 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() }); let unchanged_parent = CSTNode::NonTerminal(NonTerminal { @@ -549,6 +560,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -591,7 +603,9 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() })], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -614,7 +628,9 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() })], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -637,7 +653,9 @@ mod tests { value: "value_c", is_block_end_delimiter: false, })], + ..Default::default() })], + ..Default::default() }); let matching_configuration = MatchingConfiguration::from(Language::Java); @@ -727,6 +745,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![], + ..Default::default() }); let left = CSTNode::NonTerminal(NonTerminal { @@ -743,6 +762,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let right = CSTNode::NonTerminal(NonTerminal { @@ -759,6 +779,7 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() }); assert_merge_output_is( @@ -808,6 +829,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let left = CSTNode::NonTerminal(NonTerminal { @@ -834,6 +856,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let right = CSTNode::NonTerminal(NonTerminal { @@ -850,6 +873,7 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -888,6 +912,7 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -898,6 +923,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let left = CSTNode::NonTerminal(NonTerminal { @@ -921,6 +947,7 @@ mod tests { value: "value_c", is_block_end_delimiter: false, })], + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -931,6 +958,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let right = CSTNode::NonTerminal(NonTerminal { @@ -947,6 +975,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); assert_merge_output_is( @@ -1026,6 +1055,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let unchanged_parent = CSTNode::NonTerminal(NonTerminal { @@ -1052,6 +1082,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let changed_parent = CSTNode::NonTerminal(NonTerminal { @@ -1086,6 +1117,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -1131,6 +1163,7 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -1147,6 +1180,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -1173,6 +1207,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -1214,7 +1249,9 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() })], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -1231,6 +1268,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -1254,6 +1292,7 @@ mod tests { value: "value_c", is_block_end_delimiter: false, })], + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -1264,6 +1303,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); assert_merge_output_is( @@ -1325,6 +1365,7 @@ mod tests { start_position: Point { row: 0, column: 0 }, end_position: Point { row: 0, column: 7 }, children: vec![], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -1341,6 +1382,7 @@ mod tests { value: "value_a", is_block_end_delimiter: false, })], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -1367,6 +1409,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -1419,6 +1462,7 @@ mod tests { is_block_end_delimiter: false, }), ], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -1435,6 +1479,7 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -1451,6 +1496,7 @@ mod tests { value: "value_c", is_block_end_delimiter: false, })], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -1491,6 +1537,7 @@ mod tests { value: "value_b", is_block_end_delimiter: false, })], + ..Default::default() }), CSTNode::NonTerminal(NonTerminal { id: uuid::Uuid::new_v4(), @@ -1506,8 +1553,10 @@ mod tests { value: "value_c", is_block_end_delimiter: false, })], + ..Default::default() }), ], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -1530,7 +1579,9 @@ mod tests { value: "value_c", is_block_end_delimiter: false, })], + ..Default::default() })], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -1553,7 +1604,9 @@ mod tests { value: "value_c", is_block_end_delimiter: false, })], + ..Default::default() })], + ..Default::default() }); assert_merge_output_is( @@ -1603,6 +1656,7 @@ mod tests { end_position: Point { row: 0, column: 7 }, children: vec![], are_children_unordered: true, + ..Default::default() }; let kind_b = NonTerminal { id: uuid::Uuid::new_v4(), @@ -1611,6 +1665,7 @@ mod tests { end_position: Point { row: 0, column: 7 }, children: vec![], are_children_unordered: true, + ..Default::default() }; let matchings = Matchings::empty(); diff --git a/merge/src/unordered_merge.rs b/merge/src/unordered_merge.rs index 7df782f..4bcf505 100644 --- a/merge/src/unordered_merge.rs +++ b/merge/src/unordered_merge.rs @@ -234,6 +234,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -268,6 +269,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -294,6 +296,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let merge = MergedCSTNode::NonTerminal { @@ -345,6 +348,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -376,6 +380,7 @@ mod tests { end_position: model::Point { row: 0, column: 1 }, is_block_end_delimiter: false, })], + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -386,6 +391,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -417,6 +423,7 @@ mod tests { end_position: model::Point { row: 0, column: 1 }, is_block_end_delimiter: false, })], + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -427,6 +434,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -500,6 +508,8 @@ mod tests { is_block_end_delimiter: false, }), ], + identifier: Some(vec!["main"]), + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -510,6 +520,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -551,6 +562,8 @@ mod tests { is_block_end_delimiter: false, }), ], + identifier: Some(vec!["main"]), + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -561,6 +574,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -587,6 +601,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let expected_merge = MergedCSTNode::NonTerminal { @@ -668,6 +683,8 @@ mod tests { is_block_end_delimiter: false, }), ], + identifier: Some(vec!["method"]), + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -678,6 +695,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let parent_a = CSTNode::NonTerminal(NonTerminal { @@ -735,6 +753,8 @@ mod tests { is_block_end_delimiter: false, }), ], + identifier: Some(vec!["method"]), + ..Default::default() }), CSTNode::Terminal(Terminal { id: uuid::Uuid::new_v4(), @@ -745,6 +765,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); let parent_b = CSTNode::NonTerminal(NonTerminal { @@ -771,6 +792,7 @@ mod tests { is_block_end_delimiter: true, }), ], + ..Default::default() }); assert_merge_output_is( @@ -868,6 +890,7 @@ mod tests { end_position: Point { row: 0, column: 7 }, children: vec![], are_children_unordered: true, + ..Default::default() }; let kind_b = NonTerminal { id: uuid::Uuid::new_v4(), @@ -876,6 +899,7 @@ mod tests { end_position: Point { row: 0, column: 7 }, children: vec![], are_children_unordered: true, + ..Default::default() }; let matchings = Matchings::empty(); diff --git a/model/src/cst_node.rs b/model/src/cst_node.rs index b615fb7..0734cff 100644 --- a/model/src/cst_node.rs +++ b/model/src/cst_node.rs @@ -64,6 +64,13 @@ impl CSTNode<'_> { pub fn get_tree_size(&self) -> usize { self.get_subtree_size() + 1 } + + pub fn has_identifier(&self) -> bool { + match self { + CSTNode::Terminal(_) => true, + CSTNode::NonTerminal(node) => node.get_identifier().is_some(), + } + } } #[derive(Debug, Default, Clone)] @@ -74,6 +81,7 @@ pub struct NonTerminal<'a> { pub start_position: Point, pub end_position: Point, pub are_children_unordered: bool, + pub identifier: Option>, } impl<'a> PartialEq for NonTerminal<'a> { @@ -108,6 +116,10 @@ impl NonTerminal<'_> { format!("{} {}", acc, node.contents()) }) } + + pub fn get_identifier(&self) -> Option<&Vec<&str>> { + self.identifier.as_ref() + } } impl<'a> TryFrom<&'a CSTNode<'a>> for &'a NonTerminal<'a> { @@ -161,4 +173,8 @@ impl Terminal<'_> { pub fn contents(&self) -> String { String::from(self.value) } + + pub fn get_identifier(&self) -> (&str, &str) { + (self.kind, self.value) + } } diff --git a/parsing/Cargo.toml b/parsing/Cargo.toml index c4470f2..f100d45 100644 --- a/parsing/Cargo.toml +++ b/parsing/Cargo.toml @@ -8,6 +8,7 @@ edition = "2021" [dependencies] tree-sitter = "0.20.9" tree-sitter-java = "0.20.0" +regex = "1.10.5" model = { path = "../model" } parsing_handlers = { path = "../parsing_handlers" } log = { workspace = true } diff --git a/parsing/src/identifier_extractor.rs b/parsing/src/identifier_extractor.rs new file mode 100644 index 0000000..6194a56 --- /dev/null +++ b/parsing/src/identifier_extractor.rs @@ -0,0 +1,59 @@ +use regex::Regex; +use tree_sitter::{Language, Node, Query, QueryCapture, QueryCursor}; + +pub trait IdentifierExtractor { + fn extract_identifier_from_node<'a>(&self, node: Node, src: &'a str) -> Option>; +} + +pub struct RegularExpression(Regex); + +impl RegularExpression { + pub fn new(regex: &'static str) -> Self { + Self( + regex::Regex::new(regex) + .expect("Invalid regex provided for building RegularExpression"), + ) + } +} + +impl IdentifierExtractor for RegularExpression { + fn extract_identifier_from_node<'a>(&self, node: Node, src: &'a str) -> Option> { + self.0 + .find(node.utf8_text(src.as_bytes()).ok()?) + .map(|m| vec![m.as_str()]) + } +} + +pub struct TreeSitterQuery(Query); + +impl TreeSitterQuery { + pub fn new(query: &'static str, language: Language) -> Self { + Self( + Query::new(language, query) + .expect("Invalid Query provided for building TreeSitterQuery"), + ) + } +} + +impl IdentifierExtractor for TreeSitterQuery { + fn extract_identifier_from_node<'a>(&self, node: Node, src: &'a str) -> Option> { + let mut cursor = QueryCursor::new(); + let identifier = cursor + .matches(&self.0, node, src.as_bytes()) + .flat_map(|a_match| { + a_match.captures.iter().filter_map(|capture| { + if capture_is_within_node_bounds(capture, &node) { + capture.node.utf8_text(src.as_bytes()).ok() + } else { + None + } + }) + }) + .collect(); + Some(identifier) + } +} + +fn capture_is_within_node_bounds(capture: &QueryCapture, node: &Node) -> bool { + capture.node.start_byte() >= node.start_byte() && capture.node.end_byte() <= node.end_byte() +} diff --git a/parsing/src/lib.rs b/parsing/src/lib.rs index 41818ba..f81730d 100644 --- a/parsing/src/lib.rs +++ b/parsing/src/lib.rs @@ -1,3 +1,4 @@ +mod identifier_extractor; mod parse; mod tree_sitter_parser; diff --git a/parsing/src/parse.rs b/parsing/src/parse.rs index dd7b94e..57270e0 100644 --- a/parsing/src/parse.rs +++ b/parsing/src/parse.rs @@ -39,6 +39,10 @@ fn explore_node<'a>(node: Node, src: &'a str, config: &'a ParserConfiguration) - .map(|child| explore_node(child, src, config)) .collect(), are_children_unordered: config.kinds_with_unordered_children.contains(node.kind()), + identifier: config + .identifier_extractors + .get(node.kind()) + .and_then(|extractor| extractor.extract_identifier_from_node(node, src)), }) } } diff --git a/parsing/src/tree_sitter_parser.rs b/parsing/src/tree_sitter_parser.rs index 0e8b6ae..89b085d 100644 --- a/parsing/src/tree_sitter_parser.rs +++ b/parsing/src/tree_sitter_parser.rs @@ -1,6 +1,8 @@ use model::Language; use parsing_handlers::ParsingHandlers; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; + +use crate::identifier_extractor::{IdentifierExtractor, RegularExpression, TreeSitterQuery}; pub struct ParserConfiguration { pub(crate) language: tree_sitter::Language, @@ -8,6 +10,7 @@ pub struct ParserConfiguration { pub(crate) kinds_with_unordered_children: HashSet<&'static str>, pub(crate) block_end_delimiters: HashSet<&'static str>, pub(crate) handlers: ParsingHandlers, + pub(crate) identifier_extractors: HashMap<&'static str, Box>, } impl From for ParserConfiguration { @@ -24,6 +27,44 @@ impl From for ParserConfiguration { .into(), block_end_delimiters: ["}"].into(), handlers: ParsingHandlers::from(Language::Java), + identifier_extractors: { + let mut map: HashMap<&'static str, Box> = + HashMap::new(); + map.insert("constructor_declaration", Box::new(TreeSitterQuery::new(r#"(constructor_declaration name: (identifier) @method_name [parameters: (formal_parameters [ (formal_parameter type: (_) @argument_type) (spread_parameter (type_identifier) @spread_parameter "..." @spread_indicator) ]) _ ])"#, tree_sitter_java::language()))); + map.insert("method_declaration", Box::new(TreeSitterQuery::new(r#"(method_declaration name: (identifier) @method_name [parameters: (formal_parameters [ (formal_parameter type: (_) @argument_type) (spread_parameter (type_identifier) @spread_parameter "..." @spread_indicator) ]) _ ])"#, tree_sitter_java::language()))); + map.insert( + "field_declaration", + Box::new(TreeSitterQuery::new( + r#"(variable_declarator name: _ @name)"#, + tree_sitter_java::language(), + )), + ); + map.insert( + "import_declaration", + Box::new(TreeSitterQuery::new( + r#"(import_declaration "import" _ @resource)"#, + tree_sitter_java::language(), + )), + ); + + map.insert( + "class_declaration", + Box::new(RegularExpression::new(r#"class [A-Za-z_][A-Za-z0-9_]*"#)), + ); + + map.insert( + "enum_declaration", + Box::new(RegularExpression::new(r#"enum [A-Za-z_][A-Za-z0-9_]*"#)), + ); + + map.insert( + "interface_declaration", + Box::new(RegularExpression::new( + r#"interface [A-Za-z_][A-Za-z0-9_]*"#, + )), + ); + map + }, }, } } diff --git a/parsing_handlers/src/java/remove_block_comments.rs b/parsing_handlers/src/java/remove_block_comments.rs index 2d12a41..6654331 100644 --- a/parsing_handlers/src/java/remove_block_comments.rs +++ b/parsing_handlers/src/java/remove_block_comments.rs @@ -15,6 +15,7 @@ pub fn remove_block_comments(root: CSTNode<'_>) -> CSTNode<'_> { .map(|node| remove_block_comments(node)) .collect(), are_children_unordered: non_terminal.are_children_unordered, + identifier: non_terminal.identifier, }), } } diff --git a/parsing_handlers/src/java/tweak_import_declarations.rs b/parsing_handlers/src/java/tweak_import_declarations.rs index d5c0a33..8b8be69 100644 --- a/parsing_handlers/src/java/tweak_import_declarations.rs +++ b/parsing_handlers/src/java/tweak_import_declarations.rs @@ -34,6 +34,7 @@ pub fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { start_position: import_declarations_start, end_position: import_declarations_end, are_children_unordered: true, + identifier: None, }); let first_import_declaration_index = program @@ -63,6 +64,7 @@ pub fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { end_position: program.end_position, children: new_program_children, are_children_unordered: program.are_children_unordered, + identifier: program.identifier, }) } }