From ad2fe09f4f0bb5d88b19cff4357830a31eb33d79 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Sat, 6 Apr 2024 16:04:50 -0300 Subject: [PATCH 1/7] feat: add enhancing of import declarations node to fix import issue --- .../merge.java | 2 +- .../base.java | 14 ++++ .../left.java | 13 ++++ .../merge.java | 1 + .../right.java | 12 +++ .../imports_are_merged_correctly/merge.java | 2 +- .../scenarios/jdime_matching_issue/merge.java | 2 +- .../merge.java | 2 +- model/src/cst_node.rs | 14 ++++ parsing/src/parse.rs | 77 +++++++++++++++++-- 10 files changed, 130 insertions(+), 9 deletions(-) create mode 100644 bin/tests/scenarios/import_declarations_grouping_node/base.java create mode 100644 bin/tests/scenarios/import_declarations_grouping_node/left.java create mode 100644 bin/tests/scenarios/import_declarations_grouping_node/merge.java create mode 100644 bin/tests/scenarios/import_declarations_grouping_node/right.java diff --git a/bin/tests/scenarios/fancy_argument_types_matching_issue/merge.java b/bin/tests/scenarios/fancy_argument_types_matching_issue/merge.java index fcd3bc4..229dfeb 100644 --- a/bin/tests/scenarios/fancy_argument_types_matching_issue/merge.java +++ b/bin/tests/scenarios/fancy_argument_types_matching_issue/merge.java @@ -1 +1 @@ -package de . fosd . jdime . common ; import de . fosd . jdime . common . operations . AddOperation ; import AST . * ; import de . fosd . jdime . common . operations . ConflictOperation ; public class ASTNodeArtifact extends Artifact < ASTNodeArtifact > { private ASTNodeArtifact ( final ASTNode < ? > astnode ) { assert ( astnode != null ) ; this . astnode = astnode ; this . initializeChildren ( ) ; } public ASTNodeArtifact ( final FileArtifact artifact ) { assert ( artifact != null ) ; setRevision ( artifact . getRevision ( ) ) ; ASTNode < ? > astnode ; if ( artifact . isEmpty ( ) ) { astnode = new ASTNode < > ( ) ; } else { Program p = initProgram ( ) ; p . addSourceFile ( artifact . getPath ( ) ) ; astnode = p ; } this . astnode = astnode ; this . initializeChildren ( ) ; renumberTree ( ) ; } } \ No newline at end of file +package de . fosd . jdime . common ; import AST . * ; import de . fosd . jdime . common . operations . ConflictOperation ; import de . fosd . jdime . common . operations . AddOperation ; public class ASTNodeArtifact extends Artifact < ASTNodeArtifact > { private ASTNodeArtifact ( final ASTNode < ? > astnode ) { assert ( astnode != null ) ; this . astnode = astnode ; this . initializeChildren ( ) ; } public ASTNodeArtifact ( final FileArtifact artifact ) { assert ( artifact != null ) ; setRevision ( artifact . getRevision ( ) ) ; ASTNode < ? > astnode ; if ( artifact . isEmpty ( ) ) { astnode = new ASTNode < > ( ) ; } else { Program p = initProgram ( ) ; p . addSourceFile ( artifact . getPath ( ) ) ; astnode = p ; } this . astnode = astnode ; this . initializeChildren ( ) ; renumberTree ( ) ; } } \ No newline at end of file diff --git a/bin/tests/scenarios/import_declarations_grouping_node/base.java b/bin/tests/scenarios/import_declarations_grouping_node/base.java new file mode 100644 index 0000000..dca1ec7 --- /dev/null +++ b/bin/tests/scenarios/import_declarations_grouping_node/base.java @@ -0,0 +1,14 @@ +package br.fosd.jdime.stats; + +import java.text.DecimalFormat; +import java.util.HashMap; +import java.util.TreeSet; + +import org.apache.commons.lang3.ClassUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.log4j.Logger; + +import de.fosd.jdime.common.LangElem; + +public class ASTStats { +} diff --git a/bin/tests/scenarios/import_declarations_grouping_node/left.java b/bin/tests/scenarios/import_declarations_grouping_node/left.java new file mode 100644 index 0000000..94f43dc --- /dev/null +++ b/bin/tests/scenarios/import_declarations_grouping_node/left.java @@ -0,0 +1,13 @@ +package br.fosd.jdime.stats; + +import java.text.DecimalFormat; +import java.util.HashMap; +import java.util.TreeSet; + +import de.fosd.jdime.common.LangElem; +import org.apache.commons.lang3.ClassUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.log4j.Logger; + +public class ASTStats { +} diff --git a/bin/tests/scenarios/import_declarations_grouping_node/merge.java b/bin/tests/scenarios/import_declarations_grouping_node/merge.java new file mode 100644 index 0000000..1fa543b --- /dev/null +++ b/bin/tests/scenarios/import_declarations_grouping_node/merge.java @@ -0,0 +1 @@ +package br . fosd . jdime . stats ; import java . text . DecimalFormat ; import java . util . HashMap ; import java . util . TreeSet ; import de . fosd . jdime . common . LangElem ; import java . util . logging . Level ; import java . util . logging . Logger ; public class ASTStats { } \ No newline at end of file diff --git a/bin/tests/scenarios/import_declarations_grouping_node/right.java b/bin/tests/scenarios/import_declarations_grouping_node/right.java new file mode 100644 index 0000000..0ad5c79 --- /dev/null +++ b/bin/tests/scenarios/import_declarations_grouping_node/right.java @@ -0,0 +1,12 @@ +package br.fosd.jdime.stats; + +import java.text.DecimalFormat; +import java.util.HashMap; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +import de.fosd.jdime.common.LangElem; + +public class ASTStats { +} diff --git a/bin/tests/scenarios/imports_are_merged_correctly/merge.java b/bin/tests/scenarios/imports_are_merged_correctly/merge.java index bf12df3..51b0a11 100644 --- a/bin/tests/scenarios/imports_are_merged_correctly/merge.java +++ b/bin/tests/scenarios/imports_are_merged_correctly/merge.java @@ -1 +1 @@ -package de . fosd . jdime . merge ; import java . util . List ; import AST . * ; import de . fosd . jdime . operations . AddOperation ; import de . fosd . jdime . operations . ConflictOperation ; import de . fosd . jdime . operations . MergeOperation ; import static de . fosd . jdime . artifact . Artifacts . root ; import static de . fosd . jdime . strdump . DumpMode . PLAINTEXT_TREE ; \ No newline at end of file +package de . fosd . jdime . merge ; import java . util . List ; import AST . * ; import de . fosd . jdime . operations . AddOperation ; import de . fosd . jdime . operations . ConflictOperation ; import de . fosd . jdime . operations . MergeOperation ; import static de . fosd . jdime . artifact . Artifacts . root ; import static de . fosd . jdime . strdump . DumpMode . PLAINTEXT_TREE ; \ No newline at end of file diff --git a/bin/tests/scenarios/jdime_matching_issue/merge.java b/bin/tests/scenarios/jdime_matching_issue/merge.java index fca08b5..fece629 100644 --- a/bin/tests/scenarios/jdime_matching_issue/merge.java +++ b/bin/tests/scenarios/jdime_matching_issue/merge.java @@ -1 +1 @@ - package de . fosd . jdime . artifact ; import java . security . MessageDigest ; public abstract class Artifact < T extends Artifact < T > > implements Comparable < T > , StatisticsInterface { public boolean hasChanges ( Revision revision ) { if ( this . revision . equals ( revision ) ) { return false ; } if ( ! hasMatching ( revision ) ) { return true ; } T match = getMatching ( revision ) . getMatchingArtifact ( this ) ; return getTreeSize ( ) != match . getTreeSize ( ) || ! getTreeHash ( ) . equals ( match . getTreeHash ( ) ) ; } } \ No newline at end of file +package de . fosd . jdime . artifact ; import java . security . MessageDigest ; public abstract class Artifact < T extends Artifact < T > > implements Comparable < T > , StatisticsInterface { public boolean hasChanges ( Revision revision ) { if ( this . revision . equals ( revision ) ) { return false ; } if ( ! hasMatching ( revision ) ) { return true ; } T match = getMatching ( revision ) . getMatchingArtifact ( this ) ; return getTreeSize ( ) != match . getTreeSize ( ) || ! getTreeHash ( ) . equals ( match . getTreeHash ( ) ) ; } } \ No newline at end of file diff --git a/bin/tests/scenarios/overload_methods_with_spread_parameter/merge.java b/bin/tests/scenarios/overload_methods_with_spread_parameter/merge.java index 9e34e90..0ec2e12 100644 --- a/bin/tests/scenarios/overload_methods_with_spread_parameter/merge.java +++ b/bin/tests/scenarios/overload_methods_with_spread_parameter/merge.java @@ -1 +1 @@ - package de . fosd . jdime ; import java . io . File ; import java . net . URISyntaxException ; import java . net . URL ; import java . util . Arrays ; import org . junit . BeforeClass ; import static org . junit . Assert . assertNotNull ; import static org . junit . Assert . assertTrue ; import static org . junit . Assert . fail ; public class JDimeTest { protected static File file ( File parent , String child ) { File f = new File ( parent , child ) ; assertTrue ( f + " does not exist." , f . exists ( ) ) ; return f ; } protected static File file ( File parent , String name , String ... names ) { if ( names != null ) { String path = String . format ( "%s/%s" , name , String . join ( "/" , names ) ) ; return file ( parent , path ) ; } else { return file ( parent , name ) ; } } protected static File file ( String path ) { URL res = JDimeTest . class . getResource ( path ) ; assertNotNull ( "The file " + path + " was not found." , res ) ; try { return new File ( res . toURI ( ) ) ; } catch ( URISyntaxException e ) { fail ( e . getMessage ( ) ) ; return null ; } } protected static File file ( String name , String ... names ) { if ( names != null ) { String path = String . format ( "/%s/%s" , name , String . join ( "/" , names ) ) ; return file ( path ) ; } else { return file ( "/" + name ) ; } } } \ No newline at end of file +package de . fosd . jdime ; import java . io . File ; import java . net . URISyntaxException ; import java . net . URL ; import java . util . Arrays ; import org . junit . BeforeClass ; import static org . junit . Assert . assertNotNull ; import static org . junit . Assert . assertTrue ; import static org . junit . Assert . fail ; public class JDimeTest { protected static File file ( File parent , String child ) { File f = new File ( parent , child ) ; assertTrue ( f + " does not exist." , f . exists ( ) ) ; return f ; } protected static File file ( File parent , String name , String ... names ) { if ( names != null ) { String path = String . format ( "%s/%s" , name , String . join ( "/" , names ) ) ; return file ( parent , path ) ; } else { return file ( parent , name ) ; } } protected static File file ( String path ) { URL res = JDimeTest . class . getResource ( path ) ; assertNotNull ( "The file " + path + " was not found." , res ) ; try { return new File ( res . toURI ( ) ) ; } catch ( URISyntaxException e ) { fail ( e . getMessage ( ) ) ; return null ; } } protected static File file ( String name , String ... names ) { if ( names != null ) { String path = String . format ( "/%s/%s" , name , String . join ( "/" , names ) ) ; return file ( path ) ; } else { return file ( "/" + name ) ; } } } \ No newline at end of file diff --git a/model/src/cst_node.rs b/model/src/cst_node.rs index f6d850a..e8c99f5 100644 --- a/model/src/cst_node.rs +++ b/model/src/cst_node.rs @@ -34,6 +34,20 @@ impl CSTNode<'_> { CSTNode::NonTerminal(node) => node.contents(), } } + + pub fn start_position(&self) -> Point { + match self { + CSTNode::Terminal(node) => node.start_position, + CSTNode::NonTerminal(node) => node.start_position, + } + } + + pub fn end_position(&self) -> Point { + match self { + CSTNode::Terminal(node) => node.end_position, + CSTNode::NonTerminal(node) => node.end_position, + } + } } #[derive(Debug, Default, Clone)] diff --git a/parsing/src/parse.rs b/parsing/src/parse.rs index 4ca0f0d..1768994 100644 --- a/parsing/src/parse.rs +++ b/parsing/src/parse.rs @@ -43,6 +43,73 @@ fn explore_node<'a>(node: Node, src: &'a str, config: &'a ParserConfiguration) - } } +fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { + match root.kind() { + "program" => match root { + CSTNode::Terminal(_) => root.to_owned(), + CSTNode::NonTerminal(program) => { + let import_declaration_children: Vec = program + .children + .iter() + .filter(|node| node.kind() == "import_declaration") + .cloned() + .collect(); + + if import_declaration_children.is_empty() { + return CSTNode::NonTerminal(program); + } + + let import_declarations_start = import_declaration_children + .first() + .unwrap() + .start_position(); + + let import_declarations_end = + import_declaration_children.last().unwrap().end_position(); + + let import_declarations = CSTNode::NonTerminal(NonTerminal { + id: uuid::Uuid::new_v4(), + kind: "import_declarations", + children: import_declaration_children, + start_position: import_declarations_start, + end_position: import_declarations_end, + are_children_unordered: true, + }); + + let first_import_declaration_index = program + .children + .iter() + .position(|node| node.kind() == "import_declaration") + .unwrap(); + let last_import_declaration_index = program + .children + .iter() + .rposition(|node| node.kind() == "import_declaration") + .unwrap(); + + let mut new_program_children: Vec> = vec![]; + new_program_children.extend_from_slice( + &program.children.iter().as_slice()[..first_import_declaration_index], + ); + new_program_children.push(import_declarations); + new_program_children.extend_from_slice( + &program.children.iter().as_slice()[last_import_declaration_index + 1..], + ); + + CSTNode::NonTerminal(NonTerminal { + id: program.id, + kind: program.kind, + start_position: program.start_position, + end_position: program.end_position, + children: new_program_children, + are_children_unordered: false, + }) + } + }, + _ => root.to_owned(), + } +} + pub fn parse_string<'a>( src: &'a str, config: &'a ParserConfiguration, @@ -52,9 +119,9 @@ pub fn parse_string<'a>( .set_language(config.language) .map_err(|_| "There was an error while setting the parser language")?; - let parsed = parser.parse(src, None); - match parsed { - Some(parsed) => Result::Ok(explore_node(parsed.root_node(), src, config)), - None => Result::Err("It was not possible to parse the tree."), - } + let parsed = parser + .parse(src, None) + .ok_or("It was not possible to parse the tree.")?; + let root = explore_node(parsed.root_node(), src, config); + Ok(tweak_import_declarations(root)) } From 002937cb6ae44c47a77df6a14ba9e53cd311a03b Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 1 May 2024 21:54:35 -0300 Subject: [PATCH 2/7] feat: tweak configuration --- matching/src/matching_configuration.rs | 1 + matching/src/unordered/mod.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/matching/src/matching_configuration.rs b/matching/src/matching_configuration.rs index 91d2c32..e4921b0 100644 --- a/matching/src/matching_configuration.rs +++ b/matching/src/matching_configuration.rs @@ -24,6 +24,7 @@ impl From for MatchingConfiguration<'_> { "constructor_declaration", "field_declaration", "method_declaration", + "import_declaration", ] .into(), handlers: MatchingHandlers::from(Language::Java), diff --git a/matching/src/unordered/mod.rs b/matching/src/unordered/mod.rs index 95107b7..7b790f6 100644 --- a/matching/src/unordered/mod.rs +++ b/matching/src/unordered/mod.rs @@ -12,10 +12,18 @@ pub fn calculate_matchings<'a>( match (left, right) { (model::CSTNode::NonTerminal(left_nt), model::CSTNode::NonTerminal(right_nt)) => { if all_children_labeled(left_nt, config) && all_children_labeled(right_nt, config) { - log::debug!("Using unique label matching."); + log::debug!( + "Matching children of \"{}\" with \"{}\" using unique label matching.", + left.kind(), + right.kind() + ); unique_label::calculate_matchings(left, right, config) } else { - log::debug!("Using assignment problem matching."); + log::debug!( + "Matching children of \"{}\" with \"{}\" using assignment problem matching.", + left.kind(), + right.kind() + ); assignment_problem::calculate_matchings(left, right, config) } } From 764369411978695f1ddaebb033c6e8ca86526237 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 1 May 2024 22:05:53 -0300 Subject: [PATCH 3/7] refactor: initial generalization of parsing handler --- parsing/src/parse.rs | 69 +----------------------- parsing/src/tree_sitter_parser.rs | 88 ++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 69 deletions(-) diff --git a/parsing/src/parse.rs b/parsing/src/parse.rs index 1768994..dd7b94e 100644 --- a/parsing/src/parse.rs +++ b/parsing/src/parse.rs @@ -43,73 +43,6 @@ fn explore_node<'a>(node: Node, src: &'a str, config: &'a ParserConfiguration) - } } -fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { - match root.kind() { - "program" => match root { - CSTNode::Terminal(_) => root.to_owned(), - CSTNode::NonTerminal(program) => { - let import_declaration_children: Vec = program - .children - .iter() - .filter(|node| node.kind() == "import_declaration") - .cloned() - .collect(); - - if import_declaration_children.is_empty() { - return CSTNode::NonTerminal(program); - } - - let import_declarations_start = import_declaration_children - .first() - .unwrap() - .start_position(); - - let import_declarations_end = - import_declaration_children.last().unwrap().end_position(); - - let import_declarations = CSTNode::NonTerminal(NonTerminal { - id: uuid::Uuid::new_v4(), - kind: "import_declarations", - children: import_declaration_children, - start_position: import_declarations_start, - end_position: import_declarations_end, - are_children_unordered: true, - }); - - let first_import_declaration_index = program - .children - .iter() - .position(|node| node.kind() == "import_declaration") - .unwrap(); - let last_import_declaration_index = program - .children - .iter() - .rposition(|node| node.kind() == "import_declaration") - .unwrap(); - - let mut new_program_children: Vec> = vec![]; - new_program_children.extend_from_slice( - &program.children.iter().as_slice()[..first_import_declaration_index], - ); - new_program_children.push(import_declarations); - new_program_children.extend_from_slice( - &program.children.iter().as_slice()[last_import_declaration_index + 1..], - ); - - CSTNode::NonTerminal(NonTerminal { - id: program.id, - kind: program.kind, - start_position: program.start_position, - end_position: program.end_position, - children: new_program_children, - are_children_unordered: false, - }) - } - }, - _ => root.to_owned(), - } -} - pub fn parse_string<'a>( src: &'a str, config: &'a ParserConfiguration, @@ -123,5 +56,5 @@ pub fn parse_string<'a>( .parse(src, None) .ok_or("It was not possible to parse the tree.")?; let root = explore_node(parsed.root_node(), src, config); - Ok(tweak_import_declarations(root)) + Ok(config.handlers.run(root)) } diff --git a/parsing/src/tree_sitter_parser.rs b/parsing/src/tree_sitter_parser.rs index a112d6c..f14531b 100644 --- a/parsing/src/tree_sitter_parser.rs +++ b/parsing/src/tree_sitter_parser.rs @@ -1,11 +1,29 @@ -use model::Language; +use model::{cst_node::NonTerminal, CSTNode, Language}; use std::collections::HashSet; +pub type ParsingHandler = fn(root: CSTNode) -> CSTNode; + +pub struct ParsingHandlers { + handlers: Vec, +} + +impl ParsingHandlers { + pub fn new(handlers: Vec) -> Self { + Self { handlers } + } + + pub fn run<'a>(&'a self, root: CSTNode<'a>) -> CSTNode<'a> { + self.handlers.iter().fold(root, |acc, handler| handler(acc)) + } +} + + pub struct ParserConfiguration { pub(crate) language: tree_sitter::Language, pub(crate) stop_compilation_at: HashSet<&'static str>, pub(crate) kinds_with_unordered_children: HashSet<&'static str>, pub(crate) block_end_delimiters: HashSet<&'static str>, + pub(crate) handlers: ParsingHandlers, } impl From for ParserConfiguration { @@ -21,7 +39,75 @@ impl From for ParserConfiguration { ] .into(), block_end_delimiters: ["}"].into(), + handlers: ParsingHandlers::new(vec![tweak_import_declarations]), }, } } } + +fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { + match root.kind() { + "program" => match root { + CSTNode::Terminal(_) => root.to_owned(), + CSTNode::NonTerminal(program) => { + let import_declaration_children: Vec = program + .children + .iter() + .filter(|node| node.kind() == "import_declaration") + .cloned() + .collect(); + + if import_declaration_children.is_empty() { + return CSTNode::NonTerminal(program); + } + + let import_declarations_start = import_declaration_children + .first() + .unwrap() + .start_position(); + + let import_declarations_end = + import_declaration_children.last().unwrap().end_position(); + + let import_declarations = CSTNode::NonTerminal(NonTerminal { + id: uuid::Uuid::new_v4(), + kind: "import_declarations", + children: import_declaration_children, + start_position: import_declarations_start, + end_position: import_declarations_end, + are_children_unordered: true, + }); + + let first_import_declaration_index = program + .children + .iter() + .position(|node| node.kind() == "import_declaration") + .unwrap(); + let last_import_declaration_index = program + .children + .iter() + .rposition(|node| node.kind() == "import_declaration") + .unwrap(); + + let mut new_program_children: Vec> = vec![]; + new_program_children.extend_from_slice( + &program.children.iter().as_slice()[..first_import_declaration_index], + ); + new_program_children.push(import_declarations); + new_program_children.extend_from_slice( + &program.children.iter().as_slice()[last_import_declaration_index + 1..], + ); + + CSTNode::NonTerminal(NonTerminal { + id: program.id, + kind: program.kind, + start_position: program.start_position, + end_position: program.end_position, + children: new_program_children, + are_children_unordered: false, + }) + } + }, + _ => root.to_owned(), + } +} \ No newline at end of file From c5bda9f53bbace62986a9f22f43359f9f6d5a187 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 1 May 2024 22:11:27 -0300 Subject: [PATCH 4/7] refactor: move parsing handlers to a separate crate --- Cargo.lock | 10 +++ parsing/Cargo.toml | 1 + parsing/src/tree_sitter_parser.rs | 89 +------------------ parsing_handlers/Cargo.toml | 11 +++ parsing_handlers/src/java/mod.rs | 7 ++ .../src/java/tweak_import_declarations.rs | 68 ++++++++++++++ parsing_handlers/src/lib.rs | 28 ++++++ 7 files changed, 128 insertions(+), 86 deletions(-) create mode 100644 parsing_handlers/Cargo.toml create mode 100644 parsing_handlers/src/java/mod.rs create mode 100644 parsing_handlers/src/java/tweak_import_declarations.rs create mode 100644 parsing_handlers/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index b3feb0c..12af7ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -413,11 +413,21 @@ version = "0.1.0" dependencies = [ "log", "model", + "parsing_handlers", "tree-sitter", "tree-sitter-java", "uuid", ] +[[package]] +name = "parsing_handlers" +version = "0.1.0" +dependencies = [ + "log", + "model", + "uuid", +] + [[package]] name = "pathfinding" version = "4.9.1" diff --git a/parsing/Cargo.toml b/parsing/Cargo.toml index 56c5e48..c4470f2 100644 --- a/parsing/Cargo.toml +++ b/parsing/Cargo.toml @@ -9,5 +9,6 @@ edition = "2021" tree-sitter = "0.20.9" tree-sitter-java = "0.20.0" model = { path = "../model" } +parsing_handlers = { path = "../parsing_handlers" } log = { workspace = true } uuid = { workspace = true } diff --git a/parsing/src/tree_sitter_parser.rs b/parsing/src/tree_sitter_parser.rs index f14531b..0e8b6ae 100644 --- a/parsing/src/tree_sitter_parser.rs +++ b/parsing/src/tree_sitter_parser.rs @@ -1,23 +1,7 @@ -use model::{cst_node::NonTerminal, CSTNode, Language}; +use model::Language; +use parsing_handlers::ParsingHandlers; use std::collections::HashSet; -pub type ParsingHandler = fn(root: CSTNode) -> CSTNode; - -pub struct ParsingHandlers { - handlers: Vec, -} - -impl ParsingHandlers { - pub fn new(handlers: Vec) -> Self { - Self { handlers } - } - - pub fn run<'a>(&'a self, root: CSTNode<'a>) -> CSTNode<'a> { - self.handlers.iter().fold(root, |acc, handler| handler(acc)) - } -} - - pub struct ParserConfiguration { pub(crate) language: tree_sitter::Language, pub(crate) stop_compilation_at: HashSet<&'static str>, @@ -39,75 +23,8 @@ impl From for ParserConfiguration { ] .into(), block_end_delimiters: ["}"].into(), - handlers: ParsingHandlers::new(vec![tweak_import_declarations]), + handlers: ParsingHandlers::from(Language::Java), }, } } } - -fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { - match root.kind() { - "program" => match root { - CSTNode::Terminal(_) => root.to_owned(), - CSTNode::NonTerminal(program) => { - let import_declaration_children: Vec = program - .children - .iter() - .filter(|node| node.kind() == "import_declaration") - .cloned() - .collect(); - - if import_declaration_children.is_empty() { - return CSTNode::NonTerminal(program); - } - - let import_declarations_start = import_declaration_children - .first() - .unwrap() - .start_position(); - - let import_declarations_end = - import_declaration_children.last().unwrap().end_position(); - - let import_declarations = CSTNode::NonTerminal(NonTerminal { - id: uuid::Uuid::new_v4(), - kind: "import_declarations", - children: import_declaration_children, - start_position: import_declarations_start, - end_position: import_declarations_end, - are_children_unordered: true, - }); - - let first_import_declaration_index = program - .children - .iter() - .position(|node| node.kind() == "import_declaration") - .unwrap(); - let last_import_declaration_index = program - .children - .iter() - .rposition(|node| node.kind() == "import_declaration") - .unwrap(); - - let mut new_program_children: Vec> = vec![]; - new_program_children.extend_from_slice( - &program.children.iter().as_slice()[..first_import_declaration_index], - ); - new_program_children.push(import_declarations); - new_program_children.extend_from_slice( - &program.children.iter().as_slice()[last_import_declaration_index + 1..], - ); - - CSTNode::NonTerminal(NonTerminal { - id: program.id, - kind: program.kind, - start_position: program.start_position, - end_position: program.end_position, - children: new_program_children, - are_children_unordered: false, - }) - } - }, - _ => root.to_owned(), - } -} \ No newline at end of file diff --git a/parsing_handlers/Cargo.toml b/parsing_handlers/Cargo.toml new file mode 100644 index 0000000..a6e772a --- /dev/null +++ b/parsing_handlers/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "parsing_handlers" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +model = { path = "../model" } +log = { workspace = true } +uuid = { workspace = true } diff --git a/parsing_handlers/src/java/mod.rs b/parsing_handlers/src/java/mod.rs new file mode 100644 index 0000000..311f7fa --- /dev/null +++ b/parsing_handlers/src/java/mod.rs @@ -0,0 +1,7 @@ +mod tweak_import_declarations; + +use crate::ParsingHandlers; + +pub fn get_default_java_parsing_handlers() -> ParsingHandlers { + ParsingHandlers::new(vec![tweak_import_declarations::tweak_import_declarations]) +} diff --git a/parsing_handlers/src/java/tweak_import_declarations.rs b/parsing_handlers/src/java/tweak_import_declarations.rs new file mode 100644 index 0000000..124e520 --- /dev/null +++ b/parsing_handlers/src/java/tweak_import_declarations.rs @@ -0,0 +1,68 @@ +use model::{cst_node::NonTerminal, CSTNode}; + +pub fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { + match root.kind() { + "program" => match root { + CSTNode::Terminal(_) => root.to_owned(), + CSTNode::NonTerminal(program) => { + let import_declaration_children: Vec = program + .children + .iter() + .filter(|node| node.kind() == "import_declaration") + .cloned() + .collect(); + + if import_declaration_children.is_empty() { + return CSTNode::NonTerminal(program); + } + + let import_declarations_start = import_declaration_children + .first() + .unwrap() + .start_position(); + + let import_declarations_end = + import_declaration_children.last().unwrap().end_position(); + + let import_declarations = CSTNode::NonTerminal(NonTerminal { + id: uuid::Uuid::new_v4(), + kind: "import_declarations", + children: import_declaration_children, + start_position: import_declarations_start, + end_position: import_declarations_end, + are_children_unordered: true, + }); + + let first_import_declaration_index = program + .children + .iter() + .position(|node| node.kind() == "import_declaration") + .unwrap(); + let last_import_declaration_index = program + .children + .iter() + .rposition(|node| node.kind() == "import_declaration") + .unwrap(); + + let mut new_program_children: Vec> = vec![]; + new_program_children.extend_from_slice( + &program.children.iter().as_slice()[..first_import_declaration_index], + ); + new_program_children.push(import_declarations); + new_program_children.extend_from_slice( + &program.children.iter().as_slice()[last_import_declaration_index + 1..], + ); + + CSTNode::NonTerminal(NonTerminal { + id: program.id, + kind: program.kind, + start_position: program.start_position, + end_position: program.end_position, + children: new_program_children, + are_children_unordered: false, + }) + } + }, + _ => root.to_owned(), + } +} diff --git a/parsing_handlers/src/lib.rs b/parsing_handlers/src/lib.rs new file mode 100644 index 0000000..d47c234 --- /dev/null +++ b/parsing_handlers/src/lib.rs @@ -0,0 +1,28 @@ +mod java; + +use java::get_default_java_parsing_handlers; +use model::Language; + +pub type ParsingHandler = fn(root: model::CSTNode) -> model::CSTNode; + +pub struct ParsingHandlers { + handlers: Vec, +} + +impl ParsingHandlers { + pub fn new(handlers: Vec) -> Self { + Self { handlers } + } + + pub fn run<'a>(&'a self, root: model::CSTNode<'a>) -> model::CSTNode<'a> { + self.handlers.iter().fold(root, |acc, handler| handler(acc)) + } +} + +impl From for ParsingHandlers { + fn from(language: Language) -> Self { + match language { + Language::Java => get_default_java_parsing_handlers(), + } + } +} From 0dfab92e6878f9f9b2953418ea4f15d3cecccff7 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 1 May 2024 22:14:56 -0300 Subject: [PATCH 5/7] refactor: use early return --- .../src/java/tweak_import_declarations.rs | 113 +++++++++--------- 1 file changed, 57 insertions(+), 56 deletions(-) diff --git a/parsing_handlers/src/java/tweak_import_declarations.rs b/parsing_handlers/src/java/tweak_import_declarations.rs index 124e520..7e14bc5 100644 --- a/parsing_handlers/src/java/tweak_import_declarations.rs +++ b/parsing_handlers/src/java/tweak_import_declarations.rs @@ -1,68 +1,69 @@ use model::{cst_node::NonTerminal, CSTNode}; pub fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { - match root.kind() { - "program" => match root { - CSTNode::Terminal(_) => root.to_owned(), - CSTNode::NonTerminal(program) => { - let import_declaration_children: Vec = program - .children - .iter() - .filter(|node| node.kind() == "import_declaration") - .cloned() - .collect(); + if root.kind() != "program" { + return root.to_owned(); + } + + match root { + CSTNode::Terminal(_) => root, + CSTNode::NonTerminal(program) => { + let import_declaration_children: Vec = program + .children + .iter() + .filter(|node| node.kind() == "import_declaration") + .cloned() + .collect(); - if import_declaration_children.is_empty() { - return CSTNode::NonTerminal(program); - } + if import_declaration_children.is_empty() { + return CSTNode::NonTerminal(program); + } - let import_declarations_start = import_declaration_children - .first() - .unwrap() - .start_position(); + let import_declarations_start = import_declaration_children + .first() + .unwrap() + .start_position(); - let import_declarations_end = - import_declaration_children.last().unwrap().end_position(); + let import_declarations_end = + import_declaration_children.last().unwrap().end_position(); - let import_declarations = CSTNode::NonTerminal(NonTerminal { - id: uuid::Uuid::new_v4(), - kind: "import_declarations", - children: import_declaration_children, - start_position: import_declarations_start, - end_position: import_declarations_end, - are_children_unordered: true, - }); + let import_declarations = CSTNode::NonTerminal(NonTerminal { + id: uuid::Uuid::new_v4(), + kind: "import_declarations", + children: import_declaration_children, + start_position: import_declarations_start, + end_position: import_declarations_end, + are_children_unordered: true, + }); - let first_import_declaration_index = program - .children - .iter() - .position(|node| node.kind() == "import_declaration") - .unwrap(); - let last_import_declaration_index = program - .children - .iter() - .rposition(|node| node.kind() == "import_declaration") - .unwrap(); + let first_import_declaration_index = program + .children + .iter() + .position(|node| node.kind() == "import_declaration") + .unwrap(); + let last_import_declaration_index = program + .children + .iter() + .rposition(|node| node.kind() == "import_declaration") + .unwrap(); - let mut new_program_children: Vec> = vec![]; - new_program_children.extend_from_slice( - &program.children.iter().as_slice()[..first_import_declaration_index], - ); - new_program_children.push(import_declarations); - new_program_children.extend_from_slice( - &program.children.iter().as_slice()[last_import_declaration_index + 1..], - ); + let mut new_program_children: Vec> = vec![]; + new_program_children.extend_from_slice( + &program.children.iter().as_slice()[..first_import_declaration_index], + ); + new_program_children.push(import_declarations); + new_program_children.extend_from_slice( + &program.children.iter().as_slice()[last_import_declaration_index + 1..], + ); - CSTNode::NonTerminal(NonTerminal { - id: program.id, - kind: program.kind, - start_position: program.start_position, - end_position: program.end_position, - children: new_program_children, - are_children_unordered: false, - }) - } - }, - _ => root.to_owned(), + CSTNode::NonTerminal(NonTerminal { + id: program.id, + kind: program.kind, + start_position: program.start_position, + end_position: program.end_position, + children: new_program_children, + are_children_unordered: program.are_children_unordered, + }) + } } } From 4f2404b4875e79c49fee2af34457c48426c1abd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Pedro=20Henrique?= Date: Wed, 1 May 2024 22:29:52 -0300 Subject: [PATCH 6/7] feat(parsing,parsing_handlers): add API to enhance parsing result (#49) This PR introduces an API that enables users to augment Tree Sitter's output, enriching the tree structure with additional information. --- parsing_handlers/src/language.rs | 10 ++++++++ parsing_handlers/src/lib.rs | 29 +++--------------------- parsing_handlers/src/parsing_handlers.rs | 17 ++++++++++++++ 3 files changed, 30 insertions(+), 26 deletions(-) create mode 100644 parsing_handlers/src/language.rs create mode 100644 parsing_handlers/src/parsing_handlers.rs diff --git a/parsing_handlers/src/language.rs b/parsing_handlers/src/language.rs new file mode 100644 index 0000000..8ed4c62 --- /dev/null +++ b/parsing_handlers/src/language.rs @@ -0,0 +1,10 @@ +use crate::{java::get_default_java_parsing_handlers, ParsingHandlers}; +use model::Language; + +impl From for ParsingHandlers { + fn from(language: Language) -> Self { + match language { + Language::Java => get_default_java_parsing_handlers(), + } + } +} diff --git a/parsing_handlers/src/lib.rs b/parsing_handlers/src/lib.rs index d47c234..d7a6388 100644 --- a/parsing_handlers/src/lib.rs +++ b/parsing_handlers/src/lib.rs @@ -1,28 +1,5 @@ mod java; +mod language; +mod parsing_handlers; -use java::get_default_java_parsing_handlers; -use model::Language; - -pub type ParsingHandler = fn(root: model::CSTNode) -> model::CSTNode; - -pub struct ParsingHandlers { - handlers: Vec, -} - -impl ParsingHandlers { - pub fn new(handlers: Vec) -> Self { - Self { handlers } - } - - pub fn run<'a>(&'a self, root: model::CSTNode<'a>) -> model::CSTNode<'a> { - self.handlers.iter().fold(root, |acc, handler| handler(acc)) - } -} - -impl From for ParsingHandlers { - fn from(language: Language) -> Self { - match language { - Language::Java => get_default_java_parsing_handlers(), - } - } -} +pub use parsing_handlers::{ParsingHandler, ParsingHandlers}; diff --git a/parsing_handlers/src/parsing_handlers.rs b/parsing_handlers/src/parsing_handlers.rs new file mode 100644 index 0000000..442c42a --- /dev/null +++ b/parsing_handlers/src/parsing_handlers.rs @@ -0,0 +1,17 @@ +use model::CSTNode; + +pub type ParsingHandler = fn(root: CSTNode) -> CSTNode; + +pub struct ParsingHandlers { + handlers: Vec, +} + +impl ParsingHandlers { + pub fn new(handlers: Vec) -> Self { + Self { handlers } + } + + pub fn run<'a>(&'a self, root: CSTNode<'a>) -> CSTNode<'a> { + self.handlers.iter().fold(root, |acc, handler| handler(acc)) + } +} From c8decac09d921e29ea0724df99a2db006aa3e6f8 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 1 May 2024 22:55:35 -0300 Subject: [PATCH 7/7] test: cover edge cases --- .../src/java/tweak_import_declarations.rs | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/parsing_handlers/src/java/tweak_import_declarations.rs b/parsing_handlers/src/java/tweak_import_declarations.rs index 7e14bc5..d5c0a33 100644 --- a/parsing_handlers/src/java/tweak_import_declarations.rs +++ b/parsing_handlers/src/java/tweak_import_declarations.rs @@ -67,3 +67,29 @@ pub fn tweak_import_declarations(root: CSTNode<'_>) -> CSTNode<'_> { } } } + +#[cfg(test)] +mod tests { + use model::{cst_node::Terminal, CSTNode}; + + #[test] + fn if_the_root_is_not_a_program_we_just_return_it() { + let root = CSTNode::Terminal(Terminal { + kind: "terminal", + value: "not_a_program", + ..Default::default() + }); + + assert_eq!(super::tweak_import_declarations(root.clone()), root); + } + + #[test] + fn if_somehow_the_root_is_a_terminal_node_we_just_return_it() { + let root = CSTNode::Terminal(Terminal { + kind: "program", + ..Default::default() + }); + + assert_eq!(super::tweak_import_declarations(root.clone()), root); + } +}