Skip to content

Commit

Permalink
feat: add regex matching as well
Browse files Browse the repository at this point in the history
  • Loading branch information
jpedroh committed Jul 20, 2024
1 parent 40a066e commit fb64495
Show file tree
Hide file tree
Showing 13 changed files with 91 additions and 53 deletions.
23 changes: 15 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions bin/tests/node_reordering/merge.output.java

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 4 additions & 2 deletions matching/src/ordered/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::{
matching_configuration::MatchingConfiguration, matching_entry::MatchingEntry, MatchingRepresentation, Matchings
matching_configuration::MatchingConfiguration, matching_entry::MatchingEntry,
MatchingRepresentation, Matchings,
};
use model::{cst_node::NonTerminal, CSTNode};
use unordered_pair::UnorderedPair;
Expand Down Expand Up @@ -36,7 +37,8 @@ pub fn calculate_matchings<'a>(
..
}),
) => {
let root_matching: usize = (left.get_matching_representation() == right.get_matching_representation()).into();
let root_matching: usize =
(left.get_matching_representation() == right.get_matching_representation()).into();

let m = children_left.len();
let n = children_right.len();
Expand Down
4 changes: 3 additions & 1 deletion matching/src/unordered/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,7 @@ pub fn calculate_matchings<'a>(
}

fn all_children_labeled(node: &NonTerminal, config: &MatchingConfiguration) -> bool {
node.children.iter().all(|child| child.get_identifier().is_some())
node.children
.iter()
.all(|child| child.get_identifier().is_some())
}
5 changes: 3 additions & 2 deletions matching/src/unordered/unique_label.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ pub fn calculate_matchings<'a>(

for child_left in children_left {
for child_right in children_right {
let is_same_identifier = child_left.get_identifier().is_some() && child_left.get_identifier() == child_right.get_identifier();

let is_same_identifier = child_left.get_identifier().is_some()
&& child_left.get_identifier() == child_right.get_identifier();

if is_same_identifier {
let child_matchings =
crate::calculate_matchings(child_left, child_right, config);
Expand Down
2 changes: 1 addition & 1 deletion matching_handlers/src/java/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use self::{

pub fn get_default_java_matching_handlers<'a>() -> MatchingHandlers<'a> {
let mut matching_handlers: MatchingHandlers<'a> = MatchingHandlers::new();

matching_handlers.register(
"constructor_declaration",
compute_matching_score_for_method_declaration,
Expand Down
2 changes: 1 addition & 1 deletion model/src/cst_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ impl CSTNode<'_> {
pub fn get_identifier(&self) -> Option<Vec<&str>> {
match self {
CSTNode::Terminal(node) => Some(vec![node.kind, node.value]),
CSTNode::NonTerminal(node) => node.get_identifier()
CSTNode::NonTerminal(node) => node.get_identifier(),
}
}
}
Expand Down
1 change: 1 addition & 0 deletions parsing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ edition = "2021"
[dependencies]
tree-sitter = "0.20.9"
tree-sitter-java = "0.20.0"
regex = "1.10.5"
model = { path = "../model" }
parsing_handlers = { path = "../parsing_handlers" }
log = { workspace = true }
Expand Down
95 changes: 63 additions & 32 deletions parsing/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ use model::{
};
use tree_sitter::{Node, Query, QueryCursor};

enum IdentifierExtractor {
RegularExpression(&'static str),
TreeSitterQuery(&'static str),
}

fn extract_identifier_from_node<'a>(
node: Node,
src: &'a str,
Expand All @@ -15,7 +20,8 @@ fn extract_identifier_from_node<'a>(
let queries = HashMap::from([
(
"constructor_declaration",
r#"
IdentifierExtractor::TreeSitterQuery(
r#"
(
constructor_declaration
name:
Expand All @@ -35,7 +41,7 @@ fn extract_identifier_from_node<'a>(
@argument_type
)
(
spread_parameter (type_identifier) @spread
spread_parameter (type_identifier) @spread_parameter "..." @spread_indicator
)
]
)
Expand All @@ -45,10 +51,12 @@ fn extract_identifier_from_node<'a>(
"#,
),
),
(
"method_declaration",
r#"
IdentifierExtractor::TreeSitterQuery(
r#"
(
method_declaration
name:
Expand All @@ -68,7 +76,7 @@ fn extract_identifier_from_node<'a>(
@argument_type
)
(
spread_parameter (type_identifier) @spread
spread_parameter (type_identifier) @spread_parameter "..." @spread_indicator
)
]
)
Expand All @@ -78,55 +86,78 @@ fn extract_identifier_from_node<'a>(
"#,
),
),
(
"field_declaration",
r#"(variable_declarator name: _ @name)"#,
IdentifierExtractor::TreeSitterQuery(r#"(variable_declarator name: _ @name)"#),
),
(
"import_declaration",
r#"(import_declaration ( scoped_identifier ) @namespace)"#,
IdentifierExtractor::TreeSitterQuery(
r#"(import_declaration ( scoped_identifier ) @namespace)"#,
),
),
(
"class_declaration",
r#"(class_declaration name: (identifier) @name)"#,
IdentifierExtractor::RegularExpression(
r#"class [A-Za-z_][A-Za-z0-9_]*"#,
),
),
(
"enum_declaration",
r#"(enum_declaration name: (identifier) @name)"#,
IdentifierExtractor::RegularExpression(
r#"enum [A-Za-z_][A-Za-z0-9_]*"#,
),
),
(
"interface_declaration",
r#"(interface_declaration name: (identifier) @name)"#,
IdentifierExtractor::RegularExpression(
r#"interface [A-Za-z_][A-Za-z0-9_]*"#,
),
),
]);

let query_string = queries.get(node.kind())?;
log::debug!("Using {:?} as query_string", query_string);
let query = Query::new(config.language, &query_string).ok()?;
let mut cursor = QueryCursor::new();
let identifier = cursor
.matches(&query, node, src.as_bytes())
.into_iter()
.flat_map(|a_match| {
a_match
.captures
.iter()
.filter(|capture| {
capture.node.start_byte() >= node.start_byte()
&& capture.node.end_byte() <= node.end_byte()

let identifier_extractor = queries.get(node.kind())?;

let identifier = match identifier_extractor {
IdentifierExtractor::RegularExpression(regex) => {
let identifier = regex::Regex::new(regex)
.unwrap()
.find(node.utf8_text(src.as_bytes()).ok()?)
.map(|m| m.as_str())?;
Some(vec![identifier])
}
IdentifierExtractor::TreeSitterQuery(query_string) => {
let query = Query::new(config.language, query_string).ok()?;
let mut cursor = QueryCursor::new();
let identifier = cursor
.matches(&query, node, src.as_bytes())
.into_iter()
.flat_map(|a_match| {
a_match
.captures
.iter()
.filter(|capture| {
capture.node.start_byte() >= node.start_byte()
&& capture.node.end_byte() <= node.end_byte()
})
.filter_map(|capture_index| {
capture_index.node.utf8_text(src.as_bytes()).ok()
})
})
.collect();
Some(identifier)
}
};

})
.filter_map(|capture_index| capture_index.node.utf8_text(src.as_bytes()).ok())
})
.collect();
log::debug!(
"Found {:?} as identifier for node {:?}",
identifier,
node.utf8_text(src.as_bytes()).ok()?
);

log::debug!("Found {:?} as identifier for node {:?}", identifier, node.utf8_text(src.as_bytes()).unwrap());

Some(identifier
)
identifier
}

fn explore_node<'a>(node: Node, src: &'a str, config: &'a ParserConfiguration) -> CSTNode<'a> {
Expand Down

0 comments on commit fb64495

Please sign in to comment.