From 9c282fbb9ea0542edb08fc0cb484781773b255dd Mon Sep 17 00:00:00 2001 From: williamfzc <178894043@qq.com> Date: Tue, 3 Sep 2024 21:11:07 +0800 Subject: [PATCH 1/6] chore: basic path calc --- src/graph.rs | 16 ++++++++++++++++ src/symbol.rs | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/graph.rs b/src/graph.rs index 9e97807..a0c0230 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -545,6 +545,10 @@ impl Graph { .collect(); FileMetadata { symbols } } + + pub fn file_paths(&self, src_file: &String, dst_file: &String) { + self.symbol_graph.file_paths(src_file, dst_file); + } } #[derive(Serialize, Deserialize, Clone)] @@ -700,4 +704,16 @@ mod tests { info!("{:?}: {}", item.symbol, item.weight); }); } + + #[test] + fn paths() { + tracing_subscriber::fmt::init(); + let mut config = GraphConfig::default(); + config.project_path = String::from("."); + let g = Graph::from(config); + let symbols = g.file_paths( + &String::from("src/extractor.rs"), + &String::from("src/graph.rs"), + ); + } } diff --git a/src/symbol.rs b/src/symbol.rs index 05aa02c..d4520e3 100644 --- a/src/symbol.rs +++ b/src/symbol.rs @@ -1,9 +1,11 @@ +use petgraph::algo::all_simple_paths; use petgraph::graph::{NodeIndex, UnGraph}; use petgraph::prelude::EdgeRef; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::sync::Arc; +use tracing::info; use tree_sitter::Range; #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] @@ -248,4 +250,39 @@ impl SymbolGraph { let ref_index = self.symbol_mapping.get(symbol_id).unwrap(); self.neighbor_symbols(*ref_index) } + + pub fn file_paths(&self, src_file: &String, dst_file: &String) -> Vec { + if let (Some(src_index), Some(dst_index)) = ( + self.file_mapping.get(src_file), + self.file_mapping.get(dst_file), + ) { + // file -> symbol -> symbol -> file + // so at most 2 + let pairs: Vec<_> = + all_simple_paths::, _>(&self.g, *src_index, *dst_index, 1, Some(2)) + .filter(|each| each.len() > 0) + .map(|each| DefRefPair { + src_symbol: self.g[each[1]].get_symbol().unwrap().clone(), + dst_symbol: self.g[each[2]].get_symbol().unwrap().clone(), + }) + .collect(); + pairs.iter().for_each(|pair| { + info!( + "{} {} -> {} {}", + pair.src_symbol.file, + pair.src_symbol.name, + pair.dst_symbol.file, + pair.dst_symbol.name + ); + }); + } + + // fallback + vec![] + } +} + +pub struct DefRefPair { + pub src_symbol: Symbol, + pub dst_symbol: Symbol, } From 63f319e04a7b08afdfec130002a3e0906bdc3b16 Mon Sep 17 00:00:00 2001 From: williamfzc <178894043@qq.com> Date: Fri, 6 Sep 2024 23:01:48 +0800 Subject: [PATCH 2/6] chore: better rust rule --- src/rule.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/rule.rs b/src/rule.rs index e6c0a9f..620836e 100644 --- a/src/rule.rs +++ b/src/rule.rs @@ -16,9 +16,24 @@ pub fn get_rule(extractor_type: &Extractor) -> Rule { Extractor::Rust => Rule { import_grammar: r#" (identifier) @variable_name +(call_expression + function: (identifier) @function) +(call_expression + function: (field_expression + field: (field_identifier) @function.method)) +(call_expression + function: (scoped_identifier + "::" + name: (identifier) @function)) "#, export_grammar: r#" (function_item name: (identifier) @exported_symbol) +(function_signature_item name: (identifier) @exported_symbol) +(generic_function + function: (identifier) @exported_symbol) +(generic_function + function: (scoped_identifier + name: (identifier) @exported_symbol)) "#, }, From a22caba48d362a8fb65e6f9da9b6326bdd15123e Mon Sep 17 00:00:00 2001 From: williamfzc <178894043@qq.com> Date: Fri, 6 Sep 2024 23:09:25 +0800 Subject: [PATCH 3/6] chore: path api rename --- src/graph.rs | 22 +++++++++++++++++----- src/symbol.rs | 14 +++----------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/graph.rs b/src/graph.rs index a0c0230..98c9999 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,5 +1,5 @@ use crate::extractor::Extractor; -use crate::symbol::{Symbol, SymbolGraph, SymbolKind}; +use crate::symbol::{DefRefPair, Symbol, SymbolGraph, SymbolKind}; use cupido::collector::config::Collect; use cupido::collector::config::{get_collector, Config}; use cupido::relation::graph::RelationGraph as CupidoRelationGraph; @@ -12,7 +12,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::fs; use std::path::Path; use std::time::Instant; -use tracing::{debug, info, warn}; +use tracing::{debug, info}; pub struct FileContext { pub path: String, @@ -546,8 +546,8 @@ impl Graph { FileMetadata { symbols } } - pub fn file_paths(&self, src_file: &String, dst_file: &String) { - self.symbol_graph.file_paths(src_file, dst_file); + pub fn pairs_between_files(&self, src_file: &String, dst_file: &String) -> Vec { + self.symbol_graph.pairs_between_files(src_file, dst_file) } } @@ -611,6 +611,7 @@ impl GraphConfig { #[cfg(test)] mod tests { use crate::graph::{Graph, GraphConfig}; + use crate::symbol::DefRefPair; use petgraph::visit::EdgeRef; use tracing::{debug, info}; @@ -711,9 +712,20 @@ mod tests { let mut config = GraphConfig::default(); config.project_path = String::from("."); let g = Graph::from(config); - let symbols = g.file_paths( + let symbols: Vec = g.pairs_between_files( &String::from("src/extractor.rs"), &String::from("src/graph.rs"), ); + symbols.iter().for_each(|pair| { + info!( + "{} {} {} -> {} {} {}", + pair.src_symbol.file, + pair.src_symbol.name, + pair.src_symbol.range.start_point.row, + pair.dst_symbol.file, + pair.dst_symbol.name, + pair.dst_symbol.range.start_point.row + ); + }); } } diff --git a/src/symbol.rs b/src/symbol.rs index d4520e3..11d9e4f 100644 --- a/src/symbol.rs +++ b/src/symbol.rs @@ -5,7 +5,6 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use tracing::info; use tree_sitter::Range; #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] @@ -251,7 +250,7 @@ impl SymbolGraph { self.neighbor_symbols(*ref_index) } - pub fn file_paths(&self, src_file: &String, dst_file: &String) -> Vec { + pub fn pairs_between_files(&self, src_file: &String, dst_file: &String) -> Vec { if let (Some(src_index), Some(dst_index)) = ( self.file_mapping.get(src_file), self.file_mapping.get(dst_file), @@ -265,16 +264,9 @@ impl SymbolGraph { src_symbol: self.g[each[1]].get_symbol().unwrap().clone(), dst_symbol: self.g[each[2]].get_symbol().unwrap().clone(), }) + .filter(|each| each.src_symbol.kind == SymbolKind::DEF) .collect(); - pairs.iter().for_each(|pair| { - info!( - "{} {} -> {} {}", - pair.src_symbol.file, - pair.src_symbol.name, - pair.dst_symbol.file, - pair.dst_symbol.name - ); - }); + return pairs; } // fallback From 39a979b16f77748491bc82def1f72dd2c9d72e43 Mon Sep 17 00:00:00 2001 From: williamfzc <178894043@qq.com> Date: Sun, 8 Sep 2024 11:03:48 +0800 Subject: [PATCH 4/6] feat: symbol_csv command in cmd --- src/graph.rs | 47 +++-------------------------------------------- src/main.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++----- src/symbol.rs | 2 +- 3 files changed, 48 insertions(+), 50 deletions(-) diff --git a/src/graph.rs b/src/graph.rs index 98c9999..4e1b0f8 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -491,36 +491,6 @@ impl Graph { contexts } - pub fn symbols_between_files(&self, src: &String, dst: &String) -> Vec { - if !self.files().contains(src) || !self.files().contains(dst) { - return Vec::new(); - } - - let mut related_symbols: Vec = vec![]; - - // other files -> this file - let definitions_in_file = self.symbol_graph.list_definitions(src); - let definition_count = definitions_in_file.len(); - - definitions_in_file.iter().for_each(|def| { - self.symbol_graph - .list_references_by_definition(&def.id()) - .iter() - .filter(|(each, _)| { - return each.file.eq(dst); - }) - .for_each(|(each_ref, weight)| { - let real_weight = std::cmp::max(weight / definition_count, 1); - related_symbols.push(RelatedSymbol { - symbol: each_ref.clone(), - weight: real_weight, - }) - }); - }); - - related_symbols - } - pub fn related_symbols(&self, symbol: &Symbol) -> HashMap { match symbol.kind { SymbolKind::DEF => self @@ -547,6 +517,9 @@ impl Graph { } pub fn pairs_between_files(&self, src_file: &String, dst_file: &String) -> Vec { + if !self.files().contains(src_file) || !self.files().contains(dst_file) { + return Vec::new(); + } self.symbol_graph.pairs_between_files(src_file, dst_file) } } @@ -692,20 +665,6 @@ mod tests { }); } - #[test] - fn between_files() { - let mut config = GraphConfig::default(); - config.project_path = String::from("."); - let g = Graph::from(config); - let symbols = g.symbols_between_files( - &String::from("src/rule.rs"), - &String::from("src/extractor.rs"), - ); - symbols.iter().for_each(|item| { - info!("{:?}: {}", item.symbol, item.weight); - }); - } - #[test] fn paths() { tracing_subscriber::fmt::init(); diff --git a/src/main.rs b/src/main.rs index 0cf530a..56fe2f4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,7 @@ use gossiphs::graph::{Graph, GraphConfig, RelatedFileContext}; use gossiphs::server::{server_main, ServerConfig}; use inquire::Text; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fs; use std::fs::File; use std::io::Write; @@ -104,6 +104,10 @@ struct RelationCommand { #[clap(long)] #[clap(default_value = "output.csv")] csv: String, + + #[clap(long)] + #[clap(default_value = "")] + symbol_csv: String, } #[derive(Parser, Debug)] @@ -245,7 +249,6 @@ fn handle_relation(relation_cmd: RelationCommand) { Ok(writer) => writer, Err(e) => panic!("Failed to create CSV writer: {}", e), }; - // Write the header row let mut header = vec!["".to_string()]; header.extend(files.clone()); @@ -253,11 +256,28 @@ fn handle_relation(relation_cmd: RelationCommand) { panic!("Failed to write CSV header: {}", e); } + let mut symbol_wtr_opts = None; + if !relation_cmd.symbol_csv.is_empty() { + let symbol_wtr_result = Writer::from_path(relation_cmd.symbol_csv); + symbol_wtr_opts = match symbol_wtr_result { + Ok(writer) => Some(writer), + Err(e) => panic!("Failed to create CSV writer: {}", e), + }; + let mut header = vec!["".to_string()]; + header.extend(files.clone()); + if let Some(symbol_wtr) = symbol_wtr_opts.as_mut() { + symbol_wtr + .write_record(&header) + .expect("Failed to write header to symbol_wtr"); + } + } + // Write each row for file in &files { let mut row = vec![file.clone()]; - let related_files = g.related_files(file); - let related_files_map: HashMap<_, _> = related_files + let mut pair_row = vec![file.clone()]; + let related_files_map: HashMap<_, _> = g + .related_files(file) .into_iter() .map(|rf| (rf.name, rf.score)) .collect(); @@ -268,8 +288,24 @@ fn handle_relation(relation_cmd: RelationCommand) { .unwrap_or(&0) .to_string(); row.push(score); + + if symbol_wtr_opts.is_some() { + let pairs = g + .pairs_between_files(&file, &related_file) + .iter() + .map(|each| each.src_symbol.name.clone()) + .collect::>() + .into_iter() + .collect::>(); + pair_row.push(pairs.join("|")); + } } wtr.write_record(&row).expect("Failed to write record"); + if let Some(symbol_wtr) = symbol_wtr_opts.as_mut() { + symbol_wtr + .write_record(&pair_row) + .expect("Failed to write pair_row to symbol_wtr"); + } } // Flush the writer to ensure all data is written @@ -665,8 +701,11 @@ fn diff_test() { #[test] fn relation_test() { + let mut config = CommonOptions::default(); + config.project_path = "../gin".parse().unwrap(); handle_relation(RelationCommand { - common_options: CommonOptions::default(), + common_options: config, csv: "ok.csv".to_string(), + symbol_csv: "ok1.csv".to_string(), }) } diff --git a/src/symbol.rs b/src/symbol.rs index 11d9e4f..c0011b0 100644 --- a/src/symbol.rs +++ b/src/symbol.rs @@ -259,7 +259,7 @@ impl SymbolGraph { // so at most 2 let pairs: Vec<_> = all_simple_paths::, _>(&self.g, *src_index, *dst_index, 1, Some(2)) - .filter(|each| each.len() > 0) + .filter(|each| each.len() == 4) .map(|each| DefRefPair { src_symbol: self.g[each[1]].get_symbol().unwrap().clone(), dst_symbol: self.g[each[2]].get_symbol().unwrap().clone(), From e3be40de2636074cff4ada8b2a87867de094134f Mon Sep 17 00:00:00 2001 From: williamfzc <178894043@qq.com> Date: Sun, 8 Sep 2024 11:28:17 +0800 Subject: [PATCH 5/6] chore: path calc cost improvement --- src/main.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/main.rs b/src/main.rs index 56fe2f4..390cf7f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -283,21 +283,22 @@ fn handle_relation(relation_cmd: RelationCommand) { .collect(); for related_file in &files { - let score = related_files_map - .get(related_file) - .unwrap_or(&0) - .to_string(); - row.push(score); + let score = related_files_map.get(related_file).unwrap_or(&0); + row.push(score.to_string()); if symbol_wtr_opts.is_some() { - let pairs = g - .pairs_between_files(&file, &related_file) - .iter() - .map(|each| each.src_symbol.name.clone()) - .collect::>() - .into_iter() - .collect::>(); - pair_row.push(pairs.join("|")); + if score > &0 { + let pairs = g + .pairs_between_files(&file, &related_file) + .iter() + .map(|each| each.src_symbol.name.clone()) + .collect::>() + .into_iter() + .collect::>(); + pair_row.push(pairs.join("|")); + } else { + pair_row.push(String::new()); + } } } wtr.write_record(&row).expect("Failed to write record"); @@ -702,7 +703,7 @@ fn diff_test() { #[test] fn relation_test() { let mut config = CommonOptions::default(); - config.project_path = "../gin".parse().unwrap(); + config.project_path = ".".parse().unwrap(); handle_relation(RelationCommand { common_options: config, csv: "ok.csv".to_string(), From 852b164526019610e3dd1303eba34444a71fc74c Mon Sep 17 00:00:00 2001 From: williamfzc <178894043@qq.com> Date: Sun, 8 Sep 2024 11:43:09 +0800 Subject: [PATCH 6/6] fix(ci): wrong ref --- .github/workflows/cargo-test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cargo-test.yml b/.github/workflows/cargo-test.yml index 44a3764..42021b1 100644 --- a/.github/workflows/cargo-test.yml +++ b/.github/workflows/cargo-test.yml @@ -18,6 +18,7 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: '0' + ref: ${{ github.event.pull_request.head.ref }} - name: Cache Cargo dependencies uses: actions/cache@v2 with: @@ -52,7 +53,7 @@ jobs: cp ./target/debug/gossiphs ./gossiphs cd tree-sitter - time ../gossiphs relation + time ../gossiphs relation --symbol-csv test-symbol.csv cd .. cd typescript-eslint