From 040047cce5c390182766d67068a262f208e3480d Mon Sep 17 00:00:00 2001 From: sriram98v Date: Fri, 13 Oct 2023 17:55:28 -0500 Subject: [PATCH] added export edges with labels --- src/bin/bin.rs | 37 ++++++++++++++++++++++++++++++++----- src/suffix_tree.rs | 29 ++++++++++++++++++++++++----- src/suffix_tree/tree.rs | 3 ++- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/src/bin/bin.rs b/src/bin/bin.rs index 5808ebf..0f61cee 100644 --- a/src/bin/bin.rs +++ b/src/bin/bin.rs @@ -2,8 +2,11 @@ extern crate clap; use clap::{arg, Command}; use bio::io::fasta; +use generalized_suffix_tree::data::tree_item::TreeItem; use generalized_suffix_tree::suffix_tree::KGST; +use generalized_suffix_tree::suffix_tree::tree::SuffixTree; use indicatif::{ProgressBar, ProgressStyle}; +use itertools::Itertools; use std::fs::File; use std::io::Write; @@ -48,13 +51,37 @@ fn build_tree(file:&str, num_seq: &usize, max_depth: &usize)->KGST tree } -fn save_tree_edges(tree: &mut KGST, output_path: String){ +fn save_tree(tree: KGST, output_path: String){ println!("Saving tree nodes to {}.", &output_path); let edge_iter = tree.iter_edges_post(); - println!("Writing nodes"); let mut f = File::create(output_path).expect("Unable to create file"); + writeln!(f, "start kgst").expect("Write failed"); + writeln!(f, "start edges").expect("Write failed"); for (n1, n2) in edge_iter{ - writeln!(f, "{} {}", n1, n2).expect("Write failed"); + writeln!(f, "{}->{}; {}", n1, n2, tree.get_node_label(&n2).iter().map(|x| format!("{}", x)).collect::()).expect("Write failed"); + } + writeln!(f, "end").expect("Write failed"); + println!("Saved"); +} + +fn _node_sim(tree: KGST, output_path: String){ + println!("Saving tree strings to {}.", &output_path); + let string_iter = tree.iter_strings(); + let pb = ProgressBar::new(string_iter.len() as u64); + pb.set_style(ProgressStyle::with_template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({eta})") + .unwrap() + .progress_chars("#>-")); + let mut f = File::create(output_path).expect("Unable to create file"); + writeln!(f, "ID,node_values").expect("Write failed"); + for (_itemid, (item, _depth)) in string_iter{ + let mut node_values: Vec = vec![0; tree.num_nodes()]; + for node_id in item.get_nodes().iter(){ + for path_node in tree.get_node_path_pre(node_id).iter(){ + node_values[*path_node] = 1; + } + } + writeln!(f, "{},{}", item.get_id(), node_values.iter().map(|i| format!("{}", i)).collect::()).expect("Write failed"); + pb.inc(1); } println!("Saved"); } @@ -87,12 +114,12 @@ fn main(){ match matches.subcommand(){ Some(("build", sub_m)) => { - let mut tree: KGST = build_tree( + let tree: KGST = build_tree( sub_m.get_one::("source").expect("required").as_str(), sub_m.get_one::("num").expect("required"), sub_m.get_one::("depth").expect("required") ); - save_tree_edges(&mut tree, sub_m.get_one::("out").expect("required").to_string()); + save_tree(tree, sub_m.get_one::("out").expect("required").to_string()); }, _ => { println!("Either build a tree or query an existing tree. Refer help page (-h flag)"); diff --git a/src/suffix_tree.rs b/src/suffix_tree.rs index 4a3c1b5..5bb7c6e 100644 --- a/src/suffix_tree.rs +++ b/src/suffix_tree.rs @@ -289,7 +289,7 @@ where } } - fn get_node_data(&self, node_id: &NodeID)->&HashMap>{ + pub fn get_node_data(&self, node_id: &NodeID)->&HashMap>{ self.node_data.get(node_id).expect("Node ID does not exist!") } @@ -444,8 +444,14 @@ where PostOrdNodes::new(&self.root, &self.nodes) } + /// Returns the nodes in a path in preorder pub fn iter_path_pre(&self, node_id: &NodeID)->std::collections::linked_list::IntoIter{ - self.get_node_path(node_id).into_iter() + self.get_node_path_pre(node_id).into_iter() + } + + /// Returns the nodes in a path in postorder + pub fn iter_path_post(&self, node_id: &NodeID)->std::collections::linked_list::IntoIter{ + self.get_node_path_post(node_id).into_iter() } /// Returns a postorder edge iterator of the tree @@ -483,14 +489,16 @@ where fn get_suffix_link(&self, node_id: &NodeID) -> &usize{ self.suffix_links.get(node_id).expect("Node id does not exist!") } - fn get_node_label(&self, _node_id: &NodeID)->&[T]{ - todo!(); + fn get_node_label(&self, node_id: &NodeID)->&[T]{ + let node_edge_length = self.get_node_edge_length(node_id); + let node_start = self.get_node_start(node_id).clone(); + &self.get_string_by_treeitem_id(self.get_node_string_id(node_id))[node_start..node_start+node_edge_length] } fn get_node_path_label(&self, _node_id: &NodeID)->&[T]{ todo!(); } - fn get_node_path(&self, node_id: &NodeID)->LinkedList{ + fn get_node_path_pre(&self, node_id: &NodeID)->LinkedList{ let mut node_path: LinkedList = LinkedList::new(); let mut curr_node_id: usize = node_id.clone(); while self.get_node_parent(&curr_node_id).expect("Invalid NodeID! Path is broken")!=&0{ @@ -500,7 +508,18 @@ where node_path.push_front(curr_node_id); node_path.push_front(0); node_path + } + fn get_node_path_post(&self, node_id: &NodeID)->LinkedList{ + let mut node_path: LinkedList = LinkedList::new(); + let mut curr_node_id: usize = node_id.clone(); + while self.get_node_parent(&curr_node_id).expect("Invalid NodeID! Path is broken")!=&0{ + node_path.push_front(curr_node_id.clone()); + curr_node_id = self.get_node_parent(&curr_node_id).cloned().expect("Invalid NodeID! Path is broken"); + } + node_path.push_back(curr_node_id); + node_path.push_back(0); + node_path } fn is_suffix(&self, s:&[T])->bool{ diff --git a/src/suffix_tree/tree.rs b/src/suffix_tree/tree.rs index f632694..a1647ac 100644 --- a/src/suffix_tree/tree.rs +++ b/src/suffix_tree/tree.rs @@ -10,7 +10,8 @@ pub trait SuffixTree{ fn get_suffix_link(&self, node_id: &NodeID) -> &usize; fn get_node_label(&self, node_id: &NodeID)->&[T]; fn get_node_path_label(&self, node_id: &NodeID)->&[T]; - fn get_node_path(&self, node_id: &NodeID)->LinkedList; + fn get_node_path_pre(&self, node_id: &NodeID)->LinkedList; + fn get_node_path_post(&self, node_id: &NodeID)->LinkedList; /// Checks if the input slice is a suffix of any of the strings present in the tree. fn is_suffix(&self, s:&[T])->bool; } \ No newline at end of file