From 65ab5aeaab93be9f4e29fa96980babc40ff30fc9 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sat, 14 Oct 2023 17:23:48 -0500 Subject: [PATCH 01/40] swtiched the outputs of get methods to refs --- src/tree/simple_rtree.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 4a3f022..2629ffc 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -7,12 +7,12 @@ use crate::iter::edge_iter::*; pub type EdgeWeight = f64; pub trait SimpleRTree { - fn get_root(&self)->NodeID; - fn get_nodes(&self)->HashMap; - fn get_children(&self, node_id: &NodeID)->HashMap; + fn get_root(&self)->&NodeID; + fn get_nodes(&self)->&HashMap; + fn get_children(&self, node_id: &NodeID)->Option<&HashSet<(Option, NodeID)>>; fn get_leaves(&self, node_id: &NodeID)->HashSet; - fn get_descendents(&self, node_id: &NodeID)->Vec; - fn get_subtree(&self, node_id: &NodeID)->Box; + fn get_descendents(&self, node_id: &NodeID)->&Vec; + fn get_subtree(&self, node_id: &NodeID)->&Box; fn get_mrca(&self, node_id_list: Vec<&NodeID>)->&NodeID; fn is_leaf(&self, node_id: &NodeID)->bool; From e153a1b99cf8a16cb4305100e0e9dfec7e25fac0 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sat, 14 Oct 2023 17:25:26 -0500 Subject: [PATCH 02/40] renamed rooted tree struct, implemented get_root, get_nodes, get_children, get_leaves --- src/tree.rs | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 2 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index d31afc2..86bd261 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -4,12 +4,125 @@ use std::{collections::{HashMap, HashSet}, fmt::Display}; use crate::node::*; use crate::tree::simple_rtree::*; +use crate::iter::{node_iter::*, edge_iter::*}; -pub struct SimpleRTree{ +pub struct RootedPhyloTree{ root: NodeID, nodes: HashMap, children: HashMap, NodeID)>>, - parents: HashMap, + parents: HashMap>, data: HashMap>, leaves: HashSet, +} + +impl RootedPhyloTree{ + pub fn new()->Self{ + RootedPhyloTree { + root: 0, + nodes: HashMap::from([(0, false)]), + children: HashMap::new(), + parents: HashMap::from([(0, None)]), + data: HashMap::new(), + leaves: HashSet::new() + } + } + + fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ + if self.get_children(node_id).expect("Invalid NodeID!").is_empty(){ + leaves.push(*node_id); + } + + for (_edge_weight, child_node_id) in self.get_children(node_id).expect("Invalid NodeID").iter(){ + self.leaves_of_node(child_node_id, leaves); + } + } +} + +impl SimpleRTree for RootedPhyloTree{ + fn get_root(&self)->&NodeID{ + &self.root + } + + fn get_nodes(&self)->&HashMap{ + &self.nodes + } + + fn get_children(&self, node_id: &NodeID)->Option<&HashSet<(Option, NodeID)>>{ + self.children.get(node_id) + } + + fn get_leaves(&self, node_id: &NodeID)->HashSet{ + let mut leaf_vec: Vec = Vec::new(); + self.leaves_of_node(node_id, &mut leaf_vec); + leaf_vec.into_iter().collect::>() + } + + fn get_descendents(&self, node_id: &NodeID)->&Vec{ + todo!() + } + + fn get_subtree(&self, node_id: &NodeID)->&Box{ + todo!() + } + + fn get_mrca(&self, node_id_list: Vec<&NodeID>)->&NodeID{ + todo!() + } + + fn is_leaf(&self, node_id: &NodeID)->bool{ + todo!() + } + + fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID)){ + todo!() + } + + fn extract_subtree(&mut self, node_id: &NodeID)-> Box{ + todo!() + } + + fn prune_subtree(&self, node_id: &NodeID)-> Box{ + todo!() + } + + fn iter_node_pre(&self, start_node_id: &NodeID)->PreOrdNodes{ + todo!() + } + + fn iter_node_post(&self, start_node_id: &NodeID)->PostOrdNodes{ + todo!() + } + + fn iter_edges_pre(&self, start_node_id: &NodeID)->PreOrdEdges{ + todo!() + } + + fn iter_edges_post(&self, start_node_id: &NodeID)->PostOrdEdges{ + todo!() + } + + fn get_ancestors(&self, node_id: &NodeID)->Vec<&NodeID>{ + todo!() + } + + fn phylogenetic_distance_matrix(&self)->Vec>{ + todo!() + } + + fn reroot_at_node(&mut self, node_id: &NodeID){ + todo!() + } + + fn insert_internal_node(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)){ + todo!() + } + + fn distance_from_root(&self, weighted: bool)->f64{ + todo!() + } + + fn get_bipartition(&self, edge: (&NodeID, &NodeID)){ + todo!() + } + } \ No newline at end of file From 17fb6f3ecbac62fec89906fd7f2aa45ea373b1f1 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sun, 15 Oct 2023 17:13:01 -0500 Subject: [PATCH 03/40] added comments to traits, renamed distance matrix method --- src/tree.rs | 2 +- src/tree/simple_rtree.rs | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 86bd261..6a33065 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -105,7 +105,7 @@ impl SimpleRTree for RootedPhyloTree{ todo!() } - fn phylogenetic_distance_matrix(&self)->Vec>{ + fn leaf_distance_matrix(&self, weighted: bool)->Vec>{ todo!() } diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 2629ffc..a4df2bb 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -7,29 +7,67 @@ use crate::iter::edge_iter::*; pub type EdgeWeight = f64; pub trait SimpleRTree { + + /// Returns root node id fn get_root(&self)->&NodeID; + + /// Returns all node ids fn get_nodes(&self)->&HashMap; + + /// Returns children node ids for given node id fn get_children(&self, node_id: &NodeID)->Option<&HashSet<(Option, NodeID)>>; + + /// Returns all leaf node ids fn get_leaves(&self, node_id: &NodeID)->HashSet; + + /// Returns all descendant nodes of give node fn get_descendents(&self, node_id: &NodeID)->&Vec; + + /// Returns full subtree rooted at given node fn get_subtree(&self, node_id: &NodeID)->&Box; + + /// Returns most recent common ancestor of give node set fn get_mrca(&self, node_id_list: Vec<&NodeID>)->&NodeID; + + /// Checks if the given node is a leaf node fn is_leaf(&self, node_id: &NodeID)->bool; + /// Attaches input tree to self by spliting an edge fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID)); + + /// Returns subtree starting at given node, while corresponding nodes from self. fn extract_subtree(&mut self, node_id: &NodeID)-> Box; + + /// Returns subtree starting at given node, without removing corresponding nodes from self fn prune_subtree(&self, node_id: &NodeID)-> Box; + ///Returns an iterator that iterates over the nodes in Pre-order fn iter_node_pre(&self, start_node_id: &NodeID)->PreOrdNodes; + + ///Returns an iterator that iterates over the nodes in Post-order fn iter_node_post(&self, start_node_id: &NodeID)->PostOrdNodes; + + ///Returns an iterator that iterates over the edges in Pre-order fn iter_edges_pre(&self, start_node_id: &NodeID)->PreOrdEdges; + + ///Returns an iterator that iterates over the edges in Post-order fn iter_edges_post(&self, start_node_id: &NodeID)->PostOrdEdges; + + /// Returns all node ids in path from root to given node fn get_ancestors(&self, node_id: &NodeID)->Vec<&NodeID>; - fn phylogenetic_distance_matrix(&self)->Vec>; + /// Returns pairwise distance matrix of the taxa. If weighted is true, then returns sum of edge weights along paths connecting leaves of tree + fn leaf_distance_matrix(&self, weighted: bool)->Vec>; + + /// Rerootes tree at given node. fn reroot_at_node(&mut self, node_id: &NodeID); + + /// Inserts node in the middle of edge given by pair of node ids fn insert_internal_node(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)); + /// Returns distance of node from root. If weighted is true, it returns sum of edges from root to self. fn distance_from_root(&self, weighted: bool)->f64; + + /// Returns bipartition induced by edge fn get_bipartition(&self, edge: (&NodeID, &NodeID)); } \ No newline at end of file From 0dcfb6aa844de9c48ee095e6d97ff6b1c5692ef5 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sun, 15 Oct 2023 18:14:53 -0500 Subject: [PATCH 04/40] implemented node iterators, removed data field in tree struct, implemented node iter methods, get subtree and extract subtree --- src/iter/node_iter.rs | 39 +++++++++++++++++++---- src/tree.rs | 69 +++++++++++++++++++++++++++++----------- src/tree/simple_rtree.rs | 8 +---- 3 files changed, 85 insertions(+), 31 deletions(-) diff --git a/src/iter/node_iter.rs b/src/iter/node_iter.rs index 7c48f9b..3190d8d 100644 --- a/src/iter/node_iter.rs +++ b/src/iter/node_iter.rs @@ -12,8 +12,11 @@ pub struct PreOrdNodes impl PreOrdNodes { - pub fn new(start_node_id: &NodeID, _tree: &HashMap)->Self{ - Self { stack:vec![*start_node_id], nodes: HashMap::new()} + pub fn new(start_node_id: &NodeID, children: &HashMap, NodeID)>>)->Self{ + Self { stack:vec![*start_node_id], nodes: children.iter() + .map(|(k, v)| (k.clone(), v.iter() + .map(|(_ew, ni)| ni.clone()).collect::>())) + .collect()} } } @@ -22,7 +25,16 @@ impl Iterator for PreOrdNodes type Item = NodeID; fn next(&mut self)->Option{ - todo!() + match self.stack.pop() { + Some(node_id) => { + let children_ids:HashSet = self.nodes.get(&node_id).cloned().expect("Invalid Node ID!"); + for child_node_id in children_ids.into_iter().sorted(){ + self.stack.push(child_node_id) + } + Some(node_id) + } + None => None, + } } } @@ -34,8 +46,11 @@ pub struct PostOrdNodes impl PostOrdNodes { - pub fn new(start_node_id: &NodeID, _tree: &HashMap)->Self{ - Self { stack:vec![*start_node_id], nodes: HashMap::new()} + pub fn new(start_node_id: &NodeID, children: &HashMap, NodeID)>>)->Self{ + Self { stack:vec![*start_node_id], nodes: children.iter() + .map(|(k, v)| (k.clone(), v.iter() + .map(|(_ew, ni)| ni.clone()).collect::>())) + .collect()} } } @@ -44,6 +59,18 @@ impl Iterator for PostOrdNodes type Item = NodeID; fn next(&mut self)->Option{ - todo!() + while let Some(node_id) = self.stack.pop() { + if self.nodes.contains_key(&node_id){ + self.stack.push(node_id); + let children = self.nodes.remove(&node_id).unwrap(); + for child_id in children.into_iter().sorted(){ + self.stack.push(child_id) + } + } + else{ + return Some(node_id) + } + } + None } } \ No newline at end of file diff --git a/src/tree.rs b/src/tree.rs index 6a33065..2f6642d 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,6 +1,6 @@ pub mod simple_rtree; -use std::{collections::{HashMap, HashSet}, fmt::Display}; +use std::{collections::{HashMap, HashSet}}; use crate::node::*; use crate::tree::simple_rtree::*; @@ -11,7 +11,6 @@ pub struct RootedPhyloTree{ nodes: HashMap, children: HashMap, NodeID)>>, parents: HashMap>, - data: HashMap>, leaves: HashSet, } @@ -22,7 +21,6 @@ impl RootedPhyloTree{ nodes: HashMap::from([(0, false)]), children: HashMap::new(), parents: HashMap::from([(0, None)]), - data: HashMap::new(), leaves: HashSet::new() } } @@ -57,12 +55,29 @@ impl SimpleRTree for RootedPhyloTree{ leaf_vec.into_iter().collect::>() } - fn get_descendents(&self, node_id: &NodeID)->&Vec{ - todo!() - } - - fn get_subtree(&self, node_id: &NodeID)->&Box{ - todo!() + fn get_subtree(&self, node_id: &NodeID)->Box{ + let root= node_id.clone(); + let mut nodes: HashMap= HashMap::new(); + let mut children: HashMap, NodeID)>> = HashMap::new(); + let mut parents: HashMap> = HashMap::new(); + let mut leaves: HashSet = HashSet::new(); + for decsendant_node_id in self.iter_node_pre(node_id){ + nodes.insert(decsendant_node_id.clone(), self.nodes.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + children.insert(decsendant_node_id.clone(), self.children.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + parents.insert(decsendant_node_id.clone(), self.parents.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + if self.is_leaf(&decsendant_node_id){ + leaves.insert(decsendant_node_id.clone()); + } + } + Box::new( + RootedPhyloTree{ + root: root, + nodes: nodes, + children: children, + parents: parents, + leaves: leaves, + } + ) } fn get_mrca(&self, node_id_list: Vec<&NodeID>)->&NodeID{ @@ -70,7 +85,7 @@ impl SimpleRTree for RootedPhyloTree{ } fn is_leaf(&self, node_id: &NodeID)->bool{ - todo!() + self.nodes.get(node_id).expect("Invalid NodeID").clone() } fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID)){ @@ -78,19 +93,37 @@ impl SimpleRTree for RootedPhyloTree{ } fn extract_subtree(&mut self, node_id: &NodeID)-> Box{ - todo!() - } - - fn prune_subtree(&self, node_id: &NodeID)-> Box{ - todo!() + let root= node_id.clone(); + let mut nodes: HashMap= HashMap::new(); + let mut children: HashMap, NodeID)>> = HashMap::new(); + let mut parents: HashMap> = HashMap::new(); + let mut leaves: HashSet = HashSet::new(); + for decsendant_node_id in self.iter_node_pre(node_id){ + nodes.insert(decsendant_node_id.clone(), self.nodes.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); + children.insert(decsendant_node_id.clone(), self.children.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); + parents.insert(decsendant_node_id.clone(), self.parents.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); + match self.leaves.take(&decsendant_node_id){ + Some(leaf_id) => {leaves.insert(leaf_id.clone());}, + None => {}, + } + } + Box::new( + RootedPhyloTree{ + root: root, + nodes: nodes, + children: children, + parents: parents, + leaves: leaves, + } + ) } fn iter_node_pre(&self, start_node_id: &NodeID)->PreOrdNodes{ - todo!() + PreOrdNodes::new(start_node_id, &self.children) } fn iter_node_post(&self, start_node_id: &NodeID)->PostOrdNodes{ - todo!() + PostOrdNodes::new(start_node_id, &self.children) } fn iter_edges_pre(&self, start_node_id: &NodeID)->PreOrdEdges{ @@ -110,7 +143,7 @@ impl SimpleRTree for RootedPhyloTree{ } fn reroot_at_node(&mut self, node_id: &NodeID){ - todo!() + self.root = node_id.clone(); } fn insert_internal_node(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)){ diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index a4df2bb..8b22509 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -20,11 +20,8 @@ pub trait SimpleRTree { /// Returns all leaf node ids fn get_leaves(&self, node_id: &NodeID)->HashSet; - /// Returns all descendant nodes of give node - fn get_descendents(&self, node_id: &NodeID)->&Vec; - /// Returns full subtree rooted at given node - fn get_subtree(&self, node_id: &NodeID)->&Box; + fn get_subtree(&self, node_id: &NodeID)->Box; /// Returns most recent common ancestor of give node set fn get_mrca(&self, node_id_list: Vec<&NodeID>)->&NodeID; @@ -37,9 +34,6 @@ pub trait SimpleRTree { /// Returns subtree starting at given node, while corresponding nodes from self. fn extract_subtree(&mut self, node_id: &NodeID)-> Box; - - /// Returns subtree starting at given node, without removing corresponding nodes from self - fn prune_subtree(&self, node_id: &NodeID)-> Box; ///Returns an iterator that iterates over the nodes in Pre-order fn iter_node_pre(&self, start_node_id: &NodeID)->PreOrdNodes; From 315f61cebfc8b436e20eb2ed8d69f8dc71c12ab3 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sun, 15 Oct 2023 18:25:36 -0500 Subject: [PATCH 05/40] added get_cluster trait, implemented get_cluster and get_bipartition --- src/tree.rs | 10 ++++++++-- src/tree/simple_rtree.rs | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 2f6642d..3b392e2 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -154,8 +154,14 @@ impl SimpleRTree for RootedPhyloTree{ todo!() } - fn get_bipartition(&self, edge: (&NodeID, &NodeID)){ - todo!() + fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet){ + (self.get_cluster(edge.0), self.get_cluster(edge.1)) + } + + fn get_cluster(&self, node_id: &NodeID)-> HashSet{ + let mut leaves: Vec = Vec::new(); + self.leaves_of_node(node_id, &mut leaves); + HashSet::from_iter(leaves) } } \ No newline at end of file diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 8b22509..0b8da7a 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -63,5 +63,8 @@ pub trait SimpleRTree { fn distance_from_root(&self, weighted: bool)->f64; /// Returns bipartition induced by edge - fn get_bipartition(&self, edge: (&NodeID, &NodeID)); + fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet); + + /// Returns cluster of node + fn get_cluster(&self, node_id: &NodeID)-> HashSet; } \ No newline at end of file From 60ebcf87677f6279097dd95aee013d3d5034d205 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sun, 15 Oct 2023 18:39:58 -0500 Subject: [PATCH 06/40] fixed bipartition implementation --- src/tree.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 3b392e2..1b7d1f3 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,6 +1,6 @@ pub mod simple_rtree; -use std::{collections::{HashMap, HashSet}}; +use std::collections::{HashMap, HashSet}; use crate::node::*; use crate::tree::simple_rtree::*; @@ -155,7 +155,8 @@ impl SimpleRTree for RootedPhyloTree{ } fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet){ - (self.get_cluster(edge.0), self.get_cluster(edge.1)) + let c2 = self.get_cluster(edge.1); + (self.leaves.difference(&c2).map(|x| x.clone()).collect(), c2) } fn get_cluster(&self, node_id: &NodeID)-> HashSet{ From d16aa150c2a71fffef6807d82d0a89976ba1c131 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sun, 15 Oct 2023 19:31:25 -0500 Subject: [PATCH 07/40] added method to get tree from newick string --- src/tree.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tree.rs b/src/tree.rs index 1b7d1f3..5d42a9d 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -25,6 +25,11 @@ impl RootedPhyloTree{ } } + pub fn from_newick(newick_string: String)->Self{ + todo!() + } + + fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ if self.get_children(node_id).expect("Invalid NodeID!").is_empty(){ leaves.push(*node_id); From 66ca88a806bed8498d6b9ba6aac40f682a0ee39b Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sun, 15 Oct 2023 20:35:10 -0500 Subject: [PATCH 08/40] added unrooted tree struct --- src/tree.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/tree.rs b/src/tree.rs index 5d42a9d..39163ec 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -6,6 +6,12 @@ use crate::node::*; use crate::tree::simple_rtree::*; use crate::iter::{node_iter::*, edge_iter::*}; +pub struct UnrootedPhyloTree{ + nodes: HashMap, + neighbours: HashMap, NodeID)>>, + leaves: HashSet, +} + pub struct RootedPhyloTree{ root: NodeID, nodes: HashMap, From 9ab0ee332ef6e694541eeb1799fa34ff829423de Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 16 Oct 2023 17:35:13 -0500 Subject: [PATCH 09/40] moved leaves from HashSet to HashMap --- src/tree.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 39163ec..b24f60e 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -9,7 +9,7 @@ use crate::iter::{node_iter::*, edge_iter::*}; pub struct UnrootedPhyloTree{ nodes: HashMap, neighbours: HashMap, NodeID)>>, - leaves: HashSet, + leaves: HashMap, } pub struct RootedPhyloTree{ @@ -17,7 +17,7 @@ pub struct RootedPhyloTree{ nodes: HashMap, children: HashMap, NodeID)>>, parents: HashMap>, - leaves: HashSet, + leaves: HashMap, } impl RootedPhyloTree{ @@ -27,7 +27,7 @@ impl RootedPhyloTree{ nodes: HashMap::from([(0, false)]), children: HashMap::new(), parents: HashMap::from([(0, None)]), - leaves: HashSet::new() + leaves: HashMap::new() } } @@ -71,13 +71,13 @@ impl SimpleRTree for RootedPhyloTree{ let mut nodes: HashMap= HashMap::new(); let mut children: HashMap, NodeID)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); - let mut leaves: HashSet = HashSet::new(); + let mut leaves: HashMap = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ nodes.insert(decsendant_node_id.clone(), self.nodes.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); children.insert(decsendant_node_id.clone(), self.children.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); parents.insert(decsendant_node_id.clone(), self.parents.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); if self.is_leaf(&decsendant_node_id){ - leaves.insert(decsendant_node_id.clone()); + leaves.insert(decsendant_node_id.clone(), self.leaves.get(&decsendant_node_id).cloned().unwrap()); } } Box::new( @@ -108,13 +108,13 @@ impl SimpleRTree for RootedPhyloTree{ let mut nodes: HashMap= HashMap::new(); let mut children: HashMap, NodeID)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); - let mut leaves: HashSet = HashSet::new(); + let mut leaves: HashMap = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ nodes.insert(decsendant_node_id.clone(), self.nodes.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); children.insert(decsendant_node_id.clone(), self.children.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); parents.insert(decsendant_node_id.clone(), self.parents.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); - match self.leaves.take(&decsendant_node_id){ - Some(leaf_id) => {leaves.insert(leaf_id.clone());}, + match self.leaves.remove(&decsendant_node_id){ + Some(taxa_id) => {leaves.insert(decsendant_node_id.clone(), taxa_id);}, None => {}, } } @@ -167,7 +167,7 @@ impl SimpleRTree for RootedPhyloTree{ fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet){ let c2 = self.get_cluster(edge.1); - (self.leaves.difference(&c2).map(|x| x.clone()).collect(), c2) + (self.leaves.keys().map(|x| x.clone()).collect::>().difference(&c2).map(|x| x.clone()).collect(), c2) } fn get_cluster(&self, node_id: &NodeID)-> HashSet{ From b473d2b82958864a11ff96da7ef772a1e12f10e5 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 16 Oct 2023 19:18:39 -0500 Subject: [PATCH 10/40] added preorder ancestor iterator --- src/iter/node_iter.rs | 1 - src/tree.rs | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/iter/node_iter.rs b/src/iter/node_iter.rs index 3190d8d..3754a0c 100644 --- a/src/iter/node_iter.rs +++ b/src/iter/node_iter.rs @@ -3,7 +3,6 @@ use crate::tree::simple_rtree::*; use std::collections::{HashMap, HashSet}; use itertools::Itertools; - pub struct PreOrdNodes { stack: Vec, diff --git a/src/tree.rs b/src/tree.rs index b24f60e..184c5a2 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -45,6 +45,24 @@ impl RootedPhyloTree{ self.leaves_of_node(child_node_id, leaves); } } + + pub fn iter_node_ancestors_pre(&self, node_id:&NodeID)->Vec{ + let mut node_iter: Vec = Vec::new(); + let mut curr_node = node_id; + while self.parents.get(curr_node) != None { + match self.parents.get(curr_node).expect("Invalid NodeID!") { + Some(node) => { + node_iter.push(node.clone()); + curr_node = node; + }, + None => { + node_iter.push(self.get_root().clone()); + break; + }, + } + } + node_iter + } } impl SimpleRTree for RootedPhyloTree{ From fbce887a52679ba64de3a61d17ed2a8deadb5590 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 16 Oct 2023 19:39:13 -0500 Subject: [PATCH 11/40] simple mrca implemented --- src/tree.rs | 23 +++++++++++++++++++++-- src/tree/simple_rtree.rs | 2 +- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 184c5a2..e7d124f 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -109,8 +109,27 @@ impl SimpleRTree for RootedPhyloTree{ ) } - fn get_mrca(&self, node_id_list: Vec<&NodeID>)->&NodeID{ - todo!() + fn get_mrca(&self, node_id_list: Vec<&NodeID>)->NodeID{ + let ancestor_iter_vec: Vec> = node_id_list.iter().map(|x| self.iter_node_ancestors_pre(x).into_iter()).collect(); + let mut mrca: NodeID = 0; + for mut iterator in ancestor_iter_vec{ + let temp: HashSet = HashSet::new(); + match iterator.next(){ + Some(x) => { + match temp.contains(&x){ + true => {mrca = x.clone()}, + false => { + match temp.len()==0{ + true => {}, + false => {return mrca} + } + } + } + }, + None => {} + } + } + mrca } fn is_leaf(&self, node_id: &NodeID)->bool{ diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 0b8da7a..f122aff 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -24,7 +24,7 @@ pub trait SimpleRTree { fn get_subtree(&self, node_id: &NodeID)->Box; /// Returns most recent common ancestor of give node set - fn get_mrca(&self, node_id_list: Vec<&NodeID>)->&NodeID; + fn get_mrca(&self, node_id_list: Vec<&NodeID>)->NodeID; /// Checks if the given node is a leaf node fn is_leaf(&self, node_id: &NodeID)->bool; From 20dc05447d69c8e5bac010785cc4961ed6288d8a Mon Sep 17 00:00:00 2001 From: sriram98v Date: Tue, 17 Oct 2023 09:09:59 -0500 Subject: [PATCH 12/40] switched to Vec instead of Hashset for children, added method to insert child node and leaf --- src/iter/node_iter.rs | 4 ++-- src/tree.rs | 33 +++++++++++++++++++++++++++++---- src/tree/simple_rtree.rs | 2 +- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/src/iter/node_iter.rs b/src/iter/node_iter.rs index 3754a0c..5240356 100644 --- a/src/iter/node_iter.rs +++ b/src/iter/node_iter.rs @@ -11,7 +11,7 @@ pub struct PreOrdNodes impl PreOrdNodes { - pub fn new(start_node_id: &NodeID, children: &HashMap, NodeID)>>)->Self{ + pub fn new(start_node_id: &NodeID, children: &HashMap, NodeID)>>)->Self{ Self { stack:vec![*start_node_id], nodes: children.iter() .map(|(k, v)| (k.clone(), v.iter() .map(|(_ew, ni)| ni.clone()).collect::>())) @@ -45,7 +45,7 @@ pub struct PostOrdNodes impl PostOrdNodes { - pub fn new(start_node_id: &NodeID, children: &HashMap, NodeID)>>)->Self{ + pub fn new(start_node_id: &NodeID, children: &HashMap, NodeID)>>)->Self{ Self { stack:vec![*start_node_id], nodes: children.iter() .map(|(k, v)| (k.clone(), v.iter() .map(|(_ew, ni)| ni.clone()).collect::>())) diff --git a/src/tree.rs b/src/tree.rs index e7d124f..1c17bcc 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -15,7 +15,7 @@ pub struct UnrootedPhyloTree{ pub struct RootedPhyloTree{ root: NodeID, nodes: HashMap, - children: HashMap, NodeID)>>, + children: HashMap, NodeID)>>, parents: HashMap>, leaves: HashMap, } @@ -46,6 +46,31 @@ impl RootedPhyloTree{ } } + fn new_node(&mut self, children: Vec<(Option, NodeID)>, parent:Option, leaf_id:Option, parent_edge_weight: Option){ + let node_id = self.nodes.len(); + match leaf_id { + Some(taxa_id) => { + self.leaves.insert(node_id.clone(), taxa_id); + self.nodes.insert(node_id.clone(), true); + } + None =>{ + self.nodes.insert(node_id.clone(), false); + } + }; + self.children.insert(node_id.clone(), children); + self.parents.insert(node_id.clone(), parent); + self.children.entry(node_id).or_default().push((parent_edge_weight, node_id)); + } + + pub fn add_child_to_node(&mut self, parent:NodeID, edge_weight: Option){ + self.new_node(Vec::new(), Some(parent), None, edge_weight); + + } + + pub fn add_leaf_to_node(&mut self, parent: NodeID, leaf_label: String, edge_weight: Option){ + self.new_node(Vec::new(), Some(parent), Some(leaf_label), edge_weight); + } + pub fn iter_node_ancestors_pre(&self, node_id:&NodeID)->Vec{ let mut node_iter: Vec = Vec::new(); let mut curr_node = node_id; @@ -74,7 +99,7 @@ impl SimpleRTree for RootedPhyloTree{ &self.nodes } - fn get_children(&self, node_id: &NodeID)->Option<&HashSet<(Option, NodeID)>>{ + fn get_children(&self, node_id: &NodeID)->Option<&Vec<(Option, NodeID)>>{ self.children.get(node_id) } @@ -87,7 +112,7 @@ impl SimpleRTree for RootedPhyloTree{ fn get_subtree(&self, node_id: &NodeID)->Box{ let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); - let mut children: HashMap, NodeID)>> = HashMap::new(); + let mut children: HashMap, NodeID)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); let mut leaves: HashMap = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ @@ -143,7 +168,7 @@ impl SimpleRTree for RootedPhyloTree{ fn extract_subtree(&mut self, node_id: &NodeID)-> Box{ let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); - let mut children: HashMap, NodeID)>> = HashMap::new(); + let mut children: HashMap, NodeID)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); let mut leaves: HashMap = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index f122aff..4341ecc 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -15,7 +15,7 @@ pub trait SimpleRTree { fn get_nodes(&self)->&HashMap; /// Returns children node ids for given node id - fn get_children(&self, node_id: &NodeID)->Option<&HashSet<(Option, NodeID)>>; + fn get_children(&self, node_id: &NodeID)->Option<&Vec<(Option, NodeID)>>; /// Returns all leaf node ids fn get_leaves(&self, node_id: &NodeID)->HashSet; From 6ddc242fb6d42df99efa8d3743f37ffa78221521 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Tue, 17 Oct 2023 09:12:24 -0500 Subject: [PATCH 13/40] added reroot at edge --- src/tree.rs | 6 +++++- src/tree/simple_rtree.rs | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/tree.rs b/src/tree.rs index 1c17bcc..d1d5066 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -216,7 +216,11 @@ impl SimpleRTree for RootedPhyloTree{ } fn reroot_at_node(&mut self, node_id: &NodeID){ - self.root = node_id.clone(); + todo!() + } + + fn reroot_at_edge(&mut self, edge: (&NodeID, &NodeID)) { + todo!() } fn insert_internal_node(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)){ diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 4341ecc..021120d 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -55,6 +55,9 @@ pub trait SimpleRTree { /// Rerootes tree at given node. fn reroot_at_node(&mut self, node_id: &NodeID); + + /// Rerootes tree at edge. + fn reroot_at_edge(&mut self, edge: (&NodeID, &NodeID)); /// Inserts node in the middle of edge given by pair of node ids fn insert_internal_node(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)); From 3610dba82e71f63518f1d1be09a16324894eec2a Mon Sep 17 00:00:00 2001 From: sriram98v Date: Tue, 17 Oct 2023 09:13:47 -0500 Subject: [PATCH 14/40] removed empty line --- src/tree/simple_rtree.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 021120d..94f13f0 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -7,7 +7,6 @@ use crate::iter::edge_iter::*; pub type EdgeWeight = f64; pub trait SimpleRTree { - /// Returns root node id fn get_root(&self)->&NodeID; From 788b6364abd7fef6d468c7273dd7542fe7fd25d5 Mon Sep 17 00:00:00 2001 From: Sanket Wagle <17229623+swagle8987@users.noreply.github.com> Date: Tue, 17 Oct 2023 12:44:03 -0500 Subject: [PATCH 15/40] Updated the tree structure to have HashSet(Node) and distance matrix --- src/iter/node_iter.rs | 10 +++---- src/tree.rs | 57 ++++++++++++++++++++++++++++++---------- src/tree/simple_rtree.rs | 13 +++++++-- 3 files changed, 59 insertions(+), 21 deletions(-) diff --git a/src/iter/node_iter.rs b/src/iter/node_iter.rs index 3190d8d..61abfb1 100644 --- a/src/iter/node_iter.rs +++ b/src/iter/node_iter.rs @@ -12,10 +12,10 @@ pub struct PreOrdNodes impl PreOrdNodes { - pub fn new(start_node_id: &NodeID, children: &HashMap, NodeID)>>)->Self{ + pub fn new(start_node_id: &NodeID, children: &HashMap>)->Self{ Self { stack:vec![*start_node_id], nodes: children.iter() .map(|(k, v)| (k.clone(), v.iter() - .map(|(_ew, ni)| ni.clone()).collect::>())) + .map(|ni| ni.clone()).collect::>())) .collect()} } } @@ -46,10 +46,10 @@ pub struct PostOrdNodes impl PostOrdNodes { - pub fn new(start_node_id: &NodeID, children: &HashMap, NodeID)>>)->Self{ + pub fn new(start_node_id: &NodeID, children: &HashMap>)->Self{ Self { stack:vec![*start_node_id], nodes: children.iter() .map(|(k, v)| (k.clone(), v.iter() - .map(|(_ew, ni)| ni.clone()).collect::>())) + .map(|ni| ni.clone()).collect::>())) .collect()} } } @@ -73,4 +73,4 @@ impl Iterator for PostOrdNodes } None } -} \ No newline at end of file +} diff --git a/src/tree.rs b/src/tree.rs index b24f60e..94aa6dd 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -2,7 +2,7 @@ pub mod simple_rtree; use std::collections::{HashMap, HashSet}; -use crate::node::*; +use crate::{node::*, taxa}; use crate::tree::simple_rtree::*; use crate::iter::{node_iter::*, edge_iter::*}; @@ -12,20 +12,24 @@ pub struct UnrootedPhyloTree{ leaves: HashMap, } +#[derive(Debug)] pub struct RootedPhyloTree{ root: NodeID, nodes: HashMap, - children: HashMap, NodeID)>>, + children: HashMap>, + distance_matrix: HashMap<(NodeID,NodeID),EdgeWeight>, parents: HashMap>, leaves: HashMap, } + impl RootedPhyloTree{ pub fn new()->Self{ RootedPhyloTree { root: 0, nodes: HashMap::from([(0, false)]), children: HashMap::new(), + distance_matrix: HashMap::new(), parents: HashMap::from([(0, None)]), leaves: HashMap::new() } @@ -35,19 +39,41 @@ impl RootedPhyloTree{ todo!() } - fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ if self.get_children(node_id).expect("Invalid NodeID!").is_empty(){ leaves.push(*node_id); } - for (_edge_weight, child_node_id) in self.get_children(node_id).expect("Invalid NodeID").iter(){ + for child_node_id in self.get_children(node_id).expect("Invalid NodeID").iter(){ self.leaves_of_node(child_node_id, leaves); } } } impl SimpleRTree for RootedPhyloTree{ + fn add_node(&mut self,is_leaf:bool)->NodeID { + let n : NodeID = self.nodes.len(); + self.nodes.insert(n, is_leaf); + return n + } + + fn assign_taxa(&mut self,node:&NodeID, taxa:&str) { + self.leaves.insert(*node, String::from(taxa)); + } + + fn add_child(&mut self,parent:&NodeID, child:NodeID,distance:EdgeWeight) { + let hs = self.children.get_mut(parent); + match hs{ + None => { + let mut h:HashSet = HashSet::new(); + h.insert(child); + self.children.insert(*parent, h); + }, + Some(x) =>{ + x.insert(child); + } + } + } fn get_root(&self)->&NodeID{ &self.root } @@ -56,7 +82,7 @@ impl SimpleRTree for RootedPhyloTree{ &self.nodes } - fn get_children(&self, node_id: &NodeID)->Option<&HashSet<(Option, NodeID)>>{ + fn get_children(&self, node_id: &NodeID)->Option<&HashSet>{ self.children.get(node_id) } @@ -69,15 +95,15 @@ impl SimpleRTree for RootedPhyloTree{ fn get_subtree(&self, node_id: &NodeID)->Box{ let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); - let mut children: HashMap, NodeID)>> = HashMap::new(); + let mut children: HashMap> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); let mut leaves: HashMap = HashMap::new(); - for decsendant_node_id in self.iter_node_pre(node_id){ - nodes.insert(decsendant_node_id.clone(), self.nodes.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); - children.insert(decsendant_node_id.clone(), self.children.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); - parents.insert(decsendant_node_id.clone(), self.parents.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); - if self.is_leaf(&decsendant_node_id){ - leaves.insert(decsendant_node_id.clone(), self.leaves.get(&decsendant_node_id).cloned().unwrap()); + for descendant_node_id in self.iter_node_pre(node_id){ + nodes.insert(descendant_node_id.clone(), self.nodes.get(&descendant_node_id).expect("Invalid NodeID!").clone()); + children.insert(descendant_node_id.clone(), self.children.get(&descendant_node_id).expect("Invalid NodeID!").clone()); + parents.insert(descendant_node_id.clone(), self.parents.get(&descendant_node_id).expect("Invalid NodeID!").clone()); + if self.is_leaf(&descendant_node_id){ + leaves.insert(descendant_node_id.clone(), self.leaves.get(&descendant_node_id).cloned().unwrap()); } } Box::new( @@ -85,6 +111,7 @@ impl SimpleRTree for RootedPhyloTree{ root: root, nodes: nodes, children: children, + distance_matrix:self.distance_matrix.clone(), parents: parents, leaves: leaves, } @@ -106,7 +133,7 @@ impl SimpleRTree for RootedPhyloTree{ fn extract_subtree(&mut self, node_id: &NodeID)-> Box{ let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); - let mut children: HashMap, NodeID)>> = HashMap::new(); + let mut children: HashMap> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); let mut leaves: HashMap = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ @@ -123,6 +150,7 @@ impl SimpleRTree for RootedPhyloTree{ root: root, nodes: nodes, children: children, + distance_matrix: self.distance_matrix.clone(), parents: parents, leaves: leaves, } @@ -176,4 +204,5 @@ impl SimpleRTree for RootedPhyloTree{ HashSet::from_iter(leaves) } -} \ No newline at end of file +} + diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 0b8da7a..779126e 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -7,6 +7,15 @@ use crate::iter::edge_iter::*; pub type EdgeWeight = f64; pub trait SimpleRTree { + + + //Add node to tree + fn add_node(&mut self,is_leaf:bool)->NodeID; + + //Add child to node + fn add_child(&mut self,parent:&NodeID, child:NodeID,distance:EdgeWeight); + + fn assign_taxa(&mut self,node:&NodeID, taxa:&str); /// Returns root node id fn get_root(&self)->&NodeID; @@ -15,7 +24,7 @@ pub trait SimpleRTree { fn get_nodes(&self)->&HashMap; /// Returns children node ids for given node id - fn get_children(&self, node_id: &NodeID)->Option<&HashSet<(Option, NodeID)>>; + fn get_children(&self, node_id: &NodeID)->Option<&HashSet>; /// Returns all leaf node ids fn get_leaves(&self, node_id: &NodeID)->HashSet; @@ -67,4 +76,4 @@ pub trait SimpleRTree { /// Returns cluster of node fn get_cluster(&self, node_id: &NodeID)-> HashSet; -} \ No newline at end of file +} From c7175c8f50aab642951a01e3f6b518cf09460e02 Mon Sep 17 00:00:00 2001 From: Sanket Wagle <17229623+swagle8987@users.noreply.github.com> Date: Tue, 17 Oct 2023 12:44:18 -0500 Subject: [PATCH 16/40] Added newick reader --- src/lib.rs | 18 +++++++++++++++++- src/tree.rs | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9c49740..02b6afe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,20 @@ pub mod node; pub mod tree; pub mod taxa; -pub mod iter; \ No newline at end of file +pub mod iter; + + +#[cfg(test)] +mod tests { + use crate::tree::RootedPhyloTree; + #[test] + fn read_a_tree() { + let input_str = String::from("((A,B),(C,D));"); + println!("{:?}",RootedPhyloTree::from_newick(input_str)); + } + #[test] + fn read_big_tree() { + let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); + println!("{:?}",RootedPhyloTree::from_newick(input_str)); + } +} diff --git a/src/tree.rs b/src/tree.rs index 94aa6dd..e665df3 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -36,7 +36,46 @@ impl RootedPhyloTree{ } pub fn from_newick(newick_string: String)->Self{ - todo!() + let mut tree = RootedPhyloTree { + root: 0, + nodes: HashMap::new(), + children: HashMap::new(), + distance_matrix: HashMap::new(), + parents: HashMap::new(), + leaves: HashMap::new() + }; + let mut stack : Vec = Vec::new(); + let mut context : NodeID = 0; + let mut completed : NodeID = 0; + let mut taxa_str = String::from(""); + let mut chars = newick_string.chars(); + loop { + let next = chars.next(); + if next == None{ + break; + } + let mut c = next.unwrap(); + if c == '(' { + if context >= 0{ + stack.push(context); + } + context = tree.add_node(false); + } else if c == ')' || c == ',' { + completed = context; + context = stack.pop().unwrap(); + tree.add_child(&context,completed,0.0); + } else if c.is_alphanumeric() { + stack.push(context); + context = tree.add_node(true); + while c.is_alphanumeric() { + taxa_str.push(c); + c = chars.next().unwrap(); + } + tree.assign_taxa(&context, &taxa_str); + taxa_str = String::from(""); + } + } + return tree } fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ From d5a9b7c504c147c47d35e76395f148addfa3c7ce Mon Sep 17 00:00:00 2001 From: sriram98v Date: Tue, 17 Oct 2023 22:17:18 -0500 Subject: [PATCH 17/40] switched from Hashset to Vec for children --- src/iter/edge_iter.rs | 2 -- src/iter/node_iter.rs | 8 ++++---- src/tree.rs | 44 ++++++++++++++-------------------------- src/tree/simple_rtree.rs | 11 +++++----- 4 files changed, 24 insertions(+), 41 deletions(-) diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index 059947e..07182c5 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -1,8 +1,6 @@ use crate::node::*; use crate::tree::simple_rtree::*; use std::collections::{HashMap, HashSet}; -use itertools::Itertools; - pub struct PreOrdEdges { diff --git a/src/iter/node_iter.rs b/src/iter/node_iter.rs index 5e48a59..97a57a5 100644 --- a/src/iter/node_iter.rs +++ b/src/iter/node_iter.rs @@ -11,10 +11,10 @@ pub struct PreOrdNodes impl PreOrdNodes { - pub fn new(start_node_id: &NodeID, children: &HashMap>)->Self{ + pub fn new(start_node_id: &NodeID, children: &HashMap)>>)->Self{ Self { stack:vec![*start_node_id], nodes: children.iter() .map(|(k, v)| (k.clone(), v.iter() - .map(|ni| ni.clone()).collect::>())) + .map(|ni| ni.0.clone()).collect::>())) .collect()} } } @@ -45,10 +45,10 @@ pub struct PostOrdNodes impl PostOrdNodes { - pub fn new(start_node_id: &NodeID, children: &HashMap>)->Self{ + pub fn new(start_node_id: &NodeID, children: &HashMap)>>)->Self{ Self { stack:vec![*start_node_id], nodes: children.iter() .map(|(k, v)| (k.clone(), v.iter() - .map(|ni| ni.clone()).collect::>())) + .map(|ni| ni.0.clone()).collect::>())) .collect()} } } diff --git a/src/tree.rs b/src/tree.rs index 7157d61..3f69e67 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -2,7 +2,7 @@ pub mod simple_rtree; use std::collections::{HashMap, HashSet}; -use crate::{node::*, taxa}; +use crate::node::*; use crate::tree::simple_rtree::*; use crate::iter::{node_iter::*, edge_iter::*}; @@ -16,8 +16,7 @@ pub struct UnrootedPhyloTree{ pub struct RootedPhyloTree{ root: NodeID, nodes: HashMap, - children: HashMap>, - distance_matrix: HashMap<(NodeID,NodeID),EdgeWeight>, + children: HashMap)>>, parents: HashMap>, leaves: HashMap, } @@ -29,7 +28,6 @@ impl RootedPhyloTree{ root: 0, nodes: HashMap::from([(0, false)]), children: HashMap::new(), - distance_matrix: HashMap::new(), parents: HashMap::from([(0, None)]), leaves: HashMap::new() } @@ -40,13 +38,12 @@ impl RootedPhyloTree{ root: 0, nodes: HashMap::new(), children: HashMap::new(), - distance_matrix: HashMap::new(), parents: HashMap::new(), leaves: HashMap::new() }; let mut stack : Vec = Vec::new(); let mut context : NodeID = 0; - let mut completed : NodeID = 0; + let mut completed : NodeID; let mut taxa_str = String::from(""); let mut chars = newick_string.chars(); loop { @@ -56,14 +53,14 @@ impl RootedPhyloTree{ } let mut c = next.unwrap(); if c == '(' { - if context >= 0{ + if context > 0{ stack.push(context); } context = tree.add_node(false); } else if c == ')' || c == ',' { completed = context; context = stack.pop().unwrap(); - tree.add_child(&context,completed,0.0); + tree.add_child(&context,completed,None); } else if c.is_alphanumeric() { stack.push(context); context = tree.add_node(true); @@ -84,11 +81,11 @@ impl RootedPhyloTree{ } for child_node_id in self.get_children(node_id).expect("Invalid NodeID").iter(){ - self.leaves_of_node(child_node_id, leaves); + self.leaves_of_node(&child_node_id.0, leaves); } } - fn new_node(&mut self, children: Vec<(Option, NodeID)>, parent:Option, leaf_id:Option, parent_edge_weight: Option){ + fn new_node(&mut self, children: Vec<(NodeID, Option)>, parent:Option, leaf_id:Option, parent_edge_weight: Option){ let node_id = self.nodes.len(); match leaf_id { Some(taxa_id) => { @@ -101,7 +98,7 @@ impl RootedPhyloTree{ }; self.children.insert(node_id.clone(), children); self.parents.insert(node_id.clone(), parent); - self.children.entry(node_id).or_default().push((parent_edge_weight, node_id)); + self.children.entry(node_id).or_default().push((node_id, parent_edge_weight)); } pub fn add_child_to_node(&mut self, parent:NodeID, edge_weight: Option){ @@ -133,6 +130,7 @@ impl RootedPhyloTree{ } impl SimpleRTree for RootedPhyloTree{ + fn add_node(&mut self,is_leaf:bool)->NodeID { let n : NodeID = self.nodes.len(); self.nodes.insert(n, is_leaf); @@ -143,18 +141,8 @@ impl SimpleRTree for RootedPhyloTree{ self.leaves.insert(*node, String::from(taxa)); } - fn add_child(&mut self,parent:&NodeID, child:NodeID,distance:EdgeWeight) { - let hs = self.children.get_mut(parent); - match hs{ - None => { - let mut h:HashSet = HashSet::new(); - h.insert(child); - self.children.insert(*parent, h); - }, - Some(x) =>{ - x.insert(child); - } - } + fn add_child(&mut self,parent:&NodeID, child:NodeID, distance: Option) { + self.children.entry(*parent).or_default().insert(*parent, (child, distance)); } fn get_root(&self)->&NodeID{ &self.root @@ -164,7 +152,7 @@ impl SimpleRTree for RootedPhyloTree{ &self.nodes } - fn get_children(&self, node_id: &NodeID)->Option<&HashSet>{ + fn get_children(&self, node_id: &NodeID)->Option<&Vec<(NodeID, Option)>>{ self.children.get(node_id) } @@ -177,8 +165,8 @@ impl SimpleRTree for RootedPhyloTree{ fn get_subtree(&self, node_id: &NodeID)->Box{ let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); - let mut children: HashMap> = HashMap::new(); - let mut parents: HashMap> = HashMap::new(); + let mut children: HashMap)>> = HashMap::new(); + let mut parents: HashMap> = HashMap::new(); let mut leaves: HashMap = HashMap::new(); for descendant_node_id in self.iter_node_pre(node_id){ nodes.insert(descendant_node_id.clone(), self.nodes.get(&descendant_node_id).expect("Invalid NodeID!").clone()); @@ -193,7 +181,6 @@ impl SimpleRTree for RootedPhyloTree{ root: root, nodes: nodes, children: children, - distance_matrix:self.distance_matrix.clone(), parents: parents, leaves: leaves, } @@ -234,7 +221,7 @@ impl SimpleRTree for RootedPhyloTree{ fn extract_subtree(&mut self, node_id: &NodeID)-> Box{ let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); - let mut children: HashMap> = HashMap::new(); + let mut children: HashMap)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); let mut leaves: HashMap = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ @@ -251,7 +238,6 @@ impl SimpleRTree for RootedPhyloTree{ root: root, nodes: nodes, children: children, - distance_matrix: self.distance_matrix.clone(), parents: parents, leaves: leaves, } diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 2b7cf38..46cc2a7 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -7,14 +7,13 @@ use crate::iter::edge_iter::*; pub type EdgeWeight = f64; pub trait SimpleRTree { - - - //Add node to tree + /// Add node to tree fn add_node(&mut self,is_leaf:bool)->NodeID; - //Add child to node - fn add_child(&mut self,parent:&NodeID, child:NodeID,distance:EdgeWeight); + /// Add child to node + fn add_child(&mut self,parent:&NodeID, child:NodeID, distance:Option); + /// Assign taxa to leaf node fn assign_taxa(&mut self,node:&NodeID, taxa:&str); /// Returns root node id @@ -24,7 +23,7 @@ pub trait SimpleRTree { fn get_nodes(&self)->&HashMap; /// Returns children node ids for given node id - fn get_children(&self, node_id: &NodeID)->Option<&HashSet>; + fn get_children(&self, node_id: &NodeID)->Option<&Vec<(NodeID, Option)>>; /// Returns all leaf node ids fn get_leaves(&self, node_id: &NodeID)->HashSet; From f38db88c81b33813c208bed5afcc7f6bfb6df154 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Wed, 18 Oct 2023 18:46:41 -0500 Subject: [PATCH 18/40] switched to dbg! macro, modified add_node and from_newick --- src/lib.rs | 4 +- src/tree.rs | 138 +++++++++++++++++---------------------- src/tree/simple_rtree.rs | 4 +- 3 files changed, 65 insertions(+), 81 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 02b6afe..7a6d869 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,11 +10,11 @@ mod tests { #[test] fn read_a_tree() { let input_str = String::from("((A,B),(C,D));"); - println!("{:?}",RootedPhyloTree::from_newick(input_str)); + dbg!(RootedPhyloTree::from_newick(input_str)); } #[test] fn read_big_tree() { let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); - println!("{:?}",RootedPhyloTree::from_newick(input_str)); + dbg!(RootedPhyloTree::from_newick(input_str)); } } diff --git a/src/tree.rs b/src/tree.rs index 3f69e67..b8a24a1 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -21,95 +21,68 @@ pub struct RootedPhyloTree{ leaves: HashMap, } - impl RootedPhyloTree{ pub fn new()->Self{ RootedPhyloTree { root: 0, nodes: HashMap::from([(0, false)]), - children: HashMap::new(), + children: HashMap::from([(0, Vec::new())]), parents: HashMap::from([(0, None)]), leaves: HashMap::new() } } pub fn from_newick(newick_string: String)->Self{ - let mut tree = RootedPhyloTree { - root: 0, - nodes: HashMap::new(), - children: HashMap::new(), - parents: HashMap::new(), - leaves: HashMap::new() - }; + let mut tree = RootedPhyloTree::new(); let mut stack : Vec = Vec::new(); let mut context : NodeID = 0; - let mut completed : NodeID; - let mut taxa_str = String::from(""); - let mut chars = newick_string.chars(); - loop { - let next = chars.next(); - if next == None{ + let mut str_ptr: usize = 0; + let newick_string = newick_string.chars().collect::>(); + loop{ + if str_ptr>=newick_string.len(){ break; } - let mut c = next.unwrap(); - if c == '(' { - if context > 0{ + match newick_string[str_ptr]{ + '(' => { + stack.push(context); - } - context = tree.add_node(false); - } else if c == ')' || c == ',' { - completed = context; - context = stack.pop().unwrap(); - tree.add_child(&context,completed,None); - } else if c.is_alphanumeric() { - stack.push(context); - context = tree.add_node(true); - while c.is_alphanumeric() { - taxa_str.push(c); - c = chars.next().unwrap(); - } - tree.assign_taxa(&context, &taxa_str); - taxa_str = String::from(""); + context = tree.add_node(Vec::new(), None, None, None); + }, + ')'|',' => { + let completed = context; + context = stack.pop().unwrap(); + tree.add_child(&context,&completed,None); + }, + _ => { + if newick_string[str_ptr].is_alphanumeric(){ + let mut taxa_str = String::from(""); + stack.push(context); + while newick_string[str_ptr].is_alphanumeric(){ + taxa_str.push(newick_string[str_ptr]); + str_ptr+=1; + } + context = tree.add_node(Vec::new(), None, Some(taxa_str), None); + continue; + } + }, } + str_ptr +=1; } - return tree + dbg!(stack); + return tree; } + fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ if self.get_children(node_id).expect("Invalid NodeID!").is_empty(){ leaves.push(*node_id); } - for child_node_id in self.get_children(node_id).expect("Invalid NodeID").iter(){ - self.leaves_of_node(&child_node_id.0, leaves); + for (child_node_id, _edge_weight) in self.get_children(node_id).expect("Invalid NodeID").iter(){ + self.leaves_of_node(child_node_id, leaves); } } - fn new_node(&mut self, children: Vec<(NodeID, Option)>, parent:Option, leaf_id:Option, parent_edge_weight: Option){ - let node_id = self.nodes.len(); - match leaf_id { - Some(taxa_id) => { - self.leaves.insert(node_id.clone(), taxa_id); - self.nodes.insert(node_id.clone(), true); - } - None =>{ - self.nodes.insert(node_id.clone(), false); - } - }; - self.children.insert(node_id.clone(), children); - self.parents.insert(node_id.clone(), parent); - self.children.entry(node_id).or_default().push((node_id, parent_edge_weight)); - } - - pub fn add_child_to_node(&mut self, parent:NodeID, edge_weight: Option){ - self.new_node(Vec::new(), Some(parent), None, edge_weight); - - } - - pub fn add_leaf_to_node(&mut self, parent: NodeID, leaf_label: String, edge_weight: Option){ - self.new_node(Vec::new(), Some(parent), Some(leaf_label), edge_weight); - } - pub fn iter_node_ancestors_pre(&self, node_id:&NodeID)->Vec{ let mut node_iter: Vec = Vec::new(); let mut curr_node = node_id; @@ -130,20 +103,32 @@ impl RootedPhyloTree{ } impl SimpleRTree for RootedPhyloTree{ - - fn add_node(&mut self,is_leaf:bool)->NodeID { - let n : NodeID = self.nodes.len(); - self.nodes.insert(n, is_leaf); - return n + fn add_node(&mut self, children: Vec<(NodeID, Option)>, parent:Option, leaf_id:Option, parent_edge_weight: Option)->NodeID{ + let node_id = self.nodes.len(); + match leaf_id { + Some(taxa_id) => { + self.leaves.insert(node_id.clone(), taxa_id); + self.nodes.insert(node_id.clone(), true); + } + None =>{ + self.nodes.insert(node_id.clone(), false); + } + }; + self.children.insert(node_id.clone(), children); + self.parents.insert(node_id.clone(), parent); + self.children.entry(node_id).or_default().push((node_id, parent_edge_weight)); + node_id } fn assign_taxa(&mut self,node:&NodeID, taxa:&str) { - self.leaves.insert(*node, String::from(taxa)); + *self.leaves.entry(*node).or_insert(String::new()) = String::from(taxa); } - fn add_child(&mut self,parent:&NodeID, child:NodeID, distance: Option) { - self.children.entry(*parent).or_default().insert(*parent, (child, distance)); + fn add_child(&mut self,parent:&NodeID, child: &NodeID, distance: Option) { + self.children.entry(parent.clone()).or_default().push((child.clone(), distance)); + self.parents.entry(child.clone()).and_modify(|e| *e=Some(child.clone())); } + fn get_root(&self)->&NodeID{ &self.root } @@ -168,12 +153,12 @@ impl SimpleRTree for RootedPhyloTree{ let mut children: HashMap)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); let mut leaves: HashMap = HashMap::new(); - for descendant_node_id in self.iter_node_pre(node_id){ - nodes.insert(descendant_node_id.clone(), self.nodes.get(&descendant_node_id).expect("Invalid NodeID!").clone()); - children.insert(descendant_node_id.clone(), self.children.get(&descendant_node_id).expect("Invalid NodeID!").clone()); - parents.insert(descendant_node_id.clone(), self.parents.get(&descendant_node_id).expect("Invalid NodeID!").clone()); - if self.is_leaf(&descendant_node_id){ - leaves.insert(descendant_node_id.clone(), self.leaves.get(&descendant_node_id).cloned().unwrap()); + for decsendant_node_id in self.iter_node_pre(node_id){ + nodes.insert(decsendant_node_id.clone(), self.nodes.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + children.insert(decsendant_node_id.clone(), self.children.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + parents.insert(decsendant_node_id.clone(), self.parents.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + if self.is_leaf(&decsendant_node_id){ + leaves.insert(decsendant_node_id.clone(), self.leaves.get(&decsendant_node_id).cloned().unwrap()); } } Box::new( @@ -295,5 +280,4 @@ impl SimpleRTree for RootedPhyloTree{ HashSet::from_iter(leaves) } -} - +} \ No newline at end of file diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 46cc2a7..bfa3f6c 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -8,10 +8,10 @@ pub type EdgeWeight = f64; pub trait SimpleRTree { /// Add node to tree - fn add_node(&mut self,is_leaf:bool)->NodeID; + fn add_node(&mut self, children: Vec<(NodeID, Option)>, parent:Option, leaf_id:Option, parent_edge_weight: Option)->NodeID; /// Add child to node - fn add_child(&mut self,parent:&NodeID, child:NodeID, distance:Option); + fn add_child(&mut self,parent:&NodeID, child:&NodeID, distance:Option); /// Assign taxa to leaf node fn assign_taxa(&mut self,node:&NodeID, taxa:&str); From 7b6a3d4cadc5f2669172ba389c5ebc0a2c23f64b Mon Sep 17 00:00:00 2001 From: sriram98v Date: Thu, 19 Oct 2023 17:53:54 -0500 Subject: [PATCH 19/40] added new methods with default implementations to tree trait (get_node_degree, is_weighted, add_children) and generic clean tree method, fixed from_newick method --- src/tree.rs | 63 ++++++++++++++++++++++++++++++++-------- src/tree/simple_rtree.rs | 33 ++++++++++++++++++++- 2 files changed, 83 insertions(+), 13 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index b8a24a1..7561650 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -25,9 +25,9 @@ impl RootedPhyloTree{ pub fn new()->Self{ RootedPhyloTree { root: 0, - nodes: HashMap::from([(0, false)]), - children: HashMap::from([(0, Vec::new())]), - parents: HashMap::from([(0, None)]), + nodes: HashMap::new(), + children: HashMap::new(), + parents: HashMap::new(), leaves: HashMap::new() } } @@ -39,12 +39,11 @@ impl RootedPhyloTree{ let mut str_ptr: usize = 0; let newick_string = newick_string.chars().collect::>(); loop{ - if str_ptr>=newick_string.len(){ + if str_ptr==newick_string.len(){ break; } match newick_string[str_ptr]{ '(' => { - stack.push(context); context = tree.add_node(Vec::new(), None, None, None); }, @@ -68,17 +67,17 @@ impl RootedPhyloTree{ } str_ptr +=1; } - dbg!(stack); + tree.remove_self_loops(); return tree; } fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ - if self.get_children(node_id).expect("Invalid NodeID!").is_empty(){ + if self.get_node_children(node_id).is_empty(){ leaves.push(*node_id); } - for (child_node_id, _edge_weight) in self.get_children(node_id).expect("Invalid NodeID").iter(){ + for (child_node_id, _edge_weight) in self.get_node_children(node_id).iter(){ self.leaves_of_node(child_node_id, leaves); } } @@ -100,6 +99,12 @@ impl RootedPhyloTree{ } node_iter } + + fn remove_self_loops(&mut self){ + for (node_id, children) in self.children.iter_mut(){ + children.retain(|(child_id, _edge_weight)| child_id!=node_id); + } + } } impl SimpleRTree for RootedPhyloTree{ @@ -125,8 +130,14 @@ impl SimpleRTree for RootedPhyloTree{ } fn add_child(&mut self,parent:&NodeID, child: &NodeID, distance: Option) { - self.children.entry(parent.clone()).or_default().push((child.clone(), distance)); - self.parents.entry(child.clone()).and_modify(|e| *e=Some(child.clone())); + if parent!=child{ + self.children.entry(parent.clone()).or_default().push((child.clone(), distance)); + } + self.parents.entry(child.clone()).and_modify(|e| *e=Some(parent.clone())); + } + + fn add_children(&mut self, parent:NodeID, children: Vec<(NodeID, Option)>) { + self.children.entry(parent).and_modify(|x| x.extend(children)); } fn get_root(&self)->&NodeID{ @@ -137,8 +148,12 @@ impl SimpleRTree for RootedPhyloTree{ &self.nodes } - fn get_children(&self, node_id: &NodeID)->Option<&Vec<(NodeID, Option)>>{ - self.children.get(node_id) + fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>{ + self.children.get(node_id).expect("Invalid NodeID!") + } + + fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>{ + self.parents.get(node_id).expect("Invalid NodeID!").as_ref() } fn get_leaves(&self, node_id: &NodeID)->HashSet{ @@ -148,6 +163,9 @@ impl SimpleRTree for RootedPhyloTree{ } fn get_subtree(&self, node_id: &NodeID)->Box{ + if self.is_leaf(node_id){ + panic!("NodeID is a leaf"); + } let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); let mut children: HashMap)>> = HashMap::new(); @@ -280,4 +298,25 @@ impl SimpleRTree for RootedPhyloTree{ HashSet::from_iter(leaves) } + fn clean(&mut self) { + let mut remove_list: Vec<&NodeID> = Vec::new(); + for (node_id, is_leaf) in self.nodes.clone().iter(){ + // remove root with only one child + if node_id==self.get_root() && self.get_node_degree(node_id)<2{ + let new_root = self.get_node_children(self.get_root())[0].0; + self.root = new_root; + self.parents.entry(new_root).and_modify(|x| *x = None); + remove_list.push(node_id); + } + // remove nodes with only one child + else if !is_leaf && self.get_node_degree(node_id)<3{ + let parent = self.get_node_parent(node_id).cloned(); + let children = self.get_node_children(node_id).clone(); + for (child_id, _edge_weight) in children.clone().into_iter(){ + self.parents.entry(child_id.clone()).and_modify(|x| *x = parent); + } + self.add_children(parent.unwrap(), children); + } + } + } } \ No newline at end of file diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index bfa3f6c..ed1be1f 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -13,6 +13,13 @@ pub trait SimpleRTree { /// Add child to node fn add_child(&mut self,parent:&NodeID, child:&NodeID, distance:Option); + /// Add children to node + fn add_children(&mut self, parent:NodeID, children: Vec<(NodeID, Option)>){ + for (child_id, edge_weight) in children.iter(){ + self.add_child(&parent, child_id, edge_weight.clone()); + } + } + /// Assign taxa to leaf node fn assign_taxa(&mut self,node:&NodeID, taxa:&str); @@ -21,9 +28,30 @@ pub trait SimpleRTree { /// Returns all node ids fn get_nodes(&self)->&HashMap; + + /// Returns node degree + fn get_node_degree(&self, node_id:&NodeID)->usize{ + self.get_node_children(node_id).len() + match self.get_node_parent(node_id) { + Some(_) => 1, + None => 0 + } + } + + /// Check if tree is weighted + fn is_weighted(&self)->bool{ + for (_, _, edge_weight) in self.iter_edges_post(self.get_root()){ + if edge_weight!=None{ + return true; + } + } + false + } /// Returns children node ids for given node id - fn get_children(&self, node_id: &NodeID)->Option<&Vec<(NodeID, Option)>>; + fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>; + + /// Returns node parent + fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>; /// Returns all leaf node ids fn get_leaves(&self, node_id: &NodeID)->HashSet; @@ -78,4 +106,7 @@ pub trait SimpleRTree { /// Returns cluster of node fn get_cluster(&self, node_id: &NodeID)-> HashSet; + + /// Cleans self by removing 1) internal nodes (other than root) with degree 2, 2) Floating root nodes, 3) self loops + fn clean(&mut self); } From d5e9686d12d05611083b4ada5b36394c9e7c6df8 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Thu, 19 Oct 2023 17:54:08 -0500 Subject: [PATCH 20/40] updated output type of edge iterator --- src/iter/edge_iter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index 07182c5..ac2ed0f 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -17,7 +17,7 @@ impl PreOrdEdges impl Iterator for PreOrdEdges { - type Item = (NodeID, NodeID); + type Item = (NodeID, NodeID, Option); fn next(&mut self)->Option{ todo!(); @@ -39,7 +39,7 @@ impl PostOrdEdges impl Iterator for PostOrdEdges { - type Item = (NodeID, NodeID); + type Item = (NodeID, NodeID, Option); fn next(&mut self)->Option{ todo!(); From 5c6d4c32d9c9aeae1f985f7aefd8c336f9a8f8cc Mon Sep 17 00:00:00 2001 From: sriram98v Date: Fri, 20 Oct 2023 15:57:06 -0500 Subject: [PATCH 21/40] adding weighted tree parsing --- src/lib.rs | 24 +++++++++---- src/tree.rs | 78 ++++++++++++++++++++++++++++++++++------ src/tree/simple_rtree.rs | 10 ++++++ 3 files changed, 94 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7a6d869..81dae74 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,14 +7,24 @@ pub mod iter; #[cfg(test)] mod tests { use crate::tree::RootedPhyloTree; + // #[test] + // fn read_small_tree() { + // let input_str = String::from("((A,B),(C,D));"); + // dbg!(RootedPhyloTree::from_newick(input_str)); + // } + // #[test] + // fn read_big_tree() { + // let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); + // dbg!(RootedPhyloTree::from_newick(input_str)); + // } + // #[test] + // fn read_smalllw_tree() { + // let input_str = String::from("((A:0.12,B:12),(C:10,D:0.001));"); + // dbg!(RootedPhyloTree::from_newick(input_str)); + // } #[test] - fn read_a_tree() { - let input_str = String::from("((A,B),(C,D));"); - dbg!(RootedPhyloTree::from_newick(input_str)); - } - #[test] - fn read_big_tree() { - let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); + fn read_smallfw_tree() { + let input_str = String::from("((A:0.12,B:12):10,(C:15,D:0.001):20);"); dbg!(RootedPhyloTree::from_newick(input_str)); } } diff --git a/src/tree.rs b/src/tree.rs index 7561650..abb3d38 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -2,6 +2,8 @@ pub mod simple_rtree; use std::collections::{HashMap, HashSet}; +use itertools::Itertools; + use crate::node::*; use crate::tree::simple_rtree::*; use crate::iter::{node_iter::*, edge_iter::*}; @@ -47,31 +49,58 @@ impl RootedPhyloTree{ stack.push(context); context = tree.add_node(Vec::new(), None, None, None); }, - ')'|',' => { + ')' => { + let mut decimal_str: String = String::from(""); let completed = context; - context = stack.pop().unwrap(); - tree.add_child(&context,&completed,None); + context = stack.pop().unwrap(); + str_ptr += 1; + if newick_string[str_ptr]==':'{ + str_ptr+=1; + while newick_string[str_ptr].is_ascii_digit() || newick_string[str_ptr]=='.'{ + decimal_str.push(dbg!(newick_string[str_ptr])); + str_ptr+=1; + } + } + dbg!(&decimal_str.parse::().ok()); + // if !tree.node_is_child_of(&context, &completed){ + tree.add_child(dbg!(&context), dbg!(&completed), dbg!(decimal_str.parse::().ok())); + // } + continue; }, + ',' => { + let completed = context; + context = stack.pop().unwrap(); + if !tree.node_is_child_of(&context, &completed){ + tree.add_child(&context,&completed,None); + } + } _ => { if newick_string[str_ptr].is_alphanumeric(){ - let mut taxa_str = String::from(""); + let mut taxa_str = String::new(); + let mut decimal_str: String = String::new(); + let last_context = context.clone(); stack.push(context); while newick_string[str_ptr].is_alphanumeric(){ taxa_str.push(newick_string[str_ptr]); str_ptr+=1; } - context = tree.add_node(Vec::new(), None, Some(taxa_str), None); + if newick_string[str_ptr]==':'{ + str_ptr+=1; + while newick_string[str_ptr].is_ascii_digit() || newick_string[str_ptr]=='.'{ + decimal_str.push(newick_string[str_ptr]); + str_ptr+=1; + } + } + context = tree.add_node(Vec::new(), Some(last_context), Some(taxa_str), decimal_str.parse::().ok()); continue; } }, } str_ptr +=1; } - tree.remove_self_loops(); return tree; } - fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ if self.get_node_children(node_id).is_empty(){ leaves.push(*node_id); @@ -110,6 +139,7 @@ impl RootedPhyloTree{ impl SimpleRTree for RootedPhyloTree{ fn add_node(&mut self, children: Vec<(NodeID, Option)>, parent:Option, leaf_id:Option, parent_edge_weight: Option)->NodeID{ let node_id = self.nodes.len(); + match leaf_id { Some(taxa_id) => { self.leaves.insert(node_id.clone(), taxa_id); @@ -117,11 +147,20 @@ impl SimpleRTree for RootedPhyloTree{ } None =>{ self.nodes.insert(node_id.clone(), false); + self.children.insert(node_id.clone(), Vec::new()); } }; - self.children.insert(node_id.clone(), children); + self.add_children(node_id.clone(), children); self.parents.insert(node_id.clone(), parent); - self.children.entry(node_id).or_default().push((node_id, parent_edge_weight)); + match parent{ + Some(parent_id) => { + self.children.entry(parent_id).or_default().push((node_id, parent_edge_weight)); + self.parents.entry(node_id).or_insert(Some(parent_id)); + }, + None => { + self.parents.insert(node_id, None); + } + } node_id } @@ -129,11 +168,28 @@ impl SimpleRTree for RootedPhyloTree{ *self.leaves.entry(*node).or_insert(String::new()) = String::from(taxa); } + fn set_edge_weight(&mut self, parent:&NodeID, child:&NodeID, edge_weight:Option){ + self.children.entry(parent.clone()) + .and_modify(|children| *children = children.clone().iter() + .map(|(id, w)| { + match id==child{ + true => {(id.clone(), edge_weight)}, + false => {(id.clone(), w.clone())}, + } + }) + .collect() + ); + } + + fn add_child(&mut self,parent:&NodeID, child: &NodeID, distance: Option) { - if parent!=child{ + if parent == child{ + panic!("Can't set node parent to self!"); + } + if !(self.node_is_child_of(parent, child)){ self.children.entry(parent.clone()).or_default().push((child.clone(), distance)); + self.parents.entry(child.clone()).and_modify(|e| *e=Some(parent.clone())); } - self.parents.entry(child.clone()).and_modify(|e| *e=Some(parent.clone())); } fn add_children(&mut self, parent:NodeID, children: Vec<(NodeID, Option)>) { diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index ed1be1f..93f66f9 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -1,5 +1,7 @@ use std::collections::{HashMap, HashSet}; +use itertools::Itertools; + use crate::node::*; use crate::iter::node_iter::*; use crate::iter::edge_iter::*; @@ -20,6 +22,14 @@ pub trait SimpleRTree { } } + /// Sets the edge weight between two nodes (None to unweight the edge) + fn set_edge_weight(&mut self, parent:&NodeID, child:&NodeID, edge_weights:Option); + + /// Returns true of node is child of parent. + fn node_is_child_of(&self, parent:&NodeID, node:&NodeID)->bool{ + self.get_node_children(parent).iter().map(|(id, _weight)| id).contains(node) + } + /// Assign taxa to leaf node fn assign_taxa(&mut self,node:&NodeID, taxa:&str); From a3bd6a6f47bb9fb740e7435b0b58e0c5072e9682 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Fri, 20 Oct 2023 17:17:10 -0500 Subject: [PATCH 22/40] changing newick string iterator --- src/tree.rs | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index abb3d38..ef90bc3 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -27,9 +27,9 @@ impl RootedPhyloTree{ pub fn new()->Self{ RootedPhyloTree { root: 0, - nodes: HashMap::new(), - children: HashMap::new(), - parents: HashMap::new(), + nodes: HashMap::from([(0, false)]), + children: HashMap::from([(0, Vec::new())]), + parents: HashMap::from([(0, None)]), leaves: HashMap::new() } } @@ -37,17 +37,15 @@ impl RootedPhyloTree{ pub fn from_newick(newick_string: String)->Self{ let mut tree = RootedPhyloTree::new(); let mut stack : Vec = Vec::new(); - let mut context : NodeID = 0; + let mut context : NodeID = tree.get_root().clone(); let mut str_ptr: usize = 0; let newick_string = newick_string.chars().collect::>(); - loop{ - if str_ptr==newick_string.len(){ - break; - } + while str_ptr { stack.push(context); context = tree.add_node(Vec::new(), None, None, None); + str_ptr +=1; }, ')' => { let mut decimal_str: String = String::from(""); @@ -65,14 +63,9 @@ impl RootedPhyloTree{ // if !tree.node_is_child_of(&context, &completed){ tree.add_child(dbg!(&context), dbg!(&completed), dbg!(decimal_str.parse::().ok())); // } - continue; }, ',' => { - let completed = context; - context = stack.pop().unwrap(); - if !tree.node_is_child_of(&context, &completed){ - tree.add_child(&context,&completed,None); - } + str_ptr+=1; } _ => { if newick_string[str_ptr].is_alphanumeric(){ @@ -92,11 +85,9 @@ impl RootedPhyloTree{ } } context = tree.add_node(Vec::new(), Some(last_context), Some(taxa_str), decimal_str.parse::().ok()); - continue; } }, } - str_ptr +=1; } return tree; } From b13faee359ab766e5e2b95d57849bdab03002268 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sat, 21 Oct 2023 12:32:37 -0500 Subject: [PATCH 23/40] Revert "updated output type of edge iterator" This reverts commit d5e9686d12d05611083b4ada5b36394c9e7c6df8. --- src/iter/edge_iter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index ac2ed0f..07182c5 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -17,7 +17,7 @@ impl PreOrdEdges impl Iterator for PreOrdEdges { - type Item = (NodeID, NodeID, Option); + type Item = (NodeID, NodeID); fn next(&mut self)->Option{ todo!(); @@ -39,7 +39,7 @@ impl PostOrdEdges impl Iterator for PostOrdEdges { - type Item = (NodeID, NodeID, Option); + type Item = (NodeID, NodeID); fn next(&mut self)->Option{ todo!(); From a79b073c3a8c5b2bfc26d90d772a9a8a58b99731 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sat, 21 Oct 2023 12:39:20 -0500 Subject: [PATCH 24/40] Revert "added new methods with default implementations to tree trait (get_node_degree, is_weighted, add_children) and generic clean tree method, fixed from_newick method" This reverts commit 7b6a3d4cadc5f2669172ba389c5ebc0a2c23f64b. --- src/tree.rs | 73 +++++++++------------------------------- src/tree/simple_rtree.rs | 28 ++------------- 2 files changed, 17 insertions(+), 84 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index ef90bc3..a000ecd 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -40,9 +40,13 @@ impl RootedPhyloTree{ let mut context : NodeID = tree.get_root().clone(); let mut str_ptr: usize = 0; let newick_string = newick_string.chars().collect::>(); - while str_ptr=newick_string.len(){ + break; + } match newick_string[str_ptr]{ '(' => { + stack.push(context); context = tree.add_node(Vec::new(), None, None, None); str_ptr +=1; @@ -77,27 +81,23 @@ impl RootedPhyloTree{ taxa_str.push(newick_string[str_ptr]); str_ptr+=1; } - if newick_string[str_ptr]==':'{ - str_ptr+=1; - while newick_string[str_ptr].is_ascii_digit() || newick_string[str_ptr]=='.'{ - decimal_str.push(newick_string[str_ptr]); - str_ptr+=1; - } - } - context = tree.add_node(Vec::new(), Some(last_context), Some(taxa_str), decimal_str.parse::().ok()); + context = tree.add_node(Vec::new(), None, Some(taxa_str), None); + continue; } }, } + str_ptr +=1; } + dbg!(stack); return tree; } fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ - if self.get_node_children(node_id).is_empty(){ + if self.get_node_children(node_id).expect("Invalid NodeID!").is_empty(){ leaves.push(*node_id); } - for (child_node_id, _edge_weight) in self.get_node_children(node_id).iter(){ + for (child_node_id, _edge_weight) in self.get_node_children(node_id).expect("Invalid NodeID").iter(){ self.leaves_of_node(child_node_id, leaves); } } @@ -119,12 +119,6 @@ impl RootedPhyloTree{ } node_iter } - - fn remove_self_loops(&mut self){ - for (node_id, children) in self.children.iter_mut(){ - children.retain(|(child_id, _edge_weight)| child_id!=node_id); - } - } } impl SimpleRTree for RootedPhyloTree{ @@ -174,17 +168,8 @@ impl SimpleRTree for RootedPhyloTree{ fn add_child(&mut self,parent:&NodeID, child: &NodeID, distance: Option) { - if parent == child{ - panic!("Can't set node parent to self!"); - } - if !(self.node_is_child_of(parent, child)){ - self.children.entry(parent.clone()).or_default().push((child.clone(), distance)); - self.parents.entry(child.clone()).and_modify(|e| *e=Some(parent.clone())); - } - } - - fn add_children(&mut self, parent:NodeID, children: Vec<(NodeID, Option)>) { - self.children.entry(parent).and_modify(|x| x.extend(children)); + self.children.entry(parent.clone()).or_default().push((child.clone(), distance)); + self.parents.entry(child.clone()).and_modify(|e| *e=Some(child.clone())); } fn get_root(&self)->&NodeID{ @@ -195,12 +180,8 @@ impl SimpleRTree for RootedPhyloTree{ &self.nodes } - fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>{ - self.children.get(node_id).expect("Invalid NodeID!") - } - - fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>{ - self.parents.get(node_id).expect("Invalid NodeID!").as_ref() + fn get_node_children(&self, node_id: &NodeID)->Option<&Vec<(NodeID, Option)>>{ + self.children.get(node_id) } fn get_leaves(&self, node_id: &NodeID)->HashSet{ @@ -210,9 +191,6 @@ impl SimpleRTree for RootedPhyloTree{ } fn get_subtree(&self, node_id: &NodeID)->Box{ - if self.is_leaf(node_id){ - panic!("NodeID is a leaf"); - } let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); let mut children: HashMap)>> = HashMap::new(); @@ -345,25 +323,4 @@ impl SimpleRTree for RootedPhyloTree{ HashSet::from_iter(leaves) } - fn clean(&mut self) { - let mut remove_list: Vec<&NodeID> = Vec::new(); - for (node_id, is_leaf) in self.nodes.clone().iter(){ - // remove root with only one child - if node_id==self.get_root() && self.get_node_degree(node_id)<2{ - let new_root = self.get_node_children(self.get_root())[0].0; - self.root = new_root; - self.parents.entry(new_root).and_modify(|x| *x = None); - remove_list.push(node_id); - } - // remove nodes with only one child - else if !is_leaf && self.get_node_degree(node_id)<3{ - let parent = self.get_node_parent(node_id).cloned(); - let children = self.get_node_children(node_id).clone(); - for (child_id, _edge_weight) in children.clone().into_iter(){ - self.parents.entry(child_id.clone()).and_modify(|x| *x = parent); - } - self.add_children(parent.unwrap(), children); - } - } - } } \ No newline at end of file diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 93f66f9..3d01b96 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -27,7 +27,7 @@ pub trait SimpleRTree { /// Returns true of node is child of parent. fn node_is_child_of(&self, parent:&NodeID, node:&NodeID)->bool{ - self.get_node_children(parent).iter().map(|(id, _weight)| id).contains(node) + self.get_node_children(parent).expect("Invalid NodeID!").iter().map(|(id, _weight)| id).contains(node) } /// Assign taxa to leaf node @@ -38,30 +38,9 @@ pub trait SimpleRTree { /// Returns all node ids fn get_nodes(&self)->&HashMap; - - /// Returns node degree - fn get_node_degree(&self, node_id:&NodeID)->usize{ - self.get_node_children(node_id).len() + match self.get_node_parent(node_id) { - Some(_) => 1, - None => 0 - } - } - - /// Check if tree is weighted - fn is_weighted(&self)->bool{ - for (_, _, edge_weight) in self.iter_edges_post(self.get_root()){ - if edge_weight!=None{ - return true; - } - } - false - } /// Returns children node ids for given node id - fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>; - - /// Returns node parent - fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>; + fn get_node_children(&self, node_id: &NodeID)->Option<&Vec<(NodeID, Option)>>; /// Returns all leaf node ids fn get_leaves(&self, node_id: &NodeID)->HashSet; @@ -116,7 +95,4 @@ pub trait SimpleRTree { /// Returns cluster of node fn get_cluster(&self, node_id: &NodeID)-> HashSet; - - /// Cleans self by removing 1) internal nodes (other than root) with degree 2, 2) Floating root nodes, 3) self loops - fn clean(&mut self); } From 0f7435b07ba0aa31719cee5a3a505b17d9c317ba Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sat, 21 Oct 2023 16:42:01 -0500 Subject: [PATCH 25/40] updated trait methods for tree, added from newick weighted tree support --- src/iter/edge_iter.rs | 4 +- src/lib.rs | 30 ++++---- src/tree.rs | 156 ++++++++++++++++++++++----------------- src/tree/simple_rtree.rs | 40 ++++++++-- 4 files changed, 138 insertions(+), 92 deletions(-) diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index 07182c5..ac2ed0f 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -17,7 +17,7 @@ impl PreOrdEdges impl Iterator for PreOrdEdges { - type Item = (NodeID, NodeID); + type Item = (NodeID, NodeID, Option); fn next(&mut self)->Option{ todo!(); @@ -39,7 +39,7 @@ impl PostOrdEdges impl Iterator for PostOrdEdges { - type Item = (NodeID, NodeID); + type Item = (NodeID, NodeID, Option); fn next(&mut self)->Option{ todo!(); diff --git a/src/lib.rs b/src/lib.rs index 81dae74..ed7e254 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,21 +7,21 @@ pub mod iter; #[cfg(test)] mod tests { use crate::tree::RootedPhyloTree; - // #[test] - // fn read_small_tree() { - // let input_str = String::from("((A,B),(C,D));"); - // dbg!(RootedPhyloTree::from_newick(input_str)); - // } - // #[test] - // fn read_big_tree() { - // let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); - // dbg!(RootedPhyloTree::from_newick(input_str)); - // } - // #[test] - // fn read_smalllw_tree() { - // let input_str = String::from("((A:0.12,B:12),(C:10,D:0.001));"); - // dbg!(RootedPhyloTree::from_newick(input_str)); - // } + #[test] + fn read_small_tree() { + let input_str = String::from("((A,B),(C,D));"); + dbg!(RootedPhyloTree::from_newick(input_str)); + } + #[test] + fn read_big_tree() { + let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); + dbg!(RootedPhyloTree::from_newick(input_str)); + } + #[test] + fn read_smalllw_tree() { + let input_str = String::from("((A:0.12,B:12),(C:10,D:0.001));"); + dbg!(RootedPhyloTree::from_newick(input_str)); + } #[test] fn read_smallfw_tree() { let input_str = String::from("((A:0.12,B:12):10,(C:15,D:0.001):20);"); diff --git a/src/tree.rs b/src/tree.rs index a000ecd..27f68bd 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -38,66 +38,68 @@ impl RootedPhyloTree{ let mut tree = RootedPhyloTree::new(); let mut stack : Vec = Vec::new(); let mut context : NodeID = tree.get_root().clone(); + let mut taxa_str = String::new(); + let mut decimal_str: String = String::new(); let mut str_ptr: usize = 0; - let newick_string = newick_string.chars().collect::>(); - loop{ - if str_ptr>=newick_string.len(){ - break; - } - match newick_string[str_ptr]{ + let newick_string = newick_string.chars().filter(|c| !c.is_whitespace()).collect::>(); + while str_ptr { - stack.push(context); - context = tree.add_node(Vec::new(), None, None, None); + context = tree.add_node(); str_ptr +=1; }, - ')' => { - let mut decimal_str: String = String::from(""); - let completed = context; - context = stack.pop().unwrap(); - str_ptr += 1; + ')'|',' => { + // last context id + let last_context = stack.last().expect("Newick string ended abruptly!"); + // add current context as a child to last context + tree.set_child(&context, last_context, decimal_str.parse::().ok(), taxa_str.clone()); + // we clear the strings + taxa_str.clear(); + decimal_str.clear(); + + if newick_string[str_ptr]==','{ + // create next child of last context + context = tree.add_node(); + str_ptr += 1; + } + else{ + context = stack.pop().expect("Newick string ended abruptly!"); + str_ptr += 1; + } + }, + ';'=>{ + break; + } + ':' => { + // if the current context had a weight if newick_string[str_ptr]==':'{ str_ptr+=1; while newick_string[str_ptr].is_ascii_digit() || newick_string[str_ptr]=='.'{ - decimal_str.push(dbg!(newick_string[str_ptr])); + decimal_str.push(newick_string[str_ptr]); str_ptr+=1; } } - dbg!(&decimal_str.parse::().ok()); - // if !tree.node_is_child_of(&context, &completed){ - tree.add_child(dbg!(&context), dbg!(&completed), dbg!(decimal_str.parse::().ok())); - // } - }, - ',' => { - str_ptr+=1; } _ => { - if newick_string[str_ptr].is_alphanumeric(){ - let mut taxa_str = String::new(); - let mut decimal_str: String = String::new(); - let last_context = context.clone(); - stack.push(context); - while newick_string[str_ptr].is_alphanumeric(){ - taxa_str.push(newick_string[str_ptr]); - str_ptr+=1; - } - context = tree.add_node(Vec::new(), None, Some(taxa_str), None); - continue; + // push taxa characters into taxa string + while newick_string[str_ptr]!=':'&&newick_string[str_ptr]!=')'&&newick_string[str_ptr]!=','&&newick_string[str_ptr]!='('{ + taxa_str.push(dbg!(newick_string[str_ptr])); + str_ptr+=1; } }, } - str_ptr +=1; } - dbg!(stack); return tree; } fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ - if self.get_node_children(node_id).expect("Invalid NodeID!").is_empty(){ + if self.get_node_children(node_id).is_empty(){ leaves.push(*node_id); } - for (child_node_id, _edge_weight) in self.get_node_children(node_id).expect("Invalid NodeID").iter(){ + for (child_node_id, _edge_weight) in self.get_node_children(node_id).iter(){ self.leaves_of_node(child_node_id, leaves); } } @@ -119,36 +121,34 @@ impl RootedPhyloTree{ } node_iter } + + fn remove_self_loops(&mut self){ + for (node_id, children) in self.children.iter_mut(){ + children.retain(|(child_id, _edge_weight)| child_id!=node_id); + } + } } impl SimpleRTree for RootedPhyloTree{ - fn add_node(&mut self, children: Vec<(NodeID, Option)>, parent:Option, leaf_id:Option, parent_edge_weight: Option)->NodeID{ + fn add_node(&mut self)->NodeID{ + // New node id let node_id = self.nodes.len(); - - match leaf_id { - Some(taxa_id) => { - self.leaves.insert(node_id.clone(), taxa_id); - self.nodes.insert(node_id.clone(), true); - } - None =>{ - self.nodes.insert(node_id.clone(), false); - self.children.insert(node_id.clone(), Vec::new()); - } - }; - self.add_children(node_id.clone(), children); - self.parents.insert(node_id.clone(), parent); - match parent{ - Some(parent_id) => { - self.children.entry(parent_id).or_default().push((node_id, parent_edge_weight)); - self.parents.entry(node_id).or_insert(Some(parent_id)); - }, - None => { - self.parents.insert(node_id, None); - } - } + // add entry of node in parents and children fields + self.nodes.insert(node_id.clone(), false); + self.parents.insert(node_id.clone(), None); + self.children.insert(node_id.clone(), Vec::new()); node_id } + fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa:String){ + self.parents.insert(node_id.clone(), Some(parent_id.clone())); + self.children.entry(parent_id.clone()).or_default().push((node_id.clone(), distance)); + if taxa.len()>0{ + self.leaves.insert(node_id.clone(), taxa); + self.nodes.insert(node_id.clone(), true); + } + } + fn assign_taxa(&mut self,node:&NodeID, taxa:&str) { *self.leaves.entry(*node).or_insert(String::new()) = String::from(taxa); } @@ -166,12 +166,6 @@ impl SimpleRTree for RootedPhyloTree{ ); } - - fn add_child(&mut self,parent:&NodeID, child: &NodeID, distance: Option) { - self.children.entry(parent.clone()).or_default().push((child.clone(), distance)); - self.parents.entry(child.clone()).and_modify(|e| *e=Some(child.clone())); - } - fn get_root(&self)->&NodeID{ &self.root } @@ -180,8 +174,12 @@ impl SimpleRTree for RootedPhyloTree{ &self.nodes } - fn get_node_children(&self, node_id: &NodeID)->Option<&Vec<(NodeID, Option)>>{ - self.children.get(node_id) + fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>{ + self.children.get(node_id).expect("Invalid NodeID!") + } + + fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>{ + self.parents.get(node_id).expect("Invalid NodeID!").as_ref() } fn get_leaves(&self, node_id: &NodeID)->HashSet{ @@ -191,6 +189,9 @@ impl SimpleRTree for RootedPhyloTree{ } fn get_subtree(&self, node_id: &NodeID)->Box{ + if self.is_leaf(node_id){ + panic!("NodeID is a leaf"); + } let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); let mut children: HashMap)>> = HashMap::new(); @@ -323,4 +324,25 @@ impl SimpleRTree for RootedPhyloTree{ HashSet::from_iter(leaves) } + fn clean(&mut self) { + let mut remove_list: Vec<&NodeID> = Vec::new(); + for (node_id, is_leaf) in self.nodes.clone().iter(){ + // remove root with only one child + if node_id==self.get_root() && self.get_node_degree(node_id)<2{ + let new_root = self.get_node_children(self.get_root())[0].0; + self.root = new_root; + self.parents.entry(new_root).and_modify(|x| *x = None); + remove_list.push(node_id); + } + // remove nodes with only one child + else if !is_leaf && self.get_node_degree(node_id)<3{ + let parent = self.get_node_parent(node_id).cloned(); + let children = self.get_node_children(node_id).clone(); + for (child_id, _edge_weight) in children.clone().into_iter(){ + self.parents.entry(child_id.clone()).and_modify(|x| *x = parent); + } + self.set_children(parent.as_ref().unwrap(), &children); + } + } + } } \ No newline at end of file diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 3d01b96..ed1ebda 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -10,15 +10,15 @@ pub type EdgeWeight = f64; pub trait SimpleRTree { /// Add node to tree - fn add_node(&mut self, children: Vec<(NodeID, Option)>, parent:Option, leaf_id:Option, parent_edge_weight: Option)->NodeID; + fn add_node(&mut self)->NodeID; - /// Add child to node - fn add_child(&mut self,parent:&NodeID, child:&NodeID, distance:Option); + /// Sets node_id as child to parent. + fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa: String); - /// Add children to node - fn add_children(&mut self, parent:NodeID, children: Vec<(NodeID, Option)>){ + /// Sets iterable of node_ids as children to parent + fn set_children(&mut self, parent: &NodeID, children: &Vec<(NodeID, Option)>){ for (child_id, edge_weight) in children.iter(){ - self.add_child(&parent, child_id, edge_weight.clone()); + self.set_child(child_id, &parent, edge_weight.clone(), String::new()); } } @@ -27,7 +27,7 @@ pub trait SimpleRTree { /// Returns true of node is child of parent. fn node_is_child_of(&self, parent:&NodeID, node:&NodeID)->bool{ - self.get_node_children(parent).expect("Invalid NodeID!").iter().map(|(id, _weight)| id).contains(node) + self.get_node_children(parent).iter().map(|(id, _weight)| id).contains(node) } /// Assign taxa to leaf node @@ -38,9 +38,30 @@ pub trait SimpleRTree { /// Returns all node ids fn get_nodes(&self)->&HashMap; + + /// Returns node degree + fn get_node_degree(&self, node_id:&NodeID)->usize{ + self.get_node_children(node_id).len() + match self.get_node_parent(node_id) { + Some(_) => 1, + None => 0 + } + } + + /// Check if tree is weighted + fn is_weighted(&self)->bool{ + for (_, _, edge_weight) in self.iter_edges_post(self.get_root()){ + if edge_weight!=None{ + return true; + } + } + false + } /// Returns children node ids for given node id - fn get_node_children(&self, node_id: &NodeID)->Option<&Vec<(NodeID, Option)>>; + fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>; + + /// Returns node parent + fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>; /// Returns all leaf node ids fn get_leaves(&self, node_id: &NodeID)->HashSet; @@ -95,4 +116,7 @@ pub trait SimpleRTree { /// Returns cluster of node fn get_cluster(&self, node_id: &NodeID)-> HashSet; + + /// Cleans self by removing 1) internal nodes (other than root) with degree 2, 2) Floating root nodes, 3) self loops + fn clean(&mut self); } From 0ba768b6a8aba75bd3029dc693137e40e53d5c7d Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sat, 21 Oct 2023 18:28:35 -0500 Subject: [PATCH 26/40] implemented to_newick --- src/lib.rs | 20 +++++++++++-------- src/tree.rs | 9 ++++++--- src/tree/simple_rtree.rs | 42 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ed7e254..6da537b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,25 +6,29 @@ pub mod iter; #[cfg(test)] mod tests { - use crate::tree::RootedPhyloTree; + use crate::tree::{RootedPhyloTree, simple_rtree::SimpleRTree}; #[test] fn read_small_tree() { let input_str = String::from("((A,B),(C,D));"); - dbg!(RootedPhyloTree::from_newick(input_str)); - } + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + } #[test] fn read_big_tree() { let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); - dbg!(RootedPhyloTree::from_newick(input_str)); + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); } #[test] fn read_smalllw_tree() { let input_str = String::from("((A:0.12,B:12),(C:10,D:0.001));"); - dbg!(RootedPhyloTree::from_newick(input_str)); - } + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + } #[test] fn read_smallfw_tree() { let input_str = String::from("((A:0.12,B:12):10,(C:15,D:0.001):20);"); - dbg!(RootedPhyloTree::from_newick(input_str)); - } + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + } } diff --git a/src/tree.rs b/src/tree.rs index 27f68bd..efccff6 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -43,8 +43,7 @@ impl RootedPhyloTree{ let mut str_ptr: usize = 0; let newick_string = newick_string.chars().filter(|c| !c.is_whitespace()).collect::>(); while str_ptr { stack.push(context); context = tree.add_node(); @@ -85,7 +84,7 @@ impl RootedPhyloTree{ _ => { // push taxa characters into taxa string while newick_string[str_ptr]!=':'&&newick_string[str_ptr]!=')'&&newick_string[str_ptr]!=','&&newick_string[str_ptr]!='('{ - taxa_str.push(dbg!(newick_string[str_ptr])); + taxa_str.push(newick_string[str_ptr]); str_ptr+=1; } }, @@ -345,4 +344,8 @@ impl SimpleRTree for RootedPhyloTree{ } } } + + fn get_taxa(&self, node_id:&NodeID)->&String { + self.leaves.get(node_id).expect("Node has not associated taxa!") + } } \ No newline at end of file diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index ed1ebda..b29dd22 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -1,5 +1,4 @@ use std::collections::{HashMap, HashSet}; - use itertools::Itertools; use crate::node::*; @@ -119,4 +118,45 @@ pub trait SimpleRTree { /// Cleans self by removing 1) internal nodes (other than root) with degree 2, 2) Floating root nodes, 3) self loops fn clean(&mut self); + + /// Get node taxa + fn get_taxa(&self, node_id:&NodeID)->&String; + + /// Get edge weight + fn get_edge_weight(&self, parent_id: &NodeID, child_id:&NodeID)->Option<&EdgeWeight>{ + for node_id in self.get_node_children(parent_id).iter(){ + if node_id.0==*child_id{ + return node_id.1.as_ref(); + } + } + return None; + } + + /// return subtree as newick string + fn subtree_to_newick(&self, node_id:&NodeID, edge_weight:Option)->String{ + match self.is_leaf(node_id){ + true => { + match edge_weight { + Some(w) => {format!("{}:{}", self.get_taxa(node_id), w)}, + _ => {format!("{}", self.get_taxa(node_id))} + } + } + false => { + let mut tmp = String::new(); + tmp.push('('); + for (child_id, w) in self.get_node_children(node_id){ + let child_str = format!("{},", self.subtree_to_newick(child_id, *w)); + tmp.push_str(&child_str); + } + tmp.pop(); + tmp.push(')'); + return tmp; + } + } + } + + /// writes full tree in newick format + fn to_newick(&self)->String{ + format!("{};", self.subtree_to_newick(self.get_root(), None)) + } } From 02c762f775d8cd09a1d25f54a4130c93d5811ee6 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Sun, 22 Oct 2023 20:00:43 -0500 Subject: [PATCH 27/40] added method to split edge, distance from root --- src/tree.rs | 76 ++++++++++++++++++++++++++-------------- src/tree/simple_rtree.rs | 6 ++-- 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index efccff6..80eb6de 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -103,24 +103,6 @@ impl RootedPhyloTree{ } } - pub fn iter_node_ancestors_pre(&self, node_id:&NodeID)->Vec{ - let mut node_iter: Vec = Vec::new(); - let mut curr_node = node_id; - while self.parents.get(curr_node) != None { - match self.parents.get(curr_node).expect("Invalid NodeID!") { - Some(node) => { - node_iter.push(node.clone()); - curr_node = node; - }, - None => { - node_iter.push(self.get_root().clone()); - break; - }, - } - } - node_iter - } - fn remove_self_loops(&mut self){ for (node_id, children) in self.children.iter_mut(){ children.retain(|(child_id, _edge_weight)| child_id!=node_id); @@ -216,7 +198,7 @@ impl SimpleRTree for RootedPhyloTree{ } fn get_mrca(&self, node_id_list: Vec<&NodeID>)->NodeID{ - let ancestor_iter_vec: Vec> = node_id_list.iter().map(|x| self.iter_node_ancestors_pre(x).into_iter()).collect(); + let ancestor_iter_vec: Vec> = node_id_list.iter().map(|x| self.get_ancestors_pre(x).into_iter()).collect(); let mut mrca: NodeID = 0; for mut iterator in ancestor_iter_vec{ let temp: HashSet = HashSet::new(); @@ -288,8 +270,22 @@ impl SimpleRTree for RootedPhyloTree{ todo!() } - fn get_ancestors(&self, node_id: &NodeID)->Vec<&NodeID>{ - todo!() + fn get_ancestors_pre(&self, node_id: &NodeID)->Vec{ + let mut node_iter: Vec = Vec::new(); + let mut curr_node = node_id; + while self.parents.get(curr_node) != None { + match self.parents.get(curr_node).expect("Invalid NodeID!") { + Some(node) => { + node_iter.push(node.clone()); + curr_node = node; + }, + None => { + node_iter.push(self.get_root().clone()); + break; + }, + } + } + node_iter } fn leaf_distance_matrix(&self, weighted: bool)->Vec>{ @@ -304,12 +300,38 @@ impl SimpleRTree for RootedPhyloTree{ todo!() } - fn insert_internal_node(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)){ - todo!() - } - - fn distance_from_root(&self, weighted: bool)->f64{ - todo!() + fn split_edge(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)){ + let new_node_id = self.add_node(); + self.parents.insert(new_node_id, Some(edge.0)); + self.children.entry(new_node_id).or_default().push((edge.1, edge_weights.1)); + self.parents.insert(edge.1, Some(new_node_id)); + } + + fn distance_from_root(&self, node: &NodeID, weighted: bool)->f64{ + let binding = self.get_ancestors_pre(node); + let mut node_ancestor_pre = binding.iter(); + let mut curr_parent = node_ancestor_pre.next().unwrap(); + let mut distance = 0 as f64; + loop{ + match node_ancestor_pre.next(){ + Some(node_id) => { + let curr_parent_children = self.get_node_children(curr_parent); + for (child_id, w) in curr_parent_children{ + if child_id==node_id{ + match weighted { + true => {distance += w.unwrap_or(0 as f64);} + false => {distance += 1 as f64;} + } + curr_parent = node_id; + continue; + } + panic!("Ancestor chain is broken! Clean tree before moving forward...") + } + }, + None => {break;} + } + }; + distance } fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet){ diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index b29dd22..a68cfd4 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -93,7 +93,7 @@ pub trait SimpleRTree { fn iter_edges_post(&self, start_node_id: &NodeID)->PostOrdEdges; /// Returns all node ids in path from root to given node - fn get_ancestors(&self, node_id: &NodeID)->Vec<&NodeID>; + fn get_ancestors_pre(&self, node_id: &NodeID)->Vec; /// Returns pairwise distance matrix of the taxa. If weighted is true, then returns sum of edge weights along paths connecting leaves of tree fn leaf_distance_matrix(&self, weighted: bool)->Vec>; @@ -105,10 +105,10 @@ pub trait SimpleRTree { fn reroot_at_edge(&mut self, edge: (&NodeID, &NodeID)); /// Inserts node in the middle of edge given by pair of node ids - fn insert_internal_node(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)); + fn split_edge(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)); /// Returns distance of node from root. If weighted is true, it returns sum of edges from root to self. - fn distance_from_root(&self, weighted: bool)->f64; + fn distance_from_root(&self, node: &NodeID, weighted: bool)->f64; /// Returns bipartition induced by edge fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet); From b57e58550b529fb446557d955acb1a9e9c5d58d0 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 10:34:04 -0500 Subject: [PATCH 28/40] removed unused import --- src/tree.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 80eb6de..8c4ddf0 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,9 +1,6 @@ pub mod simple_rtree; use std::collections::{HashMap, HashSet}; - -use itertools::Itertools; - use crate::node::*; use crate::tree::simple_rtree::*; use crate::iter::{node_iter::*, edge_iter::*}; From 01a5e3b519ffdd6f3c1101abc0aeb41d718c63c1 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 11:34:23 -0500 Subject: [PATCH 29/40] implemented methods distance_from ancestor, distance from rootm distance_from_node, leaf_distance_matrix, and node_distance_matrix. --- src/tree.rs | 23 +++++++++++------------ src/tree/simple_rtree.rs | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 8c4ddf0..a933ef7 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -100,10 +100,12 @@ impl RootedPhyloTree{ } } - fn remove_self_loops(&mut self){ - for (node_id, children) in self.children.iter_mut(){ - children.retain(|(child_id, _edge_weight)| child_id!=node_id); - } + pub fn get_children(&self)->&HashMap)>>{ + &self.children + } + + pub fn get_parents(&self)->&HashMap>{ + &self.parents } } @@ -260,11 +262,11 @@ impl SimpleRTree for RootedPhyloTree{ } fn iter_edges_pre(&self, start_node_id: &NodeID)->PreOrdEdges{ - todo!() + PreOrdEdges::new(&self, start_node_id) } fn iter_edges_post(&self, start_node_id: &NodeID)->PostOrdEdges{ - todo!() + PostOrdEdges::new(&self, start_node_id) } fn get_ancestors_pre(&self, node_id: &NodeID)->Vec{ @@ -285,10 +287,6 @@ impl SimpleRTree for RootedPhyloTree{ node_iter } - fn leaf_distance_matrix(&self, weighted: bool)->Vec>{ - todo!() - } - fn reroot_at_node(&mut self, node_id: &NodeID){ todo!() } @@ -304,9 +302,10 @@ impl SimpleRTree for RootedPhyloTree{ self.parents.insert(edge.1, Some(new_node_id)); } - fn distance_from_root(&self, node: &NodeID, weighted: bool)->f64{ + fn distance_from_ancestor(&self, node: &NodeID, ancestor: &NodeID, weighted: bool)->f64{ let binding = self.get_ancestors_pre(node); - let mut node_ancestor_pre = binding.iter(); + let start_idx = binding.iter().position(|&x| x==*ancestor).expect("Provided ancestor is not an ancestor of node!"); + let mut node_ancestor_pre = binding[start_idx..].iter(); let mut curr_parent = node_ancestor_pre.next().unwrap(); let mut distance = 0 as f64; loop{ diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index a68cfd4..9691c24 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -96,7 +96,28 @@ pub trait SimpleRTree { fn get_ancestors_pre(&self, node_id: &NodeID)->Vec; /// Returns pairwise distance matrix of the taxa. If weighted is true, then returns sum of edge weights along paths connecting leaves of tree - fn leaf_distance_matrix(&self, weighted: bool)->Vec>; + fn leaf_distance_matrix(&self, weighted: bool)->HashMap<(NodeID, NodeID), EdgeWeight>{ + let binding = self.get_leaves(self.get_root()); + let leaves = binding.iter().combinations(2); + let mut dist_mat: HashMap<(NodeID, NodeID), EdgeWeight> = HashMap::new(); + for node_pair in leaves{ + let w = self.distance_from_node(node_pair[0], node_pair[1], weighted); + dist_mat.insert((node_pair[0].clone(), node_pair[1].clone()), w); + } + dist_mat + } + + /// Returns pairwise distance matrix of all nodes. If weighted is true, then returns sum of edge weights along paths connecting leaves of tree + fn node_distance_matrix(&self, weighted: bool)->HashMap<(NodeID, NodeID), EdgeWeight>{ + let binding = self.get_nodes(); + let leaves = binding.keys().combinations(2); + let mut dist_mat: HashMap<(NodeID, NodeID), EdgeWeight> = HashMap::new(); + for node_pair in leaves{ + let w = self.distance_from_node(node_pair[0], node_pair[1], weighted); + dist_mat.insert((node_pair[0].clone(), node_pair[1].clone()), w); + } + dist_mat + } /// Rerootes tree at given node. fn reroot_at_node(&mut self, node_id: &NodeID); @@ -107,8 +128,19 @@ pub trait SimpleRTree { /// Inserts node in the middle of edge given by pair of node ids fn split_edge(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)); + /// Returns distance of node from some ancestor of node. If weighted is true, it returns sum of edges from root to self. + fn distance_from_ancestor(&self, node: &NodeID, ancestor: &NodeID, weighted: bool)->f64; + /// Returns distance of node from root. If weighted is true, it returns sum of edges from root to self. - fn distance_from_root(&self, node: &NodeID, weighted: bool)->f64; + fn distance_from_root(&self, node: &NodeID, weighted: bool)->EdgeWeight{ + self.distance_from_ancestor(node, self.get_root(), weighted) + } + + /// Returns distance of node from root. If weighted is true, it returns sum of edges from root to self. + fn distance_from_node(&self, node1: &NodeID, node2: &NodeID, weighted: bool)->f64{ + let mrca = self.get_mrca(vec![node1, node2]); + self.distance_from_ancestor(node1, &mrca, weighted) + self.distance_from_ancestor(node2, &mrca, weighted) + } /// Returns bipartition induced by edge fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet); From 89265959bcc9f0af669d5ea2e8c9a7bd417adcc5 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 11:34:42 -0500 Subject: [PATCH 30/40] implemented post order edges --- src/iter/edge_iter.rs | 61 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index ac2ed0f..607492d 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -1,17 +1,27 @@ use crate::node::*; use crate::tree::simple_rtree::*; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; +use crate::iter::node_iter::PostOrdNodes; +use crate::tree::RootedPhyloTree; pub struct PreOrdEdges { stack: Vec<(NodeID, NodeID)>, - nodes: HashMap> + nodes_iter: PostOrdNodes, + children: HashMap)>>, } impl PreOrdEdges { - pub fn new(_tree: &HashMap)->Self{ - Self { stack:vec![], nodes: HashMap::new()} + pub fn new(tree: &RootedPhyloTree, start_node: &NodeID)->Self{ + Self { + stack:vec![], + nodes_iter: PostOrdNodes::new( + start_node, + tree.get_children(), + ), + children: tree.get_children().clone(), + } } } @@ -26,14 +36,24 @@ impl Iterator for PreOrdEdges pub struct PostOrdEdges { - stack: Vec<(NodeID, NodeID)>, - nodes: HashMap> + stack: Vec<(NodeID, NodeID, Option)>, + node_iter: PostOrdNodes, + children: HashMap)>>, + parents: HashMap>, } impl PostOrdEdges { - pub fn new(_tree: &HashMap)->Self{ - Self { stack:vec![], nodes: HashMap::new()} + pub fn new(tree: &RootedPhyloTree, start_node: &NodeID)->Self{ + Self { + stack:vec![], + node_iter: PostOrdNodes::new( + start_node, + tree.get_children(), + ), + children: tree.get_children().clone(), + parents: tree.get_parents().clone(), + } } } @@ -42,6 +62,27 @@ impl Iterator for PostOrdEdges type Item = (NodeID, NodeID, Option); fn next(&mut self)->Option{ - todo!(); - } + match self.stack.pop(){ + Some((n1, n2, w)) => Some((n1, n2, w)), + None => { + match self.node_iter.next(){ + Some(node_id) => { + let node_id_parent = self.parents.get(&node_id).unwrap(); + match node_id_parent { + Some(parent_id) => { + let mut w: Option = None; + for (child_node_id, weight) in self.children.get(parent_id).unwrap(){ + if child_node_id==&node_id{ + w = *weight; + } + } + Some((*parent_id, node_id, w)) + }, + None => return None, + } + }, + None => None + } + } + } } } \ No newline at end of file From 6b28751e9726b6ee9c9a9b8de2841e1457a457ca Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 12:38:10 -0500 Subject: [PATCH 31/40] added methods to get each field of tree --- src/tree/simple_rtree.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 9691c24..c13ab7d 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -55,6 +55,12 @@ pub trait SimpleRTree { } false } + + /// Get all node-child relationships + fn get_children(&self)->&HashMap)>>; + + /// Get all node-parent relationships + fn get_parents(&self)->&HashMap>; /// Returns children node ids for given node id fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>; @@ -63,7 +69,7 @@ pub trait SimpleRTree { fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>; /// Returns all leaf node ids - fn get_leaves(&self, node_id: &NodeID)->HashSet; + fn get_leaves(&self, node_id: &NodeID)->HashSet<(NodeID, String)>; /// Returns full subtree rooted at given node fn get_subtree(&self, node_id: &NodeID)->Box; @@ -78,7 +84,7 @@ pub trait SimpleRTree { fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID)); /// Returns subtree starting at given node, while corresponding nodes from self. - fn extract_subtree(&mut self, node_id: &NodeID)-> Box; + fn prune(&mut self, node_id: &NodeID)-> Box; ///Returns an iterator that iterates over the nodes in Pre-order fn iter_node_pre(&self, start_node_id: &NodeID)->PreOrdNodes; @@ -98,7 +104,7 @@ pub trait SimpleRTree { /// Returns pairwise distance matrix of the taxa. If weighted is true, then returns sum of edge weights along paths connecting leaves of tree fn leaf_distance_matrix(&self, weighted: bool)->HashMap<(NodeID, NodeID), EdgeWeight>{ let binding = self.get_leaves(self.get_root()); - let leaves = binding.iter().combinations(2); + let leaves = binding.iter().map(|(leaf_id, taxa)| leaf_id).combinations(2); let mut dist_mat: HashMap<(NodeID, NodeID), EdgeWeight> = HashMap::new(); for node_pair in leaves{ let w = self.distance_from_node(node_pair[0], node_pair[1], weighted); @@ -191,4 +197,7 @@ pub trait SimpleRTree { fn to_newick(&self)->String{ format!("{};", self.subtree_to_newick(self.get_root(), None)) } + + /// Increment all node_ids + fn incerement_ids(&mut self, value: &usize); } From 97478aa00b1cd156d31b2ef73169042670d72c80 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 12:38:51 -0500 Subject: [PATCH 32/40] added method to increment node_ids, renamed extract_subtree to prune --- src/tree.rs | 50 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index a933ef7..1d7be25 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -99,14 +99,6 @@ impl RootedPhyloTree{ self.leaves_of_node(child_node_id, leaves); } } - - pub fn get_children(&self)->&HashMap)>>{ - &self.children - } - - pub fn get_parents(&self)->&HashMap>{ - &self.parents - } } impl SimpleRTree for RootedPhyloTree{ @@ -154,6 +146,15 @@ impl SimpleRTree for RootedPhyloTree{ &self.nodes } + fn get_children(&self)->&HashMap)>>{ + &self.children + } + + fn get_parents(&self)->&HashMap>{ + &self.parents + } + + fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>{ self.children.get(node_id).expect("Invalid NodeID!") } @@ -162,10 +163,10 @@ impl SimpleRTree for RootedPhyloTree{ self.parents.get(node_id).expect("Invalid NodeID!").as_ref() } - fn get_leaves(&self, node_id: &NodeID)->HashSet{ + fn get_leaves(&self, node_id: &NodeID)->HashSet<(NodeID, String)>{ let mut leaf_vec: Vec = Vec::new(); self.leaves_of_node(node_id, &mut leaf_vec); - leaf_vec.into_iter().collect::>() + leaf_vec.into_iter().map(|leaf_id| (leaf_id, self.leaves.get(&leaf_id).cloned().expect("NodeID is not a leaf!"))).collect::>() } fn get_subtree(&self, node_id: &NodeID)->Box{ @@ -227,7 +228,7 @@ impl SimpleRTree for RootedPhyloTree{ todo!() } - fn extract_subtree(&mut self, node_id: &NodeID)-> Box{ + fn prune(&mut self, node_id: &NodeID)-> Box{ let root= node_id.clone(); let mut nodes: HashMap= HashMap::new(); let mut children: HashMap)>> = HashMap::new(); @@ -366,4 +367,31 @@ impl SimpleRTree for RootedPhyloTree{ fn get_taxa(&self, node_id:&NodeID)->&String { self.leaves.get(node_id).expect("Node has not associated taxa!") } + + fn incerement_ids(&mut self, value: &usize){ + self.nodes = self.nodes.clone().into_iter().map(|(node_id, node_type)| (node_id+value, node_type)).collect(); + self.parents = self.parents.clone().into_iter().map(|(node_id, parent_id)| { + ( + node_id+value, + match parent_id{ + Some(id) => Some(id+value), + None => None, + } + ) + }).collect(); + self.children = self.children.clone().into_iter().map(|(node_id, children_vec)| { + ( + node_id+value, + children_vec.into_iter().map(|(child_id, w)| { + ( + child_id+value, + w + ) + }) + .collect() + ) + }).collect(); + self.leaves = self.leaves.clone().into_iter().map(|(leaf_id, taxa)| (leaf_id+value, taxa)).collect(); + } + } \ No newline at end of file From 53245eb2144f214995f3ff4d358cc950643fcaf8 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 14:33:38 -0500 Subject: [PATCH 33/40] switched node from type to enum, updated all dependent methods and structs, added support for fully weighted+labelled newick tree string --- src/lib.rs | 6 +++ src/node.rs | 59 +++++++++++++++++++++--- src/tree.rs | 96 ++++++++++++++++++++++------------------ src/tree/simple_rtree.rs | 56 +++++++++++++---------- 4 files changed, 144 insertions(+), 73 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6da537b..5de88be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,4 +31,10 @@ mod tests { let tree = RootedPhyloTree::from_newick(input_str); dbg!(tree.to_newick()); } + #[test] + fn read_smallfwfl_tree() { + let input_str = String::from("((A:0.12,B:12)E:10,(C:15,D:0.001)F:20)G;"); + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + } } diff --git a/src/node.rs b/src/node.rs index 9190661..e7f1748 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,12 +1,59 @@ +use std::fmt::{Debug, Display}; + pub type NodeID = usize; -pub type NodeType = bool; // True for leaves, false for internal nodes +// pub type NodeType = bool; // True for leaves, false for internal nodes + +#[derive(Clone, PartialEq, Eq)] +pub enum NodeType{ + Internal(Option), + Leaf(Option), +} + +impl NodeType{ + pub fn new(is_leaf: bool, taxa: Option)->Self{ + match is_leaf { + true => Self::Leaf(taxa), + false => Self::Internal(taxa), + } + } + + pub fn is_leaf(&self)->bool{ + match self { + NodeType::Internal(_taxa) => false, + NodeType::Leaf(_taxa) => true, + } + } -pub trait Node { - fn is_leaf(&self)->bool; + pub fn flip(&mut self){ + match self { + NodeType::Internal(taxa) => {*self = NodeType::Leaf(taxa.clone())}, + NodeType::Leaf(taxa) => {*self = NodeType::Internal(taxa.clone())}, + } + } + + pub fn taxa(&self)->String{ + match self { + NodeType::Internal(taxa) => taxa.clone().unwrap_or("".to_string()), + NodeType::Leaf(taxa) => taxa.clone().unwrap_or("".to_string()), + } + } + + pub fn node_type(&self)->String{ + match self.is_leaf() { + false => format!("Internal"), + true => format!("Leaf"), + } + } +} + +impl Debug for NodeType{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.node_type(), self.taxa()) + } } -impl Node for NodeType{ - fn is_leaf(&self)->bool { - *self +impl Display for NodeType{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.node_type(), self.taxa()) } } \ No newline at end of file diff --git a/src/tree.rs b/src/tree.rs index 1d7be25..59a624c 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,6 +1,8 @@ pub mod simple_rtree; use std::collections::{HashMap, HashSet}; +use itertools::Itertools; + use crate::node::*; use crate::tree::simple_rtree::*; use crate::iter::{node_iter::*, edge_iter::*}; @@ -17,17 +19,15 @@ pub struct RootedPhyloTree{ nodes: HashMap, children: HashMap)>>, parents: HashMap>, - leaves: HashMap, } impl RootedPhyloTree{ pub fn new()->Self{ RootedPhyloTree { root: 0, - nodes: HashMap::from([(0, false)]), + nodes: HashMap::from([(0, NodeType::Internal(None))]), children: HashMap::from([(0, Vec::new())]), parents: HashMap::from([(0, None)]), - leaves: HashMap::new() } } @@ -50,22 +50,34 @@ impl RootedPhyloTree{ // last context id let last_context = stack.last().expect("Newick string ended abruptly!"); // add current context as a child to last context - tree.set_child(&context, last_context, decimal_str.parse::().ok(), taxa_str.clone()); + tree.set_child( + &context, + last_context, + decimal_str.parse::().ok(), + match taxa_str.is_empty(){ + true => None, + false => Some(taxa_str.to_string()) + } + ); // we clear the strings taxa_str.clear(); decimal_str.clear(); - if newick_string[str_ptr]==','{ - // create next child of last context - context = tree.add_node(); - str_ptr += 1; - } - else{ - context = stack.pop().expect("Newick string ended abruptly!"); - str_ptr += 1; + match newick_string[str_ptr] { + ',' => { + context = tree.add_node(); + str_ptr += 1; + } + _ => { + context = stack.pop().expect("Newick string ended abruptly!"); + str_ptr += 1; + } } }, ';'=>{ + if !taxa_str.is_empty(){ + tree.assign_taxa(&context, &taxa_str); + } break; } ':' => { @@ -80,13 +92,18 @@ impl RootedPhyloTree{ } _ => { // push taxa characters into taxa string - while newick_string[str_ptr]!=':'&&newick_string[str_ptr]!=')'&&newick_string[str_ptr]!=','&&newick_string[str_ptr]!='('{ + while newick_string[str_ptr]!=':'&&newick_string[str_ptr]!=')'&&newick_string[str_ptr]!=','&&newick_string[str_ptr]!='('&&newick_string[str_ptr]!=';'{ taxa_str.push(newick_string[str_ptr]); str_ptr+=1; } }, } } + let mut leaf_ids = Vec::new(); + tree.leaves_of_node(tree.get_root(), &mut leaf_ids); + for leaf_id in leaf_ids{ + tree.set_leaf(&leaf_id); + } return tree; } @@ -106,23 +123,24 @@ impl SimpleRTree for RootedPhyloTree{ // New node id let node_id = self.nodes.len(); // add entry of node in parents and children fields - self.nodes.insert(node_id.clone(), false); + self.nodes.insert(node_id.clone(), NodeType::Internal(None)); self.parents.insert(node_id.clone(), None); self.children.insert(node_id.clone(), Vec::new()); node_id } - fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa:String){ + fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa:Option){ self.parents.insert(node_id.clone(), Some(parent_id.clone())); self.children.entry(parent_id.clone()).or_default().push((node_id.clone(), distance)); - if taxa.len()>0{ - self.leaves.insert(node_id.clone(), taxa); - self.nodes.insert(node_id.clone(), true); - } + self.nodes.insert(node_id.clone(), NodeType::Internal(taxa)); + } + + fn set_leaf(&mut self, node_id: &NodeID) { + self.nodes.entry(node_id.clone()).and_modify(|node| node.flip()); } fn assign_taxa(&mut self,node:&NodeID, taxa:&str) { - *self.leaves.entry(*node).or_insert(String::new()) = String::from(taxa); + self.nodes.insert(node.clone(), NodeType::Internal(Some(taxa.to_string()))); } fn set_edge_weight(&mut self, parent:&NodeID, child:&NodeID, edge_weight:Option){ @@ -142,6 +160,10 @@ impl SimpleRTree for RootedPhyloTree{ &self.root } + fn get_node(&self, node_id: &NodeID)->&NodeType{ + self.nodes.get(node_id).expect("Invalid NodeID") + } + fn get_nodes(&self)->&HashMap{ &self.nodes } @@ -163,10 +185,10 @@ impl SimpleRTree for RootedPhyloTree{ self.parents.get(node_id).expect("Invalid NodeID!").as_ref() } - fn get_leaves(&self, node_id: &NodeID)->HashSet<(NodeID, String)>{ + fn get_leaves(&self, node_id: &NodeID)->Vec<(NodeID, NodeType)>{ let mut leaf_vec: Vec = Vec::new(); self.leaves_of_node(node_id, &mut leaf_vec); - leaf_vec.into_iter().map(|leaf_id| (leaf_id, self.leaves.get(&leaf_id).cloned().expect("NodeID is not a leaf!"))).collect::>() + leaf_vec.into_iter().map(|leaf_id| (leaf_id, self.nodes.get(&leaf_id).cloned().expect("Invalid NodeID!"))).collect::>() } fn get_subtree(&self, node_id: &NodeID)->Box{ @@ -177,14 +199,10 @@ impl SimpleRTree for RootedPhyloTree{ let mut nodes: HashMap= HashMap::new(); let mut children: HashMap)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); - let mut leaves: HashMap = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ nodes.insert(decsendant_node_id.clone(), self.nodes.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); children.insert(decsendant_node_id.clone(), self.children.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); parents.insert(decsendant_node_id.clone(), self.parents.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); - if self.is_leaf(&decsendant_node_id){ - leaves.insert(decsendant_node_id.clone(), self.leaves.get(&decsendant_node_id).cloned().unwrap()); - } } Box::new( RootedPhyloTree{ @@ -192,7 +210,6 @@ impl SimpleRTree for RootedPhyloTree{ nodes: nodes, children: children, parents: parents, - leaves: leaves, } ) } @@ -221,7 +238,7 @@ impl SimpleRTree for RootedPhyloTree{ } fn is_leaf(&self, node_id: &NodeID)->bool{ - self.nodes.get(node_id).expect("Invalid NodeID").clone() + self.nodes.get(node_id).expect("Invalid NodeID").is_leaf() } fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID)){ @@ -233,23 +250,17 @@ impl SimpleRTree for RootedPhyloTree{ let mut nodes: HashMap= HashMap::new(); let mut children: HashMap)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); - let mut leaves: HashMap = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ nodes.insert(decsendant_node_id.clone(), self.nodes.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); children.insert(decsendant_node_id.clone(), self.children.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); parents.insert(decsendant_node_id.clone(), self.parents.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); - match self.leaves.remove(&decsendant_node_id){ - Some(taxa_id) => {leaves.insert(decsendant_node_id.clone(), taxa_id);}, - None => {}, } - } Box::new( RootedPhyloTree{ root: root, nodes: nodes, children: children, parents: parents, - leaves: leaves, } ) } @@ -331,20 +342,20 @@ impl SimpleRTree for RootedPhyloTree{ distance } - fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet){ + fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(Vec<(NodeID, NodeType)>, Vec<(NodeID, NodeType)>){ let c2 = self.get_cluster(edge.1); - (self.leaves.keys().map(|x| x.clone()).collect::>().difference(&c2).map(|x| x.clone()).collect(), c2) + (self.nodes.clone().into_iter().filter(|x| !c2.contains(x)).collect_vec(), c2) } - fn get_cluster(&self, node_id: &NodeID)-> HashSet{ + fn get_cluster(&self, node_id: &NodeID)-> Vec<(NodeID, NodeType)>{ let mut leaves: Vec = Vec::new(); self.leaves_of_node(node_id, &mut leaves); - HashSet::from_iter(leaves) + leaves.into_iter().map(|leaf_id| (leaf_id, self.get_node(&leaf_id).clone())).collect_vec() } fn clean(&mut self) { let mut remove_list: Vec<&NodeID> = Vec::new(); - for (node_id, is_leaf) in self.nodes.clone().iter(){ + for (node_id, node) in self.nodes.clone().iter(){ // remove root with only one child if node_id==self.get_root() && self.get_node_degree(node_id)<2{ let new_root = self.get_node_children(self.get_root())[0].0; @@ -353,7 +364,7 @@ impl SimpleRTree for RootedPhyloTree{ remove_list.push(node_id); } // remove nodes with only one child - else if !is_leaf && self.get_node_degree(node_id)<3{ + else if !node.is_leaf() && self.get_node_degree(node_id)<3{ let parent = self.get_node_parent(node_id).cloned(); let children = self.get_node_children(node_id).clone(); for (child_id, _edge_weight) in children.clone().into_iter(){ @@ -364,8 +375,8 @@ impl SimpleRTree for RootedPhyloTree{ } } - fn get_taxa(&self, node_id:&NodeID)->&String { - self.leaves.get(node_id).expect("Node has not associated taxa!") + fn get_taxa(&self, node_id:&NodeID)->String { + self.get_node(node_id).taxa() } fn incerement_ids(&mut self, value: &usize){ @@ -391,7 +402,6 @@ impl SimpleRTree for RootedPhyloTree{ .collect() ) }).collect(); - self.leaves = self.leaves.clone().into_iter().map(|(leaf_id, taxa)| (leaf_id+value, taxa)).collect(); } } \ No newline at end of file diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index c13ab7d..4795dd3 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -1,4 +1,5 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; +use std::fmt::format; use itertools::Itertools; use crate::node::*; @@ -12,15 +13,18 @@ pub trait SimpleRTree { fn add_node(&mut self)->NodeID; /// Sets node_id as child to parent. - fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa: String); + fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa: Option); /// Sets iterable of node_ids as children to parent fn set_children(&mut self, parent: &NodeID, children: &Vec<(NodeID, Option)>){ for (child_id, edge_weight) in children.iter(){ - self.set_child(child_id, &parent, edge_weight.clone(), String::new()); + self.set_child(child_id, &parent, edge_weight.clone(), None); } } + /// Converts internal node to leaf_node + fn set_leaf(&mut self, node_id: &NodeID); + /// Sets the edge weight between two nodes (None to unweight the edge) fn set_edge_weight(&mut self, parent:&NodeID, child:&NodeID, edge_weights:Option); @@ -38,6 +42,9 @@ pub trait SimpleRTree { /// Returns all node ids fn get_nodes(&self)->&HashMap; + /// Returns node by ID + fn get_node(&self, node_id: &NodeID)->&NodeType; + /// Returns node degree fn get_node_degree(&self, node_id:&NodeID)->usize{ self.get_node_children(node_id).len() + match self.get_node_parent(node_id) { @@ -69,7 +76,7 @@ pub trait SimpleRTree { fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>; /// Returns all leaf node ids - fn get_leaves(&self, node_id: &NodeID)->HashSet<(NodeID, String)>; + fn get_leaves(&self, node_id: &NodeID)->Vec<(NodeID, NodeType)>; /// Returns full subtree rooted at given node fn get_subtree(&self, node_id: &NodeID)->Box; @@ -104,7 +111,7 @@ pub trait SimpleRTree { /// Returns pairwise distance matrix of the taxa. If weighted is true, then returns sum of edge weights along paths connecting leaves of tree fn leaf_distance_matrix(&self, weighted: bool)->HashMap<(NodeID, NodeID), EdgeWeight>{ let binding = self.get_leaves(self.get_root()); - let leaves = binding.iter().map(|(leaf_id, taxa)| leaf_id).combinations(2); + let leaves = binding.iter().map(|(leaf_id, _taxa)| leaf_id).combinations(2); let mut dist_mat: HashMap<(NodeID, NodeID), EdgeWeight> = HashMap::new(); for node_pair in leaves{ let w = self.distance_from_node(node_pair[0], node_pair[1], weighted); @@ -149,16 +156,16 @@ pub trait SimpleRTree { } /// Returns bipartition induced by edge - fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(HashSet, HashSet); + fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(Vec<(NodeID, NodeType)>, Vec<(NodeID, NodeType)>); /// Returns cluster of node - fn get_cluster(&self, node_id: &NodeID)-> HashSet; + fn get_cluster(&self, node_id: &NodeID)-> Vec<(NodeID, NodeType)>; /// Cleans self by removing 1) internal nodes (other than root) with degree 2, 2) Floating root nodes, 3) self loops fn clean(&mut self); /// Get node taxa - fn get_taxa(&self, node_id:&NodeID)->&String; + fn get_taxa(&self, node_id:&NodeID)->String; /// Get edge weight fn get_edge_weight(&self, parent_id: &NodeID, child_id:&NodeID)->Option<&EdgeWeight>{ @@ -172,25 +179,26 @@ pub trait SimpleRTree { /// return subtree as newick string fn subtree_to_newick(&self, node_id:&NodeID, edge_weight:Option)->String{ - match self.is_leaf(node_id){ - true => { - match edge_weight { - Some(w) => {format!("{}:{}", self.get_taxa(node_id), w)}, - _ => {format!("{}", self.get_taxa(node_id))} - } + fn print_node(node: &NodeType, weight: Option)->String{ + match weight { + Some(w) => format!("{}:{}", node.taxa(), w), + None => node.taxa() } - false => { - let mut tmp = String::new(); - tmp.push('('); - for (child_id, w) in self.get_node_children(node_id){ - let child_str = format!("{},", self.subtree_to_newick(child_id, *w)); - tmp.push_str(&child_str); - } - tmp.pop(); - tmp.push(')'); - return tmp; + } + + let node = self.get_node(node_id); + let mut tmp = String::new(); + if self.get_node_children(node_id).len()>0{ + tmp.push('('); + for (child_id, w) in self.get_node_children(node_id){ + let child_str = format!("{},", self.subtree_to_newick(child_id, *w)); + tmp.push_str(&child_str); } + tmp.pop(); + tmp.push(')'); } + tmp.push_str(&print_node(node, edge_weight)); + return tmp; } /// writes full tree in newick format From 61a7a9242b3a693b19478f7bf8e5e80eecdc57c4 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 14:37:29 -0500 Subject: [PATCH 34/40] cargo warnings --- src/iter/edge_iter.rs | 10 ++++++---- src/tree.rs | 12 ++++++------ src/tree/simple_rtree.rs | 13 ++++++------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index 607492d..fa2babe 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -6,9 +6,10 @@ use crate::tree::RootedPhyloTree; pub struct PreOrdEdges { - stack: Vec<(NodeID, NodeID)>, - nodes_iter: PostOrdNodes, + stack: Vec<(NodeID, NodeID, Option)>, + node_iter: PostOrdNodes, children: HashMap)>>, + parents: HashMap>, } impl PreOrdEdges @@ -16,11 +17,12 @@ impl PreOrdEdges pub fn new(tree: &RootedPhyloTree, start_node: &NodeID)->Self{ Self { stack:vec![], - nodes_iter: PostOrdNodes::new( + node_iter: PostOrdNodes::new( start_node, tree.get_children(), ), children: tree.get_children().clone(), + parents: tree.get_parents().clone(), } } } @@ -78,7 +80,7 @@ impl Iterator for PostOrdEdges } Some((*parent_id, node_id, w)) }, - None => return None, + None => None, } }, None => None diff --git a/src/tree.rs b/src/tree.rs index 59a624c..58d7c03 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -8,9 +8,9 @@ use crate::tree::simple_rtree::*; use crate::iter::{node_iter::*, edge_iter::*}; pub struct UnrootedPhyloTree{ - nodes: HashMap, - neighbours: HashMap, NodeID)>>, - leaves: HashMap, + _nodes: HashMap, + _neighbours: HashMap, NodeID)>>, + _leaves: HashMap, } #[derive(Debug)] @@ -241,7 +241,7 @@ impl SimpleRTree for RootedPhyloTree{ self.nodes.get(node_id).expect("Invalid NodeID").is_leaf() } - fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID)){ + fn graft_subtree(&mut self, _tree: Box, _edge: (&NodeID, &NodeID)){ todo!() } @@ -299,11 +299,11 @@ impl SimpleRTree for RootedPhyloTree{ node_iter } - fn reroot_at_node(&mut self, node_id: &NodeID){ + fn reroot_at_node(&mut self, _node_id: &NodeID){ todo!() } - fn reroot_at_edge(&mut self, edge: (&NodeID, &NodeID)) { + fn reroot_at_edge(&mut self, _edge: (&NodeID, &NodeID)) { todo!() } diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 4795dd3..7ffad79 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::fmt::format; use itertools::Itertools; use crate::node::*; @@ -29,12 +28,12 @@ pub trait SimpleRTree { fn set_edge_weight(&mut self, parent:&NodeID, child:&NodeID, edge_weights:Option); /// Returns true of node is child of parent. - fn node_is_child_of(&self, parent:&NodeID, node:&NodeID)->bool{ - self.get_node_children(parent).iter().map(|(id, _weight)| id).contains(node) + fn node_is_child_of(&self, parent:&NodeID, node_id:&NodeID)->bool{ + self.get_node_children(parent).iter().map(|(id, _weight)| id).contains(node_id) } /// Assign taxa to leaf node - fn assign_taxa(&mut self,node:&NodeID, taxa:&str); + fn assign_taxa(&mut self,node_id:&NodeID, taxa:&str); /// Returns root node id fn get_root(&self)->&NodeID; @@ -142,11 +141,11 @@ pub trait SimpleRTree { fn split_edge(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)); /// Returns distance of node from some ancestor of node. If weighted is true, it returns sum of edges from root to self. - fn distance_from_ancestor(&self, node: &NodeID, ancestor: &NodeID, weighted: bool)->f64; + fn distance_from_ancestor(&self, node_id: &NodeID, ancestor: &NodeID, weighted: bool)->f64; /// Returns distance of node from root. If weighted is true, it returns sum of edges from root to self. - fn distance_from_root(&self, node: &NodeID, weighted: bool)->EdgeWeight{ - self.distance_from_ancestor(node, self.get_root(), weighted) + fn distance_from_root(&self, node_id: &NodeID, weighted: bool)->EdgeWeight{ + self.distance_from_ancestor(node_id, self.get_root(), weighted) } /// Returns distance of node from root. If weighted is true, it returns sum of edges from root to self. From 2be296e7a5b9393e19cc91457ab2133701d6f8b0 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 14:51:30 -0500 Subject: [PATCH 35/40] cargo clippy --- src/iter/node_iter.rs | 8 +-- src/node.rs | 4 +- src/tree.rs | 129 +++++++++++++++++++-------------------- src/tree/simple_rtree.rs | 16 ++--- 4 files changed, 76 insertions(+), 81 deletions(-) diff --git a/src/iter/node_iter.rs b/src/iter/node_iter.rs index 97a57a5..64fe605 100644 --- a/src/iter/node_iter.rs +++ b/src/iter/node_iter.rs @@ -13,8 +13,8 @@ impl PreOrdNodes { pub fn new(start_node_id: &NodeID, children: &HashMap)>>)->Self{ Self { stack:vec![*start_node_id], nodes: children.iter() - .map(|(k, v)| (k.clone(), v.iter() - .map(|ni| ni.0.clone()).collect::>())) + .map(|(k, v)| (*k, v.iter() + .map(|ni| ni.0).collect::>())) .collect()} } } @@ -47,8 +47,8 @@ impl PostOrdNodes { pub fn new(start_node_id: &NodeID, children: &HashMap)>>)->Self{ Self { stack:vec![*start_node_id], nodes: children.iter() - .map(|(k, v)| (k.clone(), v.iter() - .map(|ni| ni.0.clone()).collect::>())) + .map(|(k, v)| (*k, v.iter() + .map(|ni| ni.0).collect::>())) .collect()} } } diff --git a/src/node.rs b/src/node.rs index e7f1748..87e0b9e 100644 --- a/src/node.rs +++ b/src/node.rs @@ -40,8 +40,8 @@ impl NodeType{ pub fn node_type(&self)->String{ match self.is_leaf() { - false => format!("Internal"), - true => format!("Leaf"), + false => "Internal".to_string(), + true => "Leaf".to_string(), } } } diff --git a/src/tree.rs b/src/tree.rs index 58d7c03..19d1769 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -21,6 +21,12 @@ pub struct RootedPhyloTree{ parents: HashMap>, } +impl Default for RootedPhyloTree { + fn default() -> Self { + Self::new() + } +} + impl RootedPhyloTree{ pub fn new()->Self{ RootedPhyloTree { @@ -34,7 +40,7 @@ impl RootedPhyloTree{ pub fn from_newick(newick_string: String)->Self{ let mut tree = RootedPhyloTree::new(); let mut stack : Vec = Vec::new(); - let mut context : NodeID = tree.get_root().clone(); + let mut context : NodeID = *tree.get_root(); let mut taxa_str = String::new(); let mut decimal_str: String = String::new(); let mut str_ptr: usize = 0; @@ -104,7 +110,7 @@ impl RootedPhyloTree{ for leaf_id in leaf_ids{ tree.set_leaf(&leaf_id); } - return tree; + tree } fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ @@ -123,33 +129,33 @@ impl SimpleRTree for RootedPhyloTree{ // New node id let node_id = self.nodes.len(); // add entry of node in parents and children fields - self.nodes.insert(node_id.clone(), NodeType::Internal(None)); - self.parents.insert(node_id.clone(), None); - self.children.insert(node_id.clone(), Vec::new()); + self.nodes.insert(node_id, NodeType::Internal(None)); + self.parents.insert(node_id, None); + self.children.insert(node_id, Vec::new()); node_id } fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa:Option){ - self.parents.insert(node_id.clone(), Some(parent_id.clone())); - self.children.entry(parent_id.clone()).or_default().push((node_id.clone(), distance)); - self.nodes.insert(node_id.clone(), NodeType::Internal(taxa)); + self.parents.insert(*node_id, Some(*parent_id)); + self.children.entry(*parent_id).or_default().push((*node_id, distance)); + self.nodes.insert(*node_id, NodeType::Internal(taxa)); } fn set_leaf(&mut self, node_id: &NodeID) { - self.nodes.entry(node_id.clone()).and_modify(|node| node.flip()); + self.nodes.entry(*node_id).and_modify(|node| node.flip()); } fn assign_taxa(&mut self,node:&NodeID, taxa:&str) { - self.nodes.insert(node.clone(), NodeType::Internal(Some(taxa.to_string()))); + self.nodes.insert(*node, NodeType::Internal(Some(taxa.to_string()))); } fn set_edge_weight(&mut self, parent:&NodeID, child:&NodeID, edge_weight:Option){ - self.children.entry(parent.clone()) + self.children.entry(*parent) .and_modify(|children| *children = children.clone().iter() .map(|(id, w)| { match id==child{ - true => {(id.clone(), edge_weight)}, - false => {(id.clone(), w.clone())}, + true => {(*id, edge_weight)}, + false => {(*id, *w)}, } }) .collect() @@ -195,21 +201,21 @@ impl SimpleRTree for RootedPhyloTree{ if self.is_leaf(node_id){ panic!("NodeID is a leaf"); } - let root= node_id.clone(); + let root= *node_id; let mut nodes: HashMap= HashMap::new(); let mut children: HashMap)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ - nodes.insert(decsendant_node_id.clone(), self.nodes.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); - children.insert(decsendant_node_id.clone(), self.children.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); - parents.insert(decsendant_node_id.clone(), self.parents.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + nodes.insert(decsendant_node_id, self.nodes.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + children.insert(decsendant_node_id, self.children.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + parents.insert(decsendant_node_id, *self.parents.get(&decsendant_node_id).expect("Invalid NodeID!")); } Box::new( RootedPhyloTree{ - root: root, - nodes: nodes, - children: children, - parents: parents, + root, + nodes, + children, + parents, } ) } @@ -219,19 +225,16 @@ impl SimpleRTree for RootedPhyloTree{ let mut mrca: NodeID = 0; for mut iterator in ancestor_iter_vec{ let temp: HashSet = HashSet::new(); - match iterator.next(){ - Some(x) => { - match temp.contains(&x){ - true => {mrca = x.clone()}, - false => { - match temp.len()==0{ - true => {}, - false => {return mrca} - } + if let Some(x) = iterator.next() { + match temp.contains(&x){ + true => {mrca = x}, + false => { + match temp.is_empty(){ + true => {}, + false => {return mrca} } } - }, - None => {} + } } } mrca @@ -246,21 +249,21 @@ impl SimpleRTree for RootedPhyloTree{ } fn prune(&mut self, node_id: &NodeID)-> Box{ - let root= node_id.clone(); + let root= *node_id; let mut nodes: HashMap= HashMap::new(); let mut children: HashMap)>> = HashMap::new(); let mut parents: HashMap> = HashMap::new(); for decsendant_node_id in self.iter_node_pre(node_id){ - nodes.insert(decsendant_node_id.clone(), self.nodes.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); - children.insert(decsendant_node_id.clone(), self.children.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); - parents.insert(decsendant_node_id.clone(), self.parents.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); + nodes.insert(decsendant_node_id, self.nodes.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); + children.insert(decsendant_node_id, self.children.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); + parents.insert(decsendant_node_id, self.parents.remove(&decsendant_node_id).expect("Invalid NodeID!")); } Box::new( RootedPhyloTree{ - root: root, - nodes: nodes, - children: children, - parents: parents, + root, + nodes, + children, + parents, } ) } @@ -274,24 +277,24 @@ impl SimpleRTree for RootedPhyloTree{ } fn iter_edges_pre(&self, start_node_id: &NodeID)->PreOrdEdges{ - PreOrdEdges::new(&self, start_node_id) + PreOrdEdges::new(self, start_node_id) } fn iter_edges_post(&self, start_node_id: &NodeID)->PostOrdEdges{ - PostOrdEdges::new(&self, start_node_id) + PostOrdEdges::new(self, start_node_id) } fn get_ancestors_pre(&self, node_id: &NodeID)->Vec{ let mut node_iter: Vec = Vec::new(); let mut curr_node = node_id; - while self.parents.get(curr_node) != None { + while self.parents.get(curr_node).is_some() { match self.parents.get(curr_node).expect("Invalid NodeID!") { Some(node) => { - node_iter.push(node.clone()); + node_iter.push(*node); curr_node = node; }, None => { - node_iter.push(self.get_root().clone()); + node_iter.push(*self.get_root()); break; }, } @@ -320,23 +323,18 @@ impl SimpleRTree for RootedPhyloTree{ let mut node_ancestor_pre = binding[start_idx..].iter(); let mut curr_parent = node_ancestor_pre.next().unwrap(); let mut distance = 0 as f64; - loop{ - match node_ancestor_pre.next(){ - Some(node_id) => { - let curr_parent_children = self.get_node_children(curr_parent); - for (child_id, w) in curr_parent_children{ - if child_id==node_id{ - match weighted { - true => {distance += w.unwrap_or(0 as f64);} - false => {distance += 1 as f64;} - } - curr_parent = node_id; - continue; - } - panic!("Ancestor chain is broken! Clean tree before moving forward...") - } - }, - None => {break;} + while let Some(node_id) = node_ancestor_pre.next() { + let curr_parent_children = self.get_node_children(curr_parent); + for (child_id, w) in curr_parent_children{ + if child_id==node_id{ + match weighted { + true => {distance += w.unwrap_or(0 as f64);} + false => {distance += 1_f64;} + } + curr_parent = node_id; + continue; + } + panic!("Ancestor chain is broken! Clean tree before moving forward...") } }; distance @@ -368,7 +366,7 @@ impl SimpleRTree for RootedPhyloTree{ let parent = self.get_node_parent(node_id).cloned(); let children = self.get_node_children(node_id).clone(); for (child_id, _edge_weight) in children.clone().into_iter(){ - self.parents.entry(child_id.clone()).and_modify(|x| *x = parent); + self.parents.entry(child_id).and_modify(|x| *x = parent); } self.set_children(parent.as_ref().unwrap(), &children); } @@ -384,10 +382,7 @@ impl SimpleRTree for RootedPhyloTree{ self.parents = self.parents.clone().into_iter().map(|(node_id, parent_id)| { ( node_id+value, - match parent_id{ - Some(id) => Some(id+value), - None => None, - } + parent_id.map(|id| id + value) ) }).collect(); self.children = self.children.clone().into_iter().map(|(node_id, children_vec)| { diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 7ffad79..57c6a4c 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -15,9 +15,9 @@ pub trait SimpleRTree { fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa: Option); /// Sets iterable of node_ids as children to parent - fn set_children(&mut self, parent: &NodeID, children: &Vec<(NodeID, Option)>){ + fn set_children(&mut self, parent: &NodeID, children: &[(NodeID, Option)]){ for (child_id, edge_weight) in children.iter(){ - self.set_child(child_id, &parent, edge_weight.clone(), None); + self.set_child(child_id, parent, *edge_weight, None); } } @@ -55,7 +55,7 @@ pub trait SimpleRTree { /// Check if tree is weighted fn is_weighted(&self)->bool{ for (_, _, edge_weight) in self.iter_edges_post(self.get_root()){ - if edge_weight!=None{ + if edge_weight.is_some(){ return true; } } @@ -114,7 +114,7 @@ pub trait SimpleRTree { let mut dist_mat: HashMap<(NodeID, NodeID), EdgeWeight> = HashMap::new(); for node_pair in leaves{ let w = self.distance_from_node(node_pair[0], node_pair[1], weighted); - dist_mat.insert((node_pair[0].clone(), node_pair[1].clone()), w); + dist_mat.insert((*node_pair[0], *node_pair[1]), w); } dist_mat } @@ -126,7 +126,7 @@ pub trait SimpleRTree { let mut dist_mat: HashMap<(NodeID, NodeID), EdgeWeight> = HashMap::new(); for node_pair in leaves{ let w = self.distance_from_node(node_pair[0], node_pair[1], weighted); - dist_mat.insert((node_pair[0].clone(), node_pair[1].clone()), w); + dist_mat.insert((*node_pair[0], *node_pair[1]), w); } dist_mat } @@ -173,7 +173,7 @@ pub trait SimpleRTree { return node_id.1.as_ref(); } } - return None; + None } /// return subtree as newick string @@ -187,7 +187,7 @@ pub trait SimpleRTree { let node = self.get_node(node_id); let mut tmp = String::new(); - if self.get_node_children(node_id).len()>0{ + if !self.get_node_children(node_id).is_empty(){ tmp.push('('); for (child_id, w) in self.get_node_children(node_id){ let child_str = format!("{},", self.subtree_to_newick(child_id, *w)); @@ -197,7 +197,7 @@ pub trait SimpleRTree { tmp.push(')'); } tmp.push_str(&print_node(node, edge_weight)); - return tmp; + tmp } /// writes full tree in newick format From 5356840fafcdd5a270fa72965d5e179e31013172 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 14:54:43 -0500 Subject: [PATCH 36/40] switched to using preOrderNodeiterator for preOrderEdgeIterator --- src/iter/edge_iter.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index fa2babe..cfba4de 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -1,13 +1,13 @@ use crate::node::*; use crate::tree::simple_rtree::*; use std::collections::HashMap; -use crate::iter::node_iter::PostOrdNodes; +use crate::iter::node_iter::{PostOrdNodes, PreOrdNodes}; use crate::tree::RootedPhyloTree; pub struct PreOrdEdges { stack: Vec<(NodeID, NodeID, Option)>, - node_iter: PostOrdNodes, + node_iter: PreOrdNodes, children: HashMap)>>, parents: HashMap>, } @@ -17,7 +17,7 @@ impl PreOrdEdges pub fn new(tree: &RootedPhyloTree, start_node: &NodeID)->Self{ Self { stack:vec![], - node_iter: PostOrdNodes::new( + node_iter: PreOrdNodes::new( start_node, tree.get_children(), ), From ec4cf09cb092b1e10ff24c23c9b9edf25f0a9295 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 18:00:53 -0500 Subject: [PATCH 37/40] implemented graft_subtree method --- src/tree.rs | 24 ++++++++++++++++++------ src/tree/simple_rtree.rs | 6 +++--- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 19d1769..2923a9d 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -244,8 +244,19 @@ impl SimpleRTree for RootedPhyloTree{ self.nodes.get(node_id).expect("Invalid NodeID").is_leaf() } - fn graft_subtree(&mut self, _tree: Box, _edge: (&NodeID, &NodeID)){ - todo!() + fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID), edge_weights:(Option, Option), graft_edge_weight: Option){ + let graft_node = self.split_edge(edge, edge_weights); + let input_root_id = tree.get_root(); + for input_node in tree.get_nodes().keys(){ + if self.get_nodes().contains_key(input_node){ + panic!("The NodeIDs in the input tree are already present in the current tree!"); + } + } + + self.children.extend(tree.get_children().clone().into_iter()); + self.parents.extend(tree.get_parents().clone().iter()); + self.nodes.extend(tree.get_nodes().clone().into_iter()); + self.set_child(input_root_id, &graft_node, graft_edge_weight, Some(tree.get_taxa(input_root_id))) } fn prune(&mut self, node_id: &NodeID)-> Box{ @@ -310,11 +321,12 @@ impl SimpleRTree for RootedPhyloTree{ todo!() } - fn split_edge(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)){ + fn split_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights:(Option, Option))->NodeID{ let new_node_id = self.add_node(); - self.parents.insert(new_node_id, Some(edge.0)); - self.children.entry(new_node_id).or_default().push((edge.1, edge_weights.1)); - self.parents.insert(edge.1, Some(new_node_id)); + self.parents.insert(new_node_id, Some(edge.0.clone())); + self.children.entry(new_node_id).or_default().push((edge.1.clone(), edge_weights.1)); + self.parents.insert(edge.1.clone(), Some(new_node_id)); + new_node_id } fn distance_from_ancestor(&self, node: &NodeID, ancestor: &NodeID, weighted: bool)->f64{ diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 57c6a4c..f91df7c 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -87,7 +87,7 @@ pub trait SimpleRTree { fn is_leaf(&self, node_id: &NodeID)->bool; /// Attaches input tree to self by spliting an edge - fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID)); + fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID), edge_weights:(Option, Option), graft_edge_weight: Option); /// Returns subtree starting at given node, while corresponding nodes from self. fn prune(&mut self, node_id: &NodeID)-> Box; @@ -137,8 +137,8 @@ pub trait SimpleRTree { /// Rerootes tree at edge. fn reroot_at_edge(&mut self, edge: (&NodeID, &NodeID)); - /// Inserts node in the middle of edge given by pair of node ids - fn split_edge(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)); + /// Inserts node in the middle of edge given by pair of node ids, and returns the new node id + fn split_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights:(Option, Option))->NodeID; /// Returns distance of node from some ancestor of node. If weighted is true, it returns sum of edges from root to self. fn distance_from_ancestor(&self, node_id: &NodeID, ancestor: &NodeID, weighted: bool)->f64; From 8615f6d39650f89c2f53ae10fa756e5e1a03576e Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 18:42:42 -0500 Subject: [PATCH 38/40] implemented partial reroot_at_node (dfs) --- src/tree.rs | 26 +++++++++++++++++++++----- src/tree/simple_rtree.rs | 5 ++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/tree.rs b/src/tree.rs index 2923a9d..24f0bd3 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -313,12 +313,28 @@ impl SimpleRTree for RootedPhyloTree{ node_iter } - fn reroot_at_node(&mut self, _node_id: &NodeID){ - todo!() - } + fn reroot_at_node(&mut self, node_id: &NodeID){ + let mut stack: Vec = vec![node_id.clone()]; + let mut completed_stack: Vec = Vec::new(); + let mut neighbours: HashMap)>> = self.children.clone(); + neighbours.extend( + self.parents.clone().into_iter() + .filter(|(_child_id, parent_id)| parent_id!=&None) + .map(|(child_id, parent_id)| (child_id, vec![(parent_id.unwrap(), self.get_edge_weight(parent_id.as_ref().unwrap(), &child_id).cloned())])) + ); + let mut new_children: HashMap)>> = HashMap::new(); + let mut new_parents: HashMap> = HashMap::from([(node_id.clone(), None)]); + + while !stack.is_empty(){ + let curr_node = stack.pop().unwrap(); + if let Some(child) = neighbours.get(&curr_node){ + let curr_node_children = child.into_iter().filter(|(id, _w)| !completed_stack.contains(id)); + new_children.entry(curr_node).or_default().extend(curr_node_children); + stack.extend(child.into_iter().map(|(id, _w)| id.clone())); + } + completed_stack.push(curr_node); + } - fn reroot_at_edge(&mut self, _edge: (&NodeID, &NodeID)) { - todo!() } fn split_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights:(Option, Option))->NodeID{ diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index f91df7c..1f5df82 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -135,7 +135,10 @@ pub trait SimpleRTree { fn reroot_at_node(&mut self, node_id: &NodeID); /// Rerootes tree at edge. - fn reroot_at_edge(&mut self, edge: (&NodeID, &NodeID)); + fn reroot_at_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights: (Option, Option)){ + let split_node_id = self.split_edge(edge, edge_weights); + self.reroot_at_node(&split_node_id); + } /// Inserts node in the middle of edge given by pair of node ids, and returns the new node id fn split_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights:(Option, Option))->NodeID; From 86e8fe3d3037b0e3a4ca18689179e5e287f63091 Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 21:55:02 -0500 Subject: [PATCH 39/40] renamed graft_subtree to graft --- src/tree/simple_rtree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 1f5df82..5325400 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -87,7 +87,7 @@ pub trait SimpleRTree { fn is_leaf(&self, node_id: &NodeID)->bool; /// Attaches input tree to self by spliting an edge - fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID), edge_weights:(Option, Option), graft_edge_weight: Option); + fn graft(&mut self, tree: Box, edge: (&NodeID, &NodeID), edge_weights:(Option, Option), graft_edge_weight: Option); /// Returns subtree starting at given node, while corresponding nodes from self. fn prune(&mut self, node_id: &NodeID)-> Box; From 2ccd142c7c57fef0fa268bb916a1b94ce9ff578d Mon Sep 17 00:00:00 2001 From: sriram98v Date: Mon, 23 Oct 2023 21:56:08 -0500 Subject: [PATCH 40/40] implemented iter_edges_pre and reroot at node --- src/iter/edge_iter.rs | 23 ++++++++++++++++------- src/lib.rs | 35 +++++++++++++++++++++++++++-------- src/tree.rs | 29 +++++++++++++++++------------ 3 files changed, 60 insertions(+), 27 deletions(-) diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index cfba4de..2f830d9 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -6,23 +6,21 @@ use crate::tree::RootedPhyloTree; pub struct PreOrdEdges { - stack: Vec<(NodeID, NodeID, Option)>, node_iter: PreOrdNodes, - children: HashMap)>>, - parents: HashMap>, + parents: HashMap, Option)>, } impl PreOrdEdges { pub fn new(tree: &RootedPhyloTree, start_node: &NodeID)->Self{ Self { - stack:vec![], node_iter: PreOrdNodes::new( start_node, tree.get_children(), ), - children: tree.get_children().clone(), - parents: tree.get_parents().clone(), + parents: tree.get_parents().into_iter() + .filter(|(_child_id, parent_id)| parent_id!=&&None) + .map(|(child_id, parent_id)| (child_id.clone(), (parent_id.clone(), tree.get_edge_weight(parent_id.as_ref().unwrap(), child_id).cloned()))).collect(), } } } @@ -32,7 +30,18 @@ impl Iterator for PreOrdEdges type Item = (NodeID, NodeID, Option); fn next(&mut self)->Option{ - todo!(); + while let Some(next_node) = self.node_iter.next() { + match next_node { + 0 => { + continue; + } + _ => { + let parents = self.parents.get(&next_node).unwrap(); + return Some((parents.0.unwrap(), next_node, parents.1)); + } + } + } + None } } diff --git a/src/lib.rs b/src/lib.rs index 5de88be..b104f5e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,29 +12,48 @@ mod tests { let input_str = String::from("((A,B),(C,D));"); let tree = RootedPhyloTree::from_newick(input_str); dbg!(tree.to_newick()); - } + } #[test] fn read_big_tree() { - let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); - let tree = RootedPhyloTree::from_newick(input_str); - dbg!(tree.to_newick()); + let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); } #[test] fn read_smalllw_tree() { let input_str = String::from("((A:0.12,B:12),(C:10,D:0.001));"); let tree = RootedPhyloTree::from_newick(input_str); dbg!(tree.to_newick()); - } + } #[test] fn read_smallfw_tree() { let input_str = String::from("((A:0.12,B:12):10,(C:15,D:0.001):20);"); let tree = RootedPhyloTree::from_newick(input_str); dbg!(tree.to_newick()); - } + } #[test] fn read_smallfwfl_tree() { let input_str = String::from("((A:0.12,B:12)E:10,(C:15,D:0.001)F:20)G;"); - let tree = RootedPhyloTree::from_newick(input_str); + let mut tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + tree.reroot_at_node(&1); dbg!(tree.to_newick()); - } + } + #[test] + fn reroot_node_smallfwfl_tree() { + let input_str = String::from("((A:0.12,B:12)E:10,(C:15,D:0.001)F:20)G;"); + let mut tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + tree.reroot_at_node(&1); + dbg!(tree.to_newick()); + } + #[test] + fn reroot_edge_smallfwfl_tree() { + let input_str = String::from("((A:0.12,B:12)E:10,(C:15,D:0.001)F:20)G;"); + let mut tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + tree.reroot_at_edge((&1, &0), (None, None)); + dbg!(tree.to_newick()); + } + } diff --git a/src/tree.rs b/src/tree.rs index 24f0bd3..b000b6d 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -244,7 +244,7 @@ impl SimpleRTree for RootedPhyloTree{ self.nodes.get(node_id).expect("Invalid NodeID").is_leaf() } - fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID), edge_weights:(Option, Option), graft_edge_weight: Option){ + fn graft(&mut self, tree: Box, edge: (&NodeID, &NodeID), edge_weights:(Option, Option), graft_edge_weight: Option){ let graft_node = self.split_edge(edge, edge_weights); let input_root_id = tree.get_root(); for input_node in tree.get_nodes().keys(){ @@ -315,26 +315,31 @@ impl SimpleRTree for RootedPhyloTree{ fn reroot_at_node(&mut self, node_id: &NodeID){ let mut stack: Vec = vec![node_id.clone()]; - let mut completed_stack: Vec = Vec::new(); let mut neighbours: HashMap)>> = self.children.clone(); - neighbours.extend( - self.parents.clone().into_iter() - .filter(|(_child_id, parent_id)| parent_id!=&None) - .map(|(child_id, parent_id)| (child_id, vec![(parent_id.unwrap(), self.get_edge_weight(parent_id.as_ref().unwrap(), &child_id).cloned())])) - ); + let parent_as_edge = self.parents.clone().into_iter() + .filter(|(_child_id, parent_id)| parent_id!=&None) + .map(|(child_id, parent_id)| (child_id, vec![(parent_id.unwrap(), self.get_edge_weight(parent_id.as_ref().unwrap(), &child_id).cloned())])); + for (id, edges) in parent_as_edge{ + neighbours.entry(id).or_default().extend(edges); + } let mut new_children: HashMap)>> = HashMap::new(); let mut new_parents: HashMap> = HashMap::from([(node_id.clone(), None)]); while !stack.is_empty(){ let curr_node = stack.pop().unwrap(); - if let Some(child) = neighbours.get(&curr_node){ - let curr_node_children = child.into_iter().filter(|(id, _w)| !completed_stack.contains(id)); - new_children.entry(curr_node).or_default().extend(curr_node_children); - stack.extend(child.into_iter().map(|(id, _w)| id.clone())); + if let Some(child) = neighbours.remove(&curr_node){ + let curr_node_children = &child.iter().filter(|(id, _w)| !new_parents.keys().contains(id)); + new_children.entry(curr_node).or_default().extend(curr_node_children.clone()); + for (id, _w) in &child{ + new_parents.insert(id.clone(), Some(curr_node.clone())); + } + stack.extend(child.iter().map(|(id, _w)| id.clone())) } - completed_stack.push(curr_node); } + self.children = dbg!(new_children); + self.parents = dbg!(new_parents); + self.root = *dbg!(node_id); } fn split_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights:(Option, Option))->NodeID{