diff --git a/src/iter/edge_iter.rs b/src/iter/edge_iter.rs index 059947e..2f830d9 100644 --- a/src/iter/edge_iter.rs +++ b/src/iter/edge_iter.rs @@ -1,49 +1,99 @@ use crate::node::*; use crate::tree::simple_rtree::*; -use std::collections::{HashMap, HashSet}; -use itertools::Itertools; - +use std::collections::HashMap; +use crate::iter::node_iter::{PostOrdNodes, PreOrdNodes}; +use crate::tree::RootedPhyloTree; pub struct PreOrdEdges { - stack: Vec<(NodeID, NodeID)>, - nodes: HashMap> + node_iter: PreOrdNodes, + parents: HashMap, Option)>, } impl PreOrdEdges { - pub fn new(_tree: &HashMap)->Self{ - Self { stack:vec![], nodes: HashMap::new()} + pub fn new(tree: &RootedPhyloTree, start_node: &NodeID)->Self{ + Self { + node_iter: PreOrdNodes::new( + start_node, + tree.get_children(), + ), + parents: tree.get_parents().into_iter() + .filter(|(_child_id, parent_id)| parent_id!=&&None) + .map(|(child_id, parent_id)| (child_id.clone(), (parent_id.clone(), tree.get_edge_weight(parent_id.as_ref().unwrap(), child_id).cloned()))).collect(), + } } } impl Iterator for PreOrdEdges { - type Item = (NodeID, NodeID); + type Item = (NodeID, NodeID, Option); fn next(&mut self)->Option{ - todo!(); + while let Some(next_node) = self.node_iter.next() { + match next_node { + 0 => { + continue; + } + _ => { + let parents = self.parents.get(&next_node).unwrap(); + return Some((parents.0.unwrap(), next_node, parents.1)); + } + } + } + None } } pub struct PostOrdEdges { - stack: Vec<(NodeID, NodeID)>, - nodes: HashMap> + stack: Vec<(NodeID, NodeID, Option)>, + node_iter: PostOrdNodes, + children: HashMap)>>, + parents: HashMap>, } impl PostOrdEdges { - pub fn new(_tree: &HashMap)->Self{ - Self { stack:vec![], nodes: HashMap::new()} + pub fn new(tree: &RootedPhyloTree, start_node: &NodeID)->Self{ + Self { + stack:vec![], + node_iter: PostOrdNodes::new( + start_node, + tree.get_children(), + ), + children: tree.get_children().clone(), + parents: tree.get_parents().clone(), + } } } impl Iterator for PostOrdEdges { - type Item = (NodeID, NodeID); + type Item = (NodeID, NodeID, Option); fn next(&mut self)->Option{ - todo!(); - } + match self.stack.pop(){ + Some((n1, n2, w)) => Some((n1, n2, w)), + None => { + match self.node_iter.next(){ + Some(node_id) => { + let node_id_parent = self.parents.get(&node_id).unwrap(); + match node_id_parent { + Some(parent_id) => { + let mut w: Option = None; + for (child_node_id, weight) in self.children.get(parent_id).unwrap(){ + if child_node_id==&node_id{ + w = *weight; + } + } + Some((*parent_id, node_id, w)) + }, + None => None, + } + }, + None => None + } + } + } } } \ No newline at end of file diff --git a/src/iter/node_iter.rs b/src/iter/node_iter.rs index 7c48f9b..64fe605 100644 --- a/src/iter/node_iter.rs +++ b/src/iter/node_iter.rs @@ -3,7 +3,6 @@ use crate::tree::simple_rtree::*; use std::collections::{HashMap, HashSet}; use itertools::Itertools; - pub struct PreOrdNodes { stack: Vec, @@ -12,8 +11,11 @@ pub struct PreOrdNodes impl PreOrdNodes { - pub fn new(start_node_id: &NodeID, _tree: &HashMap)->Self{ - Self { stack:vec![*start_node_id], nodes: HashMap::new()} + pub fn new(start_node_id: &NodeID, children: &HashMap)>>)->Self{ + Self { stack:vec![*start_node_id], nodes: children.iter() + .map(|(k, v)| (*k, v.iter() + .map(|ni| ni.0).collect::>())) + .collect()} } } @@ -22,7 +24,16 @@ impl Iterator for PreOrdNodes type Item = NodeID; fn next(&mut self)->Option{ - todo!() + match self.stack.pop() { + Some(node_id) => { + let children_ids:HashSet = self.nodes.get(&node_id).cloned().expect("Invalid Node ID!"); + for child_node_id in children_ids.into_iter().sorted(){ + self.stack.push(child_node_id) + } + Some(node_id) + } + None => None, + } } } @@ -34,8 +45,11 @@ pub struct PostOrdNodes impl PostOrdNodes { - pub fn new(start_node_id: &NodeID, _tree: &HashMap)->Self{ - Self { stack:vec![*start_node_id], nodes: HashMap::new()} + pub fn new(start_node_id: &NodeID, children: &HashMap)>>)->Self{ + Self { stack:vec![*start_node_id], nodes: children.iter() + .map(|(k, v)| (*k, v.iter() + .map(|ni| ni.0).collect::>())) + .collect()} } } @@ -44,6 +58,18 @@ impl Iterator for PostOrdNodes type Item = NodeID; fn next(&mut self)->Option{ - todo!() + while let Some(node_id) = self.stack.pop() { + if self.nodes.contains_key(&node_id){ + self.stack.push(node_id); + let children = self.nodes.remove(&node_id).unwrap(); + for child_id in children.into_iter().sorted(){ + self.stack.push(child_id) + } + } + else{ + return Some(node_id) + } + } + None } -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index 9c49740..b104f5e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,59 @@ pub mod node; pub mod tree; pub mod taxa; -pub mod iter; \ No newline at end of file +pub mod iter; + + +#[cfg(test)] +mod tests { + use crate::tree::{RootedPhyloTree, simple_rtree::SimpleRTree}; + #[test] + fn read_small_tree() { + let input_str = String::from("((A,B),(C,D));"); + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + } + #[test] + fn read_big_tree() { + let input_str = String::from("(0,(1,(2,(3,(4,(5,(6,(7,(8,(9,(10,(11,(12,(13,(14,(15,(16,(17,(18,(19,(20,(21,(22,(23,(24,(25,(26,(27,(28,(29,(30,(31,(32,(33,(34,(35,(36,(37,(38,(39,(40,(41,(42,(43,(44,(45,(46,(47,(48,(49,(50,(51,(52,(53,(54,(55,(56,(57,(58,(59,(60,(61,(62,(63,(64,(65,(66,(67,(68,(69,(70,(71,(72,(73,(74,(75,(76,(77,(78,(79,(80,(81,(82,(83,(84,(85,(86,(87,(88,(89,(90,(91,(92,(93,(94,(95,(96, (97,98))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"); + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + } + #[test] + fn read_smalllw_tree() { + let input_str = String::from("((A:0.12,B:12),(C:10,D:0.001));"); + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + } + #[test] + fn read_smallfw_tree() { + let input_str = String::from("((A:0.12,B:12):10,(C:15,D:0.001):20);"); + let tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + } + #[test] + fn read_smallfwfl_tree() { + let input_str = String::from("((A:0.12,B:12)E:10,(C:15,D:0.001)F:20)G;"); + let mut tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + tree.reroot_at_node(&1); + dbg!(tree.to_newick()); + } + #[test] + fn reroot_node_smallfwfl_tree() { + let input_str = String::from("((A:0.12,B:12)E:10,(C:15,D:0.001)F:20)G;"); + let mut tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + tree.reroot_at_node(&1); + dbg!(tree.to_newick()); + } + #[test] + fn reroot_edge_smallfwfl_tree() { + let input_str = String::from("((A:0.12,B:12)E:10,(C:15,D:0.001)F:20)G;"); + let mut tree = RootedPhyloTree::from_newick(input_str); + dbg!(tree.to_newick()); + tree.reroot_at_edge((&1, &0), (None, None)); + dbg!(tree.to_newick()); + } + +} diff --git a/src/node.rs b/src/node.rs index 9190661..87e0b9e 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,12 +1,59 @@ +use std::fmt::{Debug, Display}; + pub type NodeID = usize; -pub type NodeType = bool; // True for leaves, false for internal nodes +// pub type NodeType = bool; // True for leaves, false for internal nodes + +#[derive(Clone, PartialEq, Eq)] +pub enum NodeType{ + Internal(Option), + Leaf(Option), +} + +impl NodeType{ + pub fn new(is_leaf: bool, taxa: Option)->Self{ + match is_leaf { + true => Self::Leaf(taxa), + false => Self::Internal(taxa), + } + } + + pub fn is_leaf(&self)->bool{ + match self { + NodeType::Internal(_taxa) => false, + NodeType::Leaf(_taxa) => true, + } + } -pub trait Node { - fn is_leaf(&self)->bool; + pub fn flip(&mut self){ + match self { + NodeType::Internal(taxa) => {*self = NodeType::Leaf(taxa.clone())}, + NodeType::Leaf(taxa) => {*self = NodeType::Internal(taxa.clone())}, + } + } + + pub fn taxa(&self)->String{ + match self { + NodeType::Internal(taxa) => taxa.clone().unwrap_or("".to_string()), + NodeType::Leaf(taxa) => taxa.clone().unwrap_or("".to_string()), + } + } + + pub fn node_type(&self)->String{ + match self.is_leaf() { + false => "Internal".to_string(), + true => "Leaf".to_string(), + } + } +} + +impl Debug for NodeType{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.node_type(), self.taxa()) + } } -impl Node for NodeType{ - fn is_leaf(&self)->bool { - *self +impl Display for NodeType{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.node_type(), self.taxa()) } } \ No newline at end of file diff --git a/src/tree.rs b/src/tree.rs index d31afc2..b000b6d 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,15 +1,435 @@ pub mod simple_rtree; -use std::{collections::{HashMap, HashSet}, fmt::Display}; +use std::collections::{HashMap, HashSet}; +use itertools::Itertools; use crate::node::*; use crate::tree::simple_rtree::*; +use crate::iter::{node_iter::*, edge_iter::*}; -pub struct SimpleRTree{ +pub struct UnrootedPhyloTree{ + _nodes: HashMap, + _neighbours: HashMap, NodeID)>>, + _leaves: HashMap, +} + +#[derive(Debug)] +pub struct RootedPhyloTree{ root: NodeID, nodes: HashMap, - children: HashMap, NodeID)>>, - parents: HashMap, - data: HashMap>, - leaves: HashSet, + children: HashMap)>>, + parents: HashMap>, +} + +impl Default for RootedPhyloTree { + fn default() -> Self { + Self::new() + } +} + +impl RootedPhyloTree{ + pub fn new()->Self{ + RootedPhyloTree { + root: 0, + nodes: HashMap::from([(0, NodeType::Internal(None))]), + children: HashMap::from([(0, Vec::new())]), + parents: HashMap::from([(0, None)]), + } + } + + pub fn from_newick(newick_string: String)->Self{ + let mut tree = RootedPhyloTree::new(); + let mut stack : Vec = Vec::new(); + let mut context : NodeID = *tree.get_root(); + let mut taxa_str = String::new(); + let mut decimal_str: String = String::new(); + let mut str_ptr: usize = 0; + let newick_string = newick_string.chars().filter(|c| !c.is_whitespace()).collect::>(); + while str_ptr { + stack.push(context); + context = tree.add_node(); + str_ptr +=1; + }, + ')'|',' => { + // last context id + let last_context = stack.last().expect("Newick string ended abruptly!"); + // add current context as a child to last context + tree.set_child( + &context, + last_context, + decimal_str.parse::().ok(), + match taxa_str.is_empty(){ + true => None, + false => Some(taxa_str.to_string()) + } + ); + // we clear the strings + taxa_str.clear(); + decimal_str.clear(); + + match newick_string[str_ptr] { + ',' => { + context = tree.add_node(); + str_ptr += 1; + } + _ => { + context = stack.pop().expect("Newick string ended abruptly!"); + str_ptr += 1; + } + } + }, + ';'=>{ + if !taxa_str.is_empty(){ + tree.assign_taxa(&context, &taxa_str); + } + break; + } + ':' => { + // if the current context had a weight + if newick_string[str_ptr]==':'{ + str_ptr+=1; + while newick_string[str_ptr].is_ascii_digit() || newick_string[str_ptr]=='.'{ + decimal_str.push(newick_string[str_ptr]); + str_ptr+=1; + } + } + } + _ => { + // push taxa characters into taxa string + while newick_string[str_ptr]!=':'&&newick_string[str_ptr]!=')'&&newick_string[str_ptr]!=','&&newick_string[str_ptr]!='('&&newick_string[str_ptr]!=';'{ + taxa_str.push(newick_string[str_ptr]); + str_ptr+=1; + } + }, + } + } + let mut leaf_ids = Vec::new(); + tree.leaves_of_node(tree.get_root(), &mut leaf_ids); + for leaf_id in leaf_ids{ + tree.set_leaf(&leaf_id); + } + tree + } + + fn leaves_of_node(&self, node_id:&NodeID, leaves:&mut Vec){ + if self.get_node_children(node_id).is_empty(){ + leaves.push(*node_id); + } + + for (child_node_id, _edge_weight) in self.get_node_children(node_id).iter(){ + self.leaves_of_node(child_node_id, leaves); + } + } +} + +impl SimpleRTree for RootedPhyloTree{ + fn add_node(&mut self)->NodeID{ + // New node id + let node_id = self.nodes.len(); + // add entry of node in parents and children fields + self.nodes.insert(node_id, NodeType::Internal(None)); + self.parents.insert(node_id, None); + self.children.insert(node_id, Vec::new()); + node_id + } + + fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa:Option){ + self.parents.insert(*node_id, Some(*parent_id)); + self.children.entry(*parent_id).or_default().push((*node_id, distance)); + self.nodes.insert(*node_id, NodeType::Internal(taxa)); + } + + fn set_leaf(&mut self, node_id: &NodeID) { + self.nodes.entry(*node_id).and_modify(|node| node.flip()); + } + + fn assign_taxa(&mut self,node:&NodeID, taxa:&str) { + self.nodes.insert(*node, NodeType::Internal(Some(taxa.to_string()))); + } + + fn set_edge_weight(&mut self, parent:&NodeID, child:&NodeID, edge_weight:Option){ + self.children.entry(*parent) + .and_modify(|children| *children = children.clone().iter() + .map(|(id, w)| { + match id==child{ + true => {(*id, edge_weight)}, + false => {(*id, *w)}, + } + }) + .collect() + ); + } + + fn get_root(&self)->&NodeID{ + &self.root + } + + fn get_node(&self, node_id: &NodeID)->&NodeType{ + self.nodes.get(node_id).expect("Invalid NodeID") + } + + fn get_nodes(&self)->&HashMap{ + &self.nodes + } + + fn get_children(&self)->&HashMap)>>{ + &self.children + } + + fn get_parents(&self)->&HashMap>{ + &self.parents + } + + + fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>{ + self.children.get(node_id).expect("Invalid NodeID!") + } + + fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>{ + self.parents.get(node_id).expect("Invalid NodeID!").as_ref() + } + + fn get_leaves(&self, node_id: &NodeID)->Vec<(NodeID, NodeType)>{ + let mut leaf_vec: Vec = Vec::new(); + self.leaves_of_node(node_id, &mut leaf_vec); + leaf_vec.into_iter().map(|leaf_id| (leaf_id, self.nodes.get(&leaf_id).cloned().expect("Invalid NodeID!"))).collect::>() + } + + fn get_subtree(&self, node_id: &NodeID)->Box{ + if self.is_leaf(node_id){ + panic!("NodeID is a leaf"); + } + let root= *node_id; + let mut nodes: HashMap= HashMap::new(); + let mut children: HashMap)>> = HashMap::new(); + let mut parents: HashMap> = HashMap::new(); + for decsendant_node_id in self.iter_node_pre(node_id){ + nodes.insert(decsendant_node_id, self.nodes.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + children.insert(decsendant_node_id, self.children.get(&decsendant_node_id).expect("Invalid NodeID!").clone()); + parents.insert(decsendant_node_id, *self.parents.get(&decsendant_node_id).expect("Invalid NodeID!")); + } + Box::new( + RootedPhyloTree{ + root, + nodes, + children, + parents, + } + ) + } + + fn get_mrca(&self, node_id_list: Vec<&NodeID>)->NodeID{ + let ancestor_iter_vec: Vec> = node_id_list.iter().map(|x| self.get_ancestors_pre(x).into_iter()).collect(); + let mut mrca: NodeID = 0; + for mut iterator in ancestor_iter_vec{ + let temp: HashSet = HashSet::new(); + if let Some(x) = iterator.next() { + match temp.contains(&x){ + true => {mrca = x}, + false => { + match temp.is_empty(){ + true => {}, + false => {return mrca} + } + } + } + } + } + mrca + } + + fn is_leaf(&self, node_id: &NodeID)->bool{ + self.nodes.get(node_id).expect("Invalid NodeID").is_leaf() + } + + fn graft(&mut self, tree: Box, edge: (&NodeID, &NodeID), edge_weights:(Option, Option), graft_edge_weight: Option){ + let graft_node = self.split_edge(edge, edge_weights); + let input_root_id = tree.get_root(); + for input_node in tree.get_nodes().keys(){ + if self.get_nodes().contains_key(input_node){ + panic!("The NodeIDs in the input tree are already present in the current tree!"); + } + } + + self.children.extend(tree.get_children().clone().into_iter()); + self.parents.extend(tree.get_parents().clone().iter()); + self.nodes.extend(tree.get_nodes().clone().into_iter()); + self.set_child(input_root_id, &graft_node, graft_edge_weight, Some(tree.get_taxa(input_root_id))) + } + + fn prune(&mut self, node_id: &NodeID)-> Box{ + let root= *node_id; + let mut nodes: HashMap= HashMap::new(); + let mut children: HashMap)>> = HashMap::new(); + let mut parents: HashMap> = HashMap::new(); + for decsendant_node_id in self.iter_node_pre(node_id){ + nodes.insert(decsendant_node_id, self.nodes.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); + children.insert(decsendant_node_id, self.children.remove(&decsendant_node_id).expect("Invalid NodeID!").clone()); + parents.insert(decsendant_node_id, self.parents.remove(&decsendant_node_id).expect("Invalid NodeID!")); + } + Box::new( + RootedPhyloTree{ + root, + nodes, + children, + parents, + } + ) + } + + fn iter_node_pre(&self, start_node_id: &NodeID)->PreOrdNodes{ + PreOrdNodes::new(start_node_id, &self.children) + } + + fn iter_node_post(&self, start_node_id: &NodeID)->PostOrdNodes{ + PostOrdNodes::new(start_node_id, &self.children) + } + + fn iter_edges_pre(&self, start_node_id: &NodeID)->PreOrdEdges{ + PreOrdEdges::new(self, start_node_id) + } + + fn iter_edges_post(&self, start_node_id: &NodeID)->PostOrdEdges{ + PostOrdEdges::new(self, start_node_id) + } + + fn get_ancestors_pre(&self, node_id: &NodeID)->Vec{ + let mut node_iter: Vec = Vec::new(); + let mut curr_node = node_id; + while self.parents.get(curr_node).is_some() { + match self.parents.get(curr_node).expect("Invalid NodeID!") { + Some(node) => { + node_iter.push(*node); + curr_node = node; + }, + None => { + node_iter.push(*self.get_root()); + break; + }, + } + } + node_iter + } + + fn reroot_at_node(&mut self, node_id: &NodeID){ + let mut stack: Vec = vec![node_id.clone()]; + let mut neighbours: HashMap)>> = self.children.clone(); + let parent_as_edge = self.parents.clone().into_iter() + .filter(|(_child_id, parent_id)| parent_id!=&None) + .map(|(child_id, parent_id)| (child_id, vec![(parent_id.unwrap(), self.get_edge_weight(parent_id.as_ref().unwrap(), &child_id).cloned())])); + for (id, edges) in parent_as_edge{ + neighbours.entry(id).or_default().extend(edges); + } + let mut new_children: HashMap)>> = HashMap::new(); + let mut new_parents: HashMap> = HashMap::from([(node_id.clone(), None)]); + + while !stack.is_empty(){ + let curr_node = stack.pop().unwrap(); + if let Some(child) = neighbours.remove(&curr_node){ + let curr_node_children = &child.iter().filter(|(id, _w)| !new_parents.keys().contains(id)); + new_children.entry(curr_node).or_default().extend(curr_node_children.clone()); + for (id, _w) in &child{ + new_parents.insert(id.clone(), Some(curr_node.clone())); + } + stack.extend(child.iter().map(|(id, _w)| id.clone())) + } + } + + self.children = dbg!(new_children); + self.parents = dbg!(new_parents); + self.root = *dbg!(node_id); + } + + fn split_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights:(Option, Option))->NodeID{ + let new_node_id = self.add_node(); + self.parents.insert(new_node_id, Some(edge.0.clone())); + self.children.entry(new_node_id).or_default().push((edge.1.clone(), edge_weights.1)); + self.parents.insert(edge.1.clone(), Some(new_node_id)); + new_node_id + } + + fn distance_from_ancestor(&self, node: &NodeID, ancestor: &NodeID, weighted: bool)->f64{ + let binding = self.get_ancestors_pre(node); + let start_idx = binding.iter().position(|&x| x==*ancestor).expect("Provided ancestor is not an ancestor of node!"); + let mut node_ancestor_pre = binding[start_idx..].iter(); + let mut curr_parent = node_ancestor_pre.next().unwrap(); + let mut distance = 0 as f64; + while let Some(node_id) = node_ancestor_pre.next() { + let curr_parent_children = self.get_node_children(curr_parent); + for (child_id, w) in curr_parent_children{ + if child_id==node_id{ + match weighted { + true => {distance += w.unwrap_or(0 as f64);} + false => {distance += 1_f64;} + } + curr_parent = node_id; + continue; + } + panic!("Ancestor chain is broken! Clean tree before moving forward...") + } + }; + distance + } + + fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(Vec<(NodeID, NodeType)>, Vec<(NodeID, NodeType)>){ + let c2 = self.get_cluster(edge.1); + (self.nodes.clone().into_iter().filter(|x| !c2.contains(x)).collect_vec(), c2) + } + + fn get_cluster(&self, node_id: &NodeID)-> Vec<(NodeID, NodeType)>{ + let mut leaves: Vec = Vec::new(); + self.leaves_of_node(node_id, &mut leaves); + leaves.into_iter().map(|leaf_id| (leaf_id, self.get_node(&leaf_id).clone())).collect_vec() + } + + fn clean(&mut self) { + let mut remove_list: Vec<&NodeID> = Vec::new(); + for (node_id, node) in self.nodes.clone().iter(){ + // remove root with only one child + if node_id==self.get_root() && self.get_node_degree(node_id)<2{ + let new_root = self.get_node_children(self.get_root())[0].0; + self.root = new_root; + self.parents.entry(new_root).and_modify(|x| *x = None); + remove_list.push(node_id); + } + // remove nodes with only one child + else if !node.is_leaf() && self.get_node_degree(node_id)<3{ + let parent = self.get_node_parent(node_id).cloned(); + let children = self.get_node_children(node_id).clone(); + for (child_id, _edge_weight) in children.clone().into_iter(){ + self.parents.entry(child_id).and_modify(|x| *x = parent); + } + self.set_children(parent.as_ref().unwrap(), &children); + } + } + } + + fn get_taxa(&self, node_id:&NodeID)->String { + self.get_node(node_id).taxa() + } + + fn incerement_ids(&mut self, value: &usize){ + self.nodes = self.nodes.clone().into_iter().map(|(node_id, node_type)| (node_id+value, node_type)).collect(); + self.parents = self.parents.clone().into_iter().map(|(node_id, parent_id)| { + ( + node_id+value, + parent_id.map(|id| id + value) + ) + }).collect(); + self.children = self.children.clone().into_iter().map(|(node_id, children_vec)| { + ( + node_id+value, + children_vec.into_iter().map(|(child_id, w)| { + ( + child_id+value, + w + ) + }) + .collect() + ) + }).collect(); + } + } \ No newline at end of file diff --git a/src/tree/simple_rtree.rs b/src/tree/simple_rtree.rs index 4a3f022..5325400 100644 --- a/src/tree/simple_rtree.rs +++ b/src/tree/simple_rtree.rs @@ -1,4 +1,5 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; +use itertools::Itertools; use crate::node::*; use crate::iter::node_iter::*; @@ -7,29 +8,206 @@ use crate::iter::edge_iter::*; pub type EdgeWeight = f64; pub trait SimpleRTree { - fn get_root(&self)->NodeID; - fn get_nodes(&self)->HashMap; - fn get_children(&self, node_id: &NodeID)->HashMap; - fn get_leaves(&self, node_id: &NodeID)->HashSet; - fn get_descendents(&self, node_id: &NodeID)->Vec; - fn get_subtree(&self, node_id: &NodeID)->Box; - fn get_mrca(&self, node_id_list: Vec<&NodeID>)->&NodeID; + /// Add node to tree + fn add_node(&mut self)->NodeID; + + /// Sets node_id as child to parent. + fn set_child(&mut self, node_id:&NodeID, parent_id:&NodeID, distance:Option, taxa: Option); + + /// Sets iterable of node_ids as children to parent + fn set_children(&mut self, parent: &NodeID, children: &[(NodeID, Option)]){ + for (child_id, edge_weight) in children.iter(){ + self.set_child(child_id, parent, *edge_weight, None); + } + } + + /// Converts internal node to leaf_node + fn set_leaf(&mut self, node_id: &NodeID); + + /// Sets the edge weight between two nodes (None to unweight the edge) + fn set_edge_weight(&mut self, parent:&NodeID, child:&NodeID, edge_weights:Option); + + /// Returns true of node is child of parent. + fn node_is_child_of(&self, parent:&NodeID, node_id:&NodeID)->bool{ + self.get_node_children(parent).iter().map(|(id, _weight)| id).contains(node_id) + } + + /// Assign taxa to leaf node + fn assign_taxa(&mut self,node_id:&NodeID, taxa:&str); + + /// Returns root node id + fn get_root(&self)->&NodeID; + + /// Returns all node ids + fn get_nodes(&self)->&HashMap; + + /// Returns node by ID + fn get_node(&self, node_id: &NodeID)->&NodeType; + + /// Returns node degree + fn get_node_degree(&self, node_id:&NodeID)->usize{ + self.get_node_children(node_id).len() + match self.get_node_parent(node_id) { + Some(_) => 1, + None => 0 + } + } + + /// Check if tree is weighted + fn is_weighted(&self)->bool{ + for (_, _, edge_weight) in self.iter_edges_post(self.get_root()){ + if edge_weight.is_some(){ + return true; + } + } + false + } + + /// Get all node-child relationships + fn get_children(&self)->&HashMap)>>; + + /// Get all node-parent relationships + fn get_parents(&self)->&HashMap>; + + /// Returns children node ids for given node id + fn get_node_children(&self, node_id: &NodeID)->&Vec<(NodeID, Option)>; + + /// Returns node parent + fn get_node_parent(&self, node_id:&NodeID)->Option<&NodeID>; + + /// Returns all leaf node ids + fn get_leaves(&self, node_id: &NodeID)->Vec<(NodeID, NodeType)>; + + /// Returns full subtree rooted at given node + fn get_subtree(&self, node_id: &NodeID)->Box; + + /// Returns most recent common ancestor of give node set + fn get_mrca(&self, node_id_list: Vec<&NodeID>)->NodeID; + + /// Checks if the given node is a leaf node fn is_leaf(&self, node_id: &NodeID)->bool; - fn graft_subtree(&mut self, tree: Box, edge: (&NodeID, &NodeID)); - fn extract_subtree(&mut self, node_id: &NodeID)-> Box; - fn prune_subtree(&self, node_id: &NodeID)-> Box; + /// Attaches input tree to self by spliting an edge + fn graft(&mut self, tree: Box, edge: (&NodeID, &NodeID), edge_weights:(Option, Option), graft_edge_weight: Option); + + /// Returns subtree starting at given node, while corresponding nodes from self. + fn prune(&mut self, node_id: &NodeID)-> Box; + ///Returns an iterator that iterates over the nodes in Pre-order fn iter_node_pre(&self, start_node_id: &NodeID)->PreOrdNodes; + + ///Returns an iterator that iterates over the nodes in Post-order fn iter_node_post(&self, start_node_id: &NodeID)->PostOrdNodes; + + ///Returns an iterator that iterates over the edges in Pre-order fn iter_edges_pre(&self, start_node_id: &NodeID)->PreOrdEdges; + + ///Returns an iterator that iterates over the edges in Post-order fn iter_edges_post(&self, start_node_id: &NodeID)->PostOrdEdges; - fn get_ancestors(&self, node_id: &NodeID)->Vec<&NodeID>; - fn phylogenetic_distance_matrix(&self)->Vec>; + /// Returns all node ids in path from root to given node + fn get_ancestors_pre(&self, node_id: &NodeID)->Vec; + + /// Returns pairwise distance matrix of the taxa. If weighted is true, then returns sum of edge weights along paths connecting leaves of tree + fn leaf_distance_matrix(&self, weighted: bool)->HashMap<(NodeID, NodeID), EdgeWeight>{ + let binding = self.get_leaves(self.get_root()); + let leaves = binding.iter().map(|(leaf_id, _taxa)| leaf_id).combinations(2); + let mut dist_mat: HashMap<(NodeID, NodeID), EdgeWeight> = HashMap::new(); + for node_pair in leaves{ + let w = self.distance_from_node(node_pair[0], node_pair[1], weighted); + dist_mat.insert((*node_pair[0], *node_pair[1]), w); + } + dist_mat + } + + /// Returns pairwise distance matrix of all nodes. If weighted is true, then returns sum of edge weights along paths connecting leaves of tree + fn node_distance_matrix(&self, weighted: bool)->HashMap<(NodeID, NodeID), EdgeWeight>{ + let binding = self.get_nodes(); + let leaves = binding.keys().combinations(2); + let mut dist_mat: HashMap<(NodeID, NodeID), EdgeWeight> = HashMap::new(); + for node_pair in leaves{ + let w = self.distance_from_node(node_pair[0], node_pair[1], weighted); + dist_mat.insert((*node_pair[0], *node_pair[1]), w); + } + dist_mat + } + + /// Rerootes tree at given node. fn reroot_at_node(&mut self, node_id: &NodeID); - fn insert_internal_node(&mut self, edge: (NodeID, NodeID), edge_weights:(Option, Option)); + + /// Rerootes tree at edge. + fn reroot_at_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights: (Option, Option)){ + let split_node_id = self.split_edge(edge, edge_weights); + self.reroot_at_node(&split_node_id); + } + + /// Inserts node in the middle of edge given by pair of node ids, and returns the new node id + fn split_edge(&mut self, edge: (&NodeID, &NodeID), edge_weights:(Option, Option))->NodeID; + + /// Returns distance of node from some ancestor of node. If weighted is true, it returns sum of edges from root to self. + fn distance_from_ancestor(&self, node_id: &NodeID, ancestor: &NodeID, weighted: bool)->f64; + + /// Returns distance of node from root. If weighted is true, it returns sum of edges from root to self. + fn distance_from_root(&self, node_id: &NodeID, weighted: bool)->EdgeWeight{ + self.distance_from_ancestor(node_id, self.get_root(), weighted) + } + + /// Returns distance of node from root. If weighted is true, it returns sum of edges from root to self. + fn distance_from_node(&self, node1: &NodeID, node2: &NodeID, weighted: bool)->f64{ + let mrca = self.get_mrca(vec![node1, node2]); + self.distance_from_ancestor(node1, &mrca, weighted) + self.distance_from_ancestor(node2, &mrca, weighted) + } + + /// Returns bipartition induced by edge + fn get_bipartition(&self, edge: (&NodeID, &NodeID))->(Vec<(NodeID, NodeType)>, Vec<(NodeID, NodeType)>); + + /// Returns cluster of node + fn get_cluster(&self, node_id: &NodeID)-> Vec<(NodeID, NodeType)>; + + /// Cleans self by removing 1) internal nodes (other than root) with degree 2, 2) Floating root nodes, 3) self loops + fn clean(&mut self); + + /// Get node taxa + fn get_taxa(&self, node_id:&NodeID)->String; + + /// Get edge weight + fn get_edge_weight(&self, parent_id: &NodeID, child_id:&NodeID)->Option<&EdgeWeight>{ + for node_id in self.get_node_children(parent_id).iter(){ + if node_id.0==*child_id{ + return node_id.1.as_ref(); + } + } + None + } + + /// return subtree as newick string + fn subtree_to_newick(&self, node_id:&NodeID, edge_weight:Option)->String{ + fn print_node(node: &NodeType, weight: Option)->String{ + match weight { + Some(w) => format!("{}:{}", node.taxa(), w), + None => node.taxa() + } + } + + let node = self.get_node(node_id); + let mut tmp = String::new(); + if !self.get_node_children(node_id).is_empty(){ + tmp.push('('); + for (child_id, w) in self.get_node_children(node_id){ + let child_str = format!("{},", self.subtree_to_newick(child_id, *w)); + tmp.push_str(&child_str); + } + tmp.pop(); + tmp.push(')'); + } + tmp.push_str(&print_node(node, edge_weight)); + tmp + } + + /// writes full tree in newick format + fn to_newick(&self)->String{ + format!("{};", self.subtree_to_newick(self.get_root(), None)) + } - fn distance_from_root(&self, weighted: bool)->f64; - fn get_bipartition(&self, edge: (&NodeID, &NodeID)); -} \ No newline at end of file + /// Increment all node_ids + fn incerement_ids(&mut self, value: &usize); +}