diff --git a/phylo2vec/benches/benchmarks/get_ancestry_dtype.rs b/phylo2vec/benches/benchmarks/get_ancestry_dtype.rs index 836bd78..976c298 100644 --- a/phylo2vec/benches/benchmarks/get_ancestry_dtype.rs +++ b/phylo2vec/benches/benchmarks/get_ancestry_dtype.rs @@ -3,6 +3,7 @@ use std::time::Duration; use criterion::{criterion_group, BenchmarkId, Criterion}; use phylo2vec::tree_vec::ops; +use phylo2vec::tree_vec::types::PairsVec; use phylo2vec::utils::{is_unordered, sample}; pub type AncestryTuple = Vec<(usize, usize, usize)>; @@ -37,7 +38,7 @@ fn compare_get_ancestry_datatypes(c: &mut Criterion) { } pub fn get_ancestry_tuple(v: &Vec) -> AncestryTuple { - let pairs: ops::vector::PairsVec; + let pairs: PairsVec; // Determine the implementation to use // based on whether this is an ordered @@ -83,7 +84,7 @@ pub fn get_ancestry_tuple(v: &Vec) -> AncestryTuple { } pub fn get_ancestry_vec(v: &Vec) -> AncestryVec { - let pairs: ops::vector::PairsVec; + let pairs: PairsVec; // Determine the implementation to use // based on whether this is an ordered @@ -129,7 +130,7 @@ pub fn get_ancestry_vec(v: &Vec) -> AncestryVec { } pub fn get_ancestry_ndarray(v: &Vec) -> AncestryNDArray { - let pairs: ops::vector::PairsVec; + let pairs: PairsVec; // Determine the implementation to use // based on whether this is an ordered diff --git a/phylo2vec/src/tree_vec/mod.rs b/phylo2vec/src/tree_vec/mod.rs index bb707e8..c1fb0de 100644 --- a/phylo2vec/src/tree_vec/mod.rs +++ b/phylo2vec/src/tree_vec/mod.rs @@ -1,9 +1,10 @@ use crate::utils::sample; +// Import the types module +pub mod types; + +// Import the operations modules pub mod ops; -use ops::{ - build_vector, find_coords_of_first_leaf, order_cherries, order_cherries_no_parents, Ancestry, -}; /// A vector representation of a phylogenetic tree /// @@ -68,7 +69,7 @@ impl TreeVec { /// /// # Returns /// An `Ancestry` type containing parent-child relationships - pub fn get_ancestry(&self) -> Ancestry { + pub fn get_ancestry(&self) -> types::Ancestry { return ops::get_ancestry(&self.data); } @@ -102,9 +103,9 @@ impl TreeVec { // ancestry_add[leaf_coords][leaf_col] = leaf as isize; // let ancestry_add_ref = &mut ancestry_add; - order_cherries(&mut ancestry_add); - order_cherries_no_parents(&mut ancestry_add); - self.data = build_vector(ancestry_add); + ops::order_cherries(&mut ancestry_add); + ops::order_cherries_no_parents(&mut ancestry_add); + self.data = ops::build_vector(ancestry_add); } /// Removes a leaf from the tree @@ -119,7 +120,7 @@ impl TreeVec { /// Modifies the tree structure by removing the leaf and updating indices pub fn remove_leaf(&mut self, leaf: usize) -> usize { let ancestry = self.get_ancestry(); - let leaf_coords = find_coords_of_first_leaf(&ancestry, leaf); + let leaf_coords = ops::find_coords_of_first_leaf(&ancestry, leaf); let leaf_row = leaf_coords.0; let leaf_col = leaf_coords.1; @@ -159,9 +160,9 @@ impl TreeVec { ancestry_rm.push(new_row); } - order_cherries(&mut ancestry_rm); - order_cherries_no_parents(&mut ancestry_rm); - self.data = build_vector(ancestry_rm); + ops::order_cherries(&mut ancestry_rm); + ops::order_cherries_no_parents(&mut ancestry_rm); + self.data = ops::build_vector(ancestry_rm); return sister; } @@ -232,7 +233,7 @@ mod tests { #[case(vec![0, 0, 1], vec![[1, 3, 4], [0, 2, 5], [5, 4, 6]])] - fn test_get_ancestry(#[case] v: Vec, #[case] expected: Ancestry) { + fn test_get_ancestry(#[case] v: Vec, #[case] expected: types::Ancestry) { let tree = TreeVec::new(v, None, None); let ancestry = tree.get_ancestry(); assert_eq!(ancestry, expected); diff --git a/phylo2vec/src/tree_vec/ops/avl.rs b/phylo2vec/src/tree_vec/ops/avl.rs index 6d0ee0b..bab9b88 100644 --- a/phylo2vec/src/tree_vec/ops/avl.rs +++ b/phylo2vec/src/tree_vec/ops/avl.rs @@ -1,5 +1,4 @@ -// Node definition -pub type Pair = (usize, usize); +use crate::tree_vec::types::Pair; pub struct Node { value: Pair, diff --git a/phylo2vec/src/tree_vec/ops/mod.rs b/phylo2vec/src/tree_vec/ops/mod.rs index 94e9c27..2e535d6 100644 --- a/phylo2vec/src/tree_vec/ops/mod.rs +++ b/phylo2vec/src/tree_vec/ops/mod.rs @@ -1,8 +1,18 @@ pub mod avl; +pub mod newick; pub mod vector; -#[allow(unused_imports)] +use crate::tree_vec::types::Ancestry; + pub use vector::{ - build_newick, build_vector, find_coords_of_first_leaf, get_ancestry, get_pairs, get_pairs_avl, - order_cherries, order_cherries_no_parents, to_newick, Ancestry, + build_vector, find_coords_of_first_leaf, get_ancestry, get_pairs, get_pairs_avl, + order_cherries, order_cherries_no_parents, }; + +pub use newick::build_newick; + +/// Recover a rooted tree (in Newick format) from a Phylo2Vec vector +pub fn to_newick(v: &Vec) -> String { + let ancestry: Ancestry = get_ancestry(&v); + build_newick(&ancestry) +} diff --git a/phylo2vec/src/tree_vec/ops/newick.rs b/phylo2vec/src/tree_vec/ops/newick.rs new file mode 100644 index 0000000..7ac3ce7 --- /dev/null +++ b/phylo2vec/src/tree_vec/ops/newick.rs @@ -0,0 +1,34 @@ +use crate::tree_vec::types::Ancestry; + +// The recursive function that builds the Newick string +fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String { + let leaf_max = ancestry.len(); + + // Extract the children (c1, c2) and ignore the parent from the ancestry tuple + let [c1, c2, _] = ancestry[p - leaf_max - 1]; + + // Recursive calls for left and right children, checking if they are leaves or internal nodes + let left = if c1 > leaf_max { + _build_newick_recursive_inner(c1, ancestry) + } else { + c1.to_string() // It's a leaf node, just convert to string + }; + + let right = if c2 > leaf_max { + _build_newick_recursive_inner(c2, ancestry) + } else { + c2.to_string() // It's a leaf node, just convert to string + }; + + // Create the Newick string in the form (left, right)p + format!("({},{}){}", left, right, p) +} + +/// Build newick string from the ancestry matrix +pub fn build_newick(ancestry: &Ancestry) -> String { + // Get the root node, which is the parent value of the last ancestry element + let root = ancestry.last().unwrap()[2]; + + // Build the Newick string starting from the root, and append a semicolon + format!("{};", _build_newick_recursive_inner(root, ancestry)) +} diff --git a/phylo2vec/src/tree_vec/ops/vector.rs b/phylo2vec/src/tree_vec/ops/vector.rs index 49f0518..420e1a2 100644 --- a/phylo2vec/src/tree_vec/ops/vector.rs +++ b/phylo2vec/src/tree_vec/ops/vector.rs @@ -1,13 +1,8 @@ -use crate::tree_vec::ops::avl::{AVLTree, Pair}; +use crate::tree_vec::ops::avl::AVLTree; +use crate::tree_vec::types::{Ancestry, Pair, PairsVec}; use crate::utils::is_unordered; use std::usize; -/// A type alias for the Ancestry type, which is a vector of vectors representing [child1, child2, parent] -pub type Ancestry = Vec<[usize; 3]>; - -/// A type alias for the PairsVec type, which is a vector of tuples representing (child1, child2) -pub type PairsVec = Vec; - /// Get the pair of nodes from the Phylo2Vec vector /// using a vector data structure and for loops /// implementation. @@ -150,45 +145,6 @@ pub fn get_ancestry(v: &Vec) -> Ancestry { ancestry } -// The recursive function that builds the Newick string -fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String { - let leaf_max = ancestry.len(); - - // Extract the children (c1, c2) and ignore the parent from the ancestry tuple - let [c1, c2, _] = ancestry[p - leaf_max - 1]; - - // Recursive calls for left and right children, checking if they are leaves or internal nodes - let left = if c1 > leaf_max { - _build_newick_recursive_inner(c1, ancestry) - } else { - c1.to_string() // It's a leaf node, just convert to string - }; - - let right = if c2 > leaf_max { - _build_newick_recursive_inner(c2, ancestry) - } else { - c2.to_string() // It's a leaf node, just convert to string - }; - - // Create the Newick string in the form (left, right)p - format!("({},{}){}", left, right, p) -} - -/// Build newick string from the ancestry matrix -pub fn build_newick(ancestry: &Ancestry) -> String { - // Get the root node, which is the parent value of the last ancestry element - let root = ancestry.last().unwrap()[2]; - - // Build the Newick string starting from the root, and append a semicolon - format!("{};", _build_newick_recursive_inner(root, ancestry)) -} - -/// Recover a rooted tree (in Newick format) from a Phylo2Vec vector -pub fn to_newick(v: &Vec) -> String { - let ancestry: Ancestry = get_ancestry(&v); - build_newick(&ancestry) -} - pub fn find_coords_of_first_leaf(ancestry: &Ancestry, leaf: usize) -> (usize, usize) { for r in 0..ancestry.len() { for c in 0..3 { diff --git a/phylo2vec/src/tree_vec/types.rs b/phylo2vec/src/tree_vec/types.rs new file mode 100644 index 0000000..5b78761 --- /dev/null +++ b/phylo2vec/src/tree_vec/types.rs @@ -0,0 +1,8 @@ +// A type alias for the Pair type, which is a tuple representing (child1, child2) +pub type Pair = (usize, usize); + +/// A type alias for the Ancestry type, which is a vector of vectors representing [child1, child2, parent] +pub type Ancestry = Vec<[usize; 3]>; + +/// A type alias for the PairsVec type, which is a vector of tuples representing (child1, child2) +pub type PairsVec = Vec; diff --git a/py-phylo2vec/src/lib.rs b/py-phylo2vec/src/lib.rs index 431183c..9fee47d 100644 --- a/py-phylo2vec/src/lib.rs +++ b/py-phylo2vec/src/lib.rs @@ -32,7 +32,7 @@ fn get_pairs_avl(input_vector: Vec) -> Vec<(usize, usize)> { #[pyfunction] fn build_newick(input_ancestry: Vec<[usize; 3]>) -> String { - let newick_string: String = ops::vector::build_newick(&input_ancestry); + let newick_string: String = ops::newick::build_newick(&input_ancestry); newick_string }