Skip to content

Commit

Permalink
feat: Add TreeVec struct and perform refactoring (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
lsetiawan authored Nov 15, 2024
1 parent f577d15 commit 0600ab4
Show file tree
Hide file tree
Showing 11 changed files with 7,058 additions and 820 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,9 @@ drafts.ipynb
trees/# pixi environments
.pixi
*.egg-info

# MacOS
.DS_Store

# Web
*.html
6 changes: 3 additions & 3 deletions phylo2vec/benches/benchmarks/get_pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::ops::Range;
use std::time::Duration;

use criterion::{criterion_group, BenchmarkId, Criterion};
use phylo2vec::to_newick;
use phylo2vec::tree_vec::ops;
use phylo2vec::utils::sample;

const GET_PAIRS: &str = "get_pairs";
Expand All @@ -13,8 +13,8 @@ const RANGE: Range<u32> = 8..18;
fn run_get_pairs(func: &str, n_leaves: usize, ordering: bool) {
let v = sample(n_leaves, ordering);
let _ = match func {
GET_PAIRS => to_newick::_get_pairs(&v),
GET_PAIRS_AVL => to_newick::_get_pairs_avl(&v),
GET_PAIRS => ops::get_pairs(&v),
GET_PAIRS_AVL => ops::get_pairs_avl(&v),
_ => panic!("Invalid function name"),
};
}
Expand Down
3 changes: 1 addition & 2 deletions phylo2vec/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
pub mod avl;
pub mod to_newick;
pub mod tree_vec;
pub mod utils;
10 changes: 1 addition & 9 deletions phylo2vec/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1 @@
mod avl;
mod to_newick;

fn main() {
// Currently a small testing routine to ensure things are working as intended
let v = vec![0, 2, 2, 5, 2];
let newick_string = to_newick::to_newick(v);
print!("{}", newick_string);
}
fn main() {}
113 changes: 113 additions & 0 deletions phylo2vec/src/tree_vec/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use crate::utils::sample;

pub mod ops;

#[derive(Debug, PartialEq, Clone)]
pub struct TreeVec {
n_leaf: usize,
data: Vec<usize>,
branch_lengths: Option<Vec<(f64, f64)>>,
taxa: Option<Vec<String>>,
is_rooted: bool,
}

impl TreeVec {
pub fn new(
data: Vec<usize>,
branch_lengths: Option<Vec<(f64, f64)>>,
taxa: Option<Vec<String>>,
) -> Self {
let n_leaf = data.len();
TreeVec {
data: data,
n_leaf: n_leaf,
is_rooted: true,
branch_lengths: branch_lengths,
taxa: taxa,
}
}

pub fn from_sample(n_leaves: usize, ordering: bool) -> Self {
let v = sample(n_leaves, ordering);
TreeVec::new(v, None, None)
}

pub fn to_newick(&self) -> String {
return ops::to_newick(&self.data);
}

pub fn get_ancestry(&self) -> Vec<(usize, usize, usize)> {
return ops::get_ancestry(&self.data);
}

pub fn add_leaf(leaf: usize, branch: usize) -> Self {
unimplemented!();
}

pub fn remove_leaf(leaf: usize) -> Self {
unimplemented!();
}
}

#[cfg(test)]
mod tests {
use super::*;
use rstest::*;

/// Test the creation of a new tree
///
/// Tests are using 9 leaf tree with no branch lengths and taxa
#[rstest]
#[case(vec![0, 0, 0, 1, 3, 3, 1, 4, 4])]
#[case(vec![0, 0, 0, 3, 2, 9, 4, 1, 12])]
fn test_new_tree(#[case] v: Vec<usize>) {
let expected_v = v.iter().map(|x| *x).collect::<Vec<usize>>();
let tree = TreeVec::new(v, None, None);

assert_eq!(tree.data, expected_v);
assert_eq!(tree.n_leaf, 9);
assert_eq!(tree.is_rooted, true);
assert_eq!(tree.branch_lengths, None);
assert_eq!(tree.taxa, None);
}

#[rstest]
#[case(50, true)]
#[case(50, false)]
fn test_new_tree_from_sample(#[case] n_leaves: usize, #[case] ordering: bool) {
let tree = TreeVec::from_sample(n_leaves, ordering);
assert_eq!(tree.n_leaf, n_leaves - 1);
assert_eq!(tree.is_rooted, true);
assert_eq!(tree.branch_lengths, None);
assert_eq!(tree.taxa, None);
}

#[rstest]
#[case(vec![0, 0, 0, 1, 3], "(((0,(3,5)6)8,2)9,(1,4)7)10;")]
#[case(vec![0, 1, 2, 3, 4], "(0,(1,(2,(3,(4,5)6)7)8)9)10;")]
#[case(vec![0, 0, 1], "((0,2)5,(1,3)4)6;")]
fn test_to_newick(#[case] v: Vec<usize>, #[case] expected: &str) {
let tree = TreeVec::new(v, None, None);
let newick = tree.to_newick();
assert_eq!(newick, expected);
}

#[rstest]
#[case(vec![0, 0, 0, 1, 3], vec![( 3, 5, 6),
( 1, 4, 7),
( 0, 6, 8),
( 8, 2, 9),
( 9, 7, 10)])]
#[case(vec![0, 1, 2, 3], vec![(3, 4, 5),
(2, 5, 6),
(1, 6, 7),
(0, 7, 8)])]
#[case(vec![0, 0, 1], vec![(1, 3, 4),
(0, 2, 5),
(5, 4, 6)])]
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: Vec<(usize, usize, usize)>) {
let tree = TreeVec::new(v, None, None);
let ancestry = tree.get_ancestry();
assert_eq!(ancestry, expected);
}
}
File renamed without changes.
5 changes: 5 additions & 0 deletions phylo2vec/src/tree_vec/ops/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub mod avl;
pub mod vector;

#[allow(unused_imports)]
pub use vector::{build_newick, get_ancestry, get_pairs, get_pairs_avl, to_newick, Ancestry};
90 changes: 71 additions & 19 deletions phylo2vec/src/to_newick.rs → phylo2vec/src/tree_vec/ops/vector.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
use crate::avl::{AVLTree, Pair};

// A type alias for the Ancestry type, which is a vector of tuples representing (child1, child2, parent)
type Ancestry = Vec<(usize, usize, usize)>;

pub fn _get_pairs(v: &Vec<usize>) -> Vec<(usize, usize)> {
use crate::tree_vec::ops::avl::{AVLTree, Pair};
use crate::utils::is_unordered;

/// A type alias for the Ancestry type, which is a vector of tuples representing (child1, child2, parent)
pub type Ancestry = Vec<(usize, usize, usize)>;

/// A type alias for the PairsVec type, which is a vector of tuples representing (child1, child2)
pub type PairsVec = Vec<Pair>;

/// Get the pair of nodes from the Phylo2Vec vector
/// using a vector data structure and for loops
/// implementation.
///
/// # Example
/// ```
/// use phylo2vec::tree_vec::ops::vector::get_pairs;
///
/// let v = vec![0, 0, 0, 1, 3, 3, 1, 4, 4];
/// let pairs = get_pairs(&v);
/// ```
pub fn get_pairs(v: &Vec<usize>) -> PairsVec {
let num_of_leaves: usize = v.len();
let mut pairs: Vec<(usize, usize)> = Vec::with_capacity(num_of_leaves);
let mut pairs: PairsVec = Vec::with_capacity(num_of_leaves);

// First loop (reverse iteration)
for i in (0..num_of_leaves).rev() {
Expand All @@ -15,7 +30,7 @@ pub fn _get_pairs(v: &Vec<usize>) -> Vec<(usize, usize)> {
gives birth to next_leaf.
*/
let next_leaf: usize = i + 1;
let pair = (v[i], next_leaf);
let pair: Pair = (v[i], next_leaf);
if v[i] <= i {
pairs.push(pair);
}
Expand All @@ -26,23 +41,33 @@ pub fn _get_pairs(v: &Vec<usize>) -> Vec<(usize, usize)> {
let next_leaf = j + 1;
if v[j] == 2 * j {
// 2 * j = extra root ==> pairing = (0, next_leaf)
let pair = (0, next_leaf);
let pair: Pair = (0, next_leaf);
pairs.push(pair);
} else if v[j] > j {
/*
If v[j] > j, it's not the branch leading to v[j] that gives birth,
but an internal branch. Insert at the calculated index.
*/
let index = pairs.len() + v[j] - 2 * j;
let new_pair = (pairs[index - 1].0, next_leaf);
let index: usize = pairs.len() + v[j] - 2 * j;
let new_pair: Pair = (pairs[index - 1].0, next_leaf);
pairs.insert(index, new_pair);
}
}

pairs
}

pub fn _get_pairs_avl(v: &Vec<usize>) -> Vec<Pair> {
/// Get the pair of nodes from the Phylo2Vec vector
/// using an AVL tree data structure implementation.
///
/// # Example
/// ```
/// use phylo2vec::tree_vec::ops::vector::get_pairs_avl;
///
/// let v = vec![0, 0, 0, 1, 3, 3, 1, 4, 4];
/// let pairs = get_pairs_avl(&v);
/// ```
pub fn get_pairs_avl(v: &Vec<usize>) -> PairsVec {
// AVL tree implementation of get_pairs
let k = v.len();
let mut avl_tree = AVLTree::new();
Expand All @@ -63,8 +88,35 @@ pub fn _get_pairs_avl(v: &Vec<usize>) -> Vec<Pair> {
}

/// Get the ancestry of the Phylo2Vec vector
pub fn _get_ancestry(v: &Vec<usize>) -> Ancestry {
let pairs = _get_pairs(&v);
/// v[i] = which BRANCH we do the pairing from
///
/// The initial situation looks like this:
/// R
/// |
/// | --> branch 2
/// // \\
/// branch 0 <-- // \\ --> branch 1
/// 0 1
///
/// For v[1], we have 3 possible branches too choose from.
/// v[1] = 0 or 1 indicates that we branch out from branch 0 or 1, respectively.
/// The new branch yields leaf 2 (like in ordered trees)
///
/// v[1] = 2 is somewhat similar: we create a new branch from R that yields leaf 2
pub fn get_ancestry(v: &Vec<usize>) -> Ancestry {
let pairs: PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
// or unordered tree vector
match is_unordered(&v) {
true => {
pairs = get_pairs_avl(&v);
}
false => {
pairs = get_pairs(&v);
}
}
let num_of_leaves = v.len();
// Initialize Ancestry with capacity `k`
let mut ancestry: Ancestry = Vec::with_capacity(num_of_leaves);
Expand Down Expand Up @@ -121,8 +173,8 @@ fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String {
format!("({},{}){}", left, right, p)
}

// The main function to build the Newick string from the ancestry
pub fn _build_newick(ancestry: &Ancestry) -> String {
/// Build newick string from the ancestry matrix
pub fn build_newick(ancestry: &Ancestry) -> String {
// Get the root node, which is the parent value of the last ancestry element
let root = ancestry.last().unwrap().2;

Expand All @@ -131,7 +183,7 @@ pub fn _build_newick(ancestry: &Ancestry) -> String {
}

/// Recover a rooted tree (in Newick format) from a Phylo2Vec vector
pub fn to_newick(v: Vec<usize>) -> String {
let ancestry = _get_ancestry(&v);
_build_newick(&ancestry)
pub fn to_newick(v: &Vec<usize>) -> String {
let ancestry: Ancestry = get_ancestry(&v);
build_newick(&ancestry)
}
Loading

0 comments on commit 0600ab4

Please sign in to comment.