Skip to content

Commit

Permalink
refactor: Perform codebase refactor to separate appropriate functions
Browse files Browse the repository at this point in the history
  • Loading branch information
lsetiawan committed Dec 11, 2024
1 parent ab5a1d1 commit 5efb6e3
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 67 deletions.
7 changes: 4 additions & 3 deletions phylo2vec/benches/benchmarks/get_ancestry_dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::time::Duration;

use criterion::{criterion_group, BenchmarkId, Criterion};
use phylo2vec::tree_vec::ops;
use phylo2vec::tree_vec::types::PairsVec;
use phylo2vec::utils::{is_unordered, sample};

pub type AncestryTuple = Vec<(usize, usize, usize)>;
Expand Down Expand Up @@ -37,7 +38,7 @@ fn compare_get_ancestry_datatypes(c: &mut Criterion) {
}

pub fn get_ancestry_tuple(v: &Vec<usize>) -> AncestryTuple {
let pairs: ops::vector::PairsVec;
let pairs: PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
Expand Down Expand Up @@ -83,7 +84,7 @@ pub fn get_ancestry_tuple(v: &Vec<usize>) -> AncestryTuple {
}

pub fn get_ancestry_vec(v: &Vec<usize>) -> AncestryVec {
let pairs: ops::vector::PairsVec;
let pairs: PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
Expand Down Expand Up @@ -129,7 +130,7 @@ pub fn get_ancestry_vec(v: &Vec<usize>) -> AncestryVec {
}

pub fn get_ancestry_ndarray(v: &Vec<usize>) -> AncestryNDArray {
let pairs: ops::vector::PairsVec;
let pairs: PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
Expand Down
25 changes: 13 additions & 12 deletions phylo2vec/src/tree_vec/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::utils::sample;

// Import the types module
pub mod types;

// Import the operations modules
pub mod ops;
use ops::{
build_vector, find_coords_of_first_leaf, order_cherries, order_cherries_no_parents, Ancestry,
};

/// A vector representation of a phylogenetic tree
///
Expand Down Expand Up @@ -68,7 +69,7 @@ impl TreeVec {
///
/// # Returns
/// An `Ancestry` type containing parent-child relationships
pub fn get_ancestry(&self) -> Ancestry {
pub fn get_ancestry(&self) -> types::Ancestry {
return ops::get_ancestry(&self.data);
}

Expand Down Expand Up @@ -102,9 +103,9 @@ impl TreeVec {

// ancestry_add[leaf_coords][leaf_col] = leaf as isize;
// let ancestry_add_ref = &mut ancestry_add;
order_cherries(&mut ancestry_add);
order_cherries_no_parents(&mut ancestry_add);
self.data = build_vector(ancestry_add);
ops::order_cherries(&mut ancestry_add);
ops::order_cherries_no_parents(&mut ancestry_add);
self.data = ops::build_vector(ancestry_add);
}

/// Removes a leaf from the tree
Expand All @@ -119,7 +120,7 @@ impl TreeVec {
/// Modifies the tree structure by removing the leaf and updating indices
pub fn remove_leaf(&mut self, leaf: usize) -> usize {
let ancestry = self.get_ancestry();
let leaf_coords = find_coords_of_first_leaf(&ancestry, leaf);
let leaf_coords = ops::find_coords_of_first_leaf(&ancestry, leaf);
let leaf_row = leaf_coords.0;
let leaf_col = leaf_coords.1;

Expand Down Expand Up @@ -159,9 +160,9 @@ impl TreeVec {
ancestry_rm.push(new_row);
}

order_cherries(&mut ancestry_rm);
order_cherries_no_parents(&mut ancestry_rm);
self.data = build_vector(ancestry_rm);
ops::order_cherries(&mut ancestry_rm);
ops::order_cherries_no_parents(&mut ancestry_rm);
self.data = ops::build_vector(ancestry_rm);

return sister;
}
Expand Down Expand Up @@ -232,7 +233,7 @@ mod tests {
#[case(vec![0, 0, 1], vec![[1, 3, 4],
[0, 2, 5],
[5, 4, 6]])]
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: Ancestry) {
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: types::Ancestry) {
let tree = TreeVec::new(v, None, None);
let ancestry = tree.get_ancestry();
assert_eq!(ancestry, expected);
Expand Down
3 changes: 1 addition & 2 deletions phylo2vec/src/tree_vec/ops/avl.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// Node definition
pub type Pair = (usize, usize);
use crate::tree_vec::types::Pair;

pub struct Node {
value: Pair,
Expand Down
16 changes: 13 additions & 3 deletions phylo2vec/src/tree_vec/ops/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
pub mod avl;
pub mod newick;
pub mod vector;

#[allow(unused_imports)]
use crate::tree_vec::types::Ancestry;

pub use vector::{
build_newick, build_vector, find_coords_of_first_leaf, get_ancestry, get_pairs, get_pairs_avl,
order_cherries, order_cherries_no_parents, to_newick, Ancestry,
build_vector, find_coords_of_first_leaf, get_ancestry, get_pairs, get_pairs_avl,
order_cherries, order_cherries_no_parents,
};

pub use newick::build_newick;

/// Recover a rooted tree (in Newick format) from a Phylo2Vec vector
pub fn to_newick(v: &Vec<usize>) -> String {
let ancestry: Ancestry = get_ancestry(&v);
build_newick(&ancestry)
}
34 changes: 34 additions & 0 deletions phylo2vec/src/tree_vec/ops/newick.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use crate::tree_vec::types::Ancestry;

// The recursive function that builds the Newick string
fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String {
let leaf_max = ancestry.len();

// Extract the children (c1, c2) and ignore the parent from the ancestry tuple
let [c1, c2, _] = ancestry[p - leaf_max - 1];

// Recursive calls for left and right children, checking if they are leaves or internal nodes
let left = if c1 > leaf_max {
_build_newick_recursive_inner(c1, ancestry)
} else {
c1.to_string() // It's a leaf node, just convert to string
};

let right = if c2 > leaf_max {
_build_newick_recursive_inner(c2, ancestry)
} else {
c2.to_string() // It's a leaf node, just convert to string
};

// Create the Newick string in the form (left, right)p
format!("({},{}){}", left, right, p)
}

/// Build newick string from the ancestry matrix
pub fn build_newick(ancestry: &Ancestry) -> String {
// Get the root node, which is the parent value of the last ancestry element
let root = ancestry.last().unwrap()[2];

// Build the Newick string starting from the root, and append a semicolon
format!("{};", _build_newick_recursive_inner(root, ancestry))
}
48 changes: 2 additions & 46 deletions phylo2vec/src/tree_vec/ops/vector.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
use crate::tree_vec::ops::avl::{AVLTree, Pair};
use crate::tree_vec::ops::avl::AVLTree;
use crate::tree_vec::types::{Ancestry, Pair, PairsVec};
use crate::utils::is_unordered;
use std::usize;

/// A type alias for the Ancestry type, which is a vector of vectors representing [child1, child2, parent]
pub type Ancestry = Vec<[usize; 3]>;

/// A type alias for the PairsVec type, which is a vector of tuples representing (child1, child2)
pub type PairsVec = Vec<Pair>;

/// Get the pair of nodes from the Phylo2Vec vector
/// using a vector data structure and for loops
/// implementation.
Expand Down Expand Up @@ -150,45 +145,6 @@ pub fn get_ancestry(v: &Vec<usize>) -> Ancestry {
ancestry
}

// The recursive function that builds the Newick string
fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String {
let leaf_max = ancestry.len();

// Extract the children (c1, c2) and ignore the parent from the ancestry tuple
let [c1, c2, _] = ancestry[p - leaf_max - 1];

// Recursive calls for left and right children, checking if they are leaves or internal nodes
let left = if c1 > leaf_max {
_build_newick_recursive_inner(c1, ancestry)
} else {
c1.to_string() // It's a leaf node, just convert to string
};

let right = if c2 > leaf_max {
_build_newick_recursive_inner(c2, ancestry)
} else {
c2.to_string() // It's a leaf node, just convert to string
};

// Create the Newick string in the form (left, right)p
format!("({},{}){}", left, right, p)
}

/// Build newick string from the ancestry matrix
pub fn build_newick(ancestry: &Ancestry) -> String {
// Get the root node, which is the parent value of the last ancestry element
let root = ancestry.last().unwrap()[2];

// Build the Newick string starting from the root, and append a semicolon
format!("{};", _build_newick_recursive_inner(root, ancestry))
}

/// Recover a rooted tree (in Newick format) from a Phylo2Vec vector
pub fn to_newick(v: &Vec<usize>) -> String {
let ancestry: Ancestry = get_ancestry(&v);
build_newick(&ancestry)
}

pub fn find_coords_of_first_leaf(ancestry: &Ancestry, leaf: usize) -> (usize, usize) {
for r in 0..ancestry.len() {
for c in 0..3 {
Expand Down
8 changes: 8 additions & 0 deletions phylo2vec/src/tree_vec/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// A type alias for the Pair type, which is a tuple representing (child1, child2)
pub type Pair = (usize, usize);

/// A type alias for the Ancestry type, which is a vector of vectors representing [child1, child2, parent]
pub type Ancestry = Vec<[usize; 3]>;

/// A type alias for the PairsVec type, which is a vector of tuples representing (child1, child2)
pub type PairsVec = Vec<Pair>;
2 changes: 1 addition & 1 deletion py-phylo2vec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fn get_pairs_avl(input_vector: Vec<usize>) -> Vec<(usize, usize)> {

#[pyfunction]
fn build_newick(input_ancestry: Vec<[usize; 3]>) -> String {
let newick_string: String = ops::vector::build_newick(&input_ancestry);
let newick_string: String = ops::newick::build_newick(&input_ancestry);
newick_string
}

Expand Down

0 comments on commit 5efb6e3

Please sign in to comment.