Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Perform codebase refactor to separate appropriate functions #51

Merged
merged 1 commit into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions phylo2vec/benches/benchmarks/get_ancestry_dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::time::Duration;

use criterion::{criterion_group, BenchmarkId, Criterion};
use phylo2vec::tree_vec::ops;
use phylo2vec::tree_vec::types::PairsVec;
use phylo2vec::utils::{is_unordered, sample};

pub type AncestryTuple = Vec<(usize, usize, usize)>;
Expand Down Expand Up @@ -37,7 +38,7 @@ fn compare_get_ancestry_datatypes(c: &mut Criterion) {
}

pub fn get_ancestry_tuple(v: &Vec<usize>) -> AncestryTuple {
let pairs: ops::vector::PairsVec;
let pairs: PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
Expand Down Expand Up @@ -83,7 +84,7 @@ pub fn get_ancestry_tuple(v: &Vec<usize>) -> AncestryTuple {
}

pub fn get_ancestry_vec(v: &Vec<usize>) -> AncestryVec {
let pairs: ops::vector::PairsVec;
let pairs: PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
Expand Down Expand Up @@ -129,7 +130,7 @@ pub fn get_ancestry_vec(v: &Vec<usize>) -> AncestryVec {
}

pub fn get_ancestry_ndarray(v: &Vec<usize>) -> AncestryNDArray {
let pairs: ops::vector::PairsVec;
let pairs: PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
Expand Down
25 changes: 13 additions & 12 deletions phylo2vec/src/tree_vec/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::utils::sample;

// Import the types module
pub mod types;

// Import the operations modules
pub mod ops;
use ops::{
build_vector, find_coords_of_first_leaf, order_cherries, order_cherries_no_parents, Ancestry,
};

/// A vector representation of a phylogenetic tree
///
Expand Down Expand Up @@ -68,7 +69,7 @@ impl TreeVec {
///
/// # Returns
/// An `Ancestry` type containing parent-child relationships
pub fn get_ancestry(&self) -> Ancestry {
pub fn get_ancestry(&self) -> types::Ancestry {
return ops::get_ancestry(&self.data);
}

Expand Down Expand Up @@ -102,9 +103,9 @@ impl TreeVec {

// ancestry_add[leaf_coords][leaf_col] = leaf as isize;
// let ancestry_add_ref = &mut ancestry_add;
order_cherries(&mut ancestry_add);
order_cherries_no_parents(&mut ancestry_add);
self.data = build_vector(ancestry_add);
ops::order_cherries(&mut ancestry_add);
ops::order_cherries_no_parents(&mut ancestry_add);
self.data = ops::build_vector(ancestry_add);
}

/// Removes a leaf from the tree
Expand All @@ -119,7 +120,7 @@ impl TreeVec {
/// Modifies the tree structure by removing the leaf and updating indices
pub fn remove_leaf(&mut self, leaf: usize) -> usize {
let ancestry = self.get_ancestry();
let leaf_coords = find_coords_of_first_leaf(&ancestry, leaf);
let leaf_coords = ops::find_coords_of_first_leaf(&ancestry, leaf);
let leaf_row = leaf_coords.0;
let leaf_col = leaf_coords.1;

Expand Down Expand Up @@ -159,9 +160,9 @@ impl TreeVec {
ancestry_rm.push(new_row);
}

order_cherries(&mut ancestry_rm);
order_cherries_no_parents(&mut ancestry_rm);
self.data = build_vector(ancestry_rm);
ops::order_cherries(&mut ancestry_rm);
ops::order_cherries_no_parents(&mut ancestry_rm);
self.data = ops::build_vector(ancestry_rm);

return sister;
}
Expand Down Expand Up @@ -232,7 +233,7 @@ mod tests {
#[case(vec![0, 0, 1], vec![[1, 3, 4],
[0, 2, 5],
[5, 4, 6]])]
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: Ancestry) {
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: types::Ancestry) {
let tree = TreeVec::new(v, None, None);
let ancestry = tree.get_ancestry();
assert_eq!(ancestry, expected);
Expand Down
3 changes: 1 addition & 2 deletions phylo2vec/src/tree_vec/ops/avl.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// Node definition
pub type Pair = (usize, usize);
use crate::tree_vec::types::Pair;

pub struct Node {
value: Pair,
Expand Down
16 changes: 13 additions & 3 deletions phylo2vec/src/tree_vec/ops/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
pub mod avl;
pub mod newick;
pub mod vector;

#[allow(unused_imports)]
use crate::tree_vec::types::Ancestry;

pub use vector::{
build_newick, build_vector, find_coords_of_first_leaf, get_ancestry, get_pairs, get_pairs_avl,
order_cherries, order_cherries_no_parents, to_newick, Ancestry,
build_vector, find_coords_of_first_leaf, get_ancestry, get_pairs, get_pairs_avl,
order_cherries, order_cherries_no_parents,
};

pub use newick::build_newick;

/// Recover a rooted tree (in Newick format) from a Phylo2Vec vector
pub fn to_newick(v: &Vec<usize>) -> String {
let ancestry: Ancestry = get_ancestry(&v);
build_newick(&ancestry)
}
34 changes: 34 additions & 0 deletions phylo2vec/src/tree_vec/ops/newick.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use crate::tree_vec::types::Ancestry;

// The recursive function that builds the Newick string
fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String {
let leaf_max = ancestry.len();

// Extract the children (c1, c2) and ignore the parent from the ancestry tuple
let [c1, c2, _] = ancestry[p - leaf_max - 1];

// Recursive calls for left and right children, checking if they are leaves or internal nodes
let left = if c1 > leaf_max {
_build_newick_recursive_inner(c1, ancestry)
} else {
c1.to_string() // It's a leaf node, just convert to string
};

let right = if c2 > leaf_max {
_build_newick_recursive_inner(c2, ancestry)
} else {
c2.to_string() // It's a leaf node, just convert to string
};

// Create the Newick string in the form (left, right)p
format!("({},{}){}", left, right, p)
}

/// Build newick string from the ancestry matrix
pub fn build_newick(ancestry: &Ancestry) -> String {
// Get the root node, which is the parent value of the last ancestry element
let root = ancestry.last().unwrap()[2];

// Build the Newick string starting from the root, and append a semicolon
format!("{};", _build_newick_recursive_inner(root, ancestry))
}
48 changes: 2 additions & 46 deletions phylo2vec/src/tree_vec/ops/vector.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
use crate::tree_vec::ops::avl::{AVLTree, Pair};
use crate::tree_vec::ops::avl::AVLTree;
use crate::tree_vec::types::{Ancestry, Pair, PairsVec};
use crate::utils::is_unordered;
use std::usize;

/// A type alias for the Ancestry type, which is a vector of vectors representing [child1, child2, parent]
pub type Ancestry = Vec<[usize; 3]>;

/// A type alias for the PairsVec type, which is a vector of tuples representing (child1, child2)
pub type PairsVec = Vec<Pair>;

/// Get the pair of nodes from the Phylo2Vec vector
/// using a vector data structure and for loops
/// implementation.
Expand Down Expand Up @@ -150,45 +145,6 @@ pub fn get_ancestry(v: &Vec<usize>) -> Ancestry {
ancestry
}

// The recursive function that builds the Newick string
fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String {
let leaf_max = ancestry.len();

// Extract the children (c1, c2) and ignore the parent from the ancestry tuple
let [c1, c2, _] = ancestry[p - leaf_max - 1];

// Recursive calls for left and right children, checking if they are leaves or internal nodes
let left = if c1 > leaf_max {
_build_newick_recursive_inner(c1, ancestry)
} else {
c1.to_string() // It's a leaf node, just convert to string
};

let right = if c2 > leaf_max {
_build_newick_recursive_inner(c2, ancestry)
} else {
c2.to_string() // It's a leaf node, just convert to string
};

// Create the Newick string in the form (left, right)p
format!("({},{}){}", left, right, p)
}

/// Build newick string from the ancestry matrix
pub fn build_newick(ancestry: &Ancestry) -> String {
// Get the root node, which is the parent value of the last ancestry element
let root = ancestry.last().unwrap()[2];

// Build the Newick string starting from the root, and append a semicolon
format!("{};", _build_newick_recursive_inner(root, ancestry))
}

/// Recover a rooted tree (in Newick format) from a Phylo2Vec vector
pub fn to_newick(v: &Vec<usize>) -> String {
let ancestry: Ancestry = get_ancestry(&v);
build_newick(&ancestry)
}

pub fn find_coords_of_first_leaf(ancestry: &Ancestry, leaf: usize) -> (usize, usize) {
for r in 0..ancestry.len() {
for c in 0..3 {
Expand Down
8 changes: 8 additions & 0 deletions phylo2vec/src/tree_vec/types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// A type alias for the Pair type, which is a tuple representing (child1, child2)
pub type Pair = (usize, usize);

/// A type alias for the Ancestry type, which is a vector of vectors representing [child1, child2, parent]
pub type Ancestry = Vec<[usize; 3]>;

/// A type alias for the PairsVec type, which is a vector of tuples representing (child1, child2)
pub type PairsVec = Vec<Pair>;
2 changes: 1 addition & 1 deletion py-phylo2vec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fn get_pairs_avl(input_vector: Vec<usize>) -> Vec<(usize, usize)> {

#[pyfunction]
fn build_newick(input_ancestry: Vec<[usize; 3]>) -> String {
let newick_string: String = ops::vector::build_newick(&input_ancestry);
let newick_string: String = ops::newick::build_newick(&input_ancestry);
newick_string
}

Expand Down
Loading