diff --git a/py-phylo2vec/phylo2vec/base/to_newick.py b/py-phylo2vec/phylo2vec/base/to_newick.py index 76f5956..fbb5533 100644 --- a/py-phylo2vec/phylo2vec/base/to_newick.py +++ b/py-phylo2vec/phylo2vec/base/to_newick.py @@ -1,50 +1,11 @@ """ Methods to convert a Phylo2Vec vector to a Newick-format string. """ -from phylo2vec import _phylo2vec_core - -import numba as nb import numpy as np -# tuple type -value_type = nb.types.UniTuple(nb.types.int64, 2) - - -@nb.njit(cache=True) -def _get_pairs(v): - pairs = [] - - for i in range(len(v) - 1, -1, -1): - next_leaf = i + 1 - if v[i] <= i: - # If v[i] <= i, it's an easy BD - # We now that the next pair to add now is (v[i], next_leaf) - # (as the branch leading to v[i] gives birth to the next_leaf) - # Why pairs.insert(0)? Let's take an example with [0, 0] - # We initially have (0, 1), but 0 gives birth to 2 afterwards - # So the "shallowest" pair is (0, 2) - pairs.append((v[i], next_leaf)) - - for j in range(1, len(v)): - next_leaf = j + 1 - if v[j] == 2 * j: - # 2*j = extra root ==> pairing = (0, next_leaf) - pairs.append((0, next_leaf)) - elif v[j] > j: - # If v[i] > i, it's not the branch leading v[i] that gives birth but an internal branch - # Remark 1: it will not be the "shallowest" pair, so we do not insert it at position 0 - # len(pairs) = number of pairings we did so far - # So what v[i] - len(pairs) gives us is the depth of the next pairing - # And pairs[v[i] - len(pairs) - 1][0] is a node that we processed beforehand - # which is deeper than the branch v[i] - index = v[j] - 2 * j - pairs.insert(index, (pairs[index - 1][0], next_leaf)) - - return pairs - - -@nb.njit(cache=True) -def _get_ancestry(v): +from phylo2vec import _phylo2vec_core + +def _get_ancestry(v: np.ndarray) -> np.ndarray: """ Get the "ancestry" of v (see "Returns" paragraph) @@ -77,37 +38,11 @@ def _get_ancestry(v): 2nd column: child 2 3rd column: parent node """ - pairs = _get_pairs(v) - - # We have our pairs, we can now build our ancestry - # Matrix with 3 columns: child1, child2, parent - ancestry = np.zeros((len(pairs), 3), dtype=np.int16) - - # Dictionary to keep track of the following relationship: child->highest parent - parents = nb.typed.Dict.empty(key_type=nb.types.int64, value_type=nb.types.int64) - - # Leaves are number 0, 1, ..., n_leaves - 1, so the next parent is n_leaves - next_parent = len(v) + 1 - - for i, pair in enumerate(pairs): - child1, child2 = pair - - parent_child1 = parents.get(child1, child1) - parent_child2 = parents.get(child2, child2) - - ancestry[i, :] = [parent_child1, parent_child2, next_parent] - - # Change the parents of the current children - parents[child1] = next_parent - parents[child2] = next_parent - - next_parent += 1 + ancestry_list = _phylo2vec_core.get_ancestry(v) + return np.asarray(ancestry_list) - return ancestry - -@nb.njit -def _build_newick(ancestry): +def _build_newick(ancestry: np.ndarray) -> str: """Build a Newick string from an "ancestry" array The input should always be 3-dimensional with the following format: @@ -128,35 +63,9 @@ def _build_newick(ancestry): newick : str Newick string """ - root = ancestry[-1][-1] - - newick = _build_newick_inner(root, ancestry) + ";" - - return newick - - -@nb.njit -def _build_newick_inner(node, ancestry): - leaf_max = ancestry.shape[0] - - c1, c2, _ = ancestry[node - leaf_max - 1] + return _phylo2vec_core.build_newick(ancestry) - if c1 > leaf_max: - left = _build_newick_inner(c1, ancestry) - else: - left = f"{c1}" - if c2 > leaf_max: - right = _build_newick_inner(c2, ancestry) - else: - right = f"{c2}" - - newick = f"({left},{right}){node}" - - return newick - - -@nb.njit(cache=True) def to_newick(v): """Recover a rooted tree (in Newick format) from a Phylo2Vec v @@ -170,8 +79,4 @@ def to_newick(v): newick : str Newick tree """ - ancestry = _get_ancestry(v) - - newick = _build_newick(ancestry) - - return newick + return _phylo2vec_core.to_newick(v) diff --git a/py-phylo2vec/phylo2vec/metrics/pairwise.py b/py-phylo2vec/phylo2vec/metrics/pairwise.py index 1afbdce..d2f7a01 100644 --- a/py-phylo2vec/phylo2vec/metrics/pairwise.py +++ b/py-phylo2vec/phylo2vec/metrics/pairwise.py @@ -1,11 +1,9 @@ -import numba as nb import numpy as np from phylo2vec.base.to_newick import _get_ancestry from phylo2vec.utils.validation import check_v -@nb.njit(cache=True) def cophenetic_distances(v, unrooted=False): # Should be very similar to dist_nodes in ape diff --git a/py-phylo2vec/phylo2vec/utils/random.py b/py-phylo2vec/phylo2vec/utils/random.py index 39d25a6..80d2f1a 100644 --- a/py-phylo2vec/phylo2vec/utils/random.py +++ b/py-phylo2vec/phylo2vec/utils/random.py @@ -3,12 +3,11 @@ import os import random -import numba as nb import numpy as np +from phylo2vec import _phylo2vec_core -@nb.njit -def sample(n_leaves, ordered=False): +def sample(n_leaves: int, ordered: bool = False) -> np.ndarray: """Sample a random tree via Phylo2Vec Parameters @@ -30,11 +29,8 @@ def sample(n_leaves, ordered=False): Phylo2Vec vector """ - if ordered: - v_list = [np.random.randint(0, i + 1) for i in range(n_leaves - 1)] - else: - v_list = [np.random.randint(0, 2 * i + 1) for i in range(n_leaves - 1)] - return np.array(v_list, dtype=np.uint16) + v_list = _phylo2vec_core.sample(n_leaves, ordered) + return np.asarray(v_list) def seed_everything(seed): diff --git a/py-phylo2vec/phylo2vec/utils/validation.py b/py-phylo2vec/phylo2vec/utils/validation.py index 35dbd94..e094973 100644 --- a/py-phylo2vec/phylo2vec/utils/validation.py +++ b/py-phylo2vec/phylo2vec/utils/validation.py @@ -2,8 +2,10 @@ import numpy as np +from phylo2vec import _phylo2vec_core -def check_v(v): + +def check_v(v: np.ndarray) -> None: """Input validation of a Phylo2Vec vector The input is checked to satisfy the Phylo2Vec constraints @@ -13,8 +15,4 @@ def check_v(v): v : numpy.ndarray Phylo2Vec vector """ - k = len(v) - - v_max = 2 * np.arange(k) - - assert np.all((0 <= v) & (v <= v_max)), print(v, v >= 0, v <= v_max) + _phylo2vec_core.check_v(v.tolist()) diff --git a/py-phylo2vec/phylo2vec/utils/vector.py b/py-phylo2vec/phylo2vec/utils/vector.py index b4c7721..4a4a378 100644 --- a/py-phylo2vec/phylo2vec/utils/vector.py +++ b/py-phylo2vec/phylo2vec/utils/vector.py @@ -239,7 +239,7 @@ def _find_indices_of_first_leaf(ancestry, leaf): return r, c -@nb.njit(cache=True) + def remove_leaf(v, leaf): """Remove a leaf from a Phylo2Vec v @@ -302,7 +302,7 @@ def remove_leaf(v, leaf): return v_sub, sister -@nb.njit(cache=True) + def add_leaf(v, leaf, pos): """Add a leaf to a Phylo2Vec vector v @@ -355,7 +355,7 @@ def add_leaf(v, leaf, pos): return v_add -@nb.njit(cache=True) + def get_ancestry_paths(v): """ Get the ancestry paths for each node in the Phylo2Vec vector. @@ -386,7 +386,7 @@ def get_ancestry_paths(v): return ancestry_paths -@nb.njit(cache=True) + def get_common_ancestor(v, node1, node2): """Get the first recent common ancestor between two nodes in a Phylo2Vec tree diff --git a/py-phylo2vec/src/lib.rs b/py-phylo2vec/src/lib.rs index 9fee47d..9be537e 100644 --- a/py-phylo2vec/src/lib.rs +++ b/py-phylo2vec/src/lib.rs @@ -6,7 +6,6 @@ use phylo2vec::utils; /// This function takes a Python list and converts it to a Rust vector. #[pyfunction] fn to_newick(input_vector: Vec) -> PyResult { - // TODO: Take in numpy array instead of just simple list? let newick = ops::to_newick(&input_vector); Ok(newick) } @@ -18,18 +17,6 @@ fn get_ancestry(input_vector: Vec) -> Vec<[usize; 3]> { ancestry } -#[pyfunction] -fn get_pairs(input_vector: Vec) -> Vec<(usize, usize)> { - let pairs: Vec<(usize, usize)> = ops::get_pairs(&input_vector); - pairs -} - -#[pyfunction] -fn get_pairs_avl(input_vector: Vec) -> Vec<(usize, usize)> { - let pairs: Vec<(usize, usize)> = ops::get_pairs_avl(&input_vector); - pairs -} - #[pyfunction] fn build_newick(input_ancestry: Vec<[usize; 3]>) -> String { let newick_string: String = ops::newick::build_newick(&input_ancestry); @@ -37,7 +24,6 @@ fn build_newick(input_ancestry: Vec<[usize; 3]>) -> String { } #[pyfunction] -#[pyo3(signature = (n_leaves, ordered=false, /), text_signature = "(n_leaves, ordered=False, /)")] fn sample(n_leaves: usize, ordered: bool) -> Vec { let v = utils::sample(n_leaves, ordered); v @@ -54,21 +40,7 @@ fn _phylo2vec_core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(to_newick, m)?)?; m.add_function(wrap_pyfunction!(build_newick, m)?)?; m.add_function(wrap_pyfunction!(get_ancestry, m)?)?; - m.add_function(wrap_pyfunction!(get_pairs, m)?)?; - m.add_function(wrap_pyfunction!(get_pairs_avl, m)?)?; m.add_function(wrap_pyfunction!(sample, m)?)?; m.add_function(wrap_pyfunction!(check_v, m)?)?; Ok(()) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_to_newick() { - let v = vec![0, 0, 1]; - let newick = ops::to_newick(&v); - assert_eq!(newick, "((0,2)5,(1,3)4)6;"); - } -}