Skip to content

Commit

Permalink
Moved Newick C code to new file. Added code that will eventually crea…
Browse files Browse the repository at this point in the history
…te a Tree struct from a rapidNJ newick string. I do this by modifying the rapidNJ Newick string and passing it to the C code. Currently the C code throws an error
  • Loading branch information
jhellewell14 committed Apr 5, 2024
1 parent f07b3b6 commit 1e25e95
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 63 deletions.
4 changes: 2 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
fn main() {
cxx_build::bridge("src/phylo2vec.rs")
cxx_build::bridge("src/newick.rs")
.file("src/phylo2vec.cpp")
.std("c++17")
.compile("phylo2vec");

println!("cargo:rerun-if-changed=src/phylo2vec.rs");
println!("cargo:rerun-if-changed=src/newick.rs");
println!("cargo:rerun-if-changed=src/phylo2vec.cpp");
println!("cargo:rerun-if-changed=src/phylo2vec.hpp");
}
49 changes: 5 additions & 44 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod phylo2vec;
mod tests;
mod tree;
mod dspsa;
mod newick;

use cxx::kind;
use cxx::let_cxx_string;
Expand All @@ -16,6 +17,8 @@ use regex::{RegexSet, RegexSetBuilder};

use crate::dspsa::hill_peturb;
use crate::gen_list::*;
use crate::newick::newick_to_vec;
use crate::newick::parse_rapidNJ_newick;
use crate::phylo2vec::*;
use crate::tree::Tree;
use crate::likelihoods::logse;
Expand All @@ -33,51 +36,9 @@ pub fn main() {
let args = cli_args();
// Newick string example
let nstr: String = String::from("(((((('9':0.00066056,'4':0.0012556):0.00030238,('25':0.00042479,'19':0.00030083):0.00039731):0.00012351,'7':0.00039526):8.5797e-05,'18':0.00068249):0.00011977,'20':0.00056004):9.6199e-05,(((((((('17':0.0045802,'8':0.0021951):0.00019116,(((('24':0.0021162,'12':0.0017684):0.00074803,'13':0.0043482):0.00026831,('27':0.00063335,'5':0.00071804):0.0024273):4.8521e-05,((('23':0.0012435,'2':0.0012676):0.00011672,('21':0.00085695,'6':0.00096101):0.001509):0.00046989,'10':0.0020611):0.00092726):0.00022606):0.00019029,'26':0.0027488):0.00030355,('14':0.0021462,'0':0.0018751):0.00061029):0.00035469,'1':0.00096653):0.00015559,'22':0.0013558):7.0943e-05,('16':0.00054383,'15':0.00076666):0.00015841):0.00016779,'11':0.0021712):9.9157e-05,'3':0.00081132);");

// Chops into nodes
let mut v: Vec<&str> = nstr.split_inclusive(|c: char| c.eq(&')') || c.eq(&',')).filter(|s| !s.is_empty() && s.ne(&";")).collect();
// Pop off the semi-colon
// v.pop();
// println!("{:?}", v);

let n_nodes = v.len();
let mut branch_len: Vec<f64> = Vec::with_capacity(n_nodes);
let mut name_dict: HashMap<usize, String> = HashMap::with_capacity(n_nodes);
let internal_index = ((n_nodes - 1) / 2) + 1;
let mut node_index: usize = 0;

// println!("{}", n_nodes);
// println!("{}", internal_index);

// Iterate over nodes
for node in v {
let cleaned_node: String = node.replace(&['(', '\'', ')', ','], "");
// println!("{:?}", cleaned_node);
let split_node: Vec<&str> = cleaned_node.split(':').collect();
// println!("{:?}", split_node);

let node_name: String = match split_node[0].is_empty() {
true => node_index.to_string(),
false => split_node[0].to_string(),
};
name_dict.insert(node_index, node_name);

let blen: f64 = split_node[1].parse().unwrap();
branch_len.push(blen);
node_index += 1;
let x: String = parse_rapidNJ_newick(&nstr);
// println!("{:?}", newick_to_vec(&x, 27));

// Need to reconstruct new newick string using new node labels

// let new_el: Vec<&str> = node.split_inclusive(|c: char| c.eq(&')') || c.eq(&',') || c.eq(&'(') || c.eq(&':') || c.eq(&',')).collect();

// println!("{:?}", node);
}

// for (key, val) in name_dict.iter() {
// println!("key: {key} val: {val}");
// }
// let v = newick_to_vec(&nstr, 27);


// Define rate matrix
let q: na::Matrix4<f64> = na::Matrix4::new(
Expand Down
74 changes: 74 additions & 0 deletions src/newick.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
use cxx::let_cxx_string;
use std::collections::HashMap;

#[cxx::bridge]
pub mod ffi {
unsafe extern "C++" {
include!("bactrees/include/phylo2vec.hpp");
fn doToVector(newick: Pin<&mut CxxString>, num_leaves: i32, with_mapping: bool) -> UniquePtr<CxxVector<i32>>;
}
}

pub fn newick_to_vec(nw: &String, n_leaves: usize) -> Vec<usize>{
let_cxx_string!(nw_cpp = nw);
let x = ffi::doToVector(nw_cpp, n_leaves as i32, false);
let y: Vec<usize> = x.iter().map(|el| *el as usize).collect();
y
}

pub fn parse_rapidNJ_newick(nw: &String) -> String {

// Chop the Newick string into nodes
let v: Vec<&str> = nw.split_inclusive(|c: char| c.eq(&')') || c.eq(&','))
.filter(|s| !s.is_empty() && s.ne(&";"))
.collect();

// Initiate data structures
let n_nodes = v.len();
let mut branch_len: Vec<f64> = Vec::with_capacity(n_nodes);
let mut name_dict: HashMap<usize, String> = HashMap::with_capacity(n_nodes);
let mut node_index: usize = 0;
let mut new_newick: Vec<String> = Vec::with_capacity(n_nodes);

// Iterate over nodes
for node in v {
// Cut outbrackets and commas
let cleaned_node: String = node.replace(&['(', '\'', ')', ','], "");

// Split into label and branch length
let split_node: Vec<&str> = cleaned_node.split(':').collect();

// Get old node label (an old label is assigned if no previous
// label due to being internal node)
let node_name: String = match split_node[0].is_empty() {
true => node_index.to_string(),
false => split_node[0].to_string(),
};
// Put into name HashMap
name_dict.insert(node_index, node_name);

// Get branch length
let blen: f64 = split_node[1].parse().unwrap();
branch_len.push(blen);

// Put new node labels into old newick string
if split_node[0].is_empty() {
let new_el: String = node.replace(":", &format!("{}{}", node_index.to_string(), ":"));
new_newick.push(new_el);

} else {
let new_el: String = node.replace(&format!("'{}'", name_dict.get(&node_index).unwrap()),
&format!("{}", node_index.to_string()));
new_newick.push(new_el);
}

// Iterate node index
node_index += 1;
}

// Put newick string with new labels together
let mut new_str: String = new_newick.join("");
new_str.push_str(";");

new_str
}
16 changes: 0 additions & 16 deletions src/phylo2vec.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::Tree;
use ndarray::*;
use rand::{seq::SliceRandom, thread_rng, Rng};
use cxx::let_cxx_string;

pub fn phylo2vec_quad(v: &[usize]) -> Tree {
let mut tree = Tree::new(v);
Expand Down Expand Up @@ -214,19 +213,4 @@ impl Tree {

}

}

#[cxx::bridge]
pub mod ffi {
unsafe extern "C++" {
include!("bactrees/include/phylo2vec.hpp");
fn doToVector(newick: Pin<&mut CxxString>, num_leaves: i32, with_mapping: bool) -> UniquePtr<CxxVector<i32>>;
}
}

pub fn newick_to_vec(nw: &String, n_leaves: usize) -> Vec<usize>{
let_cxx_string!(nw_cpp = nw);
let x = ffi::doToVector(nw_cpp, n_leaves as i32, false);
let y: Vec<usize> = x.iter().map(|el| *el as usize).collect();
y
}
2 changes: 1 addition & 1 deletion src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod tests {
use crate::phylo2vec::phylo2vec_lin;
use crate::phylo2vec::phylo2vec_quad;
use crate::tree::Tree;
use crate::newick_to_vec;
use crate::newick::newick_to_vec;
use crate::random_tree;

#[test]
Expand Down

0 comments on commit 1e25e95

Please sign in to comment.