Skip to content

Commit

Permalink
Merge pull request #173 from an-altosian/dev
Browse files Browse the repository at this point in the history
implment logic to separate RnaChemistry::Other() and CustomChemistry.
  • Loading branch information
rob-p authored Dec 22, 2024
2 parents afae946 + f3daf2d commit f881fa6
Show file tree
Hide file tree
Showing 7 changed files with 338 additions and 247 deletions.
48 changes: 45 additions & 3 deletions resources/chemistries.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
}
},
"10xv2": {
"geometry": "__builtin",
"geometry": "1{b[16]u[10]x:}2{r:}",
"expected_ori": "fw",
"plist_name": "dda0309f511ded5d801081a55c66b9a44cab4edbf0e07a9223f539e248d8e090",
"version": "0.1.0",
Expand All @@ -80,7 +80,7 @@
"remote_url": "https://umd.box.com/shared/static/jbs2wszgbj7k4ic2hass9ts6nhqkwq1p"
},
"10xv3": {
"geometry": "__builtin",
"geometry": "1{b[16]u[12]x:}2{r:}",
"expected_ori": "fw",
"plist_name": "2c9dfb98babe5a57ae763778adb9ebb7bfa531e105823bc26163892089333f8c",
"version": "0.1.0",
Expand All @@ -100,7 +100,7 @@
"remote_url": "https://umd.box.com/shared/static/cbpv1c4zi6ty81nvcgy3pyta2oj7vea1.txt"
},
"10xv4-3p": {
"geometry": "__builtin",
"geometry": "1{b[16]u[12]x:}2{r:}",
"expected_ori": "fw",
"plist_name": "0bfa4a0bea1d636e7ec1908aabb94307c53778185c12fc164da32dd085848131",
"version": "0.1.0",
Expand Down Expand Up @@ -138,5 +138,47 @@
"meta": {
"cr_filename": "737K-arc-v1.txt"
}
},
"dropseq": {
"geometry": "1{b[12]u[8]x:}2{r:}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"indropv2": {
"geometry": "1{r:}2{b[8-11]f[GAGTGATTGCTTGTGACGCCTT]b[8]u[6]x:}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"celseq2": {
"geometry": "1{u[6]b[6]x:}2{r:}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"splitseqv1": {
"geometry": "1{r[66]}2{u[10]b[8]f[CACCGGCTACAAAGCGTAGCCGCATGCTGA]b[8]f[TAGGTGCACGAACTCTCCGGTCTCGTAAGC]b[8]}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"splitseqv2": {
"geometry": "1{r[66]}2{u[10]b[8]f[CACCGGCTACAAAGCGTAGCCGCATGCTGA]b[8]f[TAGGTGCACGAACTCTGACACC]b[8]}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"sciseq3": {
"geometry": "1{b[9-10]f[GTCTCG]u[8][10]x:}2{r:}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
}
}
4 changes: 2 additions & 2 deletions src/simpleaf_commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ pub struct ChemistryAddOpts {
/// the geometry to which the chemistry maps, wrapped in quotes.
#[arg(short, long)]
pub geometry: String,
/// the expected orientation to give to the chemistry
/// the expected orientation indicating the direction of biological reads to reference sequences.
#[arg(short, long, value_parser = clap::builder::PossibleValuesParser::new(["fw", "rc", "both"]))]
pub expected_ori: String,
/// the (fully-qualified) path to a local file that will be copied into
Expand All @@ -497,7 +497,7 @@ pub struct ChemistryAddOpts {
/// optionally assign a version number to this chemistry. A chemistry's
/// entry can be updated in the future by adding it again with a higher
/// version number.
#[arg(long)]
#[arg(long, default_value = "0.0.0")]
pub version: Option<String>,
}

Expand Down
17 changes: 7 additions & 10 deletions src/simpleaf_commands/chemistry.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::utils::af_utils::*;
use crate::utils::chem_utils::{
custom_chem_hm_to_json, get_custom_chem_hm, get_single_custom_chem_from_file, CustomChemistry,
custom_chem_hm_into_json, get_custom_chem_hm, get_single_custom_chem_from_file, CustomChemistry,
};
use crate::utils::constants::*;
use crate::utils::prog_utils::{self, download_to_file_compute_hash};
Expand Down Expand Up @@ -56,11 +56,8 @@ pub fn add_chemistry(
let chem_p = af_home_path.join(CHEMISTRIES_PATH);

if let Some(existing_entry) = get_single_custom_chem_from_file(&chem_p, &name)? {
let existing_ver_str = existing_entry
.version()
.clone()
.unwrap_or("0.0.0".to_string());
let existing_ver = Version::parse(existing_ver_str.as_ref()).with_context( || format!("could not parse version {} found in existing chemistries.json file. Please correct this entry", existing_ver_str))?;
let existing_ver_str = existing_entry.version();
let existing_ver = Version::parse(existing_ver_str).with_context( || format!("could not parse version {} found in existing chemistries.json file. Please correct this entry", existing_ver_str))?;
if add_ver <= existing_ver {
info!("Attempting to add chemistry with version {:#} which is <= than the existing version ({:#}) for this chemistry. Skipping addition", add_ver, existing_ver);
return Ok(());
Expand Down Expand Up @@ -155,10 +152,10 @@ pub fn add_chemistry(
let custom_chem = CustomChemistry {
name,
geometry,
expected_ori: Some(ExpectedOri::from_str(&add_opts.expected_ori)?),
expected_ori: ExpectedOri::from_str(&add_opts.expected_ori)?,
plist_name: local_plist,
remote_pl_url: add_opts.remote_url,
version: Some(version),
version,
meta: None,
};

Expand All @@ -180,7 +177,7 @@ pub fn add_chemistry(
}

// convert the custom chemistry hashmap to json
let v = custom_chem_hm_to_json(&chem_hm)?;
let v = custom_chem_hm_into_json(chem_hm)?;

// write out the new custom chemistry file
let mut custom_chem_file = std::fs::File::create(&chem_p)
Expand Down Expand Up @@ -385,7 +382,7 @@ pub fn remove_chemistry(
chem_hm.remove(&name);

// convert the custom chemistry hashmap to json
let v = custom_chem_hm_to_json(&chem_hm)?;
let v = custom_chem_hm_into_json(chem_hm)?;

// write out the new custom chemistry file
let mut custom_chem_file = std::fs::File::create(&chem_p)
Expand Down
4 changes: 2 additions & 2 deletions src/simpleaf_commands/inspect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::atac::commands::AtacChemistry;
use crate::utils::constants::CHEMISTRIES_PATH;
use crate::utils::{
af_utils::RnaChemistry,
chem_utils::{custom_chem_hm_to_json, get_custom_chem_hm},
chem_utils::{custom_chem_hm_into_json, get_custom_chem_hm},
prog_utils::*,
};
use anyhow::Result;
Expand All @@ -19,7 +19,7 @@ pub fn inspect_simpleaf(version: &str, af_home_path: PathBuf) -> Result<()> {
let chem_info_value = if custom_chem_p.is_file() {
// parse the chemistry json file
let custom_chem_hm = get_custom_chem_hm(&custom_chem_p)?;
let v = custom_chem_hm_to_json(&custom_chem_hm)?;
let v = custom_chem_hm_into_json(custom_chem_hm)?;
json!({
"custom_chem_path" : custom_chem_p.display().to_string(),
"custom_geometries" : v
Expand Down
74 changes: 5 additions & 69 deletions src/simpleaf_commands/quant.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::utils::{af_utils::*, chem_utils::*};
use crate::utils::af_utils::*;

use crate::utils::prog_utils;
use crate::utils::prog_utils::{CommandVerbosityLevel, ReqProgs};
Expand Down Expand Up @@ -179,12 +179,6 @@ impl CBListInfo {
}
}

enum IndexType {
Salmon(PathBuf),
Piscem(PathBuf),
NoIndex,
}

fn push_advanced_piscem_options(
piscem_quant_cmd: &mut std::process::Command,
opts: &MapQuantOpts,
Expand Down Expand Up @@ -387,79 +381,21 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result<
// the chemistries file
let custom_chem_p = af_home_path.join(CHEMISTRIES_PATH);

let chem = match opts.chemistry.as_str() {
"10xv2" => Chemistry::Rna(RnaChemistry::TenxV2),
"10xv2-5p" => Chemistry::Rna(RnaChemistry::TenxV25P),
"10xv3" => Chemistry::Rna(RnaChemistry::TenxV3),
"10xv3-5p" => Chemistry::Rna(RnaChemistry::TenxV35P),
"10xv4-3p" => Chemistry::Rna(RnaChemistry::TenxV43P),
s => {
// we try to extract the single record for the chemistry and ignore the rest
if let Some(chem) =
get_single_custom_chem_from_file(&custom_chem_p, opts.chemistry.as_str())?
{
info!(
"custom chemistry {} maps to geometry {}",
s,
chem.geometry()
);
Chemistry::Custom(chem)
} else {
Chemistry::Custom(CustomChemistry::simple_custom(s).with_context(|| {
format!(
"Could not parse the provided chemistry {}. Please ensure it is a valid chemistry string wrapped by quotes or that it is defined in the custom_chemistries.json file.",
s
)
})?)
}
}
};
let chem = Chemistry::from_str(&index_type, &custom_chem_p, &opts.chemistry)?;

let ori: ExpectedOri;
// if the user set the orientation, then
// use that explicitly
if let Some(o) = opts.expected_ori.clone() {
ori = ExpectedOri::from_str(&o).with_context(|| {
if let Some(o) = &opts.expected_ori {
ori = ExpectedOri::from_str(o).with_context(|| {
format!(
"Could not parse orientation {}. It must be one of the following: {:?}",
o,
ExpectedOri::all_to_str().join(", ")
)
})?;
} else {
// otherwise, this was not set explicitly. In that case
// if we have 10xv2, 10xv3, or 10xv4 (3') chemistry, set ori = "fw"
// if we have 10xv2-5p or 10xv3-5p chemistry, set ori = "fw"
// otherwise set ori = "both"
match &chem {
Chemistry::Rna(RnaChemistry::TenxV2)
| Chemistry::Rna(RnaChemistry::TenxV3)
| Chemistry::Rna(RnaChemistry::TenxV43P) => {
ori = ExpectedOri::Forward;
}
Chemistry::Rna(RnaChemistry::TenxV25P) | Chemistry::Rna(RnaChemistry::TenxV35P) => {
// NOTE: This is because we assume the piscem encoding
// that is, these are treated as potentially paired-end protocols and
// we infer the orientation of the fragment = orientation of read 1.
// So, while the direction we want is the same as the 3' protocols
// above, we separate out the case statement here for clarity.
// Further, we may consider changing this or making it more robust if
// and when we propagate more information about paired-end mappings.
ori = ExpectedOri::Forward;
}
Chemistry::Rna(RnaChemistry::Other(_)) => ori = ExpectedOri::default(),
Chemistry::Custom(cc) => {
// if the custom chemistry has an orientation, use that
if let Some(o) = cc.expected_ori() {
ori = o.clone();
} else {
ori = ExpectedOri::default();
}
}
_ => {
bail!("Encountered non-RNA chemistry in simpleaf quant. This should not happen. Please report this to simpleaf GitHub issues.");
}
}
ori = chem.expected_ori();
}

let mut filter_meth_opt = None;
Expand Down
Loading

0 comments on commit f881fa6

Please sign in to comment.