Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat) initial centroid implementation for tdf #156

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 154 additions & 2 deletions crates/sage-cloudpath/src/tdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,108 @@ use sage_core::{
mass::Tolerance,
spectrum::{Precursor, RawSpectrum, Representation},
};
use timsrust::converters::ConvertableDomain;
pub use timsrust::readers::SpectrumReaderConfig as BrukerSpectrumProcessor;

pub struct TdfReader;

use std::cmp::Ordering;

fn squash_frame(mz_array: &[f32], intensity_array: &[f32], tol_ppm: f32) -> (Vec<f32>, Vec<f32>) {
// Make sure the mz array is sorted
assert!(mz_array.windows(2).all(|x| x[0] <= x[1]));

let arr_len = mz_array.len();
let mut touched = vec![false; arr_len];
let mut global_num_touched = 0;

let mut order: Vec<usize> = (0..arr_len).collect();
order.sort_unstable_by(|&a, &b| {
intensity_array[b]
.partial_cmp(&intensity_array[a])
.unwrap_or(Ordering::Equal)
});

let mut agg_mz = vec![0.0; arr_len];
let mut agg_intensity = vec![0.0; arr_len];

let utol = tol_ppm / 1e6;

for &idx in &order {
if touched[idx] {
continue;
}

let mz = mz_array[idx];
let da_tol = mz * utol;
let left_e = mz - da_tol;
let right_e = mz + da_tol;

let ss_start = mz_array.partition_point(|&x| x < left_e);
let ss_end = mz_array.partition_point(|&x| x <= right_e);

let slice_width = ss_end - ss_start;
let local_num_touched = touched[ss_start..ss_end].iter().filter(|&&x| x).count();
let local_num_untouched = slice_width - local_num_touched;

if local_num_touched == slice_width {
continue;
}

let mut curr_intensity = 0.0;
let mut curr_weighted_mz = 0.0;

for i in ss_start..ss_end {
if !touched[i] && intensity_array[i] > 0.0 {
curr_intensity += intensity_array[i];
curr_weighted_mz += mz_array[i] * intensity_array[i];
}
}

if curr_intensity > 0.0 {
curr_weighted_mz /= curr_intensity;

agg_intensity[idx] = curr_intensity;
agg_mz[idx] = curr_weighted_mz;

touched[ss_start..ss_end].iter_mut().for_each(|x| *x = true);
global_num_touched += local_num_untouched;
}

if global_num_touched == arr_len {
break;
}
}

// Drop the zeros and sort
let mut result: Vec<(f32, f32)> = agg_mz
.into_iter()
.zip(agg_intensity.into_iter())
.filter(|&(mz, intensity)| mz > 0.0 && intensity > 0.0)
.collect();

result.sort_unstable_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));

result.into_iter().unzip()
}

impl TdfReader {
pub fn parse(
&self,
path_name: impl AsRef<str>,
file_id: usize,
bruker_spectrum_processor: BrukerSpectrumProcessor,
) -> Result<Vec<RawSpectrum>, timsrust::TimsRustError> {
let tdf_path = std::path::Path::new(path_name.as_ref()).join("analysis.tdf");
let spectrum_reader = timsrust::readers::SpectrumReader::build()
.with_path(path_name.as_ref())
.with_config(bruker_spectrum_processor)
.finalize()?;
let frame_reader = timsrust::readers::FrameReader::new(path_name.as_ref())?;
let metadata = timsrust::readers::MetadataReader::new(tdf_path)?;
let mz_converter = metadata.mz_converter;

// Get ms2 spectra
let spectra: Vec<RawSpectrum> = (0..spectrum_reader.len())
.into_par_iter()
.filter_map(|index| match spectrum_reader.get(index) {
Expand All @@ -33,6 +120,9 @@ impl TdfReader {
precursors: vec![precursor],
representation: Representation::Centroid,
scan_start_time: dda_precursor.rt as f32 / 60.0,
// I am pretty sure this is wrong ...
// The retention time is most certainly not what we want for the
// injection time.
ion_injection_time: dda_precursor.rt as f32,
total_ion_current: 0.0,
mz: dda_spectrum.mz_values.iter().map(|&x| x as f32).collect(),
Expand All @@ -42,11 +132,73 @@ impl TdfReader {
};
Some(spectrum)
}
None => None,
None => {
log::warn!("No precursor found for spectrum {:?}", dda_spectrum.index);
None
}
},
Err(_) => None,
// Q: should we raise/propagate/log an error here?
Err(x) => {
log::error!("error parsing spectrum: {:?}", x);
None
}
})
.collect();

// Get MS1 spectra
let ms1_spectra = frame_reader
.get_all_ms1()
.into_iter()
.filter_map(|frame| match frame {
Ok(frame) => {
let mz: Vec<f32> = frame
.tof_indices
.iter()
.map(|&x| mz_converter.convert(x as f64) as f32)
.collect();
let intensity: Vec<f32> = frame.intensities.iter().map(|&x| x as f32).collect();

// Sort the mzs and intensities by mz
let mut indices: Vec<usize> = (0..mz.len()).collect();
indices.sort_by(|&i, &j| {
mz[i]
.partial_cmp(&mz[j])
.unwrap_or(std::cmp::Ordering::Equal)
});
let sorted_mz: Vec<f32> = indices.iter().map(|&i| mz[i].clone()).collect();
let sorted_inten: Vec<f32> =
indices.iter().map(|&i| intensity[i].clone()).collect();

// Squash the mobility dimension
let (mz, intensity) = squash_frame(&sorted_mz, &sorted_inten, 20.0);
let scan_start_time = frame.rt as f32 / 60.0;
let ion_injection_time = 100.0;
let total_ion_current = 0.0;
let id = frame.index.to_string();

let spec = RawSpectrum {
file_id,
precursors: vec![],
representation: Representation::Centroid,
scan_start_time,
ion_injection_time,
mz,
ms_level: 1,
id,
intensity,
total_ion_current,
};
Some(spec)
}
Err(x) => {
log::error!("error parsing spectrum: {:?}", x);
None
}
});

// Merge the two
let spectra: Vec<RawSpectrum> = ms1_spectra.chain(spectra.into_iter()).collect();

Ok(spectra)
}

Expand Down
2 changes: 1 addition & 1 deletion crates/sage/src/fdr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ pub fn picked_precursor(
.map(|score| ((score.ix, score.decoy), score.q))
.collect::<FnvHashMap<_, _>>();

peaks.par_iter_mut().for_each(|((ix), (peak, _))| {
peaks.par_iter_mut().for_each(|(ix, (peak, _))| {
peak.q_value = scores[ix];
});
passing
Expand Down
2 changes: 1 addition & 1 deletion crates/sage/src/modification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{
str::FromStr,
};

use serde::{de::Visitor, Deserialize, Serialize};
use serde::Serialize;

use crate::mass::VALID_AA;

Expand Down
Loading