diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index a458ca5..42dcff8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -1,10 +1,10 @@ -name: Rust +name: Build and test on: push: - branches: [ "main" ] + branches: [ "main", "develop" ] pull_request: - branches: [ "main" ] + branches: [ "main", "develop" ] workflow_dispatch: env: diff --git a/Cargo.lock b/Cargo.lock index bc9a8b4..fa3adf8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -895,6 +895,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + [[package]] name = "thrift" version = "0.17.0" @@ -908,7 +928,7 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.1.7" +version = "0.2.0" dependencies = [ "bytemuck", "byteorder", @@ -916,6 +936,7 @@ dependencies = [ "parquet", "rayon", "rusqlite", + "thiserror", "zstd", ] diff --git a/Cargo.toml b/Cargo.toml index 63a9e05..f606a21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "timsrust" -version = "0.1.7" +version = "0.2.0" edition = "2021" description = "A crate to read Bruker timsTOF data" license = "Apache-2.0" @@ -21,3 +21,4 @@ rayon = "1.5" linreg = "0.2.0" bytemuck = "1.13.1" parquet = "42.0.0" +thiserror = "1.0.0" diff --git a/README.md b/README.md index e2d0068..73965a4 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,28 @@ Add this crate to your `Cargo.toml`: ```toml [dependencies] timsrust = "x.x.x" +``` + +## Usage + +TimsRust is intended to be used as a library and not as a stand-alone application. An example of how to use it is found in e.g. [Sage](https://github.com/lazear/sage). + +### Basics + +Two primary data types are exposed through TimsRust: +* Spectra: A traditional representation that expresses intensitites in function of mz values for a given precursor. +* Frames: All recorded data from a single TIMS elution (i.e. at one specific retention_time). + +### File formats + +Two file formats are supported: +* Bruker .d folder containing: + * analysis.tdf + * analysis.tdf_bin +* Bruker .ms2 folder containing: + * converter.ms2.bin + * converter.MS2Spectra.ms2.parquet + +## Python bindings + +The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python diff --git a/src/acquisition.rs b/src/acquisition.rs new file mode 100644 index 0000000..10b21b9 --- /dev/null +++ b/src/acquisition.rs @@ -0,0 +1,5 @@ +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum AcquisitionType { + DDAPASEF, + DIAPASEF, +} diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..6935f57 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,7 @@ +use crate::file_readers; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("FileReaderError: {0}")] + FileReaderError(#[from] file_readers::FileReaderError), +} diff --git a/src/file_readers.rs b/src/file_readers.rs index ee2a53a..7a2d38f 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -16,9 +16,11 @@ pub struct FileReader { } impl FileReader { - pub fn new>(path_name: T) -> Self { - let format: FileFormat = FileFormat::parse(path_name); - Self { format } + pub fn new>( + path_name: T, + ) -> Result { + let format: FileFormat = FileFormat::parse(path_name)?; + Ok(Self { format }) } pub fn read_all_frames(&self) -> Vec { @@ -29,3 +31,9 @@ impl FileReader { self.format.read_all_spectra() } } + +#[derive(thiserror::Error, Debug)] +pub enum FileReaderError { + #[error("FileFormatError: {0}")] + FileFormatError(#[from] file_formats::FileFormatError), +} diff --git a/src/file_readers/file_formats.rs b/src/file_readers/file_formats.rs index ed2eb34..808ed8b 100644 --- a/src/file_readers/file_formats.rs +++ b/src/file_readers/file_formats.rs @@ -3,12 +3,16 @@ use std::{fs, path::PathBuf}; pub enum FileFormat { DFolder(PathBuf), MS2Folder(PathBuf), - Unknown(PathBuf), } impl FileFormat { - pub fn parse(input: impl AsRef) -> Self { + pub fn parse( + input: impl AsRef, + ) -> Result { let path: PathBuf = input.as_ref().to_path_buf(); + if !path.exists() { + return Err(FileFormatError::DirectoryDoesNotExist); + } let extension: &str = path .extension() .unwrap_or_default() @@ -18,26 +22,39 @@ impl FileFormat { "d" => Self::DFolder(path), "ms2" => Self::MS2Folder(path), _ => { - let parent_path: &std::path::Path = - path.parent().unwrap_or("".as_ref()); - Self::parse(parent_path) + if let Some(path) = path.parent() { + // Only recurse if there is a valid parent section, + // otherwise we'll get a stack overflow + return Self::parse(path); + } + return Err(FileFormatError::NoParentWithBrukerExtension); }, }; - if !format.is_valid() { - let path: PathBuf = input.as_ref().to_path_buf(); - Self::Unknown(path) - } else { - format - } + format.is_valid()?; + Ok(format) } - pub fn is_valid(&self) -> bool { - let result: bool = match &self { - Self::DFolder(path) => folder_contains_extension(path, "tdf"), - Self::MS2Folder(path) => folder_contains_extension(path, "parquet"), - Self::Unknown(_) => false, - }; - result + /// FileFormat is guaranteed to be `valid` if it is constructed + fn is_valid(&self) -> Result<(), FileFormatError> { + match &self { + Self::DFolder(path) => { + if !folder_contains_extension(path, "tdf_bin") { + return Err(FileFormatError::BinaryFilesAreMissing); + } + if !folder_contains_extension(path, "tdf") { + return Err(FileFormatError::MetadataFilesAreMissing); + } + }, + Self::MS2Folder(path) => { + if !folder_contains_extension(path, "bin") { + return Err(FileFormatError::BinaryFilesAreMissing); + } + if !folder_contains_extension(path, "parquet") { + return Err(FileFormatError::MetadataFilesAreMissing); + } + }, + } + Ok(()) } } @@ -62,3 +79,15 @@ fn folder_contains_extension( } false } + +#[derive(thiserror::Error, Debug)] +pub enum FileFormatError { + #[error("DirectoryDoesNotExist")] + DirectoryDoesNotExist, + #[error("NoParentWithBrukerExtension")] + NoParentWithBrukerExtension, + #[error("BinaryFilesAreMissing")] + BinaryFilesAreMissing, + #[error("MetadataFilesAreMissing")] + MetadataFilesAreMissing, +} diff --git a/src/file_readers/frame_readers.rs b/src/file_readers/frame_readers.rs index 5b85e4e..b8cdf91 100644 --- a/src/file_readers/frame_readers.rs +++ b/src/file_readers/frame_readers.rs @@ -10,6 +10,10 @@ pub trait ReadableFrames { fn read_single_frame(&self, index: usize) -> Frame; fn read_all_frames(&self) -> Vec; + + fn read_ms1_frames(&self) -> Vec; + + fn read_ms2_frames(&self) -> Vec; } impl FileFormat { @@ -22,10 +26,6 @@ impl FileFormat { "Folder {:} is not frame readable", path.to_str().unwrap_or_default().to_string() ), - Self::Unknown(path) => panic!( - "Folder {:} is not frame readable", - path.to_str().unwrap_or_default().to_string() - ), }; result } @@ -39,4 +39,12 @@ impl ReadableFrames for FileFormat { fn read_all_frames(&self) -> Vec { self.unwrap_frame_reader().read_all_frames() } + + fn read_ms1_frames(&self) -> Vec { + self.unwrap_frame_reader().read_ms1_frames() + } + + fn read_ms2_frames(&self) -> Vec { + self.unwrap_frame_reader().read_ms2_frames() + } } diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index 0d59a4f..7ab9b43 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -1,5 +1,6 @@ use { crate::{ + acquisition::AcquisitionType, converters::{ ConvertableIndex, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, @@ -26,6 +27,7 @@ pub struct TDFReader { pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, pub frame_table: FrameTable, + frame_types: Vec, } impl TDFReader { @@ -42,6 +44,16 @@ impl TDFReader { String::from(&file_name), frame_table.offsets.clone(), ); + let frame_types: Vec = frame_table + .msms_type + .iter() + .map(|msms_type| match msms_type { + 0 => FrameType::MS1, + 8 => FrameType::MS2(AcquisitionType::DDAPASEF), + 9 => FrameType::MS2(AcquisitionType::DIAPASEF), + _ => FrameType::Unknown, + }) + .collect(); Self { path: path.to_string(), tdf_bin_reader: tdf_bin_reader, @@ -50,6 +62,7 @@ impl TDFReader { mz_converter: Tof2MzConverter::from_sql(&tdf_sql_reader), frame_table: frame_table, tdf_sql_reader: tdf_sql_reader, + frame_types: frame_types, } } @@ -65,13 +78,7 @@ impl ReadableFrames for TDFReader { Frame::read_from_file(&self.tdf_bin_reader, index); frame.rt = self.rt_converter.convert(index as u32); frame.index = self.frame_table.id[index]; - let msms_type = self.frame_table.msms_type[index]; - frame.frame_type = match msms_type { - 0 => FrameType::MS1, - 8 => FrameType::MS2DDA, - 9 => FrameType::MS2DIA, - _ => FrameType::Unknown, - }; + frame.frame_type = self.frame_types[index]; frame } @@ -81,4 +88,24 @@ impl ReadableFrames for TDFReader { .map(|index| self.read_single_frame(index)) .collect() } + + fn read_ms1_frames(&self) -> Vec { + (0..self.tdf_bin_reader.size()) + .into_par_iter() + .map(|index| match self.frame_types[index] { + FrameType::MS1 => self.read_single_frame(index), + _ => Frame::default(), + }) + .collect() + } + + fn read_ms2_frames(&self) -> Vec { + (0..self.tdf_bin_reader.size()) + .into_par_iter() + .map(|index| match self.frame_types[index] { + FrameType::MS2(_) => self.read_single_frame(index), + _ => Frame::default(), + }) + .collect() + } } diff --git a/src/file_readers/spectrum_readers.rs b/src/file_readers/spectrum_readers.rs index ece74bf..24d6ff5 100644 --- a/src/file_readers/spectrum_readers.rs +++ b/src/file_readers/spectrum_readers.rs @@ -22,10 +22,6 @@ impl FileFormat { Self::MS2Folder(path) => Box::new(MiniTDFReader::new( path.to_str().unwrap_or_default().to_string(), )) as Box, - Self::Unknown(path) => panic!( - "Folder {:} is not spectrum readable", - path.to_str().unwrap_or_default().to_string() - ), }; result } diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 43705fc..9c1b9d6 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -26,21 +26,21 @@ pub struct DDASpectrumReader { pub path_name: String, precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, - frames: Vec, + ms2_frames: Vec, } impl DDASpectrumReader { pub fn new(path_name: String) -> Self { let tdf_reader: TDFReader = TDFReader::new(&path_name.to_string()); let mz_reader: Tof2MzConverter = tdf_reader.mz_converter; - let frames: Vec = tdf_reader.read_all_frames(); + let ms2_frames: Vec = tdf_reader.read_ms2_frames(); let precursor_reader: PrecursorReader = PrecursorReader::new(&tdf_reader); Self { path_name, precursor_reader, mz_reader, - frames, + ms2_frames, } } @@ -53,7 +53,7 @@ impl DDASpectrumReader { for &index in selection.iter() { let frame: usize = self.precursor_reader.pasef_frames.frame[index] - 1; - if self.frames[frame].intensities.len() == 0 { + if self.ms2_frames[frame].intensities.len() == 0 { continue; } let scan_start: usize = @@ -61,13 +61,13 @@ impl DDASpectrumReader { let scan_end: usize = self.precursor_reader.pasef_frames.scan_end[index]; let offset_start: usize = - self.frames[frame].scan_offsets[scan_start] as usize; + self.ms2_frames[frame].scan_offsets[scan_start] as usize; let offset_end: usize = - self.frames[frame].scan_offsets[scan_end] as usize; + self.ms2_frames[frame].scan_offsets[scan_end] as usize; let tof_selection: &[u32] = - &self.frames[frame].tof_indices[offset_start..offset_end]; + &self.ms2_frames[frame].tof_indices[offset_start..offset_end]; let intensity_selection: &[u32] = - &self.frames[frame].intensities[offset_start..offset_end]; + &self.ms2_frames[frame].intensities[offset_start..offset_end]; tof_indices.extend(tof_selection); intensities.extend(intensity_selection); } diff --git a/src/frames.rs b/src/frames.rs index 1f7c3e8..4027d96 100644 --- a/src/frames.rs +++ b/src/frames.rs @@ -1,3 +1,5 @@ +use crate::acquisition::AcquisitionType; + #[derive(Debug, PartialEq, Default)] pub struct Frame { pub scan_offsets: Vec, @@ -8,11 +10,10 @@ pub struct Frame { pub frame_type: FrameType, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone, Copy)] pub enum FrameType { MS1, - MS2DDA, - MS2DIA, + MS2(AcquisitionType), Unknown, } diff --git a/src/lib.rs b/src/lib.rs index 04a8048..774220b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,8 +18,10 @@ //! * analysis.tdf //! * analysis.tdf_bin +mod acquisition; mod calibration; mod converters; +mod errors; mod file_readers; mod frames; mod precursors; @@ -27,6 +29,8 @@ mod spectra; mod vec_utils; pub use crate::{ + acquisition::AcquisitionType, + errors::*, file_readers::FileReader, frames::{Frame, FrameType}, precursors::{Precursor, PrecursorType}, diff --git a/src/main.rs b/src/main.rs index f3bd032..4efdbe8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,7 @@ use timsrust::{FileReader, Spectrum}; fn main() { let args: Vec = env::args().collect(); let d_folder_name: &str = &args[1]; - let x = FileReader::new(d_folder_name.to_string()); + let x = FileReader::new(d_folder_name.to_string()).unwrap(); let dda_spectra: Vec = x.read_all_spectra(); let precursor_index: usize; if args.len() >= 3 { diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 29107d0..aa7de7e 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,5 +1,5 @@ use std::path::Path; -use timsrust::{FileReader, Frame, FrameType}; +use timsrust::{AcquisitionType, FileReader, Frame, FrameType}; fn get_local_directory() -> &'static Path { Path::new(std::file!()) @@ -15,7 +15,8 @@ fn tdf_reader_frames() { .to_str() .unwrap() .to_string(); - let frames: Vec = FileReader::new(file_path).read_all_frames(); + let frames: Vec = + FileReader::new(file_path).unwrap().read_all_frames(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -31,7 +32,7 @@ fn tdf_reader_frames() { intensities: (10..36).map(|x| (x + 1) * 2).collect(), index: 2, rt: 0.2, - frame_type: FrameType::MS2DDA, + frame_type: FrameType::MS2(AcquisitionType::DDAPASEF), }, Frame { scan_offsets: vec![0, 9, 19, 30, 42], @@ -47,7 +48,7 @@ fn tdf_reader_frames() { intensities: (78..136).map(|x| (x + 1) * 2).collect(), index: 4, rt: 0.4, - frame_type: FrameType::MS2DDA, + frame_type: FrameType::MS2(AcquisitionType::DDAPASEF), }, ]; for i in 0..frames.len() { diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index c5fd4f5..d38cae3 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -15,7 +15,7 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = FileReader::new(file_path).read_all_spectra(); + let spectra: Vec = FileReader::new(file_path).unwrap().read_all_spectra(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -59,7 +59,7 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = FileReader::new(file_path).read_all_spectra(); + let spectra: Vec = FileReader::new(file_path).unwrap().read_all_spectra(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076],