From 18cb7c248aa6a6d20fdc97e086a56eba087ba7ee Mon Sep 17 00:00:00 2001 From: Michael Lazear Date: Mon, 16 Oct 2023 17:28:15 -0700 Subject: [PATCH 01/13] fix: stack overflow in FileFormat, more idiomatic Rust --- src/file_readers.rs | 5 ++--- src/file_readers/file_formats.rs | 30 +++++++++++++++------------- src/file_readers/frame_readers.rs | 4 ---- src/file_readers/spectrum_readers.rs | 4 ---- src/lib.rs | 15 ++++++++++++++ src/main.rs | 8 +++++--- tests/frame_readers.rs | 2 +- tests/spectrum_readers.rs | 4 ++-- 8 files changed, 41 insertions(+), 31 deletions(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index ee2a53a..e2bb905 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -16,9 +16,8 @@ pub struct FileReader { } impl FileReader { - pub fn new>(path_name: T) -> Self { - let format: FileFormat = FileFormat::parse(path_name); - Self { format } + pub fn new>(path_name: T) -> Result { + FileFormat::parse(path_name).map(|format| Self { format }) } pub fn read_all_frames(&self) -> Vec { diff --git a/src/file_readers/file_formats.rs b/src/file_readers/file_formats.rs index ed2eb34..b8231aa 100644 --- a/src/file_readers/file_formats.rs +++ b/src/file_readers/file_formats.rs @@ -1,13 +1,14 @@ use std::{fs, path::PathBuf}; +use crate::Error; + pub enum FileFormat { DFolder(PathBuf), - MS2Folder(PathBuf), - Unknown(PathBuf), + MS2Folder(PathBuf) } impl FileFormat { - pub fn parse(input: impl AsRef) -> Self { + pub fn parse(input: impl AsRef) -> Result { let path: PathBuf = input.as_ref().to_path_buf(); let extension: &str = path .extension() @@ -18,26 +19,27 @@ impl FileFormat { "d" => Self::DFolder(path), "ms2" => Self::MS2Folder(path), _ => { - let parent_path: &std::path::Path = - path.parent().unwrap_or("".as_ref()); - Self::parse(parent_path) + if let Some(path) = path.parent() { + // Only recurse if there is a valid parent section, + // otherwise we'll get a stack overflow + return Self::parse(path) + } + return Err(Error::UnknownFileFormat) }, }; if !format.is_valid() { - let path: PathBuf = input.as_ref().to_path_buf(); - Self::Unknown(path) + Err(Error::UnknownFileFormat) } else { - format + Ok(format) } } - pub fn is_valid(&self) -> bool { - let result: bool = match &self { + /// FileFormat is guaranteed to be `valid` if it is constructed + fn is_valid(&self) -> bool { + match &self { Self::DFolder(path) => folder_contains_extension(path, "tdf"), Self::MS2Folder(path) => folder_contains_extension(path, "parquet"), - Self::Unknown(_) => false, - }; - result + } } } diff --git a/src/file_readers/frame_readers.rs b/src/file_readers/frame_readers.rs index 5b85e4e..b8c4743 100644 --- a/src/file_readers/frame_readers.rs +++ b/src/file_readers/frame_readers.rs @@ -22,10 +22,6 @@ impl FileFormat { "Folder {:} is not frame readable", path.to_str().unwrap_or_default().to_string() ), - Self::Unknown(path) => panic!( - "Folder {:} is not frame readable", - path.to_str().unwrap_or_default().to_string() - ), }; result } diff --git a/src/file_readers/spectrum_readers.rs b/src/file_readers/spectrum_readers.rs index ece74bf..24d6ff5 100644 --- a/src/file_readers/spectrum_readers.rs +++ b/src/file_readers/spectrum_readers.rs @@ -22,10 +22,6 @@ impl FileFormat { Self::MS2Folder(path) => Box::new(MiniTDFReader::new( path.to_str().unwrap_or_default().to_string(), )) as Box, - Self::Unknown(path) => panic!( - "Folder {:} is not spectrum readable", - path.to_str().unwrap_or_default().to_string() - ), }; result } diff --git a/src/lib.rs b/src/lib.rs index 04a8048..c4f3ba9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,3 +32,18 @@ pub use crate::{ precursors::{Precursor, PrecursorType}, spectra::{RawSpectrum, Spectrum}, }; + +#[derive(Debug)] +pub enum Error { + UnknownFileFormat +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::UnknownFileFormat => f.write_str("unknown file format"), + } + } +} + +impl std::error::Error for Error {} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index f3bd032..2168a38 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,10 @@ use std::env; -use timsrust::{FileReader, Spectrum}; +use timsrust::{FileReader, Spectrum, Error}; -fn main() { +fn main() -> Result<(), Error> { let args: Vec = env::args().collect(); let d_folder_name: &str = &args[1]; - let x = FileReader::new(d_folder_name.to_string()); + let x = FileReader::new(d_folder_name.to_string())?; let dda_spectra: Vec = x.read_all_spectra(); let precursor_index: usize; if args.len() >= 3 { @@ -24,4 +24,6 @@ fn main() { ); println!("precursor {:?}", dda_spectra[precursor_index].mz_values); println!("precursor {:?}", dda_spectra[precursor_index].intensities); + + Ok(()) } diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 29107d0..4aaad12 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -15,7 +15,7 @@ fn tdf_reader_frames() { .to_str() .unwrap() .to_string(); - let frames: Vec = FileReader::new(file_path).read_all_frames(); + let frames: Vec = FileReader::new(file_path).unwrap().read_all_frames(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index c5fd4f5..d38cae3 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -15,7 +15,7 @@ fn minitdf_reader() { .to_str() .unwrap() .to_string(); - let spectra: Vec = FileReader::new(file_path).read_all_spectra(); + let spectra: Vec = FileReader::new(file_path).unwrap().read_all_spectra(); let expected: Vec = vec![ Spectrum { mz_values: vec![100.0, 200.002, 300.03, 400.4], @@ -59,7 +59,7 @@ fn tdf_reader_dda() { .to_str() .unwrap() .to_string(); - let spectra: Vec = FileReader::new(file_path).read_all_spectra(); + let spectra: Vec = FileReader::new(file_path).unwrap().read_all_spectra(); let expected: Vec = vec![ Spectrum { mz_values: vec![199.7633445943076], From b003b9b70f8f0d238b10686392eb9d304bd32f5e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 10:40:24 +0200 Subject: [PATCH 02/13] CHORE: added thiserror to cargo.tol --- Cargo.lock | 21 +++++++++++++++++++++ Cargo.toml | 1 + 2 files changed, 22 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index bc9a8b4..88f3be4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -895,6 +895,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + [[package]] name = "thrift" version = "0.17.0" @@ -916,6 +936,7 @@ dependencies = [ "parquet", "rayon", "rusqlite", + "thiserror", "zstd", ] diff --git a/Cargo.toml b/Cargo.toml index 63a9e05..8266631 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,3 +21,4 @@ rayon = "1.5" linreg = "0.2.0" bytemuck = "1.13.1" parquet = "42.0.0" +thiserror = "1.0.0" From aaa165d2985f2cc9a82c67b07a7c1dc90c6ac49e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 10:41:20 +0200 Subject: [PATCH 03/13] FEAT: cleaned up format class with explicit error types --- src/file_readers/file_formats.rs | 55 ++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/src/file_readers/file_formats.rs b/src/file_readers/file_formats.rs index b8231aa..808ed8b 100644 --- a/src/file_readers/file_formats.rs +++ b/src/file_readers/file_formats.rs @@ -1,15 +1,18 @@ use std::{fs, path::PathBuf}; -use crate::Error; - pub enum FileFormat { DFolder(PathBuf), - MS2Folder(PathBuf) + MS2Folder(PathBuf), } impl FileFormat { - pub fn parse(input: impl AsRef) -> Result { + pub fn parse( + input: impl AsRef, + ) -> Result { let path: PathBuf = input.as_ref().to_path_buf(); + if !path.exists() { + return Err(FileFormatError::DirectoryDoesNotExist); + } let extension: &str = path .extension() .unwrap_or_default() @@ -22,24 +25,36 @@ impl FileFormat { if let Some(path) = path.parent() { // Only recurse if there is a valid parent section, // otherwise we'll get a stack overflow - return Self::parse(path) + return Self::parse(path); } - return Err(Error::UnknownFileFormat) + return Err(FileFormatError::NoParentWithBrukerExtension); }, }; - if !format.is_valid() { - Err(Error::UnknownFileFormat) - } else { - Ok(format) - } + format.is_valid()?; + Ok(format) } /// FileFormat is guaranteed to be `valid` if it is constructed - fn is_valid(&self) -> bool { + fn is_valid(&self) -> Result<(), FileFormatError> { match &self { - Self::DFolder(path) => folder_contains_extension(path, "tdf"), - Self::MS2Folder(path) => folder_contains_extension(path, "parquet"), + Self::DFolder(path) => { + if !folder_contains_extension(path, "tdf_bin") { + return Err(FileFormatError::BinaryFilesAreMissing); + } + if !folder_contains_extension(path, "tdf") { + return Err(FileFormatError::MetadataFilesAreMissing); + } + }, + Self::MS2Folder(path) => { + if !folder_contains_extension(path, "bin") { + return Err(FileFormatError::BinaryFilesAreMissing); + } + if !folder_contains_extension(path, "parquet") { + return Err(FileFormatError::MetadataFilesAreMissing); + } + }, } + Ok(()) } } @@ -64,3 +79,15 @@ fn folder_contains_extension( } false } + +#[derive(thiserror::Error, Debug)] +pub enum FileFormatError { + #[error("DirectoryDoesNotExist")] + DirectoryDoesNotExist, + #[error("NoParentWithBrukerExtension")] + NoParentWithBrukerExtension, + #[error("BinaryFilesAreMissing")] + BinaryFilesAreMissing, + #[error("MetadataFilesAreMissing")] + MetadataFilesAreMissing, +} From 815b9822475faba68c97726650359f22794f333f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 10:42:21 +0200 Subject: [PATCH 04/13] FEAT: Clened up filereader with explicit error message --- src/file_readers.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/file_readers.rs b/src/file_readers.rs index e2bb905..7a2d38f 100644 --- a/src/file_readers.rs +++ b/src/file_readers.rs @@ -16,8 +16,11 @@ pub struct FileReader { } impl FileReader { - pub fn new>(path_name: T) -> Result { - FileFormat::parse(path_name).map(|format| Self { format }) + pub fn new>( + path_name: T, + ) -> Result { + let format: FileFormat = FileFormat::parse(path_name)?; + Ok(Self { format }) } pub fn read_all_frames(&self) -> Vec { @@ -28,3 +31,9 @@ impl FileReader { self.format.read_all_spectra() } } + +#[derive(thiserror::Error, Debug)] +pub enum FileReaderError { + #[error("FileFormatError: {0}")] + FileFormatError(#[from] file_formats::FileFormatError), +} From bc57fba098cf310f9f20d611a5758ba1566cf911 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 10:43:21 +0200 Subject: [PATCH 05/13] FEAT: Reorganized error handling with separate module --- src/errors.rs | 7 +++++++ src/lib.rs | 17 ++--------------- 2 files changed, 9 insertions(+), 15 deletions(-) create mode 100644 src/errors.rs diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..6935f57 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,7 @@ +use crate::file_readers; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("FileReaderError: {0}")] + FileReaderError(#[from] file_readers::FileReaderError), +} diff --git a/src/lib.rs b/src/lib.rs index c4f3ba9..a74ae57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,7 @@ mod calibration; mod converters; +mod errors; mod file_readers; mod frames; mod precursors; @@ -27,23 +28,9 @@ mod spectra; mod vec_utils; pub use crate::{ + errors::*, file_readers::FileReader, frames::{Frame, FrameType}, precursors::{Precursor, PrecursorType}, spectra::{RawSpectrum, Spectrum}, }; - -#[derive(Debug)] -pub enum Error { - UnknownFileFormat -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::UnknownFileFormat => f.write_str("unknown file format"), - } - } -} - -impl std::error::Error for Error {} \ No newline at end of file From fedbbb69a8325105e263fd1d5eab89a932c69e1c Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 10:44:41 +0200 Subject: [PATCH 06/13] CHORE: main class does not return anything as timsrust is not meant to be called as application but only used as library --- src/main.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main.rs b/src/main.rs index 2168a38..4efdbe8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,10 @@ use std::env; -use timsrust::{FileReader, Spectrum, Error}; +use timsrust::{FileReader, Spectrum}; -fn main() -> Result<(), Error> { +fn main() { let args: Vec = env::args().collect(); let d_folder_name: &str = &args[1]; - let x = FileReader::new(d_folder_name.to_string())?; + let x = FileReader::new(d_folder_name.to_string()).unwrap(); let dda_spectra: Vec = x.read_all_spectra(); let precursor_index: usize; if args.len() >= 3 { @@ -24,6 +24,4 @@ fn main() -> Result<(), Error> { ); println!("precursor {:?}", dda_spectra[precursor_index].mz_values); println!("precursor {:?}", dda_spectra[precursor_index].intensities); - - Ok(()) } From 918b03dc085fabb04decaf3613ea5331ae998f83 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 10:46:47 +0200 Subject: [PATCH 07/13] CHORE: updated build and test github action --- .github/workflows/rust.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index a458ca5..42dcff8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -1,10 +1,10 @@ -name: Rust +name: Build and test on: push: - branches: [ "main" ] + branches: [ "main", "develop" ] pull_request: - branches: [ "main" ] + branches: [ "main", "develop" ] workflow_dispatch: env: From 81e499959d1aaf46cd34b1841c1b4c1984f05f1e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 11:08:00 +0200 Subject: [PATCH 08/13] CHORE: minor version update that includes better error handling. --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 88f3be4..fa3adf8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -928,7 +928,7 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.1.7" +version = "0.2.0" dependencies = [ "bytemuck", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 8266631..f606a21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "timsrust" -version = "0.1.7" +version = "0.2.0" edition = "2021" description = "A crate to read Bruker timsTOF data" license = "Apache-2.0" From 997c36d0c148b6701d11d59b999177d95bec625f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 15:05:51 +0200 Subject: [PATCH 09/13] FEAT: introduced acquistiontype enum --- src/acquisition.rs | 5 +++++ src/lib.rs | 2 ++ 2 files changed, 7 insertions(+) create mode 100644 src/acquisition.rs diff --git a/src/acquisition.rs b/src/acquisition.rs new file mode 100644 index 0000000..10b21b9 --- /dev/null +++ b/src/acquisition.rs @@ -0,0 +1,5 @@ +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum AcquisitionType { + DDAPASEF, + DIAPASEF, +} diff --git a/src/lib.rs b/src/lib.rs index a74ae57..774220b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,7 @@ //! * analysis.tdf //! * analysis.tdf_bin +mod acquisition; mod calibration; mod converters; mod errors; @@ -28,6 +29,7 @@ mod spectra; mod vec_utils; pub use crate::{ + acquisition::AcquisitionType, errors::*, file_readers::FileReader, frames::{Frame, FrameType}, From ffa603bf9f65b1e433491c2457e90739e8480d2a Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 15:08:37 +0200 Subject: [PATCH 10/13] FEAT: madde frametype acquisition dependent --- src/file_readers/frame_readers/tdf_reader.rs | 21 +++++++++++++------- src/frames.rs | 7 ++++--- tests/frame_readers.rs | 9 +++++---- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index 0d59a4f..4f31aae 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -1,5 +1,6 @@ use { crate::{ + acquisition::AcquisitionType, converters::{ ConvertableIndex, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, @@ -26,6 +27,7 @@ pub struct TDFReader { pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, pub frame_table: FrameTable, + frame_types: Vec, } impl TDFReader { @@ -42,6 +44,16 @@ impl TDFReader { String::from(&file_name), frame_table.offsets.clone(), ); + let frame_types: Vec = frame_table + .msms_type + .iter() + .map(|msms_type| match msms_type { + 0 => FrameType::MS1, + 8 => FrameType::MS2(AcquisitionType::DDAPASEF), + 9 => FrameType::MS2(AcquisitionType::DIAPASEF), + _ => FrameType::Unknown, + }) + .collect(); Self { path: path.to_string(), tdf_bin_reader: tdf_bin_reader, @@ -50,6 +62,7 @@ impl TDFReader { mz_converter: Tof2MzConverter::from_sql(&tdf_sql_reader), frame_table: frame_table, tdf_sql_reader: tdf_sql_reader, + frame_types: frame_types, } } @@ -65,13 +78,7 @@ impl ReadableFrames for TDFReader { Frame::read_from_file(&self.tdf_bin_reader, index); frame.rt = self.rt_converter.convert(index as u32); frame.index = self.frame_table.id[index]; - let msms_type = self.frame_table.msms_type[index]; - frame.frame_type = match msms_type { - 0 => FrameType::MS1, - 8 => FrameType::MS2DDA, - 9 => FrameType::MS2DIA, - _ => FrameType::Unknown, - }; + frame.frame_type = self.frame_types[index]; frame } diff --git a/src/frames.rs b/src/frames.rs index 1f7c3e8..4027d96 100644 --- a/src/frames.rs +++ b/src/frames.rs @@ -1,3 +1,5 @@ +use crate::acquisition::AcquisitionType; + #[derive(Debug, PartialEq, Default)] pub struct Frame { pub scan_offsets: Vec, @@ -8,11 +10,10 @@ pub struct Frame { pub frame_type: FrameType, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone, Copy)] pub enum FrameType { MS1, - MS2DDA, - MS2DIA, + MS2(AcquisitionType), Unknown, } diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 4aaad12..aa7de7e 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -1,5 +1,5 @@ use std::path::Path; -use timsrust::{FileReader, Frame, FrameType}; +use timsrust::{AcquisitionType, FileReader, Frame, FrameType}; fn get_local_directory() -> &'static Path { Path::new(std::file!()) @@ -15,7 +15,8 @@ fn tdf_reader_frames() { .to_str() .unwrap() .to_string(); - let frames: Vec = FileReader::new(file_path).unwrap().read_all_frames(); + let frames: Vec = + FileReader::new(file_path).unwrap().read_all_frames(); let expected: Vec = vec![ Frame { scan_offsets: vec![0, 1, 3, 6, 10], @@ -31,7 +32,7 @@ fn tdf_reader_frames() { intensities: (10..36).map(|x| (x + 1) * 2).collect(), index: 2, rt: 0.2, - frame_type: FrameType::MS2DDA, + frame_type: FrameType::MS2(AcquisitionType::DDAPASEF), }, Frame { scan_offsets: vec![0, 9, 19, 30, 42], @@ -47,7 +48,7 @@ fn tdf_reader_frames() { intensities: (78..136).map(|x| (x + 1) * 2).collect(), index: 4, rt: 0.4, - frame_type: FrameType::MS2DDA, + frame_type: FrameType::MS2(AcquisitionType::DDAPASEF), }, ]; for i in 0..frames.len() { From 771e18f41dacf6e36035649774ec12ca783a4457 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 15:09:26 +0200 Subject: [PATCH 11/13] FEAT: implemented option to only read ms1 or ms2 frames for readableframes trait --- src/file_readers/frame_readers.rs | 12 ++++++++++++ src/file_readers/frame_readers/tdf_reader.rs | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/file_readers/frame_readers.rs b/src/file_readers/frame_readers.rs index b8c4743..b8cdf91 100644 --- a/src/file_readers/frame_readers.rs +++ b/src/file_readers/frame_readers.rs @@ -10,6 +10,10 @@ pub trait ReadableFrames { fn read_single_frame(&self, index: usize) -> Frame; fn read_all_frames(&self) -> Vec; + + fn read_ms1_frames(&self) -> Vec; + + fn read_ms2_frames(&self) -> Vec; } impl FileFormat { @@ -35,4 +39,12 @@ impl ReadableFrames for FileFormat { fn read_all_frames(&self) -> Vec { self.unwrap_frame_reader().read_all_frames() } + + fn read_ms1_frames(&self) -> Vec { + self.unwrap_frame_reader().read_ms1_frames() + } + + fn read_ms2_frames(&self) -> Vec { + self.unwrap_frame_reader().read_ms2_frames() + } } diff --git a/src/file_readers/frame_readers/tdf_reader.rs b/src/file_readers/frame_readers/tdf_reader.rs index 4f31aae..7ab9b43 100644 --- a/src/file_readers/frame_readers/tdf_reader.rs +++ b/src/file_readers/frame_readers/tdf_reader.rs @@ -88,4 +88,24 @@ impl ReadableFrames for TDFReader { .map(|index| self.read_single_frame(index)) .collect() } + + fn read_ms1_frames(&self) -> Vec { + (0..self.tdf_bin_reader.size()) + .into_par_iter() + .map(|index| match self.frame_types[index] { + FrameType::MS1 => self.read_single_frame(index), + _ => Frame::default(), + }) + .collect() + } + + fn read_ms2_frames(&self) -> Vec { + (0..self.tdf_bin_reader.size()) + .into_par_iter() + .map(|index| match self.frame_types[index] { + FrameType::MS2(_) => self.read_single_frame(index), + _ => Frame::default(), + }) + .collect() + } } From 1ed2d541237010132eec3ee95559f08c50dd2ebb Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 15:09:54 +0200 Subject: [PATCH 12/13] FEAT: imporved speed performance of ddareader by only reading ms2 frames --- src/file_readers/spectrum_readers/dda_reader.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/file_readers/spectrum_readers/dda_reader.rs b/src/file_readers/spectrum_readers/dda_reader.rs index 43705fc..9c1b9d6 100644 --- a/src/file_readers/spectrum_readers/dda_reader.rs +++ b/src/file_readers/spectrum_readers/dda_reader.rs @@ -26,21 +26,21 @@ pub struct DDASpectrumReader { pub path_name: String, precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, - frames: Vec, + ms2_frames: Vec, } impl DDASpectrumReader { pub fn new(path_name: String) -> Self { let tdf_reader: TDFReader = TDFReader::new(&path_name.to_string()); let mz_reader: Tof2MzConverter = tdf_reader.mz_converter; - let frames: Vec = tdf_reader.read_all_frames(); + let ms2_frames: Vec = tdf_reader.read_ms2_frames(); let precursor_reader: PrecursorReader = PrecursorReader::new(&tdf_reader); Self { path_name, precursor_reader, mz_reader, - frames, + ms2_frames, } } @@ -53,7 +53,7 @@ impl DDASpectrumReader { for &index in selection.iter() { let frame: usize = self.precursor_reader.pasef_frames.frame[index] - 1; - if self.frames[frame].intensities.len() == 0 { + if self.ms2_frames[frame].intensities.len() == 0 { continue; } let scan_start: usize = @@ -61,13 +61,13 @@ impl DDASpectrumReader { let scan_end: usize = self.precursor_reader.pasef_frames.scan_end[index]; let offset_start: usize = - self.frames[frame].scan_offsets[scan_start] as usize; + self.ms2_frames[frame].scan_offsets[scan_start] as usize; let offset_end: usize = - self.frames[frame].scan_offsets[scan_end] as usize; + self.ms2_frames[frame].scan_offsets[scan_end] as usize; let tof_selection: &[u32] = - &self.frames[frame].tof_indices[offset_start..offset_end]; + &self.ms2_frames[frame].tof_indices[offset_start..offset_end]; let intensity_selection: &[u32] = - &self.frames[frame].intensities[offset_start..offset_end]; + &self.ms2_frames[frame].intensities[offset_start..offset_end]; tof_indices.extend(tof_selection); intensities.extend(intensity_selection); } From d6923ec0d70ea5cf92375fe48b842a77ed344b5a Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Thu, 19 Oct 2023 16:14:28 +0200 Subject: [PATCH 13/13] DOCS: readme updates --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index e2d0068..73965a4 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,28 @@ Add this crate to your `Cargo.toml`: ```toml [dependencies] timsrust = "x.x.x" +``` + +## Usage + +TimsRust is intended to be used as a library and not as a stand-alone application. An example of how to use it is found in e.g. [Sage](https://github.com/lazear/sage). + +### Basics + +Two primary data types are exposed through TimsRust: +* Spectra: A traditional representation that expresses intensitites in function of mz values for a given precursor. +* Frames: All recorded data from a single TIMS elution (i.e. at one specific retention_time). + +### File formats + +Two file formats are supported: +* Bruker .d folder containing: + * analysis.tdf + * analysis.tdf_bin +* Bruker .ms2 folder containing: + * converter.ms2.bin + * converter.MS2Spectra.ms2.parquet + +## Python bindings + +The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python