Skip to content

Commit

Permalink
Merge pull request #3 from MannLabs/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
sander-willems-bruker authored Oct 19, 2023
2 parents 73cc748 + d6923ec commit 2986c44
Show file tree
Hide file tree
Showing 17 changed files with 192 additions and 59 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: Rust
name: Build and test

on:
push:
branches: [ "main" ]
branches: [ "main", "develop" ]
pull_request:
branches: [ "main" ]
branches: [ "main", "develop" ]
workflow_dispatch:

env:
Expand Down
23 changes: 22 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "timsrust"
version = "0.1.7"
version = "0.2.0"
edition = "2021"
description = "A crate to read Bruker timsTOF data"
license = "Apache-2.0"
Expand All @@ -21,3 +21,4 @@ rayon = "1.5"
linreg = "0.2.0"
bytemuck = "1.13.1"
parquet = "42.0.0"
thiserror = "1.0.0"
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,28 @@ Add this crate to your `Cargo.toml`:
```toml
[dependencies]
timsrust = "x.x.x"
```

## Usage

TimsRust is intended to be used as a library and not as a stand-alone application. An example of how to use it is found in e.g. [Sage](https://github.com/lazear/sage).

### Basics

Two primary data types are exposed through TimsRust:
* Spectra: A traditional representation that expresses intensitites in function of mz values for a given precursor.
* Frames: All recorded data from a single TIMS elution (i.e. at one specific retention_time).

### File formats

Two file formats are supported:
* Bruker .d folder containing:
* analysis.tdf
* analysis.tdf_bin
* Bruker .ms2 folder containing:
* converter.ms2.bin
* converter.MS2Spectra.ms2.parquet

## Python bindings

The [timsrust_pyo3](https://github.com/jspaezp/timsrust_pyo3) package is an example of how the performance of TimsRust can be utilized in Python
5 changes: 5 additions & 0 deletions src/acquisition.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum AcquisitionType {
DDAPASEF,
DIAPASEF,
}
7 changes: 7 additions & 0 deletions src/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
use crate::file_readers;

#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("FileReaderError: {0}")]
FileReaderError(#[from] file_readers::FileReaderError),
}
14 changes: 11 additions & 3 deletions src/file_readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ pub struct FileReader {
}

impl FileReader {
pub fn new<T: AsRef<std::path::Path>>(path_name: T) -> Self {
let format: FileFormat = FileFormat::parse(path_name);
Self { format }
pub fn new<T: AsRef<std::path::Path>>(
path_name: T,
) -> Result<Self, FileReaderError> {
let format: FileFormat = FileFormat::parse(path_name)?;
Ok(Self { format })
}

pub fn read_all_frames(&self) -> Vec<Frame> {
Expand All @@ -29,3 +31,9 @@ impl FileReader {
self.format.read_all_spectra()
}
}

#[derive(thiserror::Error, Debug)]
pub enum FileReaderError {
#[error("FileFormatError: {0}")]
FileFormatError(#[from] file_formats::FileFormatError),
}
65 changes: 47 additions & 18 deletions src/file_readers/file_formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@ use std::{fs, path::PathBuf};
pub enum FileFormat {
DFolder(PathBuf),
MS2Folder(PathBuf),
Unknown(PathBuf),
}

impl FileFormat {
pub fn parse(input: impl AsRef<std::path::Path>) -> Self {
pub fn parse(
input: impl AsRef<std::path::Path>,
) -> Result<Self, FileFormatError> {
let path: PathBuf = input.as_ref().to_path_buf();
if !path.exists() {
return Err(FileFormatError::DirectoryDoesNotExist);
}
let extension: &str = path
.extension()
.unwrap_or_default()
Expand All @@ -18,26 +22,39 @@ impl FileFormat {
"d" => Self::DFolder(path),
"ms2" => Self::MS2Folder(path),
_ => {
let parent_path: &std::path::Path =
path.parent().unwrap_or("".as_ref());
Self::parse(parent_path)
if let Some(path) = path.parent() {
// Only recurse if there is a valid parent section,
// otherwise we'll get a stack overflow
return Self::parse(path);
}
return Err(FileFormatError::NoParentWithBrukerExtension);
},
};
if !format.is_valid() {
let path: PathBuf = input.as_ref().to_path_buf();
Self::Unknown(path)
} else {
format
}
format.is_valid()?;
Ok(format)
}

pub fn is_valid(&self) -> bool {
let result: bool = match &self {
Self::DFolder(path) => folder_contains_extension(path, "tdf"),
Self::MS2Folder(path) => folder_contains_extension(path, "parquet"),
Self::Unknown(_) => false,
};
result
/// FileFormat is guaranteed to be `valid` if it is constructed
fn is_valid(&self) -> Result<(), FileFormatError> {
match &self {
Self::DFolder(path) => {
if !folder_contains_extension(path, "tdf_bin") {
return Err(FileFormatError::BinaryFilesAreMissing);
}
if !folder_contains_extension(path, "tdf") {
return Err(FileFormatError::MetadataFilesAreMissing);
}
},
Self::MS2Folder(path) => {
if !folder_contains_extension(path, "bin") {
return Err(FileFormatError::BinaryFilesAreMissing);
}
if !folder_contains_extension(path, "parquet") {
return Err(FileFormatError::MetadataFilesAreMissing);
}
},
}
Ok(())
}
}

Expand All @@ -62,3 +79,15 @@ fn folder_contains_extension(
}
false
}

#[derive(thiserror::Error, Debug)]
pub enum FileFormatError {
#[error("DirectoryDoesNotExist")]
DirectoryDoesNotExist,
#[error("NoParentWithBrukerExtension")]
NoParentWithBrukerExtension,
#[error("BinaryFilesAreMissing")]
BinaryFilesAreMissing,
#[error("MetadataFilesAreMissing")]
MetadataFilesAreMissing,
}
16 changes: 12 additions & 4 deletions src/file_readers/frame_readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ pub trait ReadableFrames {
fn read_single_frame(&self, index: usize) -> Frame;

fn read_all_frames(&self) -> Vec<Frame>;

fn read_ms1_frames(&self) -> Vec<Frame>;

fn read_ms2_frames(&self) -> Vec<Frame>;
}

impl FileFormat {
Expand All @@ -22,10 +26,6 @@ impl FileFormat {
"Folder {:} is not frame readable",
path.to_str().unwrap_or_default().to_string()
),
Self::Unknown(path) => panic!(
"Folder {:} is not frame readable",
path.to_str().unwrap_or_default().to_string()
),
};
result
}
Expand All @@ -39,4 +39,12 @@ impl ReadableFrames for FileFormat {
fn read_all_frames(&self) -> Vec<Frame> {
self.unwrap_frame_reader().read_all_frames()
}

fn read_ms1_frames(&self) -> Vec<Frame> {
self.unwrap_frame_reader().read_ms1_frames()
}

fn read_ms2_frames(&self) -> Vec<Frame> {
self.unwrap_frame_reader().read_ms2_frames()
}
}
41 changes: 34 additions & 7 deletions src/file_readers/frame_readers/tdf_reader.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use {
crate::{
acquisition::AcquisitionType,
converters::{
ConvertableIndex, Frame2RtConverter, Scan2ImConverter,
Tof2MzConverter,
Expand All @@ -26,6 +27,7 @@ pub struct TDFReader {
pub im_converter: Scan2ImConverter,
pub mz_converter: Tof2MzConverter,
pub frame_table: FrameTable,
frame_types: Vec<FrameType>,
}

impl TDFReader {
Expand All @@ -42,6 +44,16 @@ impl TDFReader {
String::from(&file_name),
frame_table.offsets.clone(),
);
let frame_types: Vec<FrameType> = frame_table
.msms_type
.iter()
.map(|msms_type| match msms_type {
0 => FrameType::MS1,
8 => FrameType::MS2(AcquisitionType::DDAPASEF),
9 => FrameType::MS2(AcquisitionType::DIAPASEF),
_ => FrameType::Unknown,
})
.collect();
Self {
path: path.to_string(),
tdf_bin_reader: tdf_bin_reader,
Expand All @@ -50,6 +62,7 @@ impl TDFReader {
mz_converter: Tof2MzConverter::from_sql(&tdf_sql_reader),
frame_table: frame_table,
tdf_sql_reader: tdf_sql_reader,
frame_types: frame_types,
}
}

Expand All @@ -65,13 +78,7 @@ impl ReadableFrames for TDFReader {
Frame::read_from_file(&self.tdf_bin_reader, index);
frame.rt = self.rt_converter.convert(index as u32);
frame.index = self.frame_table.id[index];
let msms_type = self.frame_table.msms_type[index];
frame.frame_type = match msms_type {
0 => FrameType::MS1,
8 => FrameType::MS2DDA,
9 => FrameType::MS2DIA,
_ => FrameType::Unknown,
};
frame.frame_type = self.frame_types[index];
frame
}

Expand All @@ -81,4 +88,24 @@ impl ReadableFrames for TDFReader {
.map(|index| self.read_single_frame(index))
.collect()
}

fn read_ms1_frames(&self) -> Vec<Frame> {
(0..self.tdf_bin_reader.size())
.into_par_iter()
.map(|index| match self.frame_types[index] {
FrameType::MS1 => self.read_single_frame(index),
_ => Frame::default(),
})
.collect()
}

fn read_ms2_frames(&self) -> Vec<Frame> {
(0..self.tdf_bin_reader.size())
.into_par_iter()
.map(|index| match self.frame_types[index] {
FrameType::MS2(_) => self.read_single_frame(index),
_ => Frame::default(),
})
.collect()
}
}
4 changes: 0 additions & 4 deletions src/file_readers/spectrum_readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ impl FileFormat {
Self::MS2Folder(path) => Box::new(MiniTDFReader::new(
path.to_str().unwrap_or_default().to_string(),
)) as Box<dyn ReadableSpectra>,
Self::Unknown(path) => panic!(
"Folder {:} is not spectrum readable",
path.to_str().unwrap_or_default().to_string()
),
};
result
}
Expand Down
Loading

0 comments on commit 2986c44

Please sign in to comment.