Skip to content

Commit

Permalink
Merge pull request #11 from MannLabs/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
sander-willems-bruker authored Nov 6, 2023
2 parents 6e21810 + 90e6093 commit 1e734b8
Show file tree
Hide file tree
Showing 18 changed files with 1,090 additions and 39 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "timsrust"
version = "0.2.1"
version = "0.2.2"
edition = "2021"
description = "A crate to read Bruker timsTOF data"
license = "Apache-2.0"
Expand Down
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@

![Crates.io](https://img.shields.io/crates/v/timsrust?link=https%3A%2F%2Fcrates.io%2Fcrates%2Ftimsrust)
![docs.rs](https://img.shields.io/docsrs/timsrust?link=https%3A%2F%2Fdocs.rs%2Ftimsrust%2F0.2.1%2Ftimsrust%2F)

# TimsRust

A crate to read Bruker TimsTof data.
Expand All @@ -24,12 +28,17 @@ Two primary data types are exposed through TimsRust:
### File formats

Two file formats are supported:
* Bruker .d folder containing:
* TDF - Bruker .d folder containing:
* analysis.tdf
* analysis.tdf_bin
* Bruker .ms2 folder containing:
* converter.ms2.bin
* converter.MS2Spectra.ms2.parquet

* miniTDF - ProteoScape optimized Bruker file-format. Similar to TDF, miniTDF consists of multiple files: a binary '.bin'
and an index '.parquet' file. The file-names are made up to the following convention: `<producing-engine-name>.<domain-name>.<extension>`.
e.g. for MS2 spectrum information: `<producing-engine-name>.ms2spectrum.<extension>`. Therefore the following files are expected
in the provided ms2 folder:
* *.ms2spectrum.bin
* *.ms2spectrum.parquet


## Python bindings

Expand Down
5 changes: 5 additions & 0 deletions src/converters.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use linreg::linear_regression;

/// Converting from an index domain (e.g. Time of Flight) to a continuous domain (m/z).
pub trait ConvertableIndex {
/// Convert any index (even fractional) to a continuous value.
fn convert<T: Into<f64> + Copy>(&self, index: T) -> f64;
}

/// A converter from TOF -> m/z.
#[derive(Debug, Copy, Clone)]
pub struct Tof2MzConverter {
tof_intercept: f64,
Expand Down Expand Up @@ -40,6 +43,7 @@ impl ConvertableIndex for Tof2MzConverter {
}
}

/// A converter from Scan -> ion mobility.
#[derive(Debug, Copy, Clone)]
pub struct Scan2ImConverter {
scan_intercept: f64,
Expand All @@ -64,6 +68,7 @@ impl ConvertableIndex for Scan2ImConverter {
}
}

/// A converter from Frame -> retention time.
#[derive(Debug, Clone)]
pub struct Frame2RtConverter {
rt_values: Vec<f64>,
Expand Down
47 changes: 46 additions & 1 deletion src/file_readers.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::Error;
use crate::{
converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter},
Error,
};

mod common;
mod file_formats;
Expand All @@ -15,6 +18,8 @@ use {

pub use file_formats::FileFormatError;

use self::frame_readers::tdf_reader::TDFReader;

/// A reader to read [frames](crate::Frame) and [spectra](crate::Spectrum).
pub struct FileReader {
format: FileFormat,
Expand All @@ -37,10 +42,14 @@ impl FileReader {
self.format.read_all_frames()
}

/// NOTE: The returned vec contains all frames to not disrupt indexing.
/// MS2 frames are set to unknown and not read.
pub fn read_all_ms1_frames(&self) -> Vec<Frame> {
self.format.read_all_ms1_frames()
}

/// NOTE: The returned vec contains all frames to not disrupt indexing.
/// MS1 frames are set to unknown and not read.
pub fn read_all_ms2_frames(&self) -> Vec<Frame> {
self.format.read_all_ms2_frames()
}
Expand All @@ -56,4 +65,40 @@ impl FileReader {
pub fn read_all_spectra(&self) -> Vec<Spectrum> {
self.format.read_all_spectra()
}

pub fn get_frame_converter(&self) -> Result<Frame2RtConverter, Error> {
match &self.format {
FileFormat::DFolder(path) => Ok(TDFReader::new(
&path.to_str().unwrap_or_default().to_string(),
)
.rt_converter),
_ => Err(Error::FileFormatError(
FileFormatError::MetadataFilesAreMissing,
)),
}
}

pub fn get_scan_converter(&self) -> Result<Scan2ImConverter, Error> {
match &self.format {
FileFormat::DFolder(path) => Ok(TDFReader::new(
&path.to_str().unwrap_or_default().to_string(),
)
.im_converter),
_ => Err(Error::FileFormatError(
FileFormatError::MetadataFilesAreMissing,
)),
}
}

pub fn get_tof_converter(&self) -> Result<Tof2MzConverter, Error> {
match &self.format {
FileFormat::DFolder(path) => Ok(TDFReader::new(
&path.to_str().unwrap_or_default().to_string(),
)
.mz_converter),
_ => Err(Error::FileFormatError(
FileFormatError::MetadataFilesAreMissing,
)),
}
}
}
10 changes: 1 addition & 9 deletions src/file_readers/file_formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,7 @@ impl FileFormat {
.unwrap_or_default();
let format = match extension {
"d" => Self::DFolder(path),
"ms2" => Self::MS2Folder(path),
_ => {
if let Some(path) = path.parent() {
// Only recurse if there is a valid parent section,
// otherwise we'll get a stack overflow
return Self::parse(path);
}
return Err(FileFormatError::NoParentWithBrukerExtension);
},
_ => Self::MS2Folder(path),
};
format.is_valid()?;
Ok(format)
Expand Down
50 changes: 46 additions & 4 deletions src/file_readers/spectrum_readers/mini_tdf_reader.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::file_readers::FileFormatError;
use std::fs;
use {
crate::{
file_readers::{
Expand All @@ -7,7 +9,7 @@ use {
},
ReadableSpectra,
},
precursors::PrecursorType,
precursors::QuadrupoleEvent,
Precursor, Spectrum,
},
rayon::prelude::*,
Expand All @@ -23,6 +25,41 @@ pub struct MiniTDFReader {
frame_reader: BinFileReader,
}

fn find_ms2spectrum_file(
ms2_dir_path: &str,
extension: String,
) -> Result<String, FileFormatError> {
let files = fs::read_dir(ms2_dir_path).unwrap();
for file in files {
let filename = file
.unwrap()
.path()
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_owned();
if filename
.ends_with(std::format!("ms2spectrum.{}", extension).as_str())
{
return Ok(filename);
}
}
let err = match extension.as_str() {
"parquet" => FileFormatError::MetadataFilesAreMissing,
"bin" => FileFormatError::BinaryFilesAreMissing,
_ => FileFormatError::BinaryFilesAreMissing,
};
println!(
"{}",
format!(
"No '*.ms2spectrum.{}' file found in '{}'",
extension, ms2_dir_path
)
);
return Err(err);
}

impl MiniTDFReader {
pub fn new(path_name: String) -> Self {
let mut reader: MiniTDFReader = Self::default();
Expand All @@ -35,7 +72,10 @@ impl MiniTDFReader {

fn read_parquet_file_name(&mut self) {
let mut path: PathBuf = PathBuf::from(&self.path_name);
path.push("converter.MS2Spectra.ms2.parquet");
let ms2_parquet_file =
find_ms2spectrum_file(&self.path_name, "parquet".to_owned())
.unwrap();
path.push(ms2_parquet_file);
self.parquet_file_name = path.to_string_lossy().into_owned();
}

Expand All @@ -45,7 +85,9 @@ impl MiniTDFReader {
}
fn set_spectrum_reader(&mut self) {
let mut path: PathBuf = PathBuf::from(&self.path_name);
path.push("converter.ms2.bin");
let ms2_bin_file =
find_ms2spectrum_file(&self.path_name, "bin".to_owned()).unwrap();
path.push(ms2_bin_file);
let file_name: String = path.to_string_lossy().into_owned();
self.frame_reader =
BinFileReader::new(String::from(&file_name), self.offsets.clone());
Expand All @@ -56,7 +98,7 @@ impl ReadableSpectra for MiniTDFReader {
fn read_single_spectrum(&self, index: usize) -> Spectrum {
let mut spectrum: Spectrum =
Spectrum::read_from_file(&self.frame_reader, index);
spectrum.precursor = PrecursorType::Precursor(self.precursors[index]);
spectrum.precursor = QuadrupoleEvent::Precursor(self.precursors[index]);
spectrum
}

Expand Down
14 changes: 10 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@
//!
//! Two file formats are supported:
//!
//! * Bruker .ms2 folder containing:
//! * converter.ms2.bin
//! * converter.MS2Spectra.ms2.parquet
//! * Bruker .d folder containing:
//! * analysis.tdf
//! * analysis.tdf_bin
//! * miniTDF - ProteoScape optimized Bruker file-format. Similar to TDF, miniTDF consists of multiple files: a binary '.bin'
//! and an index '.parquet' file. The file-names are made up to the following convention: `<producing-engine-name>.<domain-name>.<extension>`.
//! e.g. for MS2 spectrum information: `<producing-engine-name>.ms2spectrum.<extension>`. Therefore the following files are expected
//! in the provided ms2 folder:
//! * *.ms2spectrum.bin
//! * *.ms2spectrum.parquet
mod acquisition;
mod calibration;
Expand All @@ -30,9 +33,12 @@ mod vec_utils;

pub use crate::{
acquisition::AcquisitionType,
converters::{
ConvertableIndex, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter,
},
errors::*,
file_readers::FileReader,
frames::{Frame, FrameType},
precursors::{Precursor, PrecursorType},
precursors::{Precursor, QuadrupoleEvent},
spectra::Spectrum,
};
8 changes: 4 additions & 4 deletions src/precursors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@ pub struct Precursor {

/// A type of quadrupole selection.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum PrecursorType {
pub enum QuadrupoleEvent {
Precursor(Precursor),
// Window(Window),
// PrecursorList(Vec<Precursor>),
None,
}

impl Default for PrecursorType {
impl Default for QuadrupoleEvent {
fn default() -> Self {
Self::None
}
}

impl PrecursorType {
impl QuadrupoleEvent {
pub fn unwrap_as_precursor(&self) -> Precursor {
match self {
PrecursorType::Precursor(precursor) => *precursor,
QuadrupoleEvent::Precursor(precursor) => *precursor,
_ => {
panic!("Not a precursor");
},
Expand Down
6 changes: 3 additions & 3 deletions src/spectra.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
converters::{ConvertableIndex, Tof2MzConverter},
precursors::PrecursorType,
precursors::QuadrupoleEvent,
vec_utils::{filter_with_mask, find_sparse_local_maxima_mask},
Precursor,
};
Expand Down Expand Up @@ -78,7 +78,7 @@ impl RawSpectrumProcessor {
.iter()
.map(|x| *x as f64)
.collect(),
precursor: PrecursorType::Precursor(precursor),
precursor: QuadrupoleEvent::Precursor(precursor),
index: index,
};
spectrum
Expand Down Expand Up @@ -112,6 +112,6 @@ pub struct RawSpectrum {
pub struct Spectrum {
pub mz_values: Vec<f64>,
pub intensities: Vec<f64>,
pub precursor: PrecursorType,
pub precursor: QuadrupoleEvent,
pub index: usize,
}
Binary file added tests/dia_test.d/analysis.tdf
Binary file not shown.
Binary file added tests/dia_test.d/analysis.tdf_bin
Binary file not shown.
4 changes: 4 additions & 0 deletions tests/simulation_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pandas
numpy
pyzstd
ipykernel
Loading

0 comments on commit 1e734b8

Please sign in to comment.