Skip to content

Commit

Permalink
Merge pull request #10 from sebastian-wehner-bruker/feature/updated_m…
Browse files Browse the repository at this point in the history
…iniTDF_conventions

feat: updated to miniTDF conventions. Adapted test names accordingly
  • Loading branch information
sander-willems-bruker authored Nov 6, 2023
2 parents 234ba0c + 58589bd commit b4c021e
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 15 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,18 @@ Two primary data types are exposed through TimsRust:
### File formats

Two file formats are supported:
* Bruker .d folder containing:
* TDF - Bruker .d folder containing:
* analysis.tdf
* analysis.tdf_bin
* Bruker .ms2 folder containing:
* converter.ms2.bin
* converter.MS2Spectra.ms2.parquet


* miniTDF - ProteoScape optimized Bruker file-format. Similar to TDF, miniTDF consists of multiple files: a binary '.bin'
and an index '.parquet' file. The file-names are made up to the following convention: `<producing-engine-name>.<domain-name>.<extension>`.
e.g. for MS2 spectrum information: `<producing-engine-name>.ms2spectrum.<extension>`. Therefor the following files are expected
in the provided ms2 folder:
* *.ms2spectrum.bin
* *.ms2spectrum.parquet


## Python bindings

Expand Down
10 changes: 1 addition & 9 deletions src/file_readers/file_formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,7 @@ impl FileFormat {
.unwrap_or_default();
let format = match extension {
"d" => Self::DFolder(path),
"ms2" => Self::MS2Folder(path),
_ => {
if let Some(path) = path.parent() {
// Only recurse if there is a valid parent section,
// otherwise we'll get a stack overflow
return Self::parse(path);
}
return Err(FileFormatError::NoParentWithBrukerExtension);
},
_ => Self::MS2Folder(path)
};
format.is_valid()?;
Ok(format)
Expand Down
26 changes: 24 additions & 2 deletions src/file_readers/spectrum_readers/mini_tdf_reader.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::fs;
use {
crate::{
file_readers::{
Expand All @@ -13,6 +14,7 @@ use {
rayon::prelude::*,
std::path::PathBuf,
};
use crate::file_readers::FileFormatError;

#[derive(Debug, Default, Clone)]
pub struct MiniTDFReader {
Expand All @@ -23,6 +25,24 @@ pub struct MiniTDFReader {
frame_reader: BinFileReader,
}

fn find_ms2spectrum_file(ms2_dir_path: &str, extension: String) -> Result<String, FileFormatError> {
let files = fs::read_dir(ms2_dir_path).unwrap();
for file in files {
let filename = file.unwrap().path().file_name().unwrap().to_str().unwrap().to_owned();
if filename.ends_with( std::format!("ms2spectrum.{}", extension).as_str()) {
return Ok(filename)
}
}
let err = match extension.as_str() {
"parquet" => FileFormatError::MetadataFilesAreMissing,
"bin" => FileFormatError::BinaryFilesAreMissing,
_ => FileFormatError::BinaryFilesAreMissing
};
println!("{}", format!("No '*.ms2spectrum.{}' file found in '{}'", extension, ms2_dir_path));
return Err(err);
}


impl MiniTDFReader {
pub fn new(path_name: String) -> Self {
let mut reader: MiniTDFReader = Self::default();
Expand All @@ -35,7 +55,8 @@ impl MiniTDFReader {

fn read_parquet_file_name(&mut self) {
let mut path: PathBuf = PathBuf::from(&self.path_name);
path.push("converter.MS2Spectra.ms2.parquet");
let ms2_parquet_file = find_ms2spectrum_file(&self.path_name, "parquet".to_owned()).unwrap();
path.push(ms2_parquet_file);
self.parquet_file_name = path.to_string_lossy().into_owned();
}

Expand All @@ -45,7 +66,8 @@ impl MiniTDFReader {
}
fn set_spectrum_reader(&mut self) {
let mut path: PathBuf = PathBuf::from(&self.path_name);
path.push("converter.ms2.bin");
let ms2_bin_file = find_ms2spectrum_file(&self.path_name, "bin".to_owned()).unwrap();
path.push(ms2_bin_file);
let file_name: String = path.to_string_lossy().into_owned();
self.frame_reader =
BinFileReader::new(String::from(&file_name), self.offsets.clone());
Expand Down
Binary file added tests/test.ms2/converter.ms2spectrum.parquet
Binary file not shown.
File renamed without changes.

0 comments on commit b4c021e

Please sign in to comment.