From 05a39d2948446069c3aee52867804c5f993a227a Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 24 Jan 2024 10:12:57 +0100 Subject: [PATCH 01/11] FEAT: updated to direct zstd decompression rather than using a decompressor object --- src/file_readers/common/ms_data_blobs/processors.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/file_readers/common/ms_data_blobs/processors.rs b/src/file_readers/common/ms_data_blobs/processors.rs index ded07d2..57db893 100644 --- a/src/file_readers/common/ms_data_blobs/processors.rs +++ b/src/file_readers/common/ms_data_blobs/processors.rs @@ -1,5 +1,4 @@ -use std::io::{prelude::*, BufReader}; -use zstd::stream::read::Decoder; +use zstd::decode_all; use super::readers::{MSDataBlob, MSDataBlobReader, MSDataBlobState}; @@ -17,11 +16,7 @@ impl MSDataBlobProcessor { fn decompress(mut self) -> Self { if self.ms_data_blob.data.len() != 0 { let reader: &[u8] = &self.ms_data_blob.data; - let mut decoder: Decoder> = Decoder::new(reader) - .expect("Cannot set decoder. Are the bytes correct?"); - let mut buf: Vec = Vec::new(); - decoder - .read_to_end(&mut buf) + let buf = decode_all(reader) .expect("Cannot decompress bytes. Are they zstd compressed?"); self.ms_data_blob.data = buf; } From 8192890523942cb595df22dfc51be2f5abcbb557 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 24 Jan 2024 15:51:55 +0100 Subject: [PATCH 02/11] FEAT: updated cargo to include criterion and memmapping --- Cargo.lock | 379 ++++++++++++++++++++++++++++++++++++++++++++++++++--- Cargo.toml | 10 +- 2 files changed, 368 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2e4d89a..c6b75c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,6 +33,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + [[package]] name = "alloc-no-stdlib" version = "2.0.4" @@ -63,6 +72,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "arrow-array" version = "42.0.0" @@ -74,7 +89,7 @@ dependencies = [ "arrow-data", "arrow-schema", "chrono", - "half", + "half 2.2.1", "hashbrown 0.14.0", "num", ] @@ -85,7 +100,7 @@ version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30ce342ecf5971004e23cef8b5fb3bacd2bbc48a381464144925074e1472e9eb" dependencies = [ - "half", + "half 2.2.1", "num", ] @@ -101,7 +116,7 @@ dependencies = [ "arrow-schema", "arrow-select", "chrono", - "half", + "half 2.2.1", "lexical-core", "num", ] @@ -114,7 +129,7 @@ checksum = "1d9a83dad6a53d6907765106d3bc61d6d9d313cfe1751701b3ef0948e7283dc2" dependencies = [ "arrow-buffer", "arrow-schema", - "half", + "half 2.2.1", "num", ] @@ -151,6 +166,17 @@ dependencies = [ "num", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -220,6 +246,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.0.79" @@ -247,6 +279,54 @@ dependencies = [ "winapi", ] +[[package]] +name = "ciborium" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" + +[[package]] +name = "ciborium-ll" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +dependencies = [ + "ciborium-io", + "half 1.8.2", +] + +[[package]] +name = "clap" +version = "3.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +dependencies = [ + "bitflags 1.3.2", + "clap_lex", + "indexmap", + "textwrap", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + [[package]] name = "const-random" version = "0.1.15" @@ -284,6 +364,42 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +dependencies = [ + "anes", + "atty", + "cast", + "ciborium", + "clap", + "criterion-plot", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + [[package]] name = "crossbeam-channel" version = "0.5.8" @@ -393,6 +509,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + [[package]] name = "half" version = "2.2.1" @@ -427,6 +549,15 @@ dependencies = [ "hashbrown 0.12.3", ] +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.2.6" @@ -459,12 +590,37 @@ dependencies = [ "cc", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "integer-encoding" version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + [[package]] name = "jobserver" version = "0.1.26" @@ -476,13 +632,19 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "lexical-core" version = "0.8.5" @@ -549,9 +711,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.142" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "libm" @@ -606,6 +768,21 @@ dependencies = [ "libc", ] +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "memmap2" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.8.0" @@ -707,7 +884,7 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ - "hermit-abi", + "hermit-abi 0.2.6", "libc", ] @@ -717,6 +894,12 @@ version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "ordered-float" version = "2.10.0" @@ -726,6 +909,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + [[package]] name = "parquet" version = "42.0.0" @@ -769,6 +958,34 @@ version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + [[package]] name = "proc-macro-hack" version = "0.5.20+deprecated" @@ -815,6 +1032,35 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + [[package]] name = "rusqlite" version = "0.29.0" @@ -838,6 +1084,21 @@ dependencies = [ "semver", ] +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -856,6 +1117,37 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" +[[package]] +name = "serde" +version = "1.0.164" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.164" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.18", +] + +[[package]] +name = "serde_json" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "smallvec" version = "1.10.0" @@ -896,6 +1188,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "textwrap" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" + [[package]] name = "thiserror" version = "1.0.40" @@ -929,11 +1227,13 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.2.3" +version = "0.2.4" dependencies = [ "bytemuck", "byteorder", + "criterion", "linreg", + "memmap2", "parquet", "rayon", "rusqlite", @@ -950,6 +1250,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "twox-hash" version = "1.6.3" @@ -978,6 +1288,16 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -986,9 +1306,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -996,9 +1316,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" dependencies = [ "bumpalo", "log", @@ -1011,9 +1331,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1021,9 +1341,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" dependencies = [ "proc-macro2", "quote", @@ -1034,9 +1354,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" + +[[package]] +name = "web-sys" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" +dependencies = [ + "js-sys", + "wasm-bindgen", +] [[package]] name = "winapi" @@ -1054,6 +1384,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index f41ffe7..8b7377a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "timsrust" -version = "0.2.3" +version = "0.2.4" edition = "2021" description = "A crate to read Bruker timsTOF data" license = "Apache-2.0" @@ -22,3 +22,11 @@ linreg = "0.2.0" bytemuck = "1.13.1" parquet = "42.0.0" thiserror = "1.0.0" +memmap2 = "0.9.3" + +[dev-dependencies] +criterion = { version = "0.4", features = ["html_reports"] } + +[[bench]] +name = "frame_reader" +harness = false From d841219b73ab822e52975c8de24070fb5617c2d4 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 24 Jan 2024 15:52:33 +0100 Subject: [PATCH 03/11] CHORE: included file size reporting on github workflow --- .github/workflows/rust.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 42dcff8..297672f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,5 +19,7 @@ jobs: - uses: actions/checkout@v3 - name: Build run: cargo build --verbose + - name: File sizes + run: find src/ -name '*.rs' | xargs wc -l | sort -nr - name: Run tests run: cargo test --verbose From b5e892d8547c41140a25c4dc0436a1e54b7b4d7e Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 24 Jan 2024 15:54:22 +0100 Subject: [PATCH 04/11] BENCH: included basic benchmarking option --- Cargo.toml | 2 +- benches/speed_performance.rs | 43 ++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 benches/speed_performance.rs diff --git a/Cargo.toml b/Cargo.toml index 8b7377a..dc11952 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,5 +28,5 @@ memmap2 = "0.9.3" criterion = { version = "0.4", features = ["html_reports"] } [[bench]] -name = "frame_reader" +name = "speed_performance" harness = false diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs new file mode 100644 index 0000000..40ef114 --- /dev/null +++ b/benches/speed_performance.rs @@ -0,0 +1,43 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use timsrust::FileReader; + +fn read_all_frames(file_reader: &FileReader) { + file_reader.read_all_frames(); +} + +fn read_all_ms1_frames(file_reader: &FileReader) { + file_reader.read_all_ms1_frames(); +} + +fn read_all_ms2_frames(file_reader: &FileReader) { + file_reader.read_all_ms2_frames(); +} + +fn read_all_spectra(file_reader: &FileReader) { + file_reader.read_all_spectra(); +} + +fn criterion_benchmark(c: &mut Criterion) { + // c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); + let mut group = c.benchmark_group("sample-size-example"); + group.significance_level(0.001).sample_size(10); + let d_folder_name: &str = "/home/sander/data/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; + let file_reader: FileReader = + FileReader::new(d_folder_name.to_string()).unwrap(); + group.bench_function("read_all_frames 6m dda", |b| { + b.iter(|| read_all_frames(black_box(&file_reader))) + }); + group.bench_function("read_all_ms1_frames 6m dda", |b| { + b.iter(|| read_all_ms1_frames(black_box(&file_reader))) + }); + group.bench_function("read_all_ms2_frames 6m dda", |b| { + b.iter(|| read_all_ms2_frames(black_box(&file_reader))) + }); + group.bench_function("read_all_spectra 6m dda", |b| { + b.iter(|| read_all_spectra(black_box(&file_reader))) + }); + group.finish(); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); From c276d1be60c19bca1f54ba0e3fb8c8e84a911a90 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 24 Jan 2024 16:19:39 +0100 Subject: [PATCH 05/11] FEAT: sped up raw reading by using mmapping and avoiding intermediate vectors --- src/file_readers/common/ms_data_blobs.rs | 70 ++++++++---- .../common/ms_data_blobs/parsers.rs | 105 ++++++++++-------- .../spectrum_readers/mini_tdf_reader.rs | 15 ++- 3 files changed, 118 insertions(+), 72 deletions(-) diff --git a/src/file_readers/common/ms_data_blobs.rs b/src/file_readers/common/ms_data_blobs.rs index 36071e6..3f4b802 100644 --- a/src/file_readers/common/ms_data_blobs.rs +++ b/src/file_readers/common/ms_data_blobs.rs @@ -1,28 +1,42 @@ mod parsers; -mod processors; -mod readers; + +use std::fs::File; + +use memmap2::Mmap; +use zstd::decode_all; use crate::{Frame, Spectrum}; -use self::{parsers::parse_frame, processors::MSDataBlobProcessor}; +use self::parsers::parse_frame; -#[derive(Debug, Default, Clone)] +#[derive(Debug, Default)] pub struct BinFileReader { - file_name: String, file_offsets: Vec, + mmap: Option, } impl BinFileReader { pub fn new(file_name: String, file_offsets: Vec) -> Self { - Self { - file_name, - file_offsets, - } + let tdf_bin_file: File = File::open(&file_name) + .expect("File cannot be opened. Is the path correct?"); + let mmap: Option = + Some(unsafe { Mmap::map(&tdf_bin_file).unwrap() }); + Self { file_offsets, mmap } } - fn read_blob(&self, index: usize) -> Vec { + fn read_blob(&self, index: usize) -> Vec { let offset: u64 = self.file_offsets[index as usize]; - MSDataBlobProcessor::from_file(&self.file_name, offset) + if let Some(mmap) = self.mmap.as_ref() { + let raw_byte_count: &[u8] = + &mmap[offset as usize..(offset + 4) as usize]; + let byte_count: u32 = + u32::from_le_bytes(raw_byte_count.try_into().unwrap()); + let compressed_blob: &[u8] = &mmap + [(offset + 8) as usize..offset as usize + byte_count as usize]; + let blob: Vec = decode_all(compressed_blob).unwrap(); + return blob; + }; + return vec![0]; } pub fn size(&self) -> usize { @@ -31,27 +45,41 @@ impl BinFileReader { } pub trait ReadableFromBinFile { - fn parse_from_ms_data_blob(buffer: Vec, index: usize) -> Self; + fn parse_from_ms_data_blob(buffer: Vec, index: usize) -> Self; fn read_from_file(bin_file: &BinFileReader, index: usize) -> Self where Self: Sized, { - let buffer: Vec = bin_file.read_blob(index); - Self::parse_from_ms_data_blob(buffer, index) + let blob: Vec = bin_file.read_blob(index); + Self::parse_from_ms_data_blob(blob, index) } } impl ReadableFromBinFile for Spectrum { - fn parse_from_ms_data_blob(buffer: Vec, index: usize) -> Self { + fn parse_from_ms_data_blob(blob: Vec, index: usize) -> Self { let mut spectrum: Spectrum = Spectrum::default(); spectrum.index = index; - if buffer.len() == 0 { + if blob.len() == 0 { return spectrum; }; - let scan_count: usize = buffer.len() / 3; - let tof_indices_bytes: &[u32] = &buffer[..scan_count as usize * 2]; - let intensities_bytes: &[u32] = &buffer[scan_count as usize * 2..]; + let size: usize = blob.len() / std::mem::size_of::(); + let first: &[u8] = &blob[0 * size..1 * size]; + let second: &[u8] = &blob[1 * size..2 * size]; + let third: &[u8] = &blob[2 * size..3 * size]; + let fourth: &[u8] = &blob[3 * size..4 * size]; + let mut spectrum_data: Vec = vec![0; size]; + for i in 0..size { + spectrum_data[i] = first[i] as u32; + spectrum_data[i] |= (second[i] as u32) << 8; + spectrum_data[i] |= (third[i] as u32) << 16; + spectrum_data[i] |= (fourth[i] as u32) << 24; + } + let scan_count: usize = blob.len() / 3 / std::mem::size_of::(); + let tof_indices_bytes: &[u32] = + &spectrum_data[..scan_count as usize * 2]; + let intensities_bytes: &[u32] = + &spectrum_data[scan_count as usize * 2..]; let mz_values: &[f64] = bytemuck::cast_slice::(tof_indices_bytes); let intensity_values: &[f32] = @@ -64,10 +92,10 @@ impl ReadableFromBinFile for Spectrum { } impl ReadableFromBinFile for Frame { - fn parse_from_ms_data_blob(buffer: Vec, index: usize) -> Self { + fn parse_from_ms_data_blob(blob: Vec, index: usize) -> Self { let mut frame = Frame::default(); (frame.scan_offsets, frame.tof_indices, frame.intensities) = - parse_frame(buffer); + parse_frame(blob); frame.index = index; frame } diff --git a/src/file_readers/common/ms_data_blobs/parsers.rs b/src/file_readers/common/ms_data_blobs/parsers.rs index 3576af5..a40120a 100644 --- a/src/file_readers/common/ms_data_blobs/parsers.rs +++ b/src/file_readers/common/ms_data_blobs/parsers.rs @@ -1,69 +1,78 @@ -use crate::vec_utils::counts_to_indptr; +fn get_u32_from_blob(blob: &Vec, index: usize) -> u32 { + let size: usize = blob.len() / std::mem::size_of::(); + return concatenate_four_bytes_into_u32( + blob[index], + blob[size + index], + blob[2 * size + index], + blob[3 * size + index], + ); +} + +fn concatenate_four_bytes_into_u32(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 { + (b1 as u32) | ((b2 as u32) << 8) | ((b3 as u32) << 16) | ((b4 as u32) << 24) +} -pub fn parse_frame(data: Vec) -> (Vec, Vec, Vec) { +pub fn parse_frame(blob: Vec) -> (Vec, Vec, Vec) { let mut tof_indices: Vec = vec![]; let mut intensities: Vec = vec![]; let mut scan_offsets: Vec = vec![]; - if data.len() != 0 { - let scan_count: usize = read_scan_count(&data); - let scan_counts: Vec = read_scan_counts(scan_count, &data); - tof_indices = read_tof_indices(scan_count, &data, &scan_counts); - intensities = read_intensities(scan_count, &data); - scan_offsets = counts_to_indptr(scan_counts); + if blob.len() != 0 { + let scan_count: usize = get_u32_from_blob(&blob, 0) as usize; + let peak_count: u32 = + ((blob.len() / std::mem::size_of::() - scan_count) / 2) as u32; + scan_offsets = read_scan_offsets(scan_count, peak_count, &blob); + intensities = read_intensities(scan_count, peak_count, &blob); + tof_indices = + read_tof_indices(scan_count, peak_count, &blob, &scan_offsets); } (scan_offsets, tof_indices, intensities) } -fn read_scan_count(data: &Vec) -> usize { - let scan_count = data[0] as usize; - scan_count +fn read_scan_offsets( + scan_count: usize, + peak_count: u32, + blob: &Vec, +) -> Vec { + // let mut scan_offsets: Vec = vec![0; scan_count + 1]; + let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); + scan_offsets.push(0); + for scan_index in 0..scan_count - 1 { + let scan_size = (get_u32_from_blob(blob, scan_index + 1) / 2) as u64; + scan_offsets.push(scan_offsets[scan_index] + scan_size); + } + scan_offsets.push(peak_count as u64); + scan_offsets } -fn read_scan_counts(scan_count: usize, data: &Vec) -> Vec { - let mut scan_counts: Vec = data[..scan_count].to_vec(); - let ion_count: u32 = ((data.len() - scan_count) / 2) as u32; - let mut defined_scan_counts: u32 = 0; - for i in &scan_counts[1..] { - defined_scan_counts += i / 2 - } - let last_scan: u32 = ion_count - defined_scan_counts; - // println!("{:} {:}, {:}", last_scan, ion_count, defined_scan_counts); - scan_counts.rotate_left(1); - scan_counts[scan_count - 1] = last_scan; - for i in 0..scan_counts.len() - 1 { - scan_counts[i] /= 2; +fn read_intensities( + scan_count: usize, + peak_count: u32, + blob: &Vec, +) -> Vec { + let mut intensities: Vec = Vec::with_capacity(peak_count as usize); + for i in 0..peak_count { + intensities + .push(get_u32_from_blob(blob, scan_count + 1 + 2 * i as usize)); } - scan_counts + intensities } fn read_tof_indices( scan_count: usize, - data: &Vec, - scan_counts: &Vec, + peak_count: u32, + blob: &Vec, + scan_offsets: &Vec, ) -> Vec { - let mut tof_indices: Vec = - data.iter().skip(scan_count).step_by(2).cloned().collect(); - let mut index: usize = 0; - for size in scan_counts { + let mut tof_indices: Vec = Vec::with_capacity(peak_count as usize); + for scan_index in 0..scan_count { + let start_offset = scan_offsets[scan_index] as usize; + let end_offset = scan_offsets[scan_index + 1] as usize; let mut current_sum: u32 = 0; - for _i in 0..*size { - current_sum += tof_indices[index]; - tof_indices[index] = current_sum; - index += 1; + for i in start_offset..end_offset { + let tof_index = get_u32_from_blob(blob, scan_count + 2 * i); + current_sum += tof_index; + tof_indices.push(current_sum - 1); } } - for i in 0..tof_indices.len() { - tof_indices[i] -= 1; - } tof_indices } - -fn read_intensities(scan_count: usize, data: &Vec) -> Vec { - let intensities: Vec = data - .iter() - .skip(scan_count + 1) - .step_by(2) - .cloned() - .collect(); - intensities -} diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index 8e4c4ac..f23f301 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -16,7 +16,7 @@ use { std::path::PathBuf, }; -#[derive(Debug, Default, Clone)] +#[derive(Debug)] pub struct MiniTDFReader { pub path_name: String, parquet_file_name: String, @@ -62,8 +62,17 @@ fn find_ms2spectrum_file( impl MiniTDFReader { pub fn new(path_name: String) -> Self { - let mut reader: MiniTDFReader = Self::default(); - reader.path_name = path_name; + let parquet_file_name: String = String::default(); + let precursors: Vec = Vec::default(); + let offsets: Vec = Vec::default(); + let frame_reader: BinFileReader = BinFileReader::default(); + let mut reader: MiniTDFReader = MiniTDFReader { + path_name, + parquet_file_name, + precursors, + offsets, + frame_reader, + }; reader.read_parquet_file_name(); reader.read_precursors(); reader.set_spectrum_reader(); From fdbfaae43c35008e0829a858ef87a970cd4e410a Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 24 Jan 2024 16:19:54 +0100 Subject: [PATCH 06/11] CHORE: cleanup of unused files --- .../common/ms_data_blobs/processors.rs | 42 ---------- .../common/ms_data_blobs/readers.rs | 83 ------------------- src/vec_utils.rs | 11 --- 3 files changed, 136 deletions(-) delete mode 100644 src/file_readers/common/ms_data_blobs/processors.rs delete mode 100644 src/file_readers/common/ms_data_blobs/readers.rs diff --git a/src/file_readers/common/ms_data_blobs/processors.rs b/src/file_readers/common/ms_data_blobs/processors.rs deleted file mode 100644 index 57db893..0000000 --- a/src/file_readers/common/ms_data_blobs/processors.rs +++ /dev/null @@ -1,42 +0,0 @@ -use zstd::decode_all; - -use super::readers::{MSDataBlob, MSDataBlobReader, MSDataBlobState}; - -pub struct MSDataBlobProcessor { - ms_data_blob: MSDataBlob, -} - -impl MSDataBlobProcessor { - pub fn from_file(path: &String, offset: u64) -> Vec { - let ms_data_blob: MSDataBlob = - MSDataBlobReader::new(path, offset).read(); - Self { ms_data_blob }.decompress().byte_shuffle_and_unpack() - } - - fn decompress(mut self) -> Self { - if self.ms_data_blob.data.len() != 0 { - let reader: &[u8] = &self.ms_data_blob.data; - let buf = decode_all(reader) - .expect("Cannot decompress bytes. Are they zstd compressed?"); - self.ms_data_blob.data = buf; - } - self.ms_data_blob.state = MSDataBlobState::Decompressed; - self - } - - fn byte_shuffle_and_unpack(&self) -> Vec { - let size: usize = self.ms_data_blob.data.len() / 4; - let first: &[u8] = &self.ms_data_blob.data[0 * size..1 * size]; - let second: &[u8] = &self.ms_data_blob.data[1 * size..2 * size]; - let third: &[u8] = &self.ms_data_blob.data[2 * size..3 * size]; - let fourth: &[u8] = &self.ms_data_blob.data[3 * size..4 * size]; - let mut frame_data: Vec = vec![0; size]; - for i in 0..size { - frame_data[i as usize] = first[i as usize] as u32; - frame_data[i as usize] += (second[i as usize] as u32) << 8; - frame_data[i as usize] += (third[i as usize] as u32) << 16; - frame_data[i as usize] += (fourth[i as usize] as u32) << 24; - } - frame_data - } -} diff --git a/src/file_readers/common/ms_data_blobs/readers.rs b/src/file_readers/common/ms_data_blobs/readers.rs deleted file mode 100644 index 68a7d9b..0000000 --- a/src/file_readers/common/ms_data_blobs/readers.rs +++ /dev/null @@ -1,83 +0,0 @@ -use byteorder::{LittleEndian, ReadBytesExt}; -use std::{ - fs::File, - io::{prelude::*, SeekFrom}, -}; - -#[derive(Debug)] -pub struct MSDataBlob { - pub data: Vec, - pub state: MSDataBlobState, -} - -#[derive(Debug)] -pub enum MSDataBlobState { - Unprocessed, - Decompressed, -} - -impl Default for MSDataBlobState { - fn default() -> Self { - Self::Unprocessed - } -} - -#[derive(Debug)] -pub struct MSDataBlobReader { - tdf_bin_file: File, - offset: u64, -} - -impl MSDataBlobReader { - pub fn new(path: &String, offset: u64) -> Self { - let tdf_bin_file: File = Self::open_file(path); - let reader: MSDataBlobReader = Self { - tdf_bin_file, - offset, - }; - reader - } - - fn open_file(path: &String) -> File { - File::open(path).expect("File cannot be opened. Is the path correct?") - } - - pub fn read(&mut self) -> MSDataBlob { - self.reset_binary_offset(); - let mut byte_count: u32 = self.read_byte_count(); - let _scan_count: u32 = self.read_scan_count(); - byte_count -= 8; - let ms_data: Vec = self.read_compressed_bytes(byte_count); - MSDataBlob { - data: ms_data, - state: MSDataBlobState::default(), - } - } - - fn reset_binary_offset(&mut self) { - let pos: SeekFrom = SeekFrom::Start(self.offset); - self.tdf_bin_file - .seek(pos) - .expect("Offset cannot be seeked. Is it in range?"); - } - - fn read_byte_count(&mut self) -> u32 { - self.tdf_bin_file - .read_u32::() - .expect("Cannot read byte count, is it little endian?") - } - - fn read_scan_count(&mut self) -> u32 { - self.tdf_bin_file - .read_u32::() - .expect("Cannot read scan count, is it little endian?") - } - - fn read_compressed_bytes(&mut self, byte_count: u32) -> Vec { - let mut buf: Vec = vec![0; byte_count as usize]; - self.tdf_bin_file - .read_exact(&mut buf) - .expect("Cannot read compressed bytes. Are the offset and byte count correct?"); - buf - } -} diff --git a/src/vec_utils.rs b/src/vec_utils.rs index 66dcce6..e6e894e 100644 --- a/src/vec_utils.rs +++ b/src/vec_utils.rs @@ -67,14 +67,3 @@ pub fn filter_with_mask(vec: &Vec, mask: &Vec) -> Vec { .map(|(&x_elem, _)| x_elem) .collect() } - -pub fn counts_to_indptr + Copy>(vec: Vec) -> Vec { - let mut indptr: Vec = Vec::with_capacity(vec.len() + 1); - let mut offset: u64 = 0; - indptr.push(offset); - for i in 0..vec.len() { - offset += vec[i].into(); - indptr.push(offset); - } - indptr.iter().map(|&x| x as u64).collect() -} From 26fe67024711e11dc31501cb8adf0261bf372899 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Wed, 24 Jan 2024 16:54:48 +0100 Subject: [PATCH 07/11] FIX: Empty frames --- src/file_readers/common/ms_data_blobs.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/file_readers/common/ms_data_blobs.rs b/src/file_readers/common/ms_data_blobs.rs index 3f4b802..c261b9b 100644 --- a/src/file_readers/common/ms_data_blobs.rs +++ b/src/file_readers/common/ms_data_blobs.rs @@ -31,12 +31,14 @@ impl BinFileReader { &mmap[offset as usize..(offset + 4) as usize]; let byte_count: u32 = u32::from_le_bytes(raw_byte_count.try_into().unwrap()); - let compressed_blob: &[u8] = &mmap - [(offset + 8) as usize..offset as usize + byte_count as usize]; - let blob: Vec = decode_all(compressed_blob).unwrap(); - return blob; + if byte_count > 8 { + let compressed_blob: &[u8] = &mmap[(offset + 8) as usize + ..offset as usize + byte_count as usize]; + let blob: Vec = decode_all(compressed_blob).unwrap(); + return blob; + } }; - return vec![0]; + return vec![]; } pub fn size(&self) -> usize { From e78ec97b7d3acbafdf6d653cc24c96342ddcaa6f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Apr 2024 14:34:23 +0200 Subject: [PATCH 08/11] CHORE: adding types --- src/file_readers/common/sql_reader/metadata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/file_readers/common/sql_reader/metadata.rs b/src/file_readers/common/sql_reader/metadata.rs index 301e784..6e1e29b 100644 --- a/src/file_readers/common/sql_reader/metadata.rs +++ b/src/file_readers/common/sql_reader/metadata.rs @@ -66,7 +66,7 @@ impl SqlReader { } pub fn read_mz_information(&self) -> (u32, f64, f64) { - let connection = get_sql_connection(&self.path); + let connection: Connection = get_sql_connection(&self.path); let tof_max_index: u32 = read_tof_max_index(&connection); let lower_mz_value: f64 = read_mz_min_value(&connection); let upper_mz_value: f64 = read_mz_max_value(&connection); From 4625976af58d1abbc35b07daafb9dccf8087418f Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Apr 2024 14:35:00 +0200 Subject: [PATCH 09/11] FIX: correcting spectrum ids --- src/file_readers/spectrum_readers/mini_tdf_reader.rs | 1 + tests/spectrum_readers.rs | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/file_readers/spectrum_readers/mini_tdf_reader.rs b/src/file_readers/spectrum_readers/mini_tdf_reader.rs index f23f301..8ba343d 100644 --- a/src/file_readers/spectrum_readers/mini_tdf_reader.rs +++ b/src/file_readers/spectrum_readers/mini_tdf_reader.rs @@ -108,6 +108,7 @@ impl ReadableSpectra for MiniTDFReader { let mut spectrum: Spectrum = Spectrum::read_from_file(&self.frame_reader, index); spectrum.precursor = QuadrupoleEvent::Precursor(self.precursors[index]); + spectrum.index = self.precursors[index].index; spectrum } diff --git a/tests/spectrum_readers.rs b/tests/spectrum_readers.rs index 6da7dec..a19f228 100644 --- a/tests/spectrum_readers.rs +++ b/tests/spectrum_readers.rs @@ -31,7 +31,7 @@ fn minitdf_reader() { frame_index: 1, collision_energy: 0.0, }), - index: 0, + index: 1, }, Spectrum { mz_values: vec![1100.0, 1200.002, 1300.03, 1400.4], @@ -46,7 +46,7 @@ fn minitdf_reader() { frame_index: 2, collision_energy: 0.0, }), - index: 1, + index: 2, }, ]; for i in 0..spectra.len() { From 3be5a9844a2de9c6aa045dc28e53900331b25009 Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Apr 2024 14:36:12 +0200 Subject: [PATCH 10/11] CHORE: type updates --- .../common/ms_data_blobs/parsers.rs | 50 ++++++++++--------- src/frames.rs | 2 +- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/src/file_readers/common/ms_data_blobs/parsers.rs b/src/file_readers/common/ms_data_blobs/parsers.rs index a40120a..bdb2918 100644 --- a/src/file_readers/common/ms_data_blobs/parsers.rs +++ b/src/file_readers/common/ms_data_blobs/parsers.rs @@ -1,5 +1,8 @@ +const U32_SIZE: usize = std::mem::size_of::(); + +#[inline(always)] fn get_u32_from_blob(blob: &Vec, index: usize) -> u32 { - let size: usize = blob.len() / std::mem::size_of::(); + let size: usize = blob.len() / U32_SIZE; return concatenate_four_bytes_into_u32( blob[index], blob[size + index], @@ -8,18 +11,18 @@ fn get_u32_from_blob(blob: &Vec, index: usize) -> u32 { ); } +#[inline(always)] fn concatenate_four_bytes_into_u32(b1: u8, b2: u8, b3: u8, b4: u8) -> u32 { (b1 as u32) | ((b2 as u32) << 8) | ((b3 as u32) << 16) | ((b4 as u32) << 24) } -pub fn parse_frame(blob: Vec) -> (Vec, Vec, Vec) { +pub fn parse_frame(blob: Vec) -> (Vec, Vec, Vec) { let mut tof_indices: Vec = vec![]; let mut intensities: Vec = vec![]; - let mut scan_offsets: Vec = vec![]; + let mut scan_offsets: Vec = vec![]; if blob.len() != 0 { let scan_count: usize = get_u32_from_blob(&blob, 0) as usize; - let peak_count: u32 = - ((blob.len() / std::mem::size_of::() - scan_count) / 2) as u32; + let peak_count: usize = (blob.len() / U32_SIZE - scan_count) / 2; scan_offsets = read_scan_offsets(scan_count, peak_count, &blob); intensities = read_intensities(scan_count, peak_count, &blob); tof_indices = @@ -30,46 +33,47 @@ pub fn parse_frame(blob: Vec) -> (Vec, Vec, Vec) { fn read_scan_offsets( scan_count: usize, - peak_count: u32, + peak_count: usize, blob: &Vec, -) -> Vec { - // let mut scan_offsets: Vec = vec![0; scan_count + 1]; - let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); +) -> Vec { + let mut scan_offsets: Vec = Vec::with_capacity(scan_count + 1); scan_offsets.push(0); for scan_index in 0..scan_count - 1 { - let scan_size = (get_u32_from_blob(blob, scan_index + 1) / 2) as u64; + let index = scan_index + 1; + let scan_size: usize = (get_u32_from_blob(blob, index) / 2) as usize; scan_offsets.push(scan_offsets[scan_index] + scan_size); } - scan_offsets.push(peak_count as u64); + scan_offsets.push(peak_count); scan_offsets } fn read_intensities( scan_count: usize, - peak_count: u32, + peak_count: usize, blob: &Vec, ) -> Vec { - let mut intensities: Vec = Vec::with_capacity(peak_count as usize); - for i in 0..peak_count { - intensities - .push(get_u32_from_blob(blob, scan_count + 1 + 2 * i as usize)); + let mut intensities: Vec = Vec::with_capacity(peak_count); + for peak_index in 0..peak_count { + let index: usize = scan_count + 1 + 2 * peak_index; + intensities.push(get_u32_from_blob(blob, index)); } intensities } fn read_tof_indices( scan_count: usize, - peak_count: u32, + peak_count: usize, blob: &Vec, - scan_offsets: &Vec, + scan_offsets: &Vec, ) -> Vec { - let mut tof_indices: Vec = Vec::with_capacity(peak_count as usize); + let mut tof_indices: Vec = Vec::with_capacity(peak_count); for scan_index in 0..scan_count { - let start_offset = scan_offsets[scan_index] as usize; - let end_offset = scan_offsets[scan_index + 1] as usize; + let start_offset: usize = scan_offsets[scan_index]; + let end_offset: usize = scan_offsets[scan_index + 1]; let mut current_sum: u32 = 0; - for i in start_offset..end_offset { - let tof_index = get_u32_from_blob(blob, scan_count + 2 * i); + for peak_index in start_offset..end_offset { + let index = scan_count + 2 * peak_index; + let tof_index: u32 = get_u32_from_blob(blob, index); current_sum += tof_index; tof_indices.push(current_sum - 1); } diff --git a/src/frames.rs b/src/frames.rs index e8c44b0..ad7df8d 100644 --- a/src/frames.rs +++ b/src/frames.rs @@ -3,7 +3,7 @@ use crate::acquisition::AcquisitionType; /// A frame with all unprocessed data as it was acquired. #[derive(Debug, PartialEq, Default)] pub struct Frame { - pub scan_offsets: Vec, + pub scan_offsets: Vec, pub tof_indices: Vec, pub intensities: Vec, pub index: usize, From 60d155f379bcf625b1784280c1d3f58f9780ab2a Mon Sep 17 00:00:00 2001 From: Sander Willems Date: Fri, 5 Apr 2024 14:36:40 +0200 Subject: [PATCH 11/11] CHORE: adding debug to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8130c3a..f5531e7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ debug/ target/ +.vscode/