From 45cbf306d0d5f93858955a6efc5e333cd93c074c Mon Sep 17 00:00:00 2001 From: Moritz Borcherding Date: Sat, 21 Dec 2024 20:33:51 +0100 Subject: [PATCH] allow creating a FrameCompressor without initially providing a source and drain --- fuzz/fuzz_targets/encode.rs | 10 +-- fuzz/fuzz_targets/interop.rs | 19 +--- src/bin/zstd.rs | 10 ++- src/encoding/frame_compressor.rs | 149 ++++++++++++++++--------------- src/encoding/match_generator.rs | 2 +- src/encoding/mod.rs | 4 +- src/tests/encode_corpus.rs | 35 +++----- 7 files changed, 110 insertions(+), 119 deletions(-) diff --git a/fuzz/fuzz_targets/encode.rs b/fuzz/fuzz_targets/encode.rs index 7048bef..8f28746 100644 --- a/fuzz/fuzz_targets/encode.rs +++ b/fuzz/fuzz_targets/encode.rs @@ -2,21 +2,17 @@ #[macro_use] extern crate libfuzzer_sys; extern crate ruzstd; -use ruzstd::encoding::{CompressionLevel, FrameCompressor}; +use ruzstd::encoding::{CompressionLevel, compress_to_vec}; fuzz_target!(|data: &[u8]| { - let mut output = Vec::new(); - let mut compressor = FrameCompressor::new(data, &mut output, CompressionLevel::Uncompressed); - compressor.compress(); + let output = compress_to_vec(data, CompressionLevel::Uncompressed); let mut decoded = Vec::with_capacity(data.len()); let mut decoder = ruzstd::decoding::FrameDecoder::new(); decoder.decode_all_to_vec(&output, &mut decoded).unwrap(); assert_eq!(data, &decoded); - let mut output = Vec::new(); - let mut compressor = FrameCompressor::new(data, &mut output, CompressionLevel::Fastest); - compressor.compress(); + let output = compress_to_vec(data, CompressionLevel::Fastest); let mut decoded = Vec::with_capacity(data.len()); let mut decoder = ruzstd::decoding::FrameDecoder::new(); diff --git a/fuzz/fuzz_targets/interop.rs b/fuzz/fuzz_targets/interop.rs index fb22f42..d7b0f7a 100644 --- a/fuzz/fuzz_targets/interop.rs +++ b/fuzz/fuzz_targets/interop.rs @@ -3,6 +3,7 @@ extern crate libfuzzer_sys; extern crate ruzstd; use std::io::Read; +use ruzstd::encoding::{CompressionLevel, compress_to_vec}; fn decode_ruzstd(data: &mut dyn std::io::Read) -> Vec { let mut decoder = ruzstd::decoding::StreamingDecoder::new(data).unwrap(); @@ -33,28 +34,14 @@ fn encode_zstd(data: &[u8]) -> Result, std::io::Error> { fn encode_ruzstd_uncompressed(data: &mut dyn std::io::Read) -> Vec { let mut input = Vec::new(); - let mut output = Vec::new(); data.read_to_end(&mut input).unwrap(); - let mut compressor = ruzstd::encoding::FrameCompressor::new( - input.as_slice(), - &mut output, - ruzstd::encoding::CompressionLevel::Uncompressed, - ); - compressor.compress(); - output + compress_to_vec(data, CompressionLevel::Uncompressed) } fn encode_ruzstd_compressed(data: &mut dyn std::io::Read) -> Vec { let mut input = Vec::new(); - let mut output = Vec::new(); data.read_to_end(&mut input).unwrap(); - let mut compressor = ruzstd::encoding::FrameCompressor::new( - input.as_slice(), - &mut output, - ruzstd::encoding::CompressionLevel::Fastest, - ); - compressor.compress(); - output + compress_to_vec(data, CompressionLevel::Uncompressed) } fn decode_zstd(data: &[u8]) -> Result, std::io::Error> { diff --git a/src/bin/zstd.rs b/src/bin/zstd.rs index 3a5321a..471389d 100644 --- a/src/bin/zstd.rs +++ b/src/bin/zstd.rs @@ -158,6 +158,10 @@ fn main() { file_paths.remove(0); if flags.is_empty() { + let mut encoder = FrameCompressor::new(CompressionLevel::Fastest); + let mut output = Vec::new(); + encoder.set_drain(&mut output); + for path in file_paths { let start_instant = Instant::now(); let file = std::fs::File::open(&path).unwrap(); @@ -168,9 +172,11 @@ fn main() { counter: 0, last_percent: 0, }; - let mut output = Vec::new(); - let mut encoder = FrameCompressor::new(file, &mut output, CompressionLevel::Fastest); + encoder.set_source(file); + + encoder.drain_mut().unwrap().clear(); encoder.compress(); + let output = encoder.drain_mut().unwrap(); println!( "Compressed {path:} from {} to {} ({}%) took {}ms", input_len, diff --git a/src/encoding/frame_compressor.rs b/src/encoding/frame_compressor.rs index e4bd05f..db8bd9c 100644 --- a/src/encoding/frame_compressor.rs +++ b/src/encoding/frame_compressor.rs @@ -5,7 +5,7 @@ use core::convert::TryInto; use super::{ block_header::BlockHeader, blocks::compress_block, frame_header::FrameHeader, - match_generator::MatchGeneratorDriver, Matcher, CompressionLevel + match_generator::MatchGeneratorDriver, CompressionLevel, Matcher, }; use crate::io::{Read, Write}; @@ -25,28 +25,26 @@ const MAX_BLOCK_SIZE: usize = 128 * 1024 - 20; /// let mock_data: &[_] = &[0x1, 0x2, 0x3, 0x4]; /// let mut output = std::vec::Vec::new(); /// // Initialize a compressor. -/// let mut compressor = FrameCompressor::new(mock_data, &mut output, CompressionLevel::Uncompressed); +/// let mut compressor = FrameCompressor::new(CompressionLevel::Uncompressed); +/// compressor.set_source(mock_data); +/// compressor.set_drain(&mut output); /// /// // `compress` writes the compressed output into the provided buffer. /// compressor.compress(); /// ``` pub struct FrameCompressor { - uncompressed_data: R, - compressed_data: W, + uncompressed_data: Option, + compressed_data: Option, compression_level: CompressionLevel, match_generator: M, } impl FrameCompressor { /// Create a new `FrameCompressor` - pub fn new( - uncompressed_data: R, - compressed_data: W, - compression_level: CompressionLevel, - ) -> Self { + pub fn new(compression_level: CompressionLevel) -> Self { Self { - uncompressed_data, - compressed_data, + uncompressed_data: None, + compressed_data: None, compression_level, match_generator: MatchGeneratorDriver::new(1024 * 128, 1), } @@ -55,30 +53,53 @@ impl FrameCompressor { impl FrameCompressor { /// Create a new `FrameCompressor` with a custom matching algorithm implementation - pub fn new_with_matcher( - source: R, - drain: W, - matcher: M, - compression_level: CompressionLevel, - ) -> Self { + pub fn new_with_matcher(matcher: M, compression_level: CompressionLevel) -> Self { Self { - uncompressed_data: source, - compressed_data: drain, + uncompressed_data: None, + compressed_data: None, match_generator: matcher, compression_level, } } - /// After calling [FrameCompressor::compress] you can replace the source and call [FrameCompressor::compress] again - pub fn replace_source(&mut self, mut uncompressed_data: R) -> R { - std::mem::swap(&mut uncompressed_data, &mut self.uncompressed_data); - uncompressed_data + /// Before calling [FrameCompressor::compress] you need to set the source + pub fn set_source(&mut self, uncompressed_data: R) -> Option { + self.uncompressed_data.replace(uncompressed_data) } - /// Before calling [FrameCompressor::compress] you can replace the drain - pub fn replace_drain(&mut self, mut compressed_data: W) -> W { - std::mem::swap(&mut compressed_data, &mut self.compressed_data); - compressed_data + /// Before calling [FrameCompressor::compress] you need to set the drain + pub fn set_drain(&mut self, compressed_data: W) -> Option { + self.compressed_data.replace(compressed_data) + } + + /// Retrieve a mutable reference to the source + pub fn source_mut(&mut self) -> Option<&mut R> { + self.uncompressed_data.as_mut() + } + + /// Retrieve a mutable reference to the drain + pub fn drain_mut(&mut self) -> Option<&mut W> { + self.compressed_data.as_mut() + } + + /// Retrieve a reference to the source + pub fn source(&self) -> Option<&R> { + self.uncompressed_data.as_ref() + } + + /// Retrieve a reference to the drain + pub fn drain(&self) -> Option<&W> { + self.compressed_data.as_ref() + } + + /// Retrieve the source + pub fn take_source(&mut self) -> Option { + self.uncompressed_data.take() + } + + /// Retrieve the drain + pub fn take_drain(&mut self) -> Option<&mut W> { + self.compressed_data.as_mut() } /// Before calling [FrameCompressor::compress] you can replace the matcher @@ -99,6 +120,8 @@ impl FrameCompressor { /// Compress the uncompressed data into a valid Zstd frame and write it into the provided buffer pub fn compress(&mut self) { self.match_generator.reset(self.compression_level); + let source = self.uncompressed_data.as_mut().unwrap(); + let drain = self.compressed_data.as_mut().unwrap(); let mut output = Vec::with_capacity(1024 * 130); let output = &mut output; @@ -116,10 +139,7 @@ impl FrameCompressor { let mut read_bytes = 0; let last_block; 'read_loop: loop { - let new_bytes = self - .uncompressed_data - .read(&mut uncompressed_data[read_bytes..]) - .unwrap(); + let new_bytes = source.read(&mut uncompressed_data[read_bytes..]).unwrap(); if new_bytes == 0 { last_block = true; break 'read_loop; @@ -141,7 +161,7 @@ impl FrameCompressor { }; // Write the header, then the block header.serialize(output); - self.compressed_data.write_all(output).unwrap(); + drain.write_all(output).unwrap(); output.clear(); break; } @@ -199,7 +219,7 @@ impl FrameCompressor { unimplemented!(); } } - self.compressed_data.write_all(output).unwrap(); + drain.write_all(output).unwrap(); output.clear(); if last_block { break; @@ -220,11 +240,10 @@ mod tests { fn frame_starts_with_magic_num() { let mock_data = [1_u8, 2, 3].as_slice(); let mut output: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - mock_data, - &mut output, - super::CompressionLevel::Uncompressed, - ); + let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed); + compressor.set_source(mock_data); + compressor.set_drain(&mut output); + compressor.compress(); assert!(output.starts_with(&MAGIC_NUM.to_le_bytes())); } @@ -233,11 +252,10 @@ mod tests { fn very_simple_raw_compress() { let mock_data = [1_u8, 2, 3].as_slice(); let mut output: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - mock_data, - &mut output, - super::CompressionLevel::Uncompressed, - ); + let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed); + compressor.set_source(mock_data); + compressor.set_drain(&mut output); + compressor.compress(); } @@ -249,11 +267,10 @@ mod tests { mock_data.extend(vec![2; 1 << 17]); mock_data.extend(vec![3; (1 << 17) - 1]); let mut output: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - mock_data.as_slice(), - &mut output, - super::CompressionLevel::Uncompressed, - ); + let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed); + compressor.set_source(mock_data.as_slice()); + compressor.set_drain(&mut output); + compressor.compress(); let mut decoder = FrameDecoder::new(); @@ -270,11 +287,10 @@ mod tests { fn rle_compress() { let mock_data = vec![0; 1 << 19]; let mut output: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - mock_data.as_slice(), - &mut output, - super::CompressionLevel::Uncompressed, - ); + let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed); + compressor.set_source(mock_data.as_slice()); + compressor.set_drain(&mut output); + compressor.compress(); let mut decoder = FrameDecoder::new(); @@ -287,11 +303,10 @@ mod tests { fn aaa_compress() { let mock_data = vec![0, 1, 3, 4, 5]; let mut output: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - mock_data.as_slice(), - &mut output, - super::CompressionLevel::Uncompressed, - ); + let mut compressor = FrameCompressor::new(super::CompressionLevel::Uncompressed); + compressor.set_source(mock_data.as_slice()); + compressor.set_drain(&mut output); + compressor.compress(); let mut decoder = FrameDecoder::new(); @@ -338,29 +353,21 @@ mod tests { fn encode_ruzstd_uncompressed(data: &mut dyn std::io::Read) -> Vec { let mut input = Vec::new(); data.read_to_end(&mut input).unwrap(); - let mut output = Vec::new(); - let mut compressor = crate::encoding::FrameCompressor::new( + crate::encoding::compress_to_vec( input.as_slice(), - &mut output, crate::encoding::CompressionLevel::Uncompressed, - ); - compressor.compress(); - output + ) } fn encode_ruzstd_compressed(data: &mut dyn std::io::Read) -> Vec { let mut input = Vec::new(); data.read_to_end(&mut input).unwrap(); - let mut output = Vec::new(); - let mut compressor = crate::encoding::FrameCompressor::new( + crate::encoding::compress_to_vec( input.as_slice(), - &mut output, - crate::encoding::CompressionLevel::Uncompressed, - ); - compressor.compress(); - output + crate::encoding::CompressionLevel::Fastest, + ) } fn decode_zstd(data: &[u8]) -> Result, std::io::Error> { diff --git a/src/encoding/match_generator.rs b/src/encoding/match_generator.rs index 8530e77..54a5949 100644 --- a/src/encoding/match_generator.rs +++ b/src/encoding/match_generator.rs @@ -8,9 +8,9 @@ use alloc::vec::Vec; use core::num::NonZeroUsize; +use super::CompressionLevel; use super::Matcher; use super::Sequence; -use super::CompressionLevel; const MIN_MATCH_LEN: usize = 5; diff --git a/src/encoding/mod.rs b/src/encoding/mod.rs index d04ad53..f512720 100644 --- a/src/encoding/mod.rs +++ b/src/encoding/mod.rs @@ -21,7 +21,9 @@ use alloc::vec::Vec; /// compress(data, &mut target, CompressionLevel::Fastest); /// ``` pub fn compress(source: R, target: W, level: CompressionLevel) { - let mut frame_enc = FrameCompressor::new(source, target, level); + let mut frame_enc = FrameCompressor::new(level); + frame_enc.set_source(source); + frame_enc.set_drain(target); frame_enc.compress(); } diff --git a/src/tests/encode_corpus.rs b/src/tests/encode_corpus.rs index 86baf40..43a7f07 100644 --- a/src/tests/encode_corpus.rs +++ b/src/tests/encode_corpus.rs @@ -29,13 +29,11 @@ fn test_encode_corpus_files_uncompressed_our_decompressor() { println!("Trying file: {:?}", path); let input = fs::read(entry.path()).unwrap(); - let mut compressed_file: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - input.as_slice(), - &mut compressed_file, - crate::encoding::CompressionLevel::Fastest, - ); + let mut compressor = FrameCompressor::new(crate::encoding::CompressionLevel::Fastest); + compressor.set_source(input.as_slice()); + compressor.set_drain(&mut compressed_file); + compressor.compress(); let mut decompressed_output = Vec::new(); let mut decoder = @@ -88,11 +86,9 @@ fn test_encode_corpus_files_uncompressed_original_decompressor() { let input = fs::read(entry.path()).unwrap(); let mut compressed_file: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - input.as_slice(), - &mut compressed_file, - crate::encoding::CompressionLevel::Fastest, - ); + let mut compressor = FrameCompressor::new(crate::encoding::CompressionLevel::Fastest); + compressor.set_source(input.as_slice()); + compressor.set_drain(&mut compressed_file); compressor.compress(); let mut decompressed_output = Vec::new(); // zstd::stream::copy_decode(compressed_file.as_slice(), &mut decompressed_output).unwrap(); @@ -151,11 +147,10 @@ fn test_encode_corpus_files_compressed_our_decompressor() { let input = fs::read(entry.path()).unwrap(); let mut compressed_file: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - input.as_slice(), - &mut compressed_file, - crate::encoding::CompressionLevel::Fastest, - ); + let mut compressor = FrameCompressor::new(crate::encoding::CompressionLevel::Fastest); + compressor.set_source(input.as_slice()); + compressor.set_drain(&mut compressed_file); + compressor.compress(); let mut decompressed_output = Vec::new(); let mut decoder = @@ -208,11 +203,9 @@ fn test_encode_corpus_files_compressed_original_decompressor() { let input = fs::read(entry.path()).unwrap(); let mut compressed_file: Vec = Vec::new(); - let mut compressor = FrameCompressor::new( - input.as_slice(), - &mut compressed_file, - crate::encoding::CompressionLevel::Fastest, - ); + let mut compressor = FrameCompressor::new(crate::encoding::CompressionLevel::Fastest); + compressor.set_source(input.as_slice()); + compressor.set_drain(&mut compressed_file); compressor.compress(); let mut decompressed_output = Vec::new(); // zstd::stream::copy_decode(compressed_file.as_slice(), &mut decompressed_output).unwrap();