KillingSpark · KillingSpark · Dec 13, 2024 · Oct 7, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/Changelog.md b/Changelog.md
@@ -33,4 +33,4 @@ This document records the changes made between versions, starting with version 0
 * Added convenience functions to FrameDecoder to decode multiple frames from a buffer (https://github.com/philipc)
 
 # After 0.7.3
-
+* Add initial compression support
diff --git a/Readme.md b/Readme.md
@@ -9,8 +9,7 @@
 A pure Rust implementation of the Zstandard compression algorithm, as defined in [this document](https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md).
 
 This crate contains a fully operational implementation of the decompression portion of the standard.
-
-*Work has started on a compressor, but it has not reached a point where the compressor provides any real function.* (CONTRIBUTORS WELCOME)
+It also provides a compressor which is usable, but it does not yet reach the speed, ratio or configurability of the original zstd library.
 
 This crate is currently actively maintained.
 
@@ -19,9 +18,14 @@ This crate is currently actively maintained.
 Feature complete on the decoder side. In terms of speed it is still behind the original C implementation which has a rust binding located [here](https://github.com/gyscos/zstd-rs).
 
 On the compression side:
-- [x] Support for generating raw, uncompressed frames
-- [ ] Support for generating RLE compressed blocks
-- [ ] Support for generating compressed blocks at any compression level
+- Support for generating compressed blocks at any compression level
+  - [x] Uncompressed
+  - [x] Fastest (roughly level 1)
+  - [ ] Default (roughly level 3)
+  - [ ] Better (roughly level 7)
+  - [ ] Best (roughly level 11)
+- [ ] Checksums
+- [ ] Dictionaries
 
 ## Speed
 

diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -31,3 +31,11 @@ path = "fuzz_targets/encode.rs"
 [[bin]]
 name = "interop"
 path = "fuzz_targets/interop.rs"
+
+[[bin]]
+name = "huff0"
+path = "fuzz_targets/huff0.rs"
+
+[[bin]]
+name = "fse"
+path = "fuzz_targets/fse.rs"
diff --git a/fuzz/artifacts/fse/crash-16fdc285684fe17e4a84ff6605c7f0e362af3dfa b/fuzz/artifacts/fse/crash-16fdc285684fe17e4a84ff6605c7f0e362af3dfa
diff --git a/fuzz/artifacts/fse/crash-da39a3ee5e6b4b0d3255bfef95601890afd80709 b/fuzz/artifacts/fse/crash-da39a3ee5e6b4b0d3255bfef95601890afd80709
diff --git a/fuzz/artifacts/fse/crash-e587fc04ebe1b7e97d0aa916ef8d3f2cc92fb4b1 b/fuzz/artifacts/fse/crash-e587fc04ebe1b7e97d0aa916ef8d3f2cc92fb4b1
diff --git a/fuzz/artifacts/huff0/crash-da39a3ee5e6b4b0d3255bfef95601890afd80709 b/fuzz/artifacts/huff0/crash-da39a3ee5e6b4b0d3255bfef95601890afd80709
diff --git a/fuzz/artifacts/huff0/crash-e7d75b9bfbab3e8e4df53bb28b87a1a01ee99d3d b/fuzz/artifacts/huff0/crash-e7d75b9bfbab3e8e4df53bb28b87a1a01ee99d3d
diff --git a/fuzz/artifacts/interop/crash-5ba93c9db0cff93f52b521d7420e43f6eda2784f b/fuzz/artifacts/interop/crash-5ba93c9db0cff93f52b521d7420e43f6eda2784f
diff --git a/fuzz/artifacts/interop/crash-a9f55c479d7c420764bde5bd6c666a7997d79d26 b/fuzz/artifacts/interop/crash-a9f55c479d7c420764bde5bd6c666a7997d79d26
diff --git a/fuzz/fuzz_targets/encode.rs b/fuzz/fuzz_targets/encode.rs
@@ -4,8 +4,21 @@ extern crate ruzstd;
 use ruzstd::encoding::{FrameCompressor, CompressionLevel};
 
 fuzz_target!(|data: &[u8]| {
-    let mut content = data;
-    let mut compressor = FrameCompressor::new(data, CompressionLevel::Uncompressed);
     let mut output = Vec::new();
-    compressor.compress(&mut output);
+    let mut compressor = FrameCompressor::new(data, &mut output, CompressionLevel::Uncompressed);
+    compressor.compress();
+
+    let mut decoded = Vec::with_capacity(data.len());
+    let mut decoder = ruzstd::FrameDecoder::new();
+    decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
+    assert_eq!(data, &decoded);
+
+    let mut output = Vec::new();
+    let mut compressor = FrameCompressor::new(data, &mut output, CompressionLevel::Fastest);
+    compressor.compress();
+
+    let mut decoded = Vec::with_capacity(data.len());
+    let mut decoder = ruzstd::FrameDecoder::new();
+    decoder.decode_all_to_vec(&output, &mut decoded).unwrap();
+    assert_eq!(data, &decoded);
 });
diff --git a/fuzz/fuzz_targets/fse.rs b/fuzz/fuzz_targets/fse.rs
@@ -0,0 +1,8 @@
+#![no_main]
+#[macro_use] extern crate libfuzzer_sys;
+extern crate ruzstd;
+use ruzstd::fse::round_trip;
+
+fuzz_target!(|data: &[u8]| {
+    round_trip(data);
+});
diff --git a/fuzz/fuzz_targets/huff0.rs b/fuzz/fuzz_targets/huff0.rs
@@ -0,0 +1,8 @@
+#![no_main]
+#[macro_use] extern crate libfuzzer_sys;
+extern crate ruzstd;
+use ruzstd::huff0::round_trip;
+
+fuzz_target!(|data: &[u8]| {
+    round_trip(data);
+});
diff --git a/fuzz/fuzz_targets/interop.rs b/fuzz/fuzz_targets/interop.rs
@@ -33,10 +33,19 @@ fn encode_zstd(data: &[u8]) -> Result<Vec<u8>, std::io::Error> {
 
 fn encode_ruzstd_uncompressed(data: &mut dyn std::io::Read) -> Vec<u8> {
     let mut input = Vec::new();
+    let mut output = Vec::new();
     data.read_to_end(&mut input).unwrap();
-    let mut compressor = ruzstd::encoding::FrameCompressor::new(&input, ruzstd::encoding::CompressionLevel::Uncompressed);
+    let mut compressor = ruzstd::encoding::FrameCompressor::new(input.as_slice(), &mut output, ruzstd::encoding::CompressionLevel::Uncompressed);
+    compressor.compress();
+    output
+}
+
+fn encode_ruzstd_compressed(data: &mut dyn std::io::Read) -> Vec<u8> {
+    let mut input = Vec::new();
     let mut output = Vec::new();
-    compressor.compress(&mut output);
+    data.read_to_end(&mut input).unwrap();
+    let mut compressor = ruzstd::encoding::FrameCompressor::new(input.as_slice(), &mut output, ruzstd::encoding::CompressionLevel::Fastest);
+    compressor.compress();
     output
 }
 
@@ -69,4 +78,12 @@ fuzz_target!(|data: &[u8]| {
         decoded, data,
         "Decoded data did not match the original input during compression"
     );
+    // Compressed encoding
+    let mut input = data;
+    let compressed = encode_ruzstd_compressed(&mut input);
+    let decoded = decode_zstd(&compressed).unwrap();
+    assert_eq!(
+        decoded, data,
+        "Decoded data did not match the original input during compression"
+    );
 });
diff --git a/src/bin/zstd.rs b/src/bin/zstd.rs
@@ -1,10 +1,14 @@
 extern crate ruzstd;
 use std::fs::File;
+use std::io::BufReader;
 use std::io::Read;
 use std::io::Seek;
 use std::io::SeekFrom;
 use std::io::Write;
+use std::time::Instant;
 
+use ruzstd::encoding::CompressionLevel;
+use ruzstd::encoding::FrameCompressor;
 use ruzstd::frame::ReadFrameHeaderError;
 use ruzstd::frame_decoder::FrameDecoderError;
 
@@ -18,11 +22,7 @@ struct StateTracker {
     old_percentage: i8,
 }
 
-fn main() {
-    let mut file_paths: Vec<_> = std::env::args().filter(|f| !f.starts_with('-')).collect();
-    let flags: Vec<_> = std::env::args().filter(|f| f.starts_with('-')).collect();
-    file_paths.remove(0);
-
+fn decompress(flags: &[String], file_paths: &[String]) {
     if !flags.contains(&"-d".to_owned()) {
         eprintln!("This zstd implementation only supports decompression. Please add a \"-d\" flag");
         return;
@@ -128,6 +128,63 @@ fn main() {
     }
 }
 
+struct PercentPrintReader<R: Read> {
+    total: usize,
+    counter: usize,
+    last_percent: usize,
+    reader: R,
+}
+
+impl<R: Read> Read for PercentPrintReader<R> {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        let new_bytes = self.reader.read(buf)?;
+        self.counter += new_bytes;
+        let progress = self.counter * 100 / self.total;
+        if progress > self.last_percent {
+            self.last_percent = progress;
+            eprint!("\r");
+            eprint!("{} % done", progress);
+        }
+        Ok(new_bytes)
+    }
+}
+
+fn main() {
+    let mut file_paths: Vec<_> = std::env::args().filter(|f| !f.starts_with('-')).collect();
+    let flags: Vec<_> = std::env::args().filter(|f| f.starts_with('-')).collect();
+    file_paths.remove(0);
+
+    if flags.is_empty() {
+        for path in file_paths {
+            let start_instant = Instant::now();
+            let file = std::fs::File::open(&path).unwrap();
+            let input_len = file.metadata().unwrap().len() as usize;
+            let file = PercentPrintReader {
+                reader: BufReader::new(file),
+                total: input_len,
+                counter: 0,
+                last_percent: 0,
+            };
+            let mut output = Vec::new();
+            let mut encoder = FrameCompressor::new(file, &mut output, CompressionLevel::Fastest);
+            encoder.compress();
+            println!(
+                "Compressed {path:} from {} to {} ({}%) took {}ms",
+                input_len,
+                output.len(),
+                if input_len == 0 {
+                    0
+                } else {
+                    output.len() * 100 / input_len
+                },
+                start_instant.elapsed().as_millis()
+            );
+        }
+    } else {
+        decompress(&flags, &file_paths);
+    }
+}
+
 fn do_something(data: &[u8], s: &mut StateTracker) {
     //Do something. Like writing it to a file or to stdout...
     std::io::stdout().write_all(data).unwrap();

diff --git a/src/decoding/block_decoder.rs b/src/decoding/block_decoder.rs
@@ -447,6 +447,13 @@ impl BlockDecoder {
             vprintln!("Executing sequences");
             execute_sequences(workspace)?;
         } else {
+            if !raw.is_empty() {
+                return Err(DecompressBlockError::DecodeSequenceError(
+                    DecodeSequenceError::ExtraBits {
+                        bits_remaining: raw.len() as isize * 8,
+                    },
+                ));
+            }
             workspace.buffer.push(&workspace.literals_buffer);
             workspace.sequences.clear();
         }

diff --git a/src/decoding/decodebuffer.rs b/src/decoding/decodebuffer.rs
@@ -285,7 +285,7 @@ impl DecodeBuffer {
             amount: usize,
         }
 
-        impl<'a> Drop for DrainGuard<'a> {
+        impl Drop for DrainGuard<'_> {
             fn drop(&mut self) {
                 if self.amount != 0 {
                     self.buffer.drop_first_n(self.amount);

diff --git a/src/decoding/ringbuffer.rs b/src/decoding/ringbuffer.rs
@@ -362,6 +362,7 @@ impl RingBuffer {
                 unsafe { copy_bytes_overshooting(src, dst, len - after_tail) }
             }
         } else {
+            #[allow(clippy::collapsible_else_if)]
             if self.head + start > self.cap {
                 // Continuous read section and destination section:
                 //
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,4 +33,4 @@ This document records the changes made between versions, starting with version 0
		* Added convenience functions to FrameDecoder to decode multiple frames from a buffer (https://github.com/philipc)

		# After 0.7.3

		* Add initial compression support