From 6729134d5bd2714c5a79727fb03412e8cb9d26f0 Mon Sep 17 00:00:00 2001 From: Chris Hennick Date: Sat, 24 Jun 2023 11:18:52 -0700 Subject: [PATCH] Let user specify iterations without improvement rather than an absolute maximum --- src/deflate.rs | 20 +++++++++++++++++--- src/lib.rs | 10 +++++++--- src/squeeze.rs | 32 ++++++++++++++++++++++++-------- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/src/deflate.rs b/src/deflate.rs index 3b5b735..4cd0a3e 100644 --- a/src/deflate.rs +++ b/src/deflate.rs @@ -1,5 +1,5 @@ use alloc::vec::Vec; -use core::{cmp, iter}; +use core::{cmp, iter, num::NonZeroU64}; use log::{debug, log_enabled}; @@ -1186,7 +1186,14 @@ fn blocksplit_attempt( for &item in &splitpoints_uncompressed { let mut s = ZopfliBlockState::new(options, last, item); - let store = lz77_optimal(&mut s, in_data, last, item, options.iteration_count.get()); + let store = lz77_optimal( + &mut s, + in_data, + last, + item, + options.iteration_count.map(NonZeroU64::get), + options.iterations_without_improvement.map(NonZeroU64::get), + ); totalcost += calculate_block_size_auto_type(&store, 0, store.size()); // ZopfliAppendLZ77Store(&store, &lz77); @@ -1202,7 +1209,14 @@ fn blocksplit_attempt( let mut s = ZopfliBlockState::new(options, last, inend); - let store = lz77_optimal(&mut s, in_data, last, inend, options.iteration_count.get()); + let store = lz77_optimal( + &mut s, + in_data, + last, + inend, + options.iteration_count.map(NonZeroU64::get), + options.iterations_without_improvement.map(NonZeroU64::get), + ); totalcost += calculate_block_size_auto_type(&store, 0, store.size()); // ZopfliAppendLZ77Store(&store, &lz77); diff --git a/src/lib.rs b/src/lib.rs index ddac158..bafcbe1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,7 +60,7 @@ mod util; #[cfg(feature = "zlib")] mod zlib; -use core::num::NonZeroU8; +use core::num::NonZeroU64; #[cfg(all(not(doc), feature = "std"))] use std::io::{Error, Write}; @@ -83,7 +83,10 @@ pub struct Options { strategy = "(1..=10u8).prop_map(|iteration_count| NonZeroU8::new(iteration_count).unwrap())" ) )] - pub iteration_count: NonZeroU8, + pub iteration_count: Option, + /// Stop after rerunning forward and backward pass this many times without finding + /// a smaller representation of the block. + pub iterations_without_improvement: Option, /// Maximum amount of blocks to split into (0 for unlimited, but this can give /// extreme results that hurt compression on some files). /// @@ -94,7 +97,8 @@ pub struct Options { impl Default for Options { fn default() -> Options { Options { - iteration_count: NonZeroU8::new(15).unwrap(), + iteration_count: Some(NonZeroU64::new(15).unwrap()), + iterations_without_improvement: None, maximum_block_splits: 15, } } diff --git a/src/squeeze.rs b/src/squeeze.rs index 04b90f7..f04de97 100644 --- a/src/squeeze.rs +++ b/src/squeeze.rs @@ -450,7 +450,8 @@ pub fn lz77_optimal( in_data: &[u8], instart: usize, inend: usize, - numiterations: u8, + max_iterations: Option, + max_iterations_without_improvement: Option, ) -> Lz77Store { /* Dist to get to here with smallest cost. */ let mut currentstore = Lz77Store::new(); @@ -470,13 +471,15 @@ pub fn lz77_optimal( let mut lastcost = 0.0; /* Try randomizing the costs a bit once the size stabilizes. */ let mut ran_state = RanState::new(); - let mut lastrandomstep = -1; + let mut lastrandomstep = u64::MAX; /* Do regular deflate, then loop multiple shortest path runs, each time using the statistics of the previous run. */ /* Repeat statistics with each time the cost model from the previous stat run. */ - for i in 0..numiterations as i32 { + let mut current_iteration: u64 = 0; + let mut iterations_without_improvement: u64 = 0; + loop { currentstore.reset(); lz77_optimal_run( s, @@ -491,30 +494,43 @@ pub fn lz77_optimal( let cost = calculate_block_size(¤tstore, 0, currentstore.size(), BlockType::Dynamic); if cost < bestcost { + iterations_without_improvement = 0; /* Copy to the output store. */ outputstore = currentstore.clone(); beststats = stats; bestcost = cost; - debug!("Iteration {}: {} bit", i, cost); + debug!("Iteration {}: {} bit", current_iteration, cost); } else { - trace!("Iteration {}: {} bit", i, cost); + iterations_without_improvement += 1; + trace!("Iteration {}: {} bit", current_iteration, cost); + if let Some(max_iterations_without_improvement) = max_iterations_without_improvement { + if iterations_without_improvement >= max_iterations_without_improvement { + break; + } + } + } + current_iteration += 1; + if let Some(max_iterations) = max_iterations { + if current_iteration >= max_iterations { + break; + } } let laststats = stats; stats.clear_freqs(); stats.get_statistics(¤tstore); - if lastrandomstep != -1 { + if lastrandomstep != u64::MAX { /* This makes it converge slower but better. Do it only once the randomness kicks in so that if the user does few iterations, it gives a better result sooner. */ stats = add_weighed_stat_freqs(&stats, 1.0, &laststats, 0.5); stats.calculate_entropy(); } - if i > 5 && (cost - lastcost).abs() < f64::EPSILON { + if current_iteration > 5 && (cost - lastcost).abs() < f64::EPSILON { stats = beststats; stats.randomize_stat_freqs(&mut ran_state); stats.calculate_entropy(); - lastrandomstep = i; + lastrandomstep = current_iteration; } lastcost = cost; }