Skip to content

Commit

Permalink
if literals get bigger by encoding them with a huffman tree, just wri…
Browse files Browse the repository at this point in the history
…te the literals raw
  • Loading branch information
KillingSpark committed Nov 22, 2024
1 parent 9e665fe commit f8a755b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
8 changes: 8 additions & 0 deletions src/encoding/bit_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ impl<V: AsMut<Vec<u8>>> BitWriter<V> {
self.bit_idx + self.bits_in_partial
}

pub fn reset_to(&mut self, index: usize) {
assert!(index % 8 == 0);
self.partial = 0;
self.bits_in_partial = 0;
self.bit_idx = index;
self.output.as_mut().resize(index / 8, 0);
}

pub fn change_bits(&mut self, idx: usize, bits: impl Into<u64>, num_bits: usize) {
self.change_bits_64(idx, bits.into(), num_bits);
}
Expand Down
10 changes: 8 additions & 2 deletions src/encoding/blocks/compressed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,6 @@ fn encode_offset(len: u32) -> (u8, u32, usize) {
(log as u8, lower, log as usize)
}

// TODO find usecase fot this
#[allow(dead_code)]
fn raw_literals(literals: &[u8], writer: &mut BitWriter<&mut Vec<u8>>) {
writer.write_bits(0u8, 2);
writer.write_bits(0b11u8, 2);
Expand All @@ -210,6 +208,7 @@ fn raw_literals(literals: &[u8], writer: &mut BitWriter<&mut Vec<u8>>) {
}

fn compress_literals(literals: &[u8], writer: &mut BitWriter<&mut Vec<u8>>) {
let reset_idx = writer.index();
writer.write_bits(2u8, 2); // compressed literals type

let encoder_table = huff0_encoder::HuffmanTable::build_from_data(literals);
Expand All @@ -235,4 +234,11 @@ fn compress_literals(literals: &[u8], writer: &mut BitWriter<&mut Vec<u8>>) {
};
let encoded_len = (writer.index() - index_before) / 8;
writer.change_bits(size_index, encoded_len as u64, size_bits);
let total_len = (writer.index() - reset_idx) / 8;

// If encoded len is bigger than the raw literals we are better off just writing the raw literals here
if total_len >= literals.len() {
writer.reset_to(reset_idx);
raw_literals(literals, writer);
}
}

0 comments on commit f8a755b

Please sign in to comment.