diff --git a/Cargo.toml b/Cargo.toml index bc4fb16..4343332 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rustc-hash" -version = "1.1.0" +version = "2.0.0" authors = ["The Rust Project Developers"] description = "speedy, non-cryptographic hash used in rustc" license = "Apache-2.0/MIT" @@ -15,3 +15,7 @@ rand = ["dep:rand", "std"] [dependencies] rand = { version = "0.8", optional = true } + +[dev-dependencies] +fnv = "1.0" +rand = "0.8" diff --git a/benches/strings.rs b/benches/strings.rs new file mode 100644 index 0000000..d07c0af --- /dev/null +++ b/benches/strings.rs @@ -0,0 +1,81 @@ +#![feature(test)] + +extern crate fnv; +extern crate rand; +extern crate rustc_hash; + +extern crate test; +use test::{black_box, Bencher}; + +use std::collections::HashSet; +use std::hash::BuildHasher; + +use fnv::FnvHashSet; +use rand::{distributions::Alphanumeric, rngs::StdRng, Rng, SeedableRng}; +use rustc_hash::FxHashSet; + +fn strings() -> HashSet +where + H: BuildHasher + Default, +{ + let mut strings = HashSet::default(); + + let rng = &mut StdRng::seed_from_u64(42); + + while strings.len() < M { + let length = rng.gen_range(0..N); + + let string: String = rng + .sample_iter(&Alphanumeric) + .take(length) + .map(char::from) + .collect(); + + strings.insert(string); + } + + strings +} + +fn find_strings(strings: &HashSet) -> bool +where + H: BuildHasher, +{ + let haystack = black_box(strings); + let needles = black_box(strings); + + needles.iter().all(|needle| haystack.contains(needle)) +} + +macro_rules! compare_fx_fnv { + ($name:ident, $string_length:expr, $table_size:expr) => { + mod $name { + use super::*; + + #[bench] + fn fx(bencher: &mut Bencher) { + let strings: FxHashSet = strings::<$string_length, $table_size, _>(); + bencher.iter(|| find_strings(&strings)); + } + + #[bench] + fn fnv(bencher: &mut Bencher) { + let strings: FnvHashSet = strings::<$string_length, $table_size, _>(); + bencher.iter(|| find_strings(&strings)); + } + } + }; +} + +compare_fx_fnv!(few_tiny, 3, 1_000); +compare_fx_fnv!(few_small, 7, 1_000); +compare_fx_fnv!(few_medium, 15, 1_000); +compare_fx_fnv!(few_large, 47, 1_000); + +compare_fx_fnv!(some_small, 7, 10_000); +compare_fx_fnv!(some_medium, 15, 10_000); +compare_fx_fnv!(some_large, 47, 10_000); + +compare_fx_fnv!(many_small, 7, 100_000); +compare_fx_fnv!(many_medium, 15, 100_000); +compare_fx_fnv!(many_large, 47, 100_000); diff --git a/src/lib.rs b/src/lib.rs index 58652dd..aaffa09 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -104,29 +104,34 @@ impl FxHasher { impl Hasher for FxHasher { #[inline] fn write(&mut self, mut bytes: &[u8]) { - #[cfg(target_pointer_width = "32")] - let read_usize = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap()); - #[cfg(target_pointer_width = "64")] - let read_usize = |bytes: &[u8]| u64::from_ne_bytes(bytes[..8].try_into().unwrap()); - - let mut hash = FxHasher { hash: self.hash }; - assert!(size_of::() <= 8); - while bytes.len() >= size_of::() { - hash.add_to_hash(read_usize(bytes) as usize); - bytes = &bytes[size_of::()..]; - } - if (size_of::() > 4) && (bytes.len() >= 4) { - hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()) as usize); - bytes = &bytes[4..]; + const SIZE: usize = { + let size = size_of::(); + assert!(1 <= size && size <= 8); + size + }; + + let mut hash = self.clone(); + + while bytes.len() >= SIZE { + let (chunk, rest) = bytes.split_at(SIZE); + hash.add_to_hash(usize::from_ne_bytes(chunk.try_into().unwrap())); + bytes = rest; } - if (size_of::() > 2) && bytes.len() >= 2 { - hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as usize); - bytes = &bytes[2..]; + + if SIZE > 4 && bytes.len() >= 4 { + let (chunk, rest) = bytes.split_at(4); + hash.add_to_hash(u32::from_ne_bytes(chunk.try_into().unwrap()) as usize); + bytes = rest; } - if (size_of::() > 1) && bytes.len() >= 1 { - hash.add_to_hash(bytes[0] as usize); + if !bytes.is_empty() { + let mut chunk = 0; + for &byte in bytes { + chunk = chunk << 8 | byte as usize; + } + hash.add_to_hash(chunk); } - self.hash = hash.hash; + + *self = hash; } #[inline] @@ -282,7 +287,7 @@ mod tests { hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == 0, hash(HashBytes(&[1])) == if B32 { 2654435769 } else { 5871781006564002453 }, hash(HashBytes(&[2])) == if B32 { 1013904242 } else { 11743562013128004906 }, - hash(HashBytes(b"uwu")) == if B32 { 3939043750 } else { 16622306935539548858 }, + hash(HashBytes(b"uwu")) == if B32 { 3128729741 } else { 14178895633054898457 }, hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2345708736 } else { 12390864548135261390 }, } }