Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add (random) seed api #22

Merged
merged 4 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,9 @@ keywords = ["hash", "hasher", "fxhash", "rustc"]
repository = "https://github.com/rust-lang-nursery/rustc-hash"

[features]
std = []
default = ["std"]
std = []
rand = ["dep:rand", "std"]

[dependencies]
rand = { version = "0.8", optional = true }
47 changes: 46 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@
#[cfg(feature = "std")]
extern crate std;

#[cfg(feature = "rand")]
extern crate rand;

#[cfg(feature = "rand")]
mod random_state;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a newline between the modules to make it more obvious that only the former is cfged?


mod seeded_state;

use core::convert::TryInto;
use core::default::Default;
#[cfg(feature = "std")]
Expand All @@ -46,6 +54,11 @@ pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
#[cfg(feature = "std")]
pub type FxHashSet<V> = HashSet<V, BuildHasherDefault<FxHasher>>;

#[cfg(feature = "rand")]
pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState};

pub use seeded_state::{FxHashMapSeed, FxHashSetSeed, FxSeededState};

/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
/// by default uses SipHash which isn't quite as speedy as we want. In the
/// compiler we're not really worried about DOS attempts, so we use a fast
Expand All @@ -67,6 +80,13 @@ const K: usize = 0x9e3779b9;
#[cfg(target_pointer_width = "64")]
const K: usize = 0x517cc1b727220a95;

impl FxHasher {
/// Creates `fx` hasher with a given seed.
pub fn with_seed(seed: usize) -> FxHasher {
FxHasher { hash: seed }
}
}

impl Default for FxHasher {
#[inline]
fn default() -> FxHasher {
Expand Down Expand Up @@ -154,7 +174,7 @@ mod tests {
compile_error!("The test suite only supports 64 bit and 32 bit usize");

use crate::FxHasher;
use core::hash::{BuildHasher, BuildHasherDefault, Hash};
use core::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};

macro_rules! test_hash {
(
Expand Down Expand Up @@ -266,4 +286,29 @@ mod tests {
hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2345708736 } else { 12390864548135261390 },
}
}

#[test]
fn with_seed_actually_different() {
let seeds = [
[1, 2],
[42, 17],
[124436707, 99237],
[usize::MIN, usize::MAX],
];

for [a_seed, b_seed] in seeds {
let a = || FxHasher::with_seed(a_seed);
let b = || FxHasher::with_seed(b_seed);

for x in u8::MIN..=u8::MAX {
let mut a = a();
let mut b = b();

x.hash(&mut a);
x.hash(&mut b);

assert_ne!(a.finish(), b.finish())
}
}
}
}
92 changes: 92 additions & 0 deletions src/random_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use std::collections::{HashMap, HashSet};

use crate::FxHasher;

/// Type alias for a hashmap using the `fx` hash algorithm with [`FxRandomState`].
pub type FxHashMapRand<K, V> = HashMap<K, V, FxRandomState>;

/// Type alias for a hashmap using the `fx` hash algorithm with [`FxRandomState`].
pub type FxHashSetRand<V> = HashSet<V, FxRandomState>;

/// `FxRandomState` is an alternative state for `HashMap` types.
///
/// A particular instance `FxRandomState` will create the same instances of
/// [`Hasher`], but the hashers created by two different `FxRandomState`
/// instances are unlikely to produce the same result for the same values.
pub struct FxRandomState {
seed: usize,
}

impl FxRandomState {
/// Constructs a new `FxRandomState` that is initialized with random seed.
pub fn new() -> FxRandomState {
use rand::Rng;
use std::{cell::Cell, thread_local};

// This mirrors what `std::collections::hash_map::RandomState` does, as of 2024-01-14.
//
// Basically
// 1. Cache result of the rng in a thread local, so repeatedly
// creating maps is cheaper
// 2. Change the cached result on every creation, so maps created
// on the same thread don't have the same iteration order
thread_local!(static SEED: Cell<usize> = {
Cell::new(rand::thread_rng().gen())
});

SEED.with(|seed| {
let s = seed.get();
seed.set(s.wrapping_add(1));
FxRandomState { seed: s }
})
}
}

impl core::hash::BuildHasher for FxRandomState {
type Hasher = FxHasher;

fn build_hasher(&self) -> Self::Hasher {
FxHasher::with_seed(self.seed)
}
}

impl Default for FxRandomState {
fn default() -> Self {
Self::new()
}
}

#[cfg(test)]
mod tests {
use std::thread;

use crate::FxHashMapRand;

#[test]
fn random_states_are_different() {
let a = FxHashMapRand::<&str, u32>::default();
let b = FxHashMapRand::<&str, u32>::default();

// That's the whole point of them being random!
//
// N.B.: `FxRandomState` uses a thread-local set to a random value and then incremented,
// which means that this is *guaranteed* to pass :>
assert_ne!(a.hasher().seed, b.hasher().seed);
}

#[test]
fn random_states_are_different_cross_thread() {
// This is similar to the test above, but uses two different threads, so they both get
// completely random, unrelated values.
//
// This means that this test is technically flaky, but the probability of it failing is
// `1 / 2.pow(bit_size_of::<usize>())`. Or 1/1.7e19 for 64 bit platforms or 1/4294967295
// for 32 bit platforms. I suppose this is acceptable.
let a = FxHashMapRand::<&str, u32>::default();
let b = thread::spawn(|| FxHashMapRand::<&str, u32>::default())
.join()
.unwrap();

assert_ne!(a.hasher().seed, b.hasher().seed);
}
}
56 changes: 56 additions & 0 deletions src/seeded_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use std::collections::{HashMap, HashSet};

use crate::FxHasher;

/// Type alias for a hashmap using the `fx` hash algorithm with [`FxSeededState`].
pub type FxHashMapSeed<K, V> = HashMap<K, V, FxSeededState>;

/// Type alias for a hashmap using the `fx` hash algorithm with [`FxSeededState`].
pub type FxHashSetSeed<V> = HashSet<V, FxSeededState>;

/// [`FxSetState`] is an alternative state for `HashMap` types, allowing to use [`FxHasher`] with a set seed.
///
/// ```
/// # use std::collections::HashMap;
/// use rustc_hash::FxSeededState;
///
/// let mut map = HashMap::with_hasher(FxSeededState::with_seed(12));
/// map.insert(15, 610);
/// assert_eq!(map[&15], 610);
/// ```
pub struct FxSeededState {
seed: usize,
}

impl FxSeededState {
/// Constructs a new `FxSeededState` that is initialized with a `seed`.
pub fn with_seed(seed: usize) -> FxSeededState {
Self { seed }
}
}

impl core::hash::BuildHasher for FxSeededState {
type Hasher = FxHasher;

fn build_hasher(&self) -> Self::Hasher {
FxHasher::with_seed(self.seed)
}
}

#[cfg(test)]
mod tests {
use core::hash::BuildHasher;

use crate::{FxHashMapSeed, FxSeededState};

#[test]
fn different_states_are_different() {
let a = FxHashMapSeed::<&str, u32>::with_hasher(FxSeededState::with_seed(1));
let b = FxHashMapSeed::<&str, u32>::with_hasher(FxSeededState::with_seed(2));

assert_ne!(
a.hasher().build_hasher().hash,
b.hasher().build_hasher().hash
);
}
}