From d9ef59e2e28cf8fb6e31ed98b8614e76f60dd384 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 13 Dec 2023 10:57:12 +0100 Subject: [PATCH] Standardize on a single fast hasher, i.e. rustc-hash We currently use both fnv and rustc-hash which basically fulfill the same role of very fast but low quality hashes. Since rustc-hash was originally created as a more modern replacement for fnv, let's standardize on that one. --- Cargo.toml | 1 - src/core/inverted_index_reader.rs | 4 ++-- src/core/segment_reader.rs | 4 ++-- src/indexer/path_to_unordered_id.rs | 4 ++-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 90aeb97f41..d80598b787 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,7 +61,6 @@ common = { version= "0.6", path = "./common/", package = "tantivy-common" } tokenizer-api = { version= "0.2", path="./tokenizer-api", package="tantivy-tokenizer-api" } sketches-ddsketch = { version = "0.2.1", features = ["use_serde"] } futures-util = { version = "0.3.28", optional = true } -fnv = "1.0.7" [target.'cfg(windows)'.dependencies] winapi = "0.3.9" diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 059ec988c1..23d22b0d33 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -1,7 +1,7 @@ use std::io; use common::BinarySerializable; -use fnv::FnvHashSet; +use rustc_hash::FxHashSet; use crate::directory::FileSlice; use crate::positions::PositionReader; @@ -78,7 +78,7 @@ impl InvertedIndexReader { pub fn list_encoded_fields(&self) -> io::Result> { let mut stream = self.termdict.stream()?; let mut fields = Vec::new(); - let mut fields_set = FnvHashSet::default(); + let mut fields_set = FxHashSet::default(); while let Some((term, _term_info)) = stream.next() { if let Some(index) = term.iter().position(|&byte| byte == JSON_END_OF_PATH) { if !fields_set.contains(&term[..index + 2]) { diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index cae1b537d8..f5bbeab0d9 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -3,8 +3,8 @@ use std::ops::BitOrAssign; use std::sync::{Arc, RwLock}; use std::{fmt, io}; -use fnv::FnvHashMap; use itertools::Itertools; +use rustc_hash::FxHashMap; use crate::core::{InvertedIndexReader, Segment, SegmentComponent, SegmentId}; use crate::directory::{CompositeFile, FileSlice}; @@ -300,7 +300,7 @@ impl SegmentReader { /// to not be listed. pub fn fields_metadata(&self) -> crate::Result> { let mut indexed_fields: Vec = Vec::new(); - let mut map_to_canonical = FnvHashMap::default(); + let mut map_to_canonical = FxHashMap::default(); for (field, field_entry) in self.schema().fields() { let field_name = field_entry.name().to_string(); let is_indexed = field_entry.is_indexed(); diff --git a/src/indexer/path_to_unordered_id.rs b/src/indexer/path_to_unordered_id.rs index 054654f948..2172624e04 100644 --- a/src/indexer/path_to_unordered_id.rs +++ b/src/indexer/path_to_unordered_id.rs @@ -1,4 +1,4 @@ -use fnv::FnvHashMap; +use rustc_hash::FxHashMap; /// `Field` is represented by an unsigned 32-bit integer type. /// The schema holds the mapping between field names and `Field` objects. @@ -24,7 +24,7 @@ impl From for OrderedPathId { #[derive(Default)] pub(crate) struct PathToUnorderedId { - map: FnvHashMap, + map: FxHashMap, } impl PathToUnorderedId {