From fd2af265b3726175b1b96c33908924b57da345ec Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 13 Oct 2023 14:56:01 +0800 Subject: [PATCH] split into ReferenceValueLeaf --- src/core/json_utils.rs | 100 +++++----- src/core/searcher.rs | 2 +- src/fastfield/writer.rs | 222 +++++++++++---------- src/schema/document/de.rs | 30 +-- src/schema/document/existing_type_impls.rs | 15 +- src/schema/document/mod.rs | 9 +- src/schema/document/owned_value.rs | 47 +++-- src/schema/document/se.rs | 119 +++++------ src/schema/document/value.rs | 204 +++++++++++++------ 9 files changed, 426 insertions(+), 322 deletions(-) diff --git a/src/core/json_utils.rs b/src/core/json_utils.rs index 79dfd25700..86fe08fd5f 100644 --- a/src/core/json_utils.rs +++ b/src/core/json_utils.rs @@ -5,7 +5,7 @@ use rustc_hash::FxHashMap; use crate::fastfield::FastValue; use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter}; -use crate::schema::document::{ReferenceValue, Value}; +use crate::schema::document::{ReferenceValue, ReferenceValueLeaf, Value}; use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR}; use crate::schema::{Field, Type, DATE_TIME_PRECISION_INDEXED}; use crate::time::format_description::well_known::Rfc3339; @@ -125,53 +125,57 @@ fn index_json_value<'a, V: Value<'a>>( positions_per_path: &mut IndexingPositionsPerPath, ) { match json_value.as_value() { - ReferenceValue::Null => {} - ReferenceValue::Str(val) => { - let mut token_stream = text_analyzer.token_stream(val); - - // TODO: make sure the chain position works out. - json_term_writer.close_path_and_set_type(Type::Str); - let indexing_position = positions_per_path.get_position(json_term_writer.term()); - postings_writer.index_text( - doc, - &mut *token_stream, - json_term_writer.term_buffer, - ctx, - indexing_position, - ); - } - ReferenceValue::U64(val) => { - json_term_writer.set_fast_value(val); - postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); - } - ReferenceValue::I64(val) => { - json_term_writer.set_fast_value(val); - postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); - } - ReferenceValue::F64(val) => { - json_term_writer.set_fast_value(val); - postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); - } - ReferenceValue::Bool(val) => { - json_term_writer.set_fast_value(val); - postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); - } - ReferenceValue::Facet(_) => { - unimplemented!("Facet support in dynamic fields is not yet implemented") - } - ReferenceValue::IpAddr(_) => { - unimplemented!("IP address support in dynamic fields is not yet implemented") - } - ReferenceValue::Date(val) => { - json_term_writer.set_fast_value(val); - postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); - } - ReferenceValue::PreTokStr(_) => { - unimplemented!("Pre-tokenized string support in dynamic fields is not yet implemented") - } - ReferenceValue::Bytes(_) => { - unimplemented!("Bytes support in dynamic fields is not yet implemented") - } + ReferenceValue::Leaf(leaf) => match leaf { + ReferenceValueLeaf::Null => {} + ReferenceValueLeaf::Str(val) => { + let mut token_stream = text_analyzer.token_stream(val); + + // TODO: make sure the chain position works out. + json_term_writer.close_path_and_set_type(Type::Str); + let indexing_position = positions_per_path.get_position(json_term_writer.term()); + postings_writer.index_text( + doc, + &mut *token_stream, + json_term_writer.term_buffer, + ctx, + indexing_position, + ); + } + ReferenceValueLeaf::U64(val) => { + json_term_writer.set_fast_value(val); + postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); + } + ReferenceValueLeaf::I64(val) => { + json_term_writer.set_fast_value(val); + postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); + } + ReferenceValueLeaf::F64(val) => { + json_term_writer.set_fast_value(val); + postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); + } + ReferenceValueLeaf::Bool(val) => { + json_term_writer.set_fast_value(val); + postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); + } + ReferenceValueLeaf::Date(val) => { + json_term_writer.set_fast_value(val); + postings_writer.subscribe(doc, 0u32, json_term_writer.term(), ctx); + } + ReferenceValueLeaf::PreTokStr(_) => { + unimplemented!( + "Pre-tokenized string support in dynamic fields is not yet implemented" + ) + } + ReferenceValueLeaf::Bytes(_) => { + unimplemented!("Bytes support in dynamic fields is not yet implemented") + } + ReferenceValueLeaf::Facet(_) => { + unimplemented!("Facet support in dynamic fields is not yet implemented") + } + ReferenceValueLeaf::IpAddr(_) => { + unimplemented!("IP address support in dynamic fields is not yet implemented") + } + }, ReferenceValue::Array(elements) => { for val in elements { index_json_value( diff --git a/src/core/searcher.rs b/src/core/searcher.rs index c3b6d47aaf..3f989696ce 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -5,7 +5,7 @@ use std::{fmt, io}; use crate::collector::Collector; use crate::core::{Executor, SegmentReader}; use crate::query::{Bm25StatisticsProvider, EnableScoring, Query}; -use crate::schema::document::{Document, DocumentDeserialize}; +use crate::schema::document::DocumentDeserialize; use crate::schema::{Schema, Term}; use crate::space_usage::SearcherSpaceUsage; use crate::store::{CacheStats, StoreReader}; diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index ad61cdf4b6..4ea6ea2916 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -5,7 +5,7 @@ use common::replace_in_place; use tokenizer_api::Token; use crate::indexer::doc_id_mapping::DocIdMapping; -use crate::schema::document::{Document, ReferenceValue, Value}; +use crate::schema::document::{Document, ReferenceValue, ReferenceValueLeaf, Value}; use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR}; use crate::schema::{value_type_to_column_type, Field, FieldType, Schema, Type}; use crate::tokenizer::{TextAnalyzer, TokenizerManager}; @@ -141,64 +141,68 @@ impl FastFieldsWriter { }; match value.as_value() { - ReferenceValue::Null => {} - ReferenceValue::Str(val) => { - if let Some(tokenizer) = &mut self.per_field_tokenizer[field.field_id() as usize] { - let mut token_stream = tokenizer.token_stream(val); - token_stream.process(&mut |token: &Token| { - self.columnar_writer - .record_str(doc_id, field_name, &token.text); - }) - } else { - self.columnar_writer.record_str(doc_id, field_name, val); + ReferenceValue::Leaf(leaf) => match leaf { + ReferenceValueLeaf::Null => {} + ReferenceValueLeaf::Str(val) => { + if let Some(tokenizer) = + &mut self.per_field_tokenizer[field.field_id() as usize] + { + let mut token_stream = tokenizer.token_stream(val); + token_stream.process(&mut |token: &Token| { + self.columnar_writer + .record_str(doc_id, field_name, &token.text); + }) + } else { + self.columnar_writer.record_str(doc_id, field_name, val); + } } - } - ReferenceValue::U64(val) => { - self.columnar_writer.record_numerical( - doc_id, - field_name, - NumericalValue::from(val), - ); - } - ReferenceValue::I64(val) => { - self.columnar_writer.record_numerical( - doc_id, - field_name, - NumericalValue::from(val), - ); - } - ReferenceValue::F64(val) => { - self.columnar_writer.record_numerical( - doc_id, - field_name, - NumericalValue::from(val), - ); - } - ReferenceValue::Date(val) => { - let date_precision = self.date_precisions[field.field_id() as usize]; - let truncated_datetime = val.truncate(date_precision); - self.columnar_writer - .record_datetime(doc_id, field_name, truncated_datetime); - } - ReferenceValue::Facet(val) => { - self.columnar_writer - .record_str(doc_id, field_name, val.encoded_str()); - } - ReferenceValue::Bytes(val) => { - self.columnar_writer.record_bytes(doc_id, field_name, val); - } - ReferenceValue::IpAddr(val) => { - self.columnar_writer.record_ip_addr(doc_id, field_name, val); - } - ReferenceValue::Bool(val) => { - self.columnar_writer.record_bool(doc_id, field_name, val); - } - ReferenceValue::PreTokStr(val) => { - for token in &val.tokens { + ReferenceValueLeaf::U64(val) => { + self.columnar_writer.record_numerical( + doc_id, + field_name, + NumericalValue::from(val), + ); + } + ReferenceValueLeaf::I64(val) => { + self.columnar_writer.record_numerical( + doc_id, + field_name, + NumericalValue::from(val), + ); + } + ReferenceValueLeaf::F64(val) => { + self.columnar_writer.record_numerical( + doc_id, + field_name, + NumericalValue::from(val), + ); + } + ReferenceValueLeaf::Date(val) => { + let date_precision = self.date_precisions[field.field_id() as usize]; + let truncated_datetime = val.truncate(date_precision); self.columnar_writer - .record_str(doc_id, field_name, &token.text); + .record_datetime(doc_id, field_name, truncated_datetime); } - } + ReferenceValueLeaf::Facet(val) => { + self.columnar_writer + .record_str(doc_id, field_name, val.encoded_str()); + } + ReferenceValueLeaf::Bytes(val) => { + self.columnar_writer.record_bytes(doc_id, field_name, val); + } + ReferenceValueLeaf::IpAddr(val) => { + self.columnar_writer.record_ip_addr(doc_id, field_name, val); + } + ReferenceValueLeaf::Bool(val) => { + self.columnar_writer.record_bool(doc_id, field_name, val); + } + ReferenceValueLeaf::PreTokStr(val) => { + for token in &val.tokens { + self.columnar_writer + .record_str(doc_id, field_name, &token.text); + } + } + }, ReferenceValue::Array(val) => { // TODO: Check this is the correct behaviour we want. for value in val { @@ -297,58 +301,62 @@ fn record_json_value_to_columnar_writer<'a, V: Value<'a>>( remaining_depth_limit -= 1; match json_val.as_value() { - ReferenceValue::Null => {} // TODO: Handle null - ReferenceValue::Str(val) => { - if let Some(text_analyzer) = tokenizer.as_mut() { - let mut token_stream = text_analyzer.token_stream(val); - token_stream.process(&mut |token| { - columnar_writer.record_str(doc, json_path_writer.as_str(), &token.text); - }) - } else { - columnar_writer.record_str(doc, json_path_writer.as_str(), val); + ReferenceValue::Leaf(leaf) => match leaf { + ReferenceValueLeaf::Null => {} // TODO: Handle null + ReferenceValueLeaf::Str(val) => { + if let Some(text_analyzer) = tokenizer.as_mut() { + let mut token_stream = text_analyzer.token_stream(val); + token_stream.process(&mut |token| { + columnar_writer.record_str(doc, json_path_writer.as_str(), &token.text); + }) + } else { + columnar_writer.record_str(doc, json_path_writer.as_str(), val); + } } - } - ReferenceValue::U64(val) => { - columnar_writer.record_numerical( - doc, - json_path_writer.as_str(), - NumericalValue::from(val), - ); - } - ReferenceValue::I64(val) => { - columnar_writer.record_numerical( - doc, - json_path_writer.as_str(), - NumericalValue::from(val), - ); - } - ReferenceValue::F64(val) => { - columnar_writer.record_numerical( - doc, - json_path_writer.as_str(), - NumericalValue::from(val), - ); - } - ReferenceValue::Bool(val) => { - columnar_writer.record_bool(doc, json_path_writer, val); - } - ReferenceValue::Date(val) => { - columnar_writer.record_datetime(doc, json_path_writer.as_str(), val); - } - ReferenceValue::Facet(_) => { - unimplemented!("Facet support in dynamic fields is not yet implemented") - } - ReferenceValue::Bytes(_) => { - // TODO: This can be re added once it is added to the JSON Utils section as well. - // columnar_writer.record_bytes(doc, json_path_writer.as_str(), val); - unimplemented!("Bytes support in dynamic fields is not yet implemented") - } - ReferenceValue::IpAddr(_) => { - unimplemented!("IP address support in dynamic fields is not yet implemented") - } - ReferenceValue::PreTokStr(_) => { - unimplemented!("Pre-tokenized string support in dynamic fields is not yet implemented") - } + ReferenceValueLeaf::U64(val) => { + columnar_writer.record_numerical( + doc, + json_path_writer.as_str(), + NumericalValue::from(val), + ); + } + ReferenceValueLeaf::I64(val) => { + columnar_writer.record_numerical( + doc, + json_path_writer.as_str(), + NumericalValue::from(val), + ); + } + ReferenceValueLeaf::F64(val) => { + columnar_writer.record_numerical( + doc, + json_path_writer.as_str(), + NumericalValue::from(val), + ); + } + ReferenceValueLeaf::Bool(val) => { + columnar_writer.record_bool(doc, json_path_writer, val); + } + ReferenceValueLeaf::Date(val) => { + columnar_writer.record_datetime(doc, json_path_writer.as_str(), val); + } + ReferenceValueLeaf::Facet(_) => { + unimplemented!("Facet support in dynamic fields is not yet implemented") + } + ReferenceValueLeaf::Bytes(_) => { + // TODO: This can be re added once it is added to the JSON Utils section as well. + // columnar_writer.record_bytes(doc, json_path_writer.as_str(), val); + unimplemented!("Bytes support in dynamic fields is not yet implemented") + } + ReferenceValueLeaf::IpAddr(_) => { + unimplemented!("IP address support in dynamic fields is not yet implemented") + } + ReferenceValueLeaf::PreTokStr(_) => { + unimplemented!( + "Pre-tokenized string support in dynamic fields is not yet implemented" + ) + } + }, ReferenceValue::Array(elements) => { for el in elements { record_json_value_to_columnar_writer( diff --git a/src/schema/document/de.rs b/src/schema/document/de.rs index a1e5972df6..1fb192d139 100644 --- a/src/schema/document/de.rs +++ b/src/schema/document/de.rs @@ -791,7 +791,8 @@ mod tests { use super::*; use crate::schema::document::existing_type_impls::JsonObjectIter; use crate::schema::document::se::BinaryValueSerializer; - use crate::schema::document::ReferenceValue; + use crate::schema::document::{ReferenceValue, ReferenceValueLeaf}; + use crate::schema::OwnedValue; fn serialize_value<'a>(value: ReferenceValue<'a, &'a serde_json::Value>) -> Vec { let mut writer = Vec::new(); @@ -810,34 +811,35 @@ mod tests { #[test] fn test_simple_value_serialize() { - let result = serialize_value(ReferenceValue::Null); + let result = serialize_value(ReferenceValueLeaf::Null.into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::Null); - let result = serialize_value(ReferenceValue::Str("hello, world")); + let result = serialize_value(ReferenceValueLeaf::Str("hello, world").into()); let value = deserialize_value(result); assert_eq!( value, crate::schema::OwnedValue::Str(String::from("hello, world")) ); - let result = serialize_value(ReferenceValue::U64(123)); + let result = serialize_value(ReferenceValueLeaf::U64(123).into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::U64(123)); - let result = serialize_value(ReferenceValue::I64(-123)); + let result = serialize_value(ReferenceValueLeaf::I64(-123).into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::I64(-123)); - let result = serialize_value(ReferenceValue::F64(123.3845)); + let result = serialize_value(ReferenceValueLeaf::F64(123.3845).into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::F64(123.3845)); - let result = serialize_value(ReferenceValue::Bool(false)); + let result = serialize_value(ReferenceValueLeaf::Bool(false).into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::Bool(false)); - let result = serialize_value(ReferenceValue::Date(DateTime::from_timestamp_micros(100))); + let result = + serialize_value(ReferenceValueLeaf::Date(DateTime::from_timestamp_micros(100)).into()); let value = deserialize_value(result); assert_eq!( value, @@ -845,7 +847,7 @@ mod tests { ); let facet = Facet::from_text("/hello/world").unwrap(); - let result = serialize_value(ReferenceValue::Facet(&facet)); + let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::Facet(facet)); @@ -853,7 +855,7 @@ mod tests { text: "hello, world".to_string(), tokens: vec![Token::default(), Token::default()], }; - let result = serialize_value(ReferenceValue::PreTokStr(&pre_tok_str)); + let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str)); } @@ -1025,11 +1027,11 @@ mod tests { let mut expected_object = BTreeMap::new(); expected_object.insert( "my-array".to_string(), - crate::schema::OwnedValue::Array(vec![crate::schema::OwnedValue::Array(vec![ - crate::schema::OwnedValue::Array(vec![]), - crate::schema::OwnedValue::Array(vec![crate::schema::OwnedValue::Null]), + OwnedValue::Array(vec![OwnedValue::Array(vec![ + OwnedValue::Array(vec![]), + OwnedValue::Array(vec![OwnedValue::Null]), ])]), ); - assert_eq!(value, crate::schema::OwnedValue::Object(expected_object)); + assert_eq!(value, OwnedValue::Object(expected_object)); } } diff --git a/src/schema/document/existing_type_impls.rs b/src/schema/document/existing_type_impls.rs index 9fb2743e02..eaeaed2cd9 100644 --- a/src/schema/document/existing_type_impls.rs +++ b/src/schema/document/existing_type_impls.rs @@ -8,6 +8,7 @@ use std::collections::{btree_map, hash_map, BTreeMap, HashMap}; use serde_json::Number; +use super::ReferenceValueLeaf; use crate::schema::document::{ ArrayAccess, DeserializeError, Document, DocumentDeserialize, DocumentDeserializer, ObjectAccess, ReferenceValue, Value, ValueDeserialize, ValueDeserializer, ValueVisitor, @@ -21,20 +22,20 @@ impl<'a> Value<'a> for &'a serde_json::Value { fn as_value(&self) -> ReferenceValue<'a, Self> { match self { - serde_json::Value::Null => ReferenceValue::Null, - serde_json::Value::Bool(value) => ReferenceValue::Bool(*value), + serde_json::Value::Null => ReferenceValueLeaf::Null.into(), + serde_json::Value::Bool(value) => ReferenceValueLeaf::Bool(*value).into(), serde_json::Value::Number(number) => { if let Some(val) = number.as_i64() { - ReferenceValue::I64(val) + ReferenceValueLeaf::I64(val).into() } else if let Some(val) = number.as_u64() { - ReferenceValue::U64(val) + ReferenceValueLeaf::U64(val).into() } else if let Some(val) = number.as_f64() { - ReferenceValue::F64(val) + ReferenceValueLeaf::F64(val).into() } else { panic!("Unsupported serde_json number {number}"); } } - serde_json::Value::String(val) => ReferenceValue::Str(val), + serde_json::Value::String(val) => ReferenceValueLeaf::Str(val).into(), serde_json::Value::Array(elements) => ReferenceValue::Array(elements.iter()), serde_json::Value::Object(object) => { ReferenceValue::Object(JsonObjectIter(object.iter())) @@ -77,7 +78,7 @@ impl ValueDeserialize for serde_json::Value { } fn visit_bool(&self, val: bool) -> Result { - Ok(serde_json::Value::Bool(val.into())) + Ok(serde_json::Value::Bool(val)) } fn visit_array<'de, A>(&self, mut access: A) -> Result diff --git a/src/schema/document/mod.rs b/src/schema/document/mod.rs index ba48409153..0e09242f7e 100644 --- a/src/schema/document/mod.rs +++ b/src/schema/document/mod.rs @@ -116,6 +116,7 @@ //! //! ``` //! use tantivy::schema::document::ReferenceValue; +//! use tantivy::schema::document::ReferenceValueLeaf; //! use tantivy::schema::{Value}; //! //! #[derive(Debug)] @@ -141,9 +142,9 @@ //! fn as_value(&self) -> ReferenceValue<'a, Self> { //! // We can support any type that Tantivy itself supports. //! match self { -//! MyCustomValue::String(val) => ReferenceValue::Str(val), -//! MyCustomValue::Float(val) => ReferenceValue::F64(*val), -//! MyCustomValue::Bool(val) => ReferenceValue::Bool(*val), +//! MyCustomValue::String(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Str(*val)), +//! MyCustomValue::Float(val) => ReferenceValue::Leaf(ReferenceValueLeaf::F64(*val)), +//! MyCustomValue::Bool(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Bool(*val)), //! } //! } //! @@ -170,7 +171,7 @@ pub use self::de::{ pub use self::default_document::{DocParsingError, TantivyDocument}; pub use self::owned_value::OwnedValue; pub(crate) use self::se::BinaryDocumentSerializer; -pub use self::value::{ReferenceValue, Value}; +pub use self::value::{ReferenceValue, ReferenceValueLeaf, Value}; use super::*; /// The core trait representing a document within the index. diff --git a/src/schema/document/owned_value.rs b/src/schema/document/owned_value.rs index 1fb89b3680..457bdeb7ab 100644 --- a/src/schema/document/owned_value.rs +++ b/src/schema/document/owned_value.rs @@ -8,6 +8,7 @@ use serde::de::{MapAccess, SeqAccess}; use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; +use super::ReferenceValueLeaf; use crate::schema::document::{ ArrayAccess, DeserializeError, ObjectAccess, ReferenceValue, Value, ValueDeserialize, ValueDeserializer, ValueVisitor, @@ -62,17 +63,17 @@ impl<'a> Value<'a> for &'a OwnedValue { fn as_value(&self) -> ReferenceValue<'a, Self> { match self { - OwnedValue::Null => ReferenceValue::Null, - OwnedValue::Str(val) => ReferenceValue::Str(val), - OwnedValue::PreTokStr(val) => ReferenceValue::PreTokStr(val), - OwnedValue::U64(val) => ReferenceValue::U64(*val), - OwnedValue::I64(val) => ReferenceValue::I64(*val), - OwnedValue::F64(val) => ReferenceValue::F64(*val), - OwnedValue::Bool(val) => ReferenceValue::Bool(*val), - OwnedValue::Date(val) => ReferenceValue::Date(*val), - OwnedValue::Facet(val) => ReferenceValue::Facet(val), - OwnedValue::Bytes(val) => ReferenceValue::Bytes(val), - OwnedValue::IpAddr(val) => ReferenceValue::IpAddr(*val), + OwnedValue::Null => ReferenceValueLeaf::Null.into(), + OwnedValue::Str(val) => ReferenceValueLeaf::Str(val).into(), + OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val).into(), + OwnedValue::U64(val) => ReferenceValueLeaf::U64(*val).into(), + OwnedValue::I64(val) => ReferenceValueLeaf::I64(*val).into(), + OwnedValue::F64(val) => ReferenceValueLeaf::F64(*val).into(), + OwnedValue::Bool(val) => ReferenceValueLeaf::Bool(*val).into(), + OwnedValue::Date(val) => ReferenceValueLeaf::Date(*val).into(), + OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val).into(), + OwnedValue::Bytes(val) => ReferenceValueLeaf::Bytes(val).into(), + OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(), OwnedValue::Array(array) => ReferenceValue::Array(array.iter()), OwnedValue::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())), } @@ -264,17 +265,19 @@ impl<'de> serde::Deserialize<'de> for OwnedValue { impl<'a, V: Value<'a>> From> for OwnedValue { fn from(val: ReferenceValue<'a, V>) -> OwnedValue { match val { - ReferenceValue::Null => OwnedValue::Null, - ReferenceValue::Str(val) => OwnedValue::Str(val.to_string()), - ReferenceValue::U64(val) => OwnedValue::U64(val), - ReferenceValue::I64(val) => OwnedValue::I64(val), - ReferenceValue::F64(val) => OwnedValue::F64(val), - ReferenceValue::Date(val) => OwnedValue::Date(val), - ReferenceValue::Facet(val) => OwnedValue::Facet(val.clone()), - ReferenceValue::Bytes(val) => OwnedValue::Bytes(val.to_vec()), - ReferenceValue::IpAddr(val) => OwnedValue::IpAddr(val), - ReferenceValue::Bool(val) => OwnedValue::Bool(val), - ReferenceValue::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()), + ReferenceValue::Leaf(leaf) => match leaf { + ReferenceValueLeaf::Null => OwnedValue::Null, + ReferenceValueLeaf::Str(val) => OwnedValue::Str(val.to_string()), + ReferenceValueLeaf::U64(val) => OwnedValue::U64(val), + ReferenceValueLeaf::I64(val) => OwnedValue::I64(val), + ReferenceValueLeaf::F64(val) => OwnedValue::F64(val), + ReferenceValueLeaf::Date(val) => OwnedValue::Date(val), + ReferenceValueLeaf::Facet(val) => OwnedValue::Facet(val.clone()), + ReferenceValueLeaf::Bytes(val) => OwnedValue::Bytes(val.to_vec()), + ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val), + ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val), + ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()), + }, ReferenceValue::Array(val) => { OwnedValue::Array(val.map(|v| v.as_value().into()).collect()) } diff --git a/src/schema/document/se.rs b/src/schema/document/se.rs index 5c0e3b85ba..10e0657e0c 100644 --- a/src/schema/document/se.rs +++ b/src/schema/document/se.rs @@ -5,6 +5,7 @@ use std::io::Write; use columnar::MonotonicallyMappableToU128; use common::{f64_to_u64, BinarySerializable, VInt}; +use super::{OwnedValue, ReferenceValueLeaf}; use crate::schema::document::{type_codes, Document, ReferenceValue, Value}; use crate::schema::Schema; @@ -39,10 +40,10 @@ where W: Write let mut serializer = BinaryValueSerializer::new(self.writer); match value_access.as_value() { - ReferenceValue::PreTokStr(pre_tokenized_text) => { - serializer.serialize_value(ReferenceValue::Str::< - &'_ crate::schema::OwnedValue, - >(&pre_tokenized_text.text))?; + ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(pre_tokenized_text)) => { + serializer.serialize_value(ReferenceValue::Leaf::<&'_ OwnedValue>( + ReferenceValueLeaf::Str(&pre_tokenized_text.text), + ))?; } _ => { serializer.serialize_value(value_access.as_value())?; @@ -90,59 +91,61 @@ where W: Write V: Value<'a>, { match value { - ReferenceValue::Null => self.write_type_code(type_codes::NULL_CODE), - ReferenceValue::Str(val) => { - self.write_type_code(type_codes::TEXT_CODE)?; + ReferenceValue::Leaf(leaf) => match leaf { + ReferenceValueLeaf::Null => self.write_type_code(type_codes::NULL_CODE), + ReferenceValueLeaf::Str(val) => { + self.write_type_code(type_codes::TEXT_CODE)?; - let temp_val = Cow::Borrowed(val); - temp_val.serialize(self.writer) - } - ReferenceValue::U64(val) => { - self.write_type_code(type_codes::U64_CODE)?; + let temp_val = Cow::Borrowed(val); + temp_val.serialize(self.writer) + } + ReferenceValueLeaf::U64(val) => { + self.write_type_code(type_codes::U64_CODE)?; - val.serialize(self.writer) - } - ReferenceValue::I64(val) => { - self.write_type_code(type_codes::I64_CODE)?; + val.serialize(self.writer) + } + ReferenceValueLeaf::I64(val) => { + self.write_type_code(type_codes::I64_CODE)?; - val.serialize(self.writer) - } - ReferenceValue::F64(val) => { - self.write_type_code(type_codes::F64_CODE)?; + val.serialize(self.writer) + } + ReferenceValueLeaf::F64(val) => { + self.write_type_code(type_codes::F64_CODE)?; - f64_to_u64(val).serialize(self.writer) - } - ReferenceValue::Date(val) => { - self.write_type_code(type_codes::DATE_CODE)?; - val.serialize(self.writer) - } - ReferenceValue::Facet(val) => { - self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?; + f64_to_u64(val).serialize(self.writer) + } + ReferenceValueLeaf::Date(val) => { + self.write_type_code(type_codes::DATE_CODE)?; + val.serialize(self.writer) + } + ReferenceValueLeaf::Facet(val) => { + self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?; - val.serialize(self.writer) - } - ReferenceValue::Bytes(val) => { - self.write_type_code(type_codes::BYTES_CODE)?; + val.serialize(self.writer) + } + ReferenceValueLeaf::Bytes(val) => { + self.write_type_code(type_codes::BYTES_CODE)?; - let temp_val = Cow::Borrowed(val); - temp_val.serialize(self.writer) - } - ReferenceValue::IpAddr(val) => { - self.write_type_code(type_codes::IP_CODE)?; + let temp_val = Cow::Borrowed(val); + temp_val.serialize(self.writer) + } + ReferenceValueLeaf::IpAddr(val) => { + self.write_type_code(type_codes::IP_CODE)?; - val.to_u128().serialize(self.writer) - } - ReferenceValue::Bool(val) => { - self.write_type_code(type_codes::BOOL_CODE)?; + val.to_u128().serialize(self.writer) + } + ReferenceValueLeaf::Bool(val) => { + self.write_type_code(type_codes::BOOL_CODE)?; - val.serialize(self.writer) - } - ReferenceValue::PreTokStr(val) => { - self.write_type_code(type_codes::EXT_CODE)?; - self.write_type_code(type_codes::TOK_STR_EXT_CODE)?; + val.serialize(self.writer) + } + ReferenceValueLeaf::PreTokStr(val) => { + self.write_type_code(type_codes::EXT_CODE)?; + self.write_type_code(type_codes::TOK_STR_EXT_CODE)?; - val.serialize(self.writer) - } + val.serialize(self.writer) + } + }, ReferenceValue::Array(elements) => { self.write_type_code(type_codes::ARRAY_CODE)?; @@ -272,7 +275,7 @@ where W: Write // as we could avoid writing the extra byte per key. But the gain is // largely not worth it for the extra complexity it brings. self.inner - .serialize_value(ReferenceValue::<'a, V>::Str(key))?; + .serialize_value(ReferenceValue::<'a, V>::Leaf(ReferenceValueLeaf::Str(key)))?; self.inner.serialize_value(value)?; self.actual_length += 1; @@ -361,7 +364,7 @@ mod tests { #[test] fn test_simple_value_serialize() { - let result = serialize_value(ReferenceValue::Null); + let result = serialize_value(ReferenceValueLeaf::Null.into()); let expected = binary_repr!( type_codes::NULL_CODE => (), ); @@ -370,7 +373,7 @@ mod tests { "Expected serialized value to match the binary representation" ); - let result = serialize_value(ReferenceValue::Str("hello, world")); + let result = serialize_value(ReferenceValueLeaf::Str("hello, world").into()); let expected = binary_repr!( type_codes::TEXT_CODE => String::from("hello, world"), ); @@ -379,7 +382,7 @@ mod tests { "Expected serialized value to match the binary representation" ); - let result = serialize_value(ReferenceValue::U64(123)); + let result = serialize_value(ReferenceValueLeaf::U64(123).into()); let expected = binary_repr!( type_codes::U64_CODE => 123u64, ); @@ -388,7 +391,7 @@ mod tests { "Expected serialized value to match the binary representation" ); - let result = serialize_value(ReferenceValue::I64(-123)); + let result = serialize_value(ReferenceValueLeaf::I64(-123).into()); let expected = binary_repr!( type_codes::I64_CODE => -123i64, ); @@ -397,7 +400,7 @@ mod tests { "Expected serialized value to match the binary representation" ); - let result = serialize_value(ReferenceValue::F64(123.3845)); + let result = serialize_value(ReferenceValueLeaf::F64(123.3845f64).into()); let expected = binary_repr!( type_codes::F64_CODE => f64_to_u64(123.3845f64), ); @@ -406,7 +409,7 @@ mod tests { "Expected serialized value to match the binary representation" ); - let result = serialize_value(ReferenceValue::Bool(false)); + let result = serialize_value(ReferenceValueLeaf::Bool(false).into()); let expected = binary_repr!( type_codes::BOOL_CODE => false, ); @@ -415,7 +418,7 @@ mod tests { "Expected serialized value to match the binary representation" ); - let result = serialize_value(ReferenceValue::Date(DateTime::MAX)); + let result = serialize_value(ReferenceValueLeaf::Date(DateTime::MAX).into()); let expected = binary_repr!( type_codes::DATE_CODE => DateTime::MAX, ); @@ -425,7 +428,7 @@ mod tests { ); let facet = Facet::from_text("/hello/world").unwrap(); - let result = serialize_value(ReferenceValue::Facet(&facet)); + let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into()); let expected = binary_repr!( type_codes::HIERARCHICAL_FACET_CODE => Facet::from_text("/hello/world").unwrap(), ); @@ -438,7 +441,7 @@ mod tests { text: "hello, world".to_string(), tokens: vec![Token::default(), Token::default()], }; - let result = serialize_value(ReferenceValue::PreTokStr(&pre_tok_str)); + let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into()); let expected = binary_repr!( type_codes::EXT_CODE, type_codes::TOK_STR_EXT_CODE => pre_tok_str, ); diff --git a/src/schema/document/value.rs b/src/schema/document/value.rs index 45d085f58b..ca3d3bf1b3 100644 --- a/src/schema/document/value.rs +++ b/src/schema/document/value.rs @@ -21,108 +21,81 @@ pub trait Value<'a>: Send + Sync + Debug { #[inline] /// Returns if the value is `null` or not. fn is_null(&self) -> bool { - matches!(self.as_value(), ReferenceValue::Null) + matches!( + self.as_value(), + ReferenceValue::Leaf(ReferenceValueLeaf::Null) + ) } #[inline] /// If the Value is a String, returns the associated str. Returns None otherwise. - fn as_str(&self) -> Option<&'a str> { - if let ReferenceValue::Str(val) = self.as_value() { + fn as_leaf(&self) -> Option> { + if let ReferenceValue::Leaf(val) = self.as_value() { Some(val) } else { None } } + #[inline] + /// If the Value is a String, returns the associated str. Returns None otherwise. + fn as_str(&self) -> Option<&'a str> { + self.as_leaf().and_then(|leaf| leaf.as_str()) + } + #[inline] /// If the Value is a u64, returns the associated u64. Returns None otherwise. fn as_u64(&self) -> Option { - if let ReferenceValue::U64(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_u64()) } #[inline] /// If the Value is a i64, returns the associated i64. Returns None otherwise. fn as_i64(&self) -> Option { - if let ReferenceValue::I64(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_i64()) } #[inline] /// If the Value is a f64, returns the associated f64. Returns None otherwise. fn as_f64(&self) -> Option { - if let ReferenceValue::F64(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_f64()) } #[inline] /// If the Value is a datetime, returns the associated datetime. Returns None otherwise. fn as_datetime(&self) -> Option { - if let ReferenceValue::Date(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_datetime()) } #[inline] /// If the Value is a IP address, returns the associated IP. Returns None otherwise. fn as_ip_addr(&self) -> Option { - if let ReferenceValue::IpAddr(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_ip_addr()) } #[inline] /// If the Value is a bool, returns the associated bool. Returns None otherwise. fn as_bool(&self) -> Option { - if let ReferenceValue::Bool(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_bool()) } #[inline] /// If the Value is a pre-tokenized string, returns the associated string. Returns None /// otherwise. fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> { - if let ReferenceValue::PreTokStr(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text()) } #[inline] /// If the Value is a bytes value, returns the associated set of bytes. Returns None otherwise. fn as_bytes(&self) -> Option<&'a [u8]> { - if let ReferenceValue::Bytes(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_bytes()) } #[inline] /// If the Value is a facet, returns the associated facet. Returns None otherwise. fn as_facet(&self) -> Option<&'a Facet> { - if let ReferenceValue::Facet(val) = self.as_value() { - Some(val) - } else { - None - } + self.as_leaf().and_then(|leaf| leaf.as_facet()) } #[inline] @@ -158,11 +131,9 @@ pub trait Value<'a>: Send + Sync + Debug { } } -/// A enum representing a value for tantivy to index. -#[derive(Clone, Debug, PartialEq)] -pub enum ReferenceValue<'a, V> -where V: Value<'a> + ?Sized -{ +/// A enum representing a leaf value for tantivy to index. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum ReferenceValueLeaf<'a> { /// A null value. Null, /// The str type is used for any text information. @@ -185,15 +156,32 @@ where V: Value<'a> + ?Sized Bool(bool), /// Pre-tokenized str type, PreTokStr(&'a PreTokenizedString), - /// A an array containing multiple values. - Array(V::ArrayIter), - /// A nested / dynamic object. - Object(V::ObjectIter), } -impl<'a, V> ReferenceValue<'a, V> -where V: Value<'a> -{ +impl<'a, T: Value<'a> + ?Sized> From> for ReferenceValue<'a, T> { + #[inline] + fn from(value: ReferenceValueLeaf<'a>) -> Self { + match value { + ReferenceValueLeaf::Null => ReferenceValue::Leaf(ReferenceValueLeaf::Null), + ReferenceValueLeaf::Str(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Str(val)), + ReferenceValueLeaf::U64(val) => ReferenceValue::Leaf(ReferenceValueLeaf::U64(val)), + ReferenceValueLeaf::I64(val) => ReferenceValue::Leaf(ReferenceValueLeaf::I64(val)), + ReferenceValueLeaf::F64(val) => ReferenceValue::Leaf(ReferenceValueLeaf::F64(val)), + ReferenceValueLeaf::Date(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Date(val)), + ReferenceValueLeaf::Facet(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Facet(val)), + ReferenceValueLeaf::Bytes(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Bytes(val)), + ReferenceValueLeaf::IpAddr(val) => { + ReferenceValue::Leaf(ReferenceValueLeaf::IpAddr(val)) + } + ReferenceValueLeaf::Bool(val) => ReferenceValue::Leaf(ReferenceValueLeaf::Bool(val)), + ReferenceValueLeaf::PreTokStr(val) => { + ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(val)) + } + } + } +} + +impl<'a> ReferenceValueLeaf<'a> { #[inline] /// Returns if the value is `null` or not. pub fn is_null(&self) -> bool { @@ -300,11 +288,105 @@ where V: Value<'a> None } } +} + +/// A enum representing a value for tantivy to index. +#[derive(Clone, Debug, PartialEq)] +pub enum ReferenceValue<'a, V> +where V: Value<'a> + ?Sized +{ + /// A null value. + Leaf(ReferenceValueLeaf<'a>), + /// A an array containing multiple values. + Array(V::ArrayIter), + /// A nested / dynamic object. + Object(V::ObjectIter), +} + +impl<'a, V> ReferenceValue<'a, V> +where V: Value<'a> +{ + #[inline] + /// Returns if the value is `null` or not. + pub fn is_null(&self) -> bool { + matches!(self, Self::Leaf(ReferenceValueLeaf::Null)) + } + + #[inline] + /// If the Value is a leaf, returns the associated leaf. Returns None otherwise. + pub fn as_leaf(&self) -> Option<&ReferenceValueLeaf<'a>> { + if let Self::Leaf(val) = self { + Some(val) + } else { + None + } + } + + #[inline] + /// If the Value is a String, returns the associated str. Returns None otherwise. + pub fn as_str(&self) -> Option<&'a str> { + self.as_leaf().and_then(|leaf| leaf.as_str()) + } + + #[inline] + /// If the Value is a u64, returns the associated u64. Returns None otherwise. + pub fn as_u64(&self) -> Option { + self.as_leaf().and_then(|leaf| leaf.as_u64()) + } + + #[inline] + /// If the Value is a i64, returns the associated i64. Returns None otherwise. + pub fn as_i64(&self) -> Option { + self.as_leaf().and_then(|leaf| leaf.as_i64()) + } + + #[inline] + /// If the Value is a f64, returns the associated f64. Returns None otherwise. + pub fn as_f64(&self) -> Option { + self.as_leaf().and_then(|leaf| leaf.as_f64()) + } + + #[inline] + /// If the Value is a datetime, returns the associated datetime. Returns None otherwise. + pub fn as_datetime(&self) -> Option { + self.as_leaf().and_then(|leaf| leaf.as_datetime()) + } + + #[inline] + /// If the Value is a IP address, returns the associated IP. Returns None otherwise. + pub fn as_ip_addr(&self) -> Option { + self.as_leaf().and_then(|leaf| leaf.as_ip_addr()) + } + + #[inline] + /// If the Value is a bool, returns the associated bool. Returns None otherwise. + pub fn as_bool(&self) -> Option { + self.as_leaf().and_then(|leaf| leaf.as_bool()) + } + + #[inline] + /// If the Value is a pre-tokenized string, returns the associated string. Returns None + /// otherwise. + pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> { + self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text()) + } + + #[inline] + /// If the Value is a bytes value, returns the associated set of bytes. Returns None otherwise. + pub fn as_bytes(&self) -> Option<&'a [u8]> { + self.as_leaf().and_then(|leaf| leaf.as_bytes()) + } + + #[inline] + /// If the Value is a facet, returns the associated facet. Returns None otherwise. + pub fn as_facet(&self) -> Option<&'a Facet> { + self.as_leaf().and_then(|leaf| leaf.as_facet()) + } #[inline] /// Returns true if the Value is an array. pub fn is_array(&self) -> bool { - matches!(self, Self::Object(_)) + matches!(self, Self::Array(_)) } #[inline]