Skip to content

Commit

Permalink
rename, add comment
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Nov 8, 2023
1 parent 7b34988 commit bbfa822
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 36 deletions.
27 changes: 6 additions & 21 deletions src/core/json_utils.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use columnar::MonotonicallyMappableToU64;
use common::JsonPathWriter;
use rustc_hash::FxHashMap;

use crate::fastfield::FastValue;
use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
use crate::schema::document::{ReferenceValue, ReferenceValueLeaf, Value};
use crate::schema::{Field, Type, DATE_TIME_PRECISION_INDEXED};
use crate::schema::{Field, Type};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{OffsetDateTime, UtcOffset};
use crate::tokenizer::TextAnalyzer;
Expand Down Expand Up @@ -248,7 +246,7 @@ fn index_json_value<'a, V: Value<'a>>(
}

/// Tries to infer a JSON type from a string.
pub fn convert_to_fast_value_and_set(term: &Term, phrase: &str) -> Option<Term> {
pub fn convert_to_fast_value_and_append(term: &Term, phrase: &str) -> Option<Term> {
let mut term = term.clone();
if let Ok(dt) = OffsetDateTime::parse(phrase, &Rfc3339) {
let dt_utc = dt.to_offset(UtcOffset::UTC);
Expand All @@ -274,31 +272,18 @@ pub fn convert_to_fast_value_and_set(term: &Term, phrase: &str) -> Option<Term>
None
}

pub fn set_json_fastvalue<T: FastValue>(term: &mut Term, val: T) {
term.append_bytes(&[T::to_type().to_code()]);
let value = if T::to_type() == Type::Date {
DateTime::from_u64(val.to_u64())
.truncate(DATE_TIME_PRECISION_INDEXED)
.to_u64()
} else {
val.to_u64()
};
term.append_bytes(value.to_be_bytes().as_slice());
}

/// helper function to generate a list of terms with their positions from a textual json value
pub(crate) fn set_string_and_get_terms(
pub(crate) fn append_string_and_get_terms(
term: &mut Term,
value: &str,
text_analyzer: &mut TextAnalyzer,
) -> Vec<(usize, Term)> {
let mut positions_and_terms = Vec::<(usize, Term)>::new();
term.append_bytes(&[Type::Str.to_code()]);
let term_num_bytes = term.len_bytes();
let mut token_stream = text_analyzer.token_stream(value);
token_stream.process(&mut |token| {
term.truncate_value_bytes(term_num_bytes);
term.append_bytes(token.text.as_bytes());
term.append_str(&token.text);
positions_and_terms.push((token.position, term.clone()));
});
positions_and_terms
Expand Down Expand Up @@ -365,8 +350,8 @@ pub fn term_from_json_paths<'a>(
expand_dots_enabled: bool,
) -> Term {
let mut json_path = JsonPathWriter::with_expand_dots(expand_dots_enabled);
for patho in paths {
json_path.push(patho);
for path in paths {
json_path.push(path);
}
json_path.set_end();
let mut term = Term::with_type_and_field(Type::Json, json_field);
Expand Down
7 changes: 4 additions & 3 deletions src/query/query_parser/query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use rustc_hash::FxHashMap;
use super::logical_ast::*;
use crate::core::Index;
use crate::json_utils::{
convert_to_fast_value_and_set, set_string_and_get_terms, split_json_path, term_from_json_paths,
append_string_and_get_terms, convert_to_fast_value_and_append, split_json_path,
term_from_json_paths,
};
use crate::query::range_query::{is_type_valid_for_fastfield_range_query, RangeQuery};
use crate::query::{
Expand Down Expand Up @@ -973,10 +974,10 @@ fn generate_literals_for_json_object(
json_options.is_expand_dots_enabled(),
);

if let Some(term) = convert_to_fast_value_and_set(&term, phrase) {
if let Some(term) = convert_to_fast_value_and_append(&term, phrase) {
logical_literals.push(LogicalLiteral::Term(term));
}
let terms = set_string_and_get_terms(&mut term, phrase, &mut text_analyzer);
let terms = append_string_and_get_terms(&mut term, phrase, &mut text_analyzer);

if terms.len() <= 1 {
for (_, term) in terms {
Expand Down
33 changes: 21 additions & 12 deletions src/schema/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ use crate::DateTime;
/// The serialized value `ValueBytes` is considered everything after the 4 first bytes (term id).
#[derive(Clone)]
pub struct Term<B = Vec<u8>>(B)
where B: AsRef<[u8]>;
where
B: AsRef<[u8]>;

/// The number of bytes used as metadata by `Term`.
const TERM_METADATA_LENGTH: usize = 5;
Expand Down Expand Up @@ -162,6 +163,7 @@ impl Term {
self.set_bytes(val.to_u64().to_be_bytes().as_ref());
}

/// Used to append a fast value to a JSON term.
pub(crate) fn append_type_and_fast_value<T: FastValue>(&mut self, val: T) {
self.0.push(T::to_type().to_code());
let value = if T::to_type() == Type::Date {
Expand All @@ -174,9 +176,8 @@ impl Term {
self.0.extend(value.to_be_bytes().as_ref());
}

/// Appends a string type marker + string to the term.
#[cfg(test)]
pub fn append_str(&mut self, val: &str) {
/// Used to appends a string type marker + string to a JSON term.
pub(crate) fn append_str(&mut self, val: &str) {
self.0.push(Type::Str.to_code());
self.0.extend(val.as_bytes().as_ref());
}
Expand Down Expand Up @@ -214,7 +215,8 @@ impl Term {
}

impl<B> Term<B>
where B: AsRef<[u8]>
where
B: AsRef<[u8]>,
{
/// Wraps a object holding bytes
pub fn wrap(data: B) -> Term<B> {
Expand Down Expand Up @@ -272,10 +274,12 @@ where B: AsRef<[u8]>
/// The nested ValueBytes in JSON is never of type JSON. (there's no recursion)
#[derive(Clone)]
pub struct ValueBytes<B>(B)
where B: AsRef<[u8]>;
where
B: AsRef<[u8]>;

impl<B> ValueBytes<B>
where B: AsRef<[u8]>
where
B: AsRef<[u8]>,
{
/// Wraps a object holding bytes
pub fn wrap(data: B) -> ValueBytes<B> {
Expand Down Expand Up @@ -481,23 +485,26 @@ where B: AsRef<[u8]>
}

impl<B> Ord for Term<B>
where B: AsRef<[u8]>
where
B: AsRef<[u8]>,
{
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.serialized_term().cmp(other.serialized_term())
}
}

impl<B> PartialOrd for Term<B>
where B: AsRef<[u8]>
where
B: AsRef<[u8]>,
{
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}

impl<B> PartialEq for Term<B>
where B: AsRef<[u8]>
where
B: AsRef<[u8]>,
{
fn eq(&self, other: &Self) -> bool {
self.serialized_term() == other.serialized_term()
Expand All @@ -507,7 +514,8 @@ where B: AsRef<[u8]>
impl<B> Eq for Term<B> where B: AsRef<[u8]> {}

impl<B> Hash for Term<B>
where B: AsRef<[u8]>
where
B: AsRef<[u8]>,
{
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.as_ref().hash(state)
Expand All @@ -522,7 +530,8 @@ fn write_opt<T: std::fmt::Debug>(f: &mut fmt::Formatter, val_opt: Option<T>) ->
}

impl<B> fmt::Debug for Term<B>
where B: AsRef<[u8]>
where
B: AsRef<[u8]>,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let field_id = self.field().field_id();
Expand Down

0 comments on commit bbfa822

Please sign in to comment.