Skip to content

Commit

Permalink
Fixing NormalizedString append when normalized is empty. (#1717)
Browse files Browse the repository at this point in the history
Co-authored-by: Anantha Kandrapu <[email protected]>
  • Loading branch information
Narsil and Anantha Kandrapu authored Jan 9, 2025
1 parent 0ff2ab0 commit e7ed39d
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions tokenizers/src/tokenizer/normalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,9 @@ impl NormalizedString {
if let Some((b, prev)) = self.normalized.char_indices().last() {
let transformations = std::iter::once((prev, 0)).chain(s.chars().map(|c| (c, 1)));
self.transform_range(Range::Normalized(b..), transformations, 0);
} else {
let transformations = s.chars().map(|c| (c, 1));
self.transform_range(Range::Normalized(..), transformations, 0);
}
self
}
Expand Down Expand Up @@ -2284,4 +2287,24 @@ mod tests {
s.lowercase();
assert_eq!(s.get(), "a...");
}

#[test]
fn test_append_after_clear() {
let mut n = NormalizedString::from("Hello");
assert_eq!(n.get(), "Hello");

n.clear();
assert_eq!(n.get(), "");

n.append(" World");
assert_eq!(n.get(), " World");

assert_eq!(n.len_original(), 5);
assert_eq!(n.len(), 6);

assert_eq!(n.get_range_original(Range::Original(0..5)), Some("Hello"));
assert_eq!(n.get_range_original(Range::Normalized(0..6)), Some(""));

assert_eq!(n.get_range(Range::Normalized(0..6)), Some(" World"));
}
}

0 comments on commit e7ed39d

Please sign in to comment.