From 2c28a26adcc381037f24a936b34e0e6b1f2109ac Mon Sep 17 00:00:00 2001 From: David Raznick Date: Thu, 25 Jan 2024 21:48:11 +0000 Subject: [PATCH] fix truncate in middle of unicode char --- src/lib.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4f90e30..1c6d33f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2587,7 +2587,11 @@ impl FlatFiles { if cell.len() > 32767 { log::warn!("WARNING: Cell larger than 32767 chararcters which is too large for XLSX format. The cell will be truncated, so some data will be missing."); - cell.truncate(32767) + let mut index: usize = 32767; + while !cell.is_char_boundary(index) { + index -= 1; + } + cell.truncate(index) } if metadata.describers[order].guess_type().0 == "number" { @@ -4376,7 +4380,7 @@ mod tests { json!({}), ) } - + #[test] fn test_s3_input() { if std::env::var("AWS_DEFAULT_REGION").is_ok() {