From c418b14816a2ca5d2b49f2290659be9f3819030b Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Fri, 10 Sep 2021 15:01:32 -0500 Subject: [PATCH] Fixes #1384, by ensuring specials are properly parsed. --- src/number/complete.rs | 204 +++++++++++++++++++++++++++++++++++++--- src/number/streaming.rs | 200 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 382 insertions(+), 22 deletions(-) diff --git a/src/number/complete.rs b/src/number/complete.rs index c5553a40c..8f01c5752 100644 --- a/src/number/complete.rs +++ b/src/number/complete.rs @@ -1,5 +1,6 @@ //! Parsers recognizing numbers, complete input version +use core::{f32, f64}; use crate::branch::alt; use crate::bytes::complete::tag; use crate::character::complete::{char, digit1, sign}; @@ -1426,6 +1427,8 @@ where )(input) } +/// + /// Recognizes a floating point number in text format and returns the integer, fraction and exponent parts of the input data /// /// *Complete version*: Can parse until the end of input. @@ -1517,16 +1520,27 @@ where Ok((i, (sign, integer, fraction, exp))) } +/// Case-insensitive comparison of digits. Only works if `y` is only ASCII letters. +#[inline] +fn case_insensitive_cmp(x: &[u8], y: &[u8]) -> bool { + let d = (x.iter().zip(y.iter())).fold(0, |d, (xi, yi)| d | xi ^ yi); + // This uses the trick that 'a' - 'A' == 0x20, and this is true + // for all characters, so as long as `yi` is a valid ASCII letter, + // `xi ^ yi` can only be 0 or 0x20. + d == 0 || d == 32 +} + /// Recognizes floating point number in text format and returns a f32. /// -/// *Complete version*: Can parse until the end of input. +/// *Complete version*: Can parse until the end of input. This only handles +/// finite (non-special floats). /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; -/// use nom::number::complete::float; +/// use nom::number::complete::float_finite; /// /// let parser = |s| { -/// float(s) +/// float_finite(s) /// }; /// /// assert_eq!(parser("11e-1"), Ok(("", 1.1))); @@ -1534,7 +1548,7 @@ where /// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); /// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); /// ``` -pub fn float>(input: T) -> IResult +pub fn float_finite>(input: T) -> IResult where T: Slice> + Slice> + Slice>, T: Clone + Offset, @@ -1560,6 +1574,42 @@ where Ok((i, float)) } +/// Recognizes floating point number in text format and returns a f32. +/// This only handles non-finite (special) values. +pub fn float_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + let (i, sign) = sign(input.clone())?; + let (mut float, count) = if i.input_len() >= 3 { + if case_insensitive_cmp(i.as_bytes(), b"nan") { + (f32::NAN, 3) + } else if i.input_len() >= 8 && case_insensitive_cmp(i.as_bytes(), b"infinity") { + (f32::INFINITY, 8) + } else if case_insensitive_cmp(i.as_bytes(), b"inf") { + (f32::INFINITY, 3) + } else { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Float))); + } + } else { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Float))); + }; + + if !sign { + float = -float; + } + + Ok((i.slice(count..), float)) +} + /// Recognizes floating point number in text format and returns a f32. /// /// *Complete version*: Can parse until the end of input. @@ -1577,7 +1627,44 @@ where /// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); /// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); /// ``` -pub fn double>(input: T) -> IResult +pub fn float>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + if let Ok((i, float)) = float_finite::(input.clone()) { + Ok((i, float)) + } else { + float_nonfinite::(input) + } +} + +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Complete version*: Can parse until the end of input. This only handles +/// finite (non-special floats). +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::double_finite; +/// +/// let parser = |s| { +/// double_finite(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn double_finite>(input: T) -> IResult where T: Slice> + Slice> + Slice>, T: Clone + Offset, @@ -1603,6 +1690,78 @@ where Ok((i, float)) } +/// Recognizes floating point number in text format and returns a f64. +/// This only handles non-finite (special) values. +pub fn double_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + let (i, sign) = sign(input.clone())?; + let (mut double, count) = if i.input_len() >= 3 { + if case_insensitive_cmp(i.as_bytes(), b"nan") { + (f64::NAN, 3) + } else if i.input_len() >= 8 && case_insensitive_cmp(i.as_bytes(), b"infinity") { + (f64::INFINITY, 8) + } else if case_insensitive_cmp(i.as_bytes(), b"inf") { + (f64::INFINITY, 3) + } else { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Float))); + } + } else { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Float))); + }; + + if !sign { + double = -double; + } + + Ok((i.slice(count..), double)) +} + +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Complete version*: Can parse until the end of input. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::double; +/// +/// let parser = |s| { +/// double(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn double>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + match double_finite::(input.clone()) { + Ok((i, double)) => Ok((i, double)), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + _ => double_nonfinite::(input), + } +} + #[cfg(test)] mod tests { use super::*; @@ -1618,6 +1777,23 @@ mod tests { }; ); + // Need more complex logic, since NaN != NaN. + macro_rules! assert_float_eq { + ($left: expr, $right: expr) => { + let left: $crate::IResult<_, _, (_, ErrorKind)> = $left; + let right: $crate::IResult<_, _, (_, ErrorKind)> = $right; + if let Ok((_, float)) = right { + if float.is_nan() { + assert!(left.unwrap().1.is_nan()); + } else { + assert_eq!(left, right); + } + }else { + assert_eq!(left, right); + } + }; + } + #[test] fn i8_tests() { assert_parse!(i8(&[0x00][..]), Ok((&b""[..], 0))); @@ -1942,6 +2118,8 @@ mod tests { "12.34", "-1.234E-12", "-1.234e-12", + "NaN", + "inf", ]; for test in test_cases.drain(..) { @@ -1951,13 +2129,15 @@ mod tests { println!("now parsing: {} -> {}", test, expected32); let larger = format!("{}", test); - assert_parse!(recognize_float(&larger[..]), Ok(("", test))); + if expected32.is_finite() { + assert_parse!(recognize_float(&larger[..]), Ok(("", test))); + } - assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32))); - assert_parse!(float(&larger[..]), Ok(("", expected32))); + assert_float_eq!(float(larger.as_bytes()), Ok((&b""[..], expected32))); + assert_float_eq!(float(&larger[..]), Ok(("", expected32))); - assert_parse!(double(larger.as_bytes()), Ok((&b""[..], expected64))); - assert_parse!(double(&larger[..]), Ok(("", expected64))); + assert_float_eq!(double(larger.as_bytes()), Ok((&b""[..], expected64))); + assert_float_eq!(double(&larger[..]), Ok(("", expected64))); } let remaining_exponent = "-1.234E-"; @@ -2051,8 +2231,8 @@ mod tests { } fn parse_f64(i: &str) -> IResult<&str, f64, ()> { - match recognize_float(i) { - Err(e) => Err(e), + match recognize_float::<_, ()>(i) { + Err(_) => Err(Err::Error(())), Ok((i, s)) => { if s.is_empty() { return Err(Err::Error(())); diff --git a/src/number/streaming.rs b/src/number/streaming.rs index 3ca445fa8..8aa8fa609 100644 --- a/src/number/streaming.rs +++ b/src/number/streaming.rs @@ -1,5 +1,6 @@ //! Parsers recognizing numbers, streaming version +use core::{f32, f64}; use crate::branch::alt; use crate::bytes::streaming::tag; use crate::character::streaming::{char, digit1, sign}; @@ -1490,17 +1491,28 @@ where Ok((i, (sign, integer, fraction, exp))) } +/// Case-insensitive comparison of digits. Only works if `y` is only ASCII letters. +#[inline] +fn case_insensitive_cmp(x: &[u8], y: &[u8]) -> bool { + let d = (x.iter().zip(y.iter())).fold(0, |d, (xi, yi)| d | xi ^ yi); + // This uses the trick that 'a' - 'A' == 0x20, and this is true + // for all characters, so as long as `yi` is a valid ASCII letter, + // `xi ^ yi` can only be 0 or 0x20. + d == 0 || d == 32 +} + /// Recognizes floating point number in text format and returns a f32. /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// This only handles finite (non-special floats). /// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; -/// use nom::number::complete::float; +/// use nom::number::complete::float_finite; /// /// let parser = |s| { -/// float(s) +/// float_finite(s) /// }; /// /// assert_eq!(parser("11e-1"), Ok(("", 1.1))); @@ -1508,7 +1520,7 @@ where /// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); /// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); /// ``` -pub fn float>(input: T) -> IResult +pub fn float_finite>(input: T) -> IResult where T: Slice> + Slice>, T: Clone + Offset, @@ -1534,6 +1546,42 @@ where Ok((i, float)) } +/// Recognizes floating point number in text format and returns a f32. +/// This only handles non-finite (special) values. +pub fn float_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + let (i, sign) = sign(input.clone())?; + let (mut float, count) = if case_insensitive_cmp(i.as_bytes(), b"nan") { + (f32::NAN, 3) + } else if i.input_len() >= 8 && case_insensitive_cmp(i.as_bytes(), b"infinity") { + (f32::INFINITY, 8) + } else if case_insensitive_cmp(i.as_bytes(), b"inf") { + (f32::INFINITY, 3) + } else { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Float))); + }; + + if !sign { + float = -float; + } + + if i.input_len() <= count { + Err(Err::Incomplete(Needed::new(count))) + } else { + Ok((i.slice(count..), float)) + } +} + /// Recognizes floating point number in text format and returns a f32. /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. @@ -1541,6 +1589,43 @@ where /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; +/// use nom::number::complete::float_finite; +/// +/// let parser = |s| { +/// float_finite(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn float>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + match float_finite::(input.clone()) { + Ok((i, double)) => Ok((i, double)), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + _ => float_nonfinite::(input), + } +} + +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; /// use nom::number::complete::float; /// /// let parser = |s| { @@ -1552,7 +1637,7 @@ where /// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); /// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); /// ``` -pub fn double>(input: T) -> IResult +pub fn double_finite>(input: T) -> IResult where T: Slice> + Slice>, T: Clone + Offset, @@ -1578,6 +1663,79 @@ where Ok((i, float)) } +/// Recognizes floating point number in text format and returns a f64. +/// This only handles non-finite (special) values. +pub fn double_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + let (i, sign) = sign(input.clone())?; + let (mut double, count) = if case_insensitive_cmp(i.as_bytes(), b"nan") { + (f64::NAN, 3) + } else if i.input_len() >= 8 && case_insensitive_cmp(i.as_bytes(), b"infinity") { + (f64::INFINITY, 8) + } else if case_insensitive_cmp(i.as_bytes(), b"inf") { + (f64::INFINITY, 3) + } else { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Float))); + }; + + if !sign { + double = -double; + } + + if i.input_len() <= count { + Err(Err::Incomplete(Needed::new(count))) + } else { + Ok((i.slice(count..), double)) + } +} + +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::float; +/// +/// let parser = |s| { +/// float(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn double>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + match double_finite::(input.clone()) { + Ok((i, double)) => Ok((i, double)), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + _ => double_nonfinite::(input), + } +} + #[cfg(test)] mod tests { use super::*; @@ -1593,6 +1751,23 @@ mod tests { }; ); + // Need more complex logic, since NaN != NaN. + macro_rules! assert_float_eq { + ($left: expr, $right: expr) => { + let left: $crate::IResult<_, _, (_, ErrorKind)> = $left; + let right: $crate::IResult<_, _, (_, ErrorKind)> = $right; + if let Ok((_, float)) = right { + if float.is_nan() { + assert!(left.unwrap().1.is_nan()); + } else { + assert_eq!(left, right); + } + }else { + assert_eq!(left, right); + } + }; + } + #[test] fn i8_tests() { assert_parse!(be_i8(&[0x00][..]), Ok((&b""[..], 0))); @@ -2023,6 +2198,8 @@ mod tests { "12.34", "-1.234E-12", "-1.234e-12", + "NaN", + "inf", ]; for test in test_cases.drain(..) { @@ -2032,13 +2209,15 @@ mod tests { println!("now parsing: {} -> {}", test, expected32); let larger = format!("{};", test); - assert_parse!(recognize_float(&larger[..]), Ok((";", test))); + if expected32.is_finite() { + assert_parse!(recognize_float(&larger[..]), Ok((";", test))); + } - assert_parse!(float(larger.as_bytes()), Ok((&b";"[..], expected32))); - assert_parse!(float(&larger[..]), Ok((";", expected32))); + assert_float_eq!(float(larger.as_bytes()), Ok((&b";"[..], expected32))); + assert_float_eq!(float(&larger[..]), Ok((";", expected32))); - assert_parse!(double(larger.as_bytes()), Ok((&b";"[..], expected64))); - assert_parse!(double(&larger[..]), Ok((";", expected64))); + assert_float_eq!(double(larger.as_bytes()), Ok((&b";"[..], expected64))); + assert_float_eq!(double(&larger[..]), Ok((";", expected64))); } let remaining_exponent = "-1.234E-"; @@ -2132,7 +2311,8 @@ mod tests { } fn parse_f64(i: &str) -> IResult<&str, f64, ()> { - match recognize_float(i) { + match recognize_float::<_, ()>(i) { + Err(Err::Failure(_)) => Err(Err::Error(())), Err(e) => Err(e), Ok((i, s)) => { if s.is_empty() {