From 31951b01ab0fa6f7f647c9d1c97213f35de37d73 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sun, 12 Jan 2025 09:40:38 -0600 Subject: [PATCH 01/18] Add support for requiring integer or fraction digits with exponents. This patches support for Rust literals as well as other formats. --- lexical-parse-float/src/parse.rs | 15 +++ lexical-parse-float/tests/api_tests.rs | 40 +++++++ lexical-util/src/error.rs | 14 +++ lexical-util/src/feature_format.rs | 56 ++++++++++ lexical-util/src/format_builder.rs | 144 +++++++++++++++++++++++++ lexical-util/src/format_flags.rs | 19 +++- lexical-util/src/not_feature_format.rs | 103 ++++++++++++++---- lexical-util/src/prebuilt_formats.rs | 43 +++++++- 8 files changed, 410 insertions(+), 24 deletions(-) diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index a49dfcec..239d5eac 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -567,6 +567,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( }); let mut n_digits = byte.current_count() - start.current_count(); #[cfg(feature = "format")] + let n_before_dot = n_digits; + #[cfg(feature = "format")] if format.required_integer_digits() && n_digits == 0 { return Err(Error::EmptyInteger(byte.cursor())); } @@ -697,6 +699,19 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( if format.no_exponent_without_fraction() && fraction_digits.is_none() { return Err(Error::ExponentWithoutFraction(byte.cursor() - 1)); } + + // We require digits before the dot, but we have none. + if format.required_integer_digits_with_exponent() && n_before_dot == 0 { + return Err(Error::ExponentWithoutIntegerDigits(byte.cursor() - 1)); + } + + // We require digits after the dot, but we have none. + if format.required_fraction_digits_with_exponent() + && fraction_digits.is_some() + && n_after_dot == 0 + { + return Err(Error::ExponentWithoutFractionDigits(byte.cursor() - 1)); + } } let is_negative_exponent = parse_exponent_sign(&mut byte)?; diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs index 3a015d85..e7302499 100644 --- a/lexical-parse-float/tests/api_tests.rs +++ b/lexical-parse-float/tests/api_tests.rs @@ -912,6 +912,46 @@ fn f64_no_exponent_without_fraction_test() { assert!(f64::from_lexical_with_options::(b"3e7", &OPTIONS).is_err()); } +#[test] +#[cfg(feature = "format")] +fn f64_required_integer_digits_with_exponent() { + const F1: u128 = rebuild(format::STANDARD) + .required_mantissa_digits(false) + .required_integer_digits_with_exponent(true) + .build_strict(); + const OPTIONS: Options = Options::new(); + assert!(f64::from_lexical_with_options::(b"1e5", &OPTIONS).is_ok()); + assert!(f64::from_lexical_with_options::(b".1e5", &OPTIONS).is_err()); + assert!(f64::from_lexical_with_options::(b".e5", &OPTIONS).is_err()); + assert!(f64::from_lexical_with_options::(b"1.e5", &OPTIONS).is_ok()); + assert!(f64::from_lexical_with_options::(b"1.0e5", &OPTIONS).is_ok()); + + const F2: u128 = rebuild(F1).required_fraction_digits(true).build_strict(); + assert!(f64::from_lexical_with_options::(b"1e5", &OPTIONS).is_ok()); + assert!(f64::from_lexical_with_options::(b"1.e5", &OPTIONS).is_err()); + assert!(f64::from_lexical_with_options::(b"1.0e5", &OPTIONS).is_ok()); +} + +#[test] +#[cfg(feature = "format")] +fn f64_required_fraction_digits_with_exponent() { + const F1: u128 = rebuild(format::STANDARD) + .required_mantissa_digits(false) + .required_fraction_digits_with_exponent(true) + .build_strict(); + const OPTIONS: Options = Options::new(); + assert!(f64::from_lexical_with_options::(b"1e5", &OPTIONS).is_ok()); + assert!(f64::from_lexical_with_options::(b".1e5", &OPTIONS).is_ok()); + assert!(f64::from_lexical_with_options::(b".e5", &OPTIONS).is_err()); + assert!(f64::from_lexical_with_options::(b"1.e5", &OPTIONS).is_err()); + assert!(f64::from_lexical_with_options::(b"1.0e5", &OPTIONS).is_ok()); + + const F2: u128 = rebuild(F1).required_fraction_digits(true).build_strict(); + assert!(f64::from_lexical_with_options::(b"1e5", &OPTIONS).is_ok()); + assert!(f64::from_lexical_with_options::(b"1.e5", &OPTIONS).is_err()); + assert!(f64::from_lexical_with_options::(b"1.0e5", &OPTIONS).is_ok()); +} + #[test] #[cfg(feature = "format")] fn f64_no_leading_zeros_test() { diff --git a/lexical-util/src/error.rs b/lexical-util/src/error.rs index 53c2cad2..e871c70c 100644 --- a/lexical-util/src/error.rs +++ b/lexical-util/src/error.rs @@ -42,6 +42,10 @@ pub enum Error { MissingExponentSign(usize), /// Exponent was present without fraction component. ExponentWithoutFraction(usize), + /// Exponent was present without any digits in the integer component. + ExponentWithoutIntegerDigits(usize), + /// Exponent was present without any digits in the fraction component. + ExponentWithoutFractionDigits(usize), /// Integer or integer component of float had invalid leading zeros. InvalidLeadingZeros(usize), /// No exponent with required exponent notation. @@ -154,6 +158,8 @@ impl Error { Self::InvalidPositiveExponentSign(_) => "'invalid `+` sign in exponent'", Self::MissingExponentSign(_) => "'missing required `+/-` sign for exponent'", Self::ExponentWithoutFraction(_) => "'invalid float containing exponent without fraction'", + Self::ExponentWithoutIntegerDigits(_) => "'invalid float containing exponent without integer digits'", + Self::ExponentWithoutFractionDigits(_) => "'invalid float containing exponent without fraction digits'", Self::InvalidLeadingZeros(_) => "'invalid number with leading zeros before digits'", Self::MissingExponent(_) => "'missing required exponent'", Self::MissingSign(_) => "'missing required `+/-` sign for integer'", @@ -217,6 +223,8 @@ impl Error { Self::InvalidPositiveExponentSign(index) => Some(index), Self::MissingExponentSign(index) => Some(index), Self::ExponentWithoutFraction(index) => Some(index), + Self::ExponentWithoutIntegerDigits(index) => Some(index), + Self::ExponentWithoutFractionDigits(index) => Some(index), Self::InvalidLeadingZeros(index) => Some(index), Self::MissingExponent(index) => Some(index), Self::MissingSign(index) => Some(index), @@ -367,6 +375,12 @@ impl fmt::Display for Error { Self::ExponentWithoutFraction(index) => { write_parse_error!(formatter, description, index) }, + Self::ExponentWithoutIntegerDigits(index) => { + write_parse_error!(formatter, description, index) + }, + Self::ExponentWithoutFractionDigits(index) => { + write_parse_error!(formatter, description, index) + }, Self::InvalidLeadingZeros(index) => write_parse_error!(formatter, description, index), Self::MissingExponent(index) => write_parse_error!(formatter, description, index), Self::MissingSign(index) => write_parse_error!(formatter, description, index), diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index 79d978b0..eeb7f7bc 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -586,6 +586,62 @@ impl NumberFormat { Self::CASE_SENSITIVE_BASE_SUFFIX } + /// If digits are required before the decimal point with exponent notation. + /// + /// See [`required_integer_digits_with_exponent`][Self::required_integer_digits_with_exponent]. + pub const REQUIRED_INTEGER_DIGITS_WITH_EXPONENT: bool = from_flag!(FORMAT, REQUIRED_INTEGER_DIGITS_WITH_EXPONENT); + + /// Get if digits are required before the decimal point with exponent notation. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`false`]. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ❌ | + /// | `.e5` | ❌ | + /// | `1.e5` | ✔️ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn required_integer_digits_with_exponent(&self) -> bool { + Self::REQUIRED_INTEGER_DIGITS_WITH_EXPONENT + } + + /// If digits are required after the decimal point with exponent notation, + /// if the decimal point is present. + /// + /// See [`required_fraction_digits_with_exponent`][Self::required_fraction_digits_with_exponent]. + pub const REQUIRED_FRACTION_DIGITS_WITH_EXPONENT: bool = from_flag!(FORMAT, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT); + + /// Get if digits are required after the decimal point with exponent + /// notation, if the decimal point is present. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`false`] + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ✔️ | + /// | `.e5` | ❌ | + /// | `1.e5` | ❌ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn required_fraction_digits_with_exponent(&self) -> bool { + Self::REQUIRED_FRACTION_DIGITS_WITH_EXPONENT + } + // DIGIT SEPARATOR FLAGS & MASKS /// If digit separators are allowed between integer digits. diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index b4577e1f..dc85a238 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -126,6 +126,10 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { /// - [`no_float_leading_zeros`]: If leading zeros before a float are not /// allowed. /// - [`required_exponent_notation`]: If exponent notation is required. +/// - [`required_integer_digits_with_exponent`]: If digits are required before +/// the decimal point with exponent notation. +/// - [`required_fraction_digits_with_exponent`]: If digits are required after +/// the decimal point with exponent notation, if the decimal point is present. /// - [`case_sensitive_exponent`]: If exponent characters are case-sensitive. /// - [`case_sensitive_base_prefix`]: If base prefixes are case-sensitive. /// - [`case_sensitive_base_suffix`]: If base suffixes are case-sensitive. @@ -233,6 +237,10 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { /// - [`no_float_leading_zeros`]: If leading zeros before a float are not /// allowed. /// - [`required_exponent_notation`]: If exponent notation is required. +/// - [`required_integer_digits_with_exponent`]: If digits are required before +/// the decimal point with exponent notation. +/// - [`required_fraction_digits_with_exponent`]: If digits are required after +/// the decimal point with exponent notation, if the decimal point is present. /// - [`case_sensitive_exponent`]: If exponent characters are case-sensitive. /// - [`case_sensitive_base_prefix`]: If base prefixes are case-sensitive. /// - [`case_sensitive_base_suffix`]: If base suffixes are case-sensitive. @@ -295,6 +303,8 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`no_integer_leading_zeros`]: Self::no_integer_leading_zeros\n [`no_float_leading_zeros`]: Self::no_float_leading_zeros\n [`required_exponent_notation`]: Self::required_exponent_notation\n +[`required_integer_digits_with_exponent`]: Self::required_integer_digits_with_exponent\n +[`required_fraction_digits_with_exponent`]: Self::required_fraction_digits_with_exponent\n [`case_sensitive_exponent`]: Self::case_sensitive_exponent\n [`integer_internal_digit_separator`]: Self::integer_internal_digit_separator\n [`fraction_internal_digit_separator`]: Self::fraction_internal_digit_separator\n @@ -329,6 +339,8 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`no_integer_leading_zeros`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L741\n [`no_float_leading_zeros`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L749\n [`required_exponent_notation`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L757\n +[`required_integer_digits_with_exponent`]: TODO\n +[`required_fraction_digits_with_exponent`]: TODO\n [`case_sensitive_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L765\n [`integer_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L793\n [`fraction_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L805\n @@ -387,6 +399,8 @@ pub struct NumberFormatBuilder { case_sensitive_exponent: bool, case_sensitive_base_prefix: bool, case_sensitive_base_suffix: bool, + required_integer_digits_with_exponent: bool, + required_fraction_digits_with_exponent: bool, integer_internal_digit_separator: bool, fraction_internal_digit_separator: bool, exponent_internal_digit_separator: bool, @@ -438,6 +452,8 @@ impl NumberFormatBuilder { /// - [`no_float_leading_zeros`][Self::get_no_float_leading_zeros] - `false` /// - [`required_exponent_notation`][Self::get_required_exponent_notation] - /// `false` + /// - [`required_integer_digits_with_exponent`][Self::required_integer_digits_with_exponent] -`false` + /// - [`required_fraction_digits_with_exponent`][Self::required_fraction_digits_with_exponent] -`false` /// - [`case_sensitive_exponent`][Self::get_case_sensitive_exponent] - /// `false` /// - [`case_sensitive_base_prefix`][Self::get_case_sensitive_base_prefix] - @@ -485,6 +501,8 @@ impl NumberFormatBuilder { case_sensitive_exponent: false, case_sensitive_base_prefix: false, case_sensitive_base_suffix: false, + required_integer_digits_with_exponent: false, + required_fraction_digits_with_exponent: false, integer_internal_digit_separator: false, fraction_internal_digit_separator: false, exponent_internal_digit_separator: false, @@ -1092,6 +1110,46 @@ impl NumberFormatBuilder { self.case_sensitive_base_suffix } + /// Get if digits are required before the decimal point with exponent + /// notation. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ❌ | + /// | `.e5` | ❌ | + /// | `1.e5` | ✔️ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn get_required_integer_digits_with_exponent(&self) -> bool { + self.required_integer_digits_with_exponent + } + + /// Get if digits are required after the decimal point with exponent + /// notation, if the decimal point is present. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ✔️ | + /// | `.e5` | ❌ | + /// | `1.e5` | ❌ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn get_required_fraction_digits_with_exponent(&self) -> bool { + self.required_fraction_digits_with_exponent + } + /// Get if digit separators are allowed between integer digits. /// /// This will not consider an input of only the digit separator @@ -2482,6 +2540,82 @@ impl NumberFormatBuilder { self } + /// Set if digits are required before the decimal point with exponent + /// notation. + /// + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ❌ | + /// | `.e5` | ❌ | + /// | `1.e5` | ✔️ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn required_integer_digits_with_exponent(mut self, flag: bool) -> Self { + self.required_integer_digits_with_exponent = flag; + self + } + + /// Set if digits are required after the decimal point with exponent + /// notation, if the decimal point is present. + /// + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ✔️ | + /// | `.e5` | ❌ | + /// | `1.e5` | ❌ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn required_fraction_digits_with_exponent(mut self, flag: bool) -> Self { + self.required_fraction_digits_with_exponent = flag; + self + } + /// Set if digit separators are allowed between integer digits. /// /// This will not consider an input of only the digit separator @@ -3246,6 +3380,8 @@ impl NumberFormatBuilder { self.case_sensitive_exponent, CASE_SENSITIVE_EXPONENT ; self.case_sensitive_base_prefix, CASE_SENSITIVE_BASE_PREFIX ; self.case_sensitive_base_suffix, CASE_SENSITIVE_BASE_SUFFIX ; + self.required_integer_digits_with_exponent, REQUIRED_INTEGER_DIGITS_WITH_EXPONENT ; + self.required_fraction_digits_with_exponent, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT ; self.integer_internal_digit_separator, INTEGER_INTERNAL_DIGIT_SEPARATOR ; self.fraction_internal_digit_separator, FRACTION_INTERNAL_DIGIT_SEPARATOR ; self.exponent_internal_digit_separator, EXPONENT_INTERNAL_DIGIT_SEPARATOR ; @@ -3339,6 +3475,14 @@ impl NumberFormatBuilder { case_sensitive_exponent: has_flag!(format, CASE_SENSITIVE_EXPONENT), case_sensitive_base_prefix: has_flag!(format, CASE_SENSITIVE_BASE_PREFIX), case_sensitive_base_suffix: has_flag!(format, CASE_SENSITIVE_BASE_SUFFIX), + required_integer_digits_with_exponent: has_flag!( + format, + REQUIRED_INTEGER_DIGITS_WITH_EXPONENT + ), + required_fraction_digits_with_exponent: has_flag!( + format, + REQUIRED_FRACTION_DIGITS_WITH_EXPONENT + ), integer_internal_digit_separator: has_flag!(format, INTEGER_INTERNAL_DIGIT_SEPARATOR), fraction_internal_digit_separator: has_flag!(format, FRACTION_INTERNAL_DIGIT_SEPARATOR), exponent_internal_digit_separator: has_flag!(format, EXPONENT_INTERNAL_DIGIT_SEPARATOR), diff --git a/lexical-util/src/format_flags.rs b/lexical-util/src/format_flags.rs index 9c6ba9c3..4cf51f22 100644 --- a/lexical-util/src/format_flags.rs +++ b/lexical-util/src/format_flags.rs @@ -25,7 +25,7 @@ //! //! 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ -//! |e/P|e/S| | +//! |e/P|e/S|I/E|F/E| | //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ //! //! 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 @@ -58,6 +58,8 @@ //! e/C = Case-sensitive exponent character. //! e/P = Case-sensitive base prefix. //! e/S = Case-sensitive base suffix. +//! I/E = Require integer digits with exponent. +//! F/E = Require fraction digits with exponent. //! //! Digit Separator Flags: //! I/I = Integer internal digit separator. @@ -336,6 +338,13 @@ pub const CASE_SENSITIVE_BASE_PREFIX: u128 = 1 << 16; /// Base suffixes are case-sensitive. pub const CASE_SENSITIVE_BASE_SUFFIX: u128 = 1 << 17; +/// Digits are required before the decimal point with exponent notation. +pub const REQUIRED_INTEGER_DIGITS_WITH_EXPONENT: u128 = 1 << 18; + +/// Digits are required after the decimal point with exponent +/// notation, if the decimal point is present. +pub const REQUIRED_FRACTION_DIGITS_WITH_EXPONENT: u128 = 1 << 19; + // Non-digit separator flags. const _: () = assert!(REQUIRED_INTEGER_DIGITS == 1); check_subsequent_flags!(REQUIRED_INTEGER_DIGITS, REQUIRED_FRACTION_DIGITS); @@ -356,6 +365,8 @@ check_subsequent_flags!(NO_FLOAT_LEADING_ZEROS, REQUIRED_EXPONENT_NOTATION); check_subsequent_flags!(REQUIRED_EXPONENT_NOTATION, CASE_SENSITIVE_EXPONENT); check_subsequent_flags!(CASE_SENSITIVE_EXPONENT, CASE_SENSITIVE_BASE_PREFIX); check_subsequent_flags!(CASE_SENSITIVE_BASE_PREFIX, CASE_SENSITIVE_BASE_SUFFIX); +check_subsequent_flags!(CASE_SENSITIVE_BASE_SUFFIX, REQUIRED_INTEGER_DIGITS_WITH_EXPONENT); +check_subsequent_flags!(REQUIRED_INTEGER_DIGITS_WITH_EXPONENT, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT); // DIGIT SEPARATOR FLAGS & MASKS // ----------------------------- @@ -528,6 +539,8 @@ pub const FLAG_MASK: u128 = CASE_SENSITIVE_EXPONENT | CASE_SENSITIVE_BASE_PREFIX | CASE_SENSITIVE_BASE_SUFFIX | + REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | + REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | INTERNAL_DIGIT_SEPARATOR | LEADING_DIGIT_SEPARATOR | TRAILING_DIGIT_SEPARATOR | @@ -549,6 +562,8 @@ pub const INTERFACE_FLAG_MASK: u128 = NO_EXPONENT_WITHOUT_FRACTION | NO_FLOAT_LEADING_ZEROS | REQUIRED_EXPONENT_NOTATION | + REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | + REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | INTERNAL_DIGIT_SEPARATOR | LEADING_DIGIT_SEPARATOR | TRAILING_DIGIT_SEPARATOR | @@ -572,6 +587,8 @@ pub const EXPONENT_FLAG_MASK: u128 = REQUIRED_EXPONENT_SIGN | NO_EXPONENT_WITHOUT_FRACTION | REQUIRED_EXPONENT_NOTATION | + REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | + REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | EXPONENT_INTERNAL_DIGIT_SEPARATOR | EXPONENT_LEADING_DIGIT_SEPARATOR | EXPONENT_TRAILING_DIGIT_SEPARATOR | diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index f7441bb5..406d1ee7 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -41,28 +41,30 @@ use crate::format_flags as flags; /// 17. [`case_sensitive_exponent`][NumberFormat::case_sensitive_exponent] /// 18. [`case_sensitive_base_prefix`][NumberFormat::case_sensitive_base_prefix] /// 19. [`case_sensitive_base_suffix`][NumberFormat::case_sensitive_base_suffix] -/// 20. [`integer_internal_digit_separator`][NumberFormat::integer_internal_digit_separator] -/// 21. [`fraction_internal_digit_separator`][NumberFormat::fraction_internal_digit_separator] -/// 22. [`exponent_internal_digit_separator`][NumberFormat::exponent_internal_digit_separator] -/// 23. [`internal_digit_separator`][NumberFormat::internal_digit_separator] -/// 24. [`integer_leading_digit_separator`][NumberFormat::integer_leading_digit_separator] -/// 25. [`fraction_leading_digit_separator`][NumberFormat::fraction_leading_digit_separator] -/// 26. [`exponent_leading_digit_separator`][NumberFormat::exponent_leading_digit_separator] -/// 27. [`leading_digit_separator`][NumberFormat::leading_digit_separator] -/// 28. [`integer_trailing_digit_separator`][NumberFormat::integer_trailing_digit_separator] -/// 29. [`fraction_trailing_digit_separator`][NumberFormat::fraction_trailing_digit_separator] -/// 30. [`exponent_trailing_digit_separator`][NumberFormat::exponent_trailing_digit_separator] -/// 31. [`trailing_digit_separator`][NumberFormat::trailing_digit_separator] -/// 32. [`integer_consecutive_digit_separator`][NumberFormat::integer_consecutive_digit_separator] -/// 33. [`fraction_consecutive_digit_separator`][NumberFormat::fraction_consecutive_digit_separator] -/// 34. [`exponent_consecutive_digit_separator`][NumberFormat::exponent_consecutive_digit_separator] -/// 35. [`consecutive_digit_separator`][NumberFormat::consecutive_digit_separator] -/// 36. [`special_digit_separator`][NumberFormat::special_digit_separator] -/// 37. [`digit_separator`][NumberFormat::digit_separator] -/// 38. [`base_prefix`][NumberFormat::base_prefix] -/// 39. [`base_suffix`][NumberFormat::base_suffix] -/// 40. [`exponent_base`][NumberFormat::exponent_base] -/// 41. [`exponent_radix`][NumberFormat::exponent_radix] +/// 20. [`required_integer_digits_with_exponent`][NumberFormat::required_integer_digits_with_exponent] +/// 21. [`required_fraction_digits_with_exponent`][NumberFormat::required_fraction_digits_with_exponent] +/// 22. [`integer_internal_digit_separator`][NumberFormat::integer_internal_digit_separator] +/// 23. [`fraction_internal_digit_separator`][NumberFormat::fraction_internal_digit_separator] +/// 24. [`exponent_internal_digit_separator`][NumberFormat::exponent_internal_digit_separator] +/// 25. [`internal_digit_separator`][NumberFormat::internal_digit_separator] +/// 26. [`integer_leading_digit_separator`][NumberFormat::integer_leading_digit_separator] +/// 27. [`fraction_leading_digit_separator`][NumberFormat::fraction_leading_digit_separator] +/// 28. [`exponent_leading_digit_separator`][NumberFormat::exponent_leading_digit_separator] +/// 29. [`leading_digit_separator`][NumberFormat::leading_digit_separator] +/// 30. [`integer_trailing_digit_separator`][NumberFormat::integer_trailing_digit_separator] +/// 31. [`fraction_trailing_digit_separator`][NumberFormat::fraction_trailing_digit_separator] +/// 32. [`exponent_trailing_digit_separator`][NumberFormat::exponent_trailing_digit_separator] +/// 33. [`trailing_digit_separator`][NumberFormat::trailing_digit_separator] +/// 34. [`integer_consecutive_digit_separator`][NumberFormat::integer_consecutive_digit_separator] +/// 35. [`fraction_consecutive_digit_separator`][NumberFormat::fraction_consecutive_digit_separator] +/// 36. [`exponent_consecutive_digit_separator`][NumberFormat::exponent_consecutive_digit_separator] +/// 37. [`consecutive_digit_separator`][NumberFormat::consecutive_digit_separator] +/// 38. [`special_digit_separator`][NumberFormat::special_digit_separator] +/// 39. [`digit_separator`][NumberFormat::digit_separator] +/// 40. [`base_prefix`][NumberFormat::base_prefix] +/// 41. [`base_suffix`][NumberFormat::base_suffix] +/// 42. [`exponent_base`][NumberFormat::exponent_base] +/// 43. [`exponent_radix`][NumberFormat::exponent_radix] /// /// This should always be constructed via [`NumberFormatBuilder`]. /// See [`NumberFormatBuilder`] for the fields for the packed struct. @@ -590,6 +592,63 @@ impl NumberFormat { Self::CASE_SENSITIVE_BASE_SUFFIX } + /// If digits are required before the decimal point with exponent notation. + /// + /// See [`required_integer_digits_with_exponent`][Self::required_integer_digits_with_exponent]. + pub const REQUIRED_INTEGER_DIGITS_WITH_EXPONENT: bool = false; + + /// Get if digits are required before the decimal point with exponent + /// notation. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`false`]. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ❌ | + /// | `.e5` | ❌ | + /// | `1.e5` | ✔️ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn required_integer_digits_with_exponent(&self) -> bool { + Self::REQUIRED_INTEGER_DIGITS_WITH_EXPONENT + } + + /// If digits are required after the decimal point with exponent + /// notation, if the decimal point is present. + /// + /// See [`required_fraction_digits_with_exponent`][Self::required_fraction_digits_with_exponent]. + pub const REQUIRED_FRACTION_DIGITS_WITH_EXPONENT: bool = false; + + /// Get if digits are required after the decimal point with exponent + /// notation, if the decimal point is present. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`false`]. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ✔️ | + /// | `.e5` | ❌ | + /// | `1.e5` | ❌ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn required_fraction_digits_with_exponent(&self) -> bool { + Self::REQUIRED_FRACTION_DIGITS_WITH_EXPONENT + } + // DIGIT SEPARATOR FLAGS & MASKS // If digit separators are allowed between integer digits. diff --git a/lexical-util/src/prebuilt_formats.rs b/lexical-util/src/prebuilt_formats.rs index 287248de..16d0fc25 100644 --- a/lexical-util/src/prebuilt_formats.rs +++ b/lexical-util/src/prebuilt_formats.rs @@ -6,7 +6,8 @@ use core::num; use crate::format::NumberFormatBuilder; -// FIXME +// TEST CODE +// --------- // Sample test code for each language used: // @@ -687,6 +688,46 @@ use crate::format::NumberFormatBuilder; // db.movie.find() // ``` +// TEST CASES +// ---------- + +// NOTE: The exact value of some of these, like specials, +// will differ based on the programming languages used. + +// `N/A` - case_sensitive_base_prefix +// `N/A` - case_sensitive_base_suffix +// `.1` - required_integer_digits +// `1.` - required_fraction_digits +// `1.0e` - required_exponent_digits +// `.` - required_mantissa_digits +// `+1` - no_positive_mantissa_sign +// `1` - required_mantissa_sign +// `1.0e3` - no_exponent_notation +// `1.0e+3` - no_positive_exponent_sign +// `1.0e3` - required_exponent_sign +// `1e3` - no_exponent_without_fraction +// `NaN` - no_special +// `nan` - case_sensitive_special +// `01` - no_integer_leading_zeros +// `01.0` - no_float_leading_zeros +// `1.0` - required_exponent_notation +// `1.0E3` - case_sensitive_exponent +// `N/A` - case_sensitive_base_prefix +// `N/A` - case_sensitive_base_suffix +// `1_1.11e11` - integer_internal_digit_separator +// `11.1_1e11` - fraction_internal_digit_separator +// `11.11e1_1` - exponent_internal_digit_separator +// `_11.11e11` - integer_leading_digit_separator +// `11._11e11` - fraction_leading_digit_separator +// `11.11e_11` - exponent_leading_digit_separator +// `11_.11e11` - integer_trailing_digit_separator +// `11.11_e11` - fraction_trailing_digit_separator +// `11.11e11_` - exponent_trailing_digit_separator +// `1__1.11e11` - integer_consecutive_digit_separator +// `11.1__1e11` - fraction_consecutive_digit_separator +// `11.11e1__1` - exponent_consecutive_digit_separator +// `na_n` - special_digit_separator + // PRE-DEFINED CONSTANTS // --------------------- // From 0489452f27c5e307a99ae462dd0a8c3e2d2a70f0 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sun, 12 Jan 2025 11:30:23 -0600 Subject: [PATCH 02/18] Add logic for formats that may support only a few conversions. This adds the following number format flags: - `supports_parsing_integers` - `supports_parsing_floats` - `supports_writing_integers` - `supports_writing_floats` If an operation is not supported with the format feature, then the code panics or returns an error immediately, which will always be resolved at compile time. --- lexical-parse-float/src/api.rs | 12 ++- lexical-parse-float/tests/api_tests.rs | 32 ++++++ lexical-parse-integer/src/api.rs | 9 +- lexical-parse-integer/tests/api_tests.rs | 44 ++++++++ lexical-util/src/error.rs | 6 ++ lexical-util/src/feature_format.rs | 72 +++++++++++++ lexical-util/src/format_builder.rs | 122 ++++++++++++++++++++++- lexical-util/src/format_flags.rs | 25 ++++- lexical-util/src/not_feature_format.rs | 120 ++++++++++++++++++---- lexical-write-float/src/api.rs | 7 +- lexical-write-float/tests/api_tests.rs | 31 +++++- lexical-write-integer/src/api.rs | 15 ++- lexical-write-integer/tests/api_tests.rs | 30 ++++++ 13 files changed, 492 insertions(+), 33 deletions(-) diff --git a/lexical-parse-float/src/api.rs b/lexical-parse-float/src/api.rs index 330cf811..e82a635a 100644 --- a/lexical-parse-float/src/api.rs +++ b/lexical-parse-float/src/api.rs @@ -51,7 +51,9 @@ macro_rules! float_from_lexical { ) -> lexical_util::result::Result { let format = NumberFormat::<{ FORMAT }> {}; - if !format.is_valid() { + if !format.supports_parsing_floats() { + return Err(Error::Unsupported); + } else if !format.is_valid() { return Err(format.error()); } else if !is_valid_options_punctuation(FORMAT, options.exponent(), options.decimal_point()) { return Err(Error::InvalidPunctuation); @@ -65,6 +67,14 @@ macro_rules! float_from_lexical { options: &Self::Options, ) -> lexical_util::result::Result<(Self, usize)> { + let format = NumberFormat::<{ FORMAT }> {}; + if !format.supports_parsing_floats() { + return Err(Error::Unsupported); + } else if !format.is_valid() { + return Err(format.error()); + } else if !is_valid_options_punctuation(FORMAT, options.exponent(), options.decimal_point()) { + return Err(Error::InvalidPunctuation); + } Self::parse_partial::(bytes, options) } } diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs index e7302499..eabf3f56 100644 --- a/lexical-parse-float/tests/api_tests.rs +++ b/lexical-parse-float/tests/api_tests.rs @@ -1275,3 +1275,35 @@ fn issue68_test() { assert_eq!(f32::INFINITY, f32::from_lexical_with_options::(hex, &OPTIONS).unwrap()); assert_eq!(f64::INFINITY, f64::from_lexical_with_options::(hex, &OPTIONS).unwrap()); } + +#[test] +#[cfg(feature = "format")] +fn unsupported_test() { + const FORMAT: u128 = NumberFormatBuilder::new().supports_parsing_floats(false).build_strict(); + const OPTIONS: Options = Options::new(); + + let float = "12345.0"; + let value = f64::from_lexical_with_options::(float.as_bytes(), &OPTIONS); + assert_eq!(value, Err(Error::Unsupported)); + + let value = f64::from_lexical_partial_with_options::(float.as_bytes(), &OPTIONS); + assert_eq!(value, Err(Error::Unsupported)); +} + +#[test] +#[cfg(feature = "format")] +fn supported_test() { + const FORMAT: u128 = NumberFormatBuilder::new() + .supports_parsing_integers(false) + .supports_writing_integers(false) + .supports_writing_floats(false) + .build_strict(); + const OPTIONS: Options = Options::new(); + + let float = "12345.0"; + let value = f64::from_lexical_with_options::(float.as_bytes(), &OPTIONS); + assert_eq!(value, Ok(12345.0)); + + let value = f64::from_lexical_partial_with_options::(float.as_bytes(), &OPTIONS); + assert_eq!(value, Ok((12345.0, 7))); +} diff --git a/lexical-parse-integer/src/api.rs b/lexical-parse-integer/src/api.rs index 44e0c335..0d8efd29 100644 --- a/lexical-parse-integer/src/api.rs +++ b/lexical-parse-integer/src/api.rs @@ -2,6 +2,7 @@ #![doc(hidden)] +use lexical_util::error::Error; use lexical_util::format::{NumberFormat, STANDARD}; use lexical_util::{from_lexical, from_lexical_with_options}; @@ -42,7 +43,9 @@ macro_rules! integer_from_lexical { ) -> lexical_util::result::Result { let format = NumberFormat::<{ FORMAT }> {}; - if !format.is_valid() { + if !format.supports_parsing_integers() { + return Err(Error::Unsupported); + } else if !format.is_valid() { return Err(format.error()); } Self::parse_complete::(bytes, options) @@ -55,7 +58,9 @@ macro_rules! integer_from_lexical { ) -> lexical_util::result::Result<(Self, usize)> { let format = NumberFormat::<{ FORMAT }> {}; - if !format.is_valid() { + if !format.supports_parsing_integers() { + return Err(Error::Unsupported); + } else if !format.is_valid() { return Err(format.error()); } Self::parse_partial::(bytes, options) diff --git a/lexical-parse-integer/tests/api_tests.rs b/lexical-parse-integer/tests/api_tests.rs index 4c25c8b5..1ca88aef 100644 --- a/lexical-parse-integer/tests/api_tests.rs +++ b/lexical-parse-integer/tests/api_tests.rs @@ -357,3 +357,47 @@ fn base_prefix_and_suffix_test() { assert!(i32::from_lexical_with_options::(b"+h", &OPTIONS).is_err()); assert!(i32::from_lexical_with_options::(b"+0x", &OPTIONS).is_err()); } + +#[test] +#[cfg(feature = "format")] +fn unsupported_test() { + const FORMAT: u128 = NumberFormatBuilder::new().supports_parsing_integers(false).build_strict(); + const OPTIONS: Options = Options::new(); + + let integer = "12345"; + let value = i64::from_lexical_with_options::(integer.as_bytes(), &OPTIONS); + assert_eq!(value, Err(Error::Unsupported)); + + let value = i64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); + assert_eq!(value, Err(Error::Unsupported)); + + let value = u64::from_lexical_with_options::(integer.as_bytes(), &OPTIONS); + assert_eq!(value, Err(Error::Unsupported)); + + let value = u64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); + assert_eq!(value, Err(Error::Unsupported)); +} + +#[test] +#[cfg(feature = "format")] +fn supported_test() { + const FORMAT: u128 = NumberFormatBuilder::new() + .supports_parsing_floats(false) + .supports_writing_integers(false) + .supports_writing_floats(false) + .build_strict(); + const OPTIONS: Options = Options::new(); + + let integer = "12345"; + let value = i64::from_lexical_with_options::(integer.as_bytes(), &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); + assert_eq!(value, Ok((12345, 5))); + + let value = u64::from_lexical_with_options::(integer.as_bytes(), &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = u64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); + assert_eq!(value, Ok((12345, 5))); +} diff --git a/lexical-util/src/error.rs b/lexical-util/src/error.rs index e871c70c..a0ac0192 100644 --- a/lexical-util/src/error.rs +++ b/lexical-util/src/error.rs @@ -92,6 +92,8 @@ pub enum Error { InvalidConsecutiveExponentDigitSeparator, /// Invalid flags were set without the format feature. InvalidFlags, + /// If the operation is unsupported. + Unsupported, // OPTION ERRORS /// Invalid NaN string: must start with an `n` character. @@ -184,6 +186,7 @@ impl Error { Self::InvalidConsecutiveFractionDigitSeparator => "'enabled consecutive digit separators in the fraction without setting a valid location'", Self::InvalidConsecutiveExponentDigitSeparator => "'enabled consecutive digit separators in the exponent without setting a valid location'", Self::InvalidFlags => "'invalid flags enabled without the format feature'", + Self::Unsupported => "the desired operation is unsupported for this format", // OPTION ERRORS Self::InvalidNanString => "'NaN string must started with `n`'", @@ -249,6 +252,7 @@ impl Error { Self::InvalidConsecutiveFractionDigitSeparator => None, Self::InvalidConsecutiveExponentDigitSeparator => None, Self::InvalidFlags => None, + Self::Unsupported => None, // OPTION ERRORS Self::InvalidNanString => None, @@ -314,6 +318,7 @@ impl Error { InvalidConsecutiveExponentDigitSeparator ); is_error_type!(is_invalid_flags, InvalidFlags); + is_error_type!(is_unsupported, Unsupported); is_error_type!(is_invalid_nan_string, InvalidNanString); is_error_type!(is_nan_string_too_long, NanStringTooLong); is_error_type!(is_invalid_inf_string, InvalidInfString); @@ -411,6 +416,7 @@ impl fmt::Display for Error { format_message!(formatter, description) }, Self::InvalidFlags => format_message!(formatter, description), + Self::Unsupported => format_message!(formatter, description), // OPTION ERRORS Self::InvalidNanString => options_message!(formatter, description), diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index eeb7f7bc..133e4a64 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -642,6 +642,78 @@ impl NumberFormat { Self::REQUIRED_FRACTION_DIGITS_WITH_EXPONENT } + /// If the format supports parsing integers. + /// + /// See [`supports_parsing_integers`][Self::supports_parsing_integers]. + pub const SUPPORTS_PARSING_INTEGERS: bool = from_flag!(FORMAT, SUPPORTS_PARSING_INTEGERS); + + /// Get if the format supports parsing integers. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Used For + /// + /// - Parse Integer + #[inline(always)] + pub const fn supports_parsing_integers(&self) -> bool { + Self::SUPPORTS_PARSING_INTEGERS + } + + /// If the format supports parsing floats. + /// + /// See [`supports_parsing_floats`][Self::supports_parsing_floats]. + pub const SUPPORTS_PARSING_FLOATS: bool = from_flag!(FORMAT, SUPPORTS_PARSING_FLOATS); + + /// Get if the format supports parsing floats. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn supports_parsing_floats(&self) -> bool { + Self::SUPPORTS_PARSING_FLOATS + } + + /// If the format supports writing integers. + /// + /// See [`supports_writing_integers`][Self::supports_writing_integers]. + pub const SUPPORTS_WRITING_INTEGERS: bool = from_flag!(FORMAT, SUPPORTS_WRITING_INTEGERS); + + /// Get if the format supports writing integers. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Used For + /// + /// - Write Integer + #[inline(always)] + pub const fn supports_writing_integers(&self) -> bool { + Self::SUPPORTS_WRITING_INTEGERS + } + + /// If the format supports writing floats. + /// + /// See [`supports_writing_floats`][Self::supports_writing_floats]. + pub const SUPPORTS_WRITING_FLOATS: bool = from_flag!(FORMAT, SUPPORTS_WRITING_FLOATS); + + /// Get if the format supports writing floats. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Used For + /// + /// - Write Float + #[inline(always)] + pub const fn supports_writing_floats(&self) -> bool { + Self::SUPPORTS_WRITING_FLOATS + } + // DIGIT SEPARATOR FLAGS & MASKS /// If digit separators are allowed between integer digits. diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index dc85a238..55799dc1 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -306,6 +306,10 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_integer_digits_with_exponent`]: Self::required_integer_digits_with_exponent\n [`required_fraction_digits_with_exponent`]: Self::required_fraction_digits_with_exponent\n [`case_sensitive_exponent`]: Self::case_sensitive_exponent\n +[`supports_parsing_integers`]: Self::supports_parsing_integers\n +[`supports_parsing_floats`]: Self::supports_parsing_floats\n +[`supports_writing_integers`]: Self::supports_writing_integers\n +[`supports_writing_floats`]: Self::supports_writing_floats\n [`integer_internal_digit_separator`]: Self::integer_internal_digit_separator\n [`fraction_internal_digit_separator`]: Self::fraction_internal_digit_separator\n [`exponent_internal_digit_separator`]: Self::exponent_internal_digit_separator\n @@ -339,8 +343,12 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`no_integer_leading_zeros`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L741\n [`no_float_leading_zeros`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L749\n [`required_exponent_notation`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L757\n -[`required_integer_digits_with_exponent`]: TODO\n -[`required_fraction_digits_with_exponent`]: TODO\n +[`required_integer_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1129\n +[`required_fraction_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1149\n +[`supports_parsing_integers`]: TODO\n +[`supports_parsing_floats`]: TODO\n +[`supports_writing_integers`]: TODO\n +[`supports_writing_floats`]: TODO\n [`case_sensitive_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L765\n [`integer_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L793\n [`fraction_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L805\n @@ -396,6 +404,10 @@ pub struct NumberFormatBuilder { no_integer_leading_zeros: bool, no_float_leading_zeros: bool, required_exponent_notation: bool, + supports_parsing_integers: bool, + supports_parsing_floats: bool, + supports_writing_integers: bool, + supports_writing_floats: bool, case_sensitive_exponent: bool, case_sensitive_base_prefix: bool, case_sensitive_base_suffix: bool, @@ -454,6 +466,12 @@ impl NumberFormatBuilder { /// `false` /// - [`required_integer_digits_with_exponent`][Self::required_integer_digits_with_exponent] -`false` /// - [`required_fraction_digits_with_exponent`][Self::required_fraction_digits_with_exponent] -`false` + /// - [`supports_parsing_integers`][Self::supports_parsing_integers] - + /// `true` + /// - [`supports_parsing_floats`][Self::supports_parsing_floats] - `true` + /// - [`supports_writing_integers`][Self::supports_writing_integers] - + /// `true` + /// - [`supports_writing_floats`][Self::supports_writing_floats] - `true` /// - [`case_sensitive_exponent`][Self::get_case_sensitive_exponent] - /// `false` /// - [`case_sensitive_base_prefix`][Self::get_case_sensitive_base_prefix] - @@ -503,6 +521,10 @@ impl NumberFormatBuilder { case_sensitive_base_suffix: false, required_integer_digits_with_exponent: false, required_fraction_digits_with_exponent: false, + supports_parsing_integers: true, + supports_parsing_floats: true, + supports_writing_integers: true, + supports_writing_floats: true, integer_internal_digit_separator: false, fraction_internal_digit_separator: false, exponent_internal_digit_separator: false, @@ -1150,6 +1172,46 @@ impl NumberFormatBuilder { self.required_fraction_digits_with_exponent } + /// Get if the format supports parsing integers. + /// + /// # Used For + /// + /// - Parse Integer + #[inline(always)] + pub fn get_supports_parsing_integers(&self) -> bool { + self.supports_parsing_integers + } + + /// Get if the format supports parsing floats. + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub fn get_supports_parsing_floats(&self) -> bool { + self.supports_parsing_floats + } + + /// Get if the format supports writing integers. + /// + /// # Used For + /// + /// - Write Integer + #[inline(always)] + pub fn get_supports_writing_integers(&self) -> bool { + self.supports_writing_integers + } + + /// Get if the format supports writing floats. + /// + /// # Used For + /// + /// - Write Float + #[inline(always)] + pub fn get_supports_writing_floats(&self) -> bool { + self.supports_writing_floats + } + /// Get if digit separators are allowed between integer digits. /// /// This will not consider an input of only the digit separator @@ -2616,6 +2678,54 @@ impl NumberFormatBuilder { self } + /// Set if the format supports parsing integers. + /// + /// # Used For + /// + /// - Parse Integer + #[inline(always)] + #[cfg(feature = "format")] + pub const fn supports_parsing_integers(mut self, flag: bool) -> Self { + self.supports_parsing_integers = flag; + self + } + + /// Set if the format supports parsing floats. + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + #[cfg(feature = "format")] + pub const fn supports_parsing_floats(mut self, flag: bool) -> Self { + self.supports_parsing_floats = flag; + self + } + + /// Set if the format supports writing integers. + /// + /// # Used For + /// + /// - Write Integer + #[inline(always)] + #[cfg(feature = "format")] + pub const fn supports_writing_integers(mut self, flag: bool) -> Self { + self.supports_writing_integers = flag; + self + } + + /// Set if the format supports writing floats. + /// + /// # Used For + /// + /// - Write Float + #[inline(always)] + #[cfg(feature = "format")] + pub const fn supports_writing_floats(mut self, flag: bool) -> Self { + self.supports_writing_floats = flag; + self + } + /// Set if digit separators are allowed between integer digits. /// /// This will not consider an input of only the digit separator @@ -3382,6 +3492,10 @@ impl NumberFormatBuilder { self.case_sensitive_base_suffix, CASE_SENSITIVE_BASE_SUFFIX ; self.required_integer_digits_with_exponent, REQUIRED_INTEGER_DIGITS_WITH_EXPONENT ; self.required_fraction_digits_with_exponent, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT ; + self.supports_parsing_integers, SUPPORTS_PARSING_INTEGERS ; + self.supports_parsing_floats, SUPPORTS_PARSING_FLOATS ; + self.supports_writing_integers, SUPPORTS_WRITING_INTEGERS ; + self.supports_writing_floats, SUPPORTS_WRITING_FLOATS ; self.integer_internal_digit_separator, INTEGER_INTERNAL_DIGIT_SEPARATOR ; self.fraction_internal_digit_separator, FRACTION_INTERNAL_DIGIT_SEPARATOR ; self.exponent_internal_digit_separator, EXPONENT_INTERNAL_DIGIT_SEPARATOR ; @@ -3483,6 +3597,10 @@ impl NumberFormatBuilder { format, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT ), + supports_parsing_integers: has_flag!(format, SUPPORTS_PARSING_INTEGERS), + supports_parsing_floats: has_flag!(format, SUPPORTS_PARSING_FLOATS), + supports_writing_integers: has_flag!(format, SUPPORTS_WRITING_INTEGERS), + supports_writing_floats: has_flag!(format, SUPPORTS_WRITING_FLOATS), integer_internal_digit_separator: has_flag!(format, INTEGER_INTERNAL_DIGIT_SEPARATOR), fraction_internal_digit_separator: has_flag!(format, FRACTION_INTERNAL_DIGIT_SEPARATOR), exponent_internal_digit_separator: has_flag!(format, EXPONENT_INTERNAL_DIGIT_SEPARATOR), diff --git a/lexical-util/src/format_flags.rs b/lexical-util/src/format_flags.rs index 4cf51f22..2b93a75e 100644 --- a/lexical-util/src/format_flags.rs +++ b/lexical-util/src/format_flags.rs @@ -25,7 +25,7 @@ //! //! 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ -//! |e/P|e/S|I/E|F/E| | +//! |e/P|e/S|I/E|F/E|p/I|p/F|w/I|w/F| | //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ //! //! 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 @@ -60,6 +60,10 @@ //! e/S = Case-sensitive base suffix. //! I/E = Require integer digits with exponent. //! F/E = Require fraction digits with exponent. +//! p/I = The format supports parsing integers. +//! p/F = The format supports parsing floats. +//! w/I = The format supports writing integers. +//! w/F = The format supports writing floats. //! //! Digit Separator Flags: //! I/I = Integer internal digit separator. @@ -345,6 +349,18 @@ pub const REQUIRED_INTEGER_DIGITS_WITH_EXPONENT: u128 = 1 << 18; /// notation, if the decimal point is present. pub const REQUIRED_FRACTION_DIGITS_WITH_EXPONENT: u128 = 1 << 19; +/// If the format supports parsing integers. +pub const SUPPORTS_PARSING_INTEGERS: u128 = 1 << 20; + +/// If the format supports parsing floats. +pub const SUPPORTS_PARSING_FLOATS: u128 = 1 << 21; + +/// If the format supports parsing integers. +pub const SUPPORTS_WRITING_INTEGERS: u128 = 1 << 22; + +/// If the format supports parsing floats. +pub const SUPPORTS_WRITING_FLOATS: u128 = 1 << 23; + // Non-digit separator flags. const _: () = assert!(REQUIRED_INTEGER_DIGITS == 1); check_subsequent_flags!(REQUIRED_INTEGER_DIGITS, REQUIRED_FRACTION_DIGITS); @@ -367,6 +383,10 @@ check_subsequent_flags!(CASE_SENSITIVE_EXPONENT, CASE_SENSITIVE_BASE_PREFIX); check_subsequent_flags!(CASE_SENSITIVE_BASE_PREFIX, CASE_SENSITIVE_BASE_SUFFIX); check_subsequent_flags!(CASE_SENSITIVE_BASE_SUFFIX, REQUIRED_INTEGER_DIGITS_WITH_EXPONENT); check_subsequent_flags!(REQUIRED_INTEGER_DIGITS_WITH_EXPONENT, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT); +check_subsequent_flags!(REQUIRED_FRACTION_DIGITS_WITH_EXPONENT, SUPPORTS_PARSING_INTEGERS); +check_subsequent_flags!(SUPPORTS_PARSING_INTEGERS, SUPPORTS_PARSING_FLOATS); +check_subsequent_flags!(SUPPORTS_PARSING_FLOATS, SUPPORTS_WRITING_INTEGERS); +check_subsequent_flags!(SUPPORTS_WRITING_INTEGERS, SUPPORTS_WRITING_FLOATS); // DIGIT SEPARATOR FLAGS & MASKS // ----------------------------- @@ -553,6 +573,9 @@ pub const FLAG_MASK: u128 = /// omitting those that are handled prior. This limits the /// number of match paths required to determine the correct /// interface. +/// +/// Note that this is mostly a legacy constant, since we do +/// constant evaluation which is always at compile time. #[doc(hidden)] pub const INTERFACE_FLAG_MASK: u128 = REQUIRED_DIGITS | diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index 406d1ee7..ab83bf23 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -43,28 +43,32 @@ use crate::format_flags as flags; /// 19. [`case_sensitive_base_suffix`][NumberFormat::case_sensitive_base_suffix] /// 20. [`required_integer_digits_with_exponent`][NumberFormat::required_integer_digits_with_exponent] /// 21. [`required_fraction_digits_with_exponent`][NumberFormat::required_fraction_digits_with_exponent] -/// 22. [`integer_internal_digit_separator`][NumberFormat::integer_internal_digit_separator] -/// 23. [`fraction_internal_digit_separator`][NumberFormat::fraction_internal_digit_separator] -/// 24. [`exponent_internal_digit_separator`][NumberFormat::exponent_internal_digit_separator] -/// 25. [`internal_digit_separator`][NumberFormat::internal_digit_separator] -/// 26. [`integer_leading_digit_separator`][NumberFormat::integer_leading_digit_separator] -/// 27. [`fraction_leading_digit_separator`][NumberFormat::fraction_leading_digit_separator] -/// 28. [`exponent_leading_digit_separator`][NumberFormat::exponent_leading_digit_separator] -/// 29. [`leading_digit_separator`][NumberFormat::leading_digit_separator] -/// 30. [`integer_trailing_digit_separator`][NumberFormat::integer_trailing_digit_separator] -/// 31. [`fraction_trailing_digit_separator`][NumberFormat::fraction_trailing_digit_separator] -/// 32. [`exponent_trailing_digit_separator`][NumberFormat::exponent_trailing_digit_separator] -/// 33. [`trailing_digit_separator`][NumberFormat::trailing_digit_separator] -/// 34. [`integer_consecutive_digit_separator`][NumberFormat::integer_consecutive_digit_separator] -/// 35. [`fraction_consecutive_digit_separator`][NumberFormat::fraction_consecutive_digit_separator] -/// 36. [`exponent_consecutive_digit_separator`][NumberFormat::exponent_consecutive_digit_separator] -/// 37. [`consecutive_digit_separator`][NumberFormat::consecutive_digit_separator] -/// 38. [`special_digit_separator`][NumberFormat::special_digit_separator] -/// 39. [`digit_separator`][NumberFormat::digit_separator] -/// 40. [`base_prefix`][NumberFormat::base_prefix] -/// 41. [`base_suffix`][NumberFormat::base_suffix] -/// 42. [`exponent_base`][NumberFormat::exponent_base] -/// 43. [`exponent_radix`][NumberFormat::exponent_radix] +/// 22. [`supports_parsing_integers`][NumberFormat::supports_parsing_integers] +/// 23. [`supports_parsing_floats`][NumberFormat::supports_parsing_floats] +/// @4. [`supports_writing_integers`][NumberFormat::supports_writing_integers] +/// 25. [`supports_writing_floats`][NumberFormat::supports_writing_floats] +/// 26. [`integer_internal_digit_separator`][NumberFormat::integer_internal_digit_separator] +/// 27. [`fraction_internal_digit_separator`][NumberFormat::fraction_internal_digit_separator] +/// 28. [`exponent_internal_digit_separator`][NumberFormat::exponent_internal_digit_separator] +/// 29. [`internal_digit_separator`][NumberFormat::internal_digit_separator] +/// 30. [`integer_leading_digit_separator`][NumberFormat::integer_leading_digit_separator] +/// 31. [`fraction_leading_digit_separator`][NumberFormat::fraction_leading_digit_separator] +/// 32. [`exponent_leading_digit_separator`][NumberFormat::exponent_leading_digit_separator] +/// 33. [`leading_digit_separator`][NumberFormat::leading_digit_separator] +/// 34. [`integer_trailing_digit_separator`][NumberFormat::integer_trailing_digit_separator] +/// 35. [`fraction_trailing_digit_separator`][NumberFormat::fraction_trailing_digit_separator] +/// 36. [`exponent_trailing_digit_separator`][NumberFormat::exponent_trailing_digit_separator] +/// 37. [`trailing_digit_separator`][NumberFormat::trailing_digit_separator] +/// 38. [`integer_consecutive_digit_separator`][NumberFormat::integer_consecutive_digit_separator] +/// 39. [`fraction_consecutive_digit_separator`][NumberFormat::fraction_consecutive_digit_separator] +/// 40. [`exponent_consecutive_digit_separator`][NumberFormat::exponent_consecutive_digit_separator] +/// 41. [`consecutive_digit_separator`][NumberFormat::consecutive_digit_separator] +/// 42. [`special_digit_separator`][NumberFormat::special_digit_separator] +/// 43. [`digit_separator`][NumberFormat::digit_separator] +/// 44. [`base_prefix`][NumberFormat::base_prefix] +/// 45. [`base_suffix`][NumberFormat::base_suffix] +/// 46. [`exponent_base`][NumberFormat::exponent_base] +/// 47. [`exponent_radix`][NumberFormat::exponent_radix] /// /// This should always be constructed via [`NumberFormatBuilder`]. /// See [`NumberFormatBuilder`] for the fields for the packed struct. @@ -649,6 +653,78 @@ impl NumberFormat { Self::REQUIRED_FRACTION_DIGITS_WITH_EXPONENT } + /// If the format supports parsing integers. + /// + /// See [`supports_parsing_integers`][Self::supports_parsing_integers]. + pub const SUPPORTS_PARSING_INTEGERS: bool = true; + + /// Get if the format supports parsing integers. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Used For + /// + /// - Parse Integer + #[inline(always)] + pub const fn supports_parsing_integers(&self) -> bool { + Self::SUPPORTS_PARSING_INTEGERS + } + + /// If the format supports parsing floats. + /// + /// See [`supports_parsing_floats`][Self::supports_parsing_floats]. + pub const SUPPORTS_PARSING_FLOATS: bool = true; + + /// Get if the format supports parsing floats. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn supports_parsing_floats(&self) -> bool { + Self::SUPPORTS_PARSING_FLOATS + } + + /// If the format supports writing integers. + /// + /// See [`supports_writing_integers`][Self::supports_writing_integers]. + pub const SUPPORTS_WRITING_INTEGERS: bool = true; + + /// Get if the format supports writing integers. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Used For + /// + /// - Write Integer + #[inline(always)] + pub const fn supports_writing_integers(&self) -> bool { + Self::SUPPORTS_WRITING_INTEGERS + } + + /// If the format supports writing floats. + /// + /// See [`supports_writing_floats`][Self::supports_writing_floats]. + pub const SUPPORTS_WRITING_FLOATS: bool = true; + + /// Get if the format supports writing floats. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Used For + /// + /// - Write Float + #[inline(always)] + pub const fn supports_writing_floats(&self) -> bool { + Self::SUPPORTS_WRITING_FLOATS + } + // DIGIT SEPARATOR FLAGS & MASKS // If digit separators are allowed between integer digits. diff --git a/lexical-write-float/src/api.rs b/lexical-write-float/src/api.rs index 56b03d49..0718ec7f 100644 --- a/lexical-write-float/src/api.rs +++ b/lexical-write-float/src/api.rs @@ -4,9 +4,10 @@ #[cfg(feature = "f16")] use lexical_util::bf16::bf16; +use lexical_util::error::Error; #[cfg(feature = "f16")] use lexical_util::f16::f16; -use lexical_util::format::STANDARD; +use lexical_util::format::{NumberFormat, STANDARD}; use lexical_util::{to_lexical, to_lexical_with_options}; use crate::options::Options; @@ -38,6 +39,10 @@ macro_rules! float_to_lexical { options: &Self::Options, ) -> &'a mut [u8] { + let format = NumberFormat::<{ FORMAT }> {}; + if !format.supports_writing_floats() { + core::panic!("{}", Error::Unsupported.description()); + } let count = self.write_float::<{ FORMAT }>(bytes, &options); &mut bytes[..count] } diff --git a/lexical-write-float/tests/api_tests.rs b/lexical-write-float/tests/api_tests.rs index 45ed9fd2..982a31ef 100644 --- a/lexical-write-float/tests/api_tests.rs +++ b/lexical-write-float/tests/api_tests.rs @@ -1,4 +1,6 @@ use lexical_util::constants::BUFFER_SIZE; +#[cfg(any(feature = "format", feature = "power-of-two"))] +use lexical_util::format::NumberFormatBuilder; use lexical_util::format::STANDARD; use lexical_write_float::{Options, ToLexical, ToLexicalWithOptions}; @@ -64,8 +66,6 @@ fn invalid_inf_test() { fn hex_test() { use core::num; - use lexical_util::format::NumberFormatBuilder; - const BASE16_2_10: u128 = NumberFormatBuilder::new() .mantissa_radix(16) .exponent_base(num::NonZeroU8::new(2)) @@ -78,3 +78,30 @@ fn hex_test() { let result = float.to_lexical_with_options::(&mut buffer, &HEX_OPTIONS); assert_eq!(result, b"3.039^12"); } + +#[test] +#[should_panic] +#[cfg(feature = "format")] +fn unsupported_test() { + const FORMAT: u128 = NumberFormatBuilder::new().supports_writing_floats(false).build_strict(); + const OPTIONS: Options = Options::new(); + + let mut buffer = [b'\x00'; BUFFER_SIZE]; + let float = 12345.0f64; + _ = float.to_lexical_with_options::(&mut buffer, &OPTIONS); +} + +#[test] +#[cfg(feature = "format")] +fn supported_test() { + const FORMAT: u128 = NumberFormatBuilder::new() + .supports_parsing_integers(false) + .supports_parsing_floats(false) + .supports_writing_integers(false) + .build_strict(); + const OPTIONS: Options = Options::new(); + + let mut buffer = [b'\x00'; BUFFER_SIZE]; + let float = 12345.0f64; + assert_eq!(b"12345.0", float.to_lexical_with_options::(&mut buffer, &OPTIONS)); +} diff --git a/lexical-write-integer/src/api.rs b/lexical-write-integer/src/api.rs index 08f58534..351bb15e 100644 --- a/lexical-write-integer/src/api.rs +++ b/lexical-write-integer/src/api.rs @@ -2,6 +2,7 @@ #![doc(hidden)] +use lexical_util::error::Error; use lexical_util::format::{NumberFormat, STANDARD}; use lexical_util::num::SignedInteger; use lexical_util::{to_lexical, to_lexical_with_options}; @@ -93,7 +94,12 @@ macro_rules! unsigned_to_lexical { ) -> &'a mut [u8] { _ = options; - assert!(NumberFormat::<{ FORMAT }> {}.is_valid()); + let format = NumberFormat::<{ FORMAT }> {}; + if !format.supports_writing_integers() { + core::panic!("{}", Error::Unsupported.description()); + } else if !format.is_valid() { + core::panic!("{}", format.error().description()); + } let len = unsigned::<$t, FORMAT>(self, bytes); &mut bytes[..len] } @@ -128,7 +134,12 @@ macro_rules! signed_to_lexical { ) -> &'a mut [u8] { _ = options; - assert!(NumberFormat::<{ FORMAT }> {}.is_valid()); + let format = NumberFormat::<{ FORMAT }> {}; + if !format.supports_writing_integers() { + core::panic!("{}", Error::Unsupported.description()); + } else if !format.is_valid() { + core::panic!("{}", format.error().description()); + } let len = signed::<$signed, $unsigned, FORMAT>(self, bytes); &mut bytes[..len] } diff --git a/lexical-write-integer/tests/api_tests.rs b/lexical-write-integer/tests/api_tests.rs index c2239be0..45179702 100644 --- a/lexical-write-integer/tests/api_tests.rs +++ b/lexical-write-integer/tests/api_tests.rs @@ -215,6 +215,36 @@ fn options_radix_test() { assert_eq!(b"A8", 128u8.to_lexical_with_options::<{ FORMAT }>(&mut buffer, &OPTIONS)); } +#[test] +#[should_panic] +#[cfg(feature = "format")] +fn unsupported_test() { + const FORMAT: u128 = NumberFormatBuilder::new().supports_writing_integers(false).build_strict(); + const OPTIONS: Options = Options::new(); + + let mut buffer = [b'\x00'; BUFFER_SIZE]; + let integer = 12345i64; + _ = integer.to_lexical_with_options::(&mut buffer, &OPTIONS); +} + +#[test] +#[cfg(feature = "format")] +fn supported_test() { + const FORMAT: u128 = NumberFormatBuilder::new() + .supports_parsing_integers(false) + .supports_parsing_floats(false) + .supports_writing_floats(false) + .build_strict(); + const OPTIONS: Options = Options::new(); + + let mut buffer = [b'\x00'; BUFFER_SIZE]; + let integer = 12345i64; + assert_eq!(b"12345", integer.to_lexical_with_options::(&mut buffer, &OPTIONS)); + + let integer = 12345u64; + assert_eq!(b"12345", integer.to_lexical_with_options::(&mut buffer, &OPTIONS)); +} + fn roundtrip(x: T) -> T where T: Roundtrip, From b7cafa212970fbcf7264f6a2de096d72cce9e91f Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sun, 12 Jan 2025 16:21:45 -0600 Subject: [PATCH 03/18] Add support for requiring base prefixes and suffixes. This requires them when parsing but also adds them to our float and integer writers when writing formats. This is useful for cases like hex floats where the floats only make sense when they have a literal `0x` prefixing them. --- CHANGELOG | 5 +- lexical-parse-float/src/parse.rs | 29 +-- lexical-parse-float/tests/api_tests.rs | 36 ++++ lexical-parse-integer/src/algorithm.rs | 138 +++++++++++++-- lexical-parse-integer/tests/api_tests.rs | 46 +++++ lexical-util/src/error.rs | 14 +- lexical-util/src/feature_format.rs | 94 +++++++++- lexical-util/src/format_builder.rs | 194 +++++++++++++++++++-- lexical-util/src/format_flags.rs | 40 ++++- lexical-util/src/not_feature_format.rs | 149 +++++++++++++--- lexical-util/tests/feature_format_tests.rs | 7 +- lexical-util/tests/format_flags_tests.rs | 26 ++- lexical-write-float/src/options.rs | 10 ++ lexical-write-float/src/write.rs | 56 +++--- lexical-write-float/tests/api_tests.rs | 35 ++++ lexical-write-integer/src/api.rs | 80 +++++++-- lexical-write-integer/src/options.rs | 15 +- lexical-write-integer/tests/api_tests.rs | 30 +++- 18 files changed, 882 insertions(+), 122 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index f6d6e0c0..0bb454dc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,7 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `build_checked` to our `Options` API (#204). - Added `has_digit_separator` to `NumberFormat` (#204). - Re-export `NumberFormat` to our other crates (#204). -- Add `Options::from_radix` for all options for similar APIs for each (#208). +- Added `Options::from_radix` for all options for similar APIs for each (#208). +- Support for requiring both integer and fraction digits with exponents, that is, `1.e5` and `.1e5`, as opposed to just requiring `1e5` (#215). +- Added `supports_parsing_integers`, `supports_parsing_floats`, `supports_writing_integers`, and `supports_writing_floats` for our number formats (#215). +- Added `required_base_prefix` and `required_base_suffix` for our number formats, requiring base prefixes and/or suffixes when parsing, and allowing writing base prefixes and/or suffixes (#215). ### Changed diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index 239d5eac..8e242cb7 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -536,25 +536,31 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // INTEGER // Check to see if we have a valid base prefix. + // NOTE: `lz_prefix` is if we had a leading zero when + // checking for a base prefix: it is not if the prefix + // exists or not. #[allow(unused_variables)] - let mut is_prefix = false; - #[cfg(feature = "format")] + let mut lz_prefix = false; + #[cfg(all(feature = "format", feature = "power-of-two"))] { let base_prefix = format.base_prefix(); + let mut has_prefix = false; let mut iter = byte.integer_iter(); if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() { // Check to see if the next character is the base prefix. // We must have a format like `0x`, `0d`, `0o`. // NOTE: The check for empty integer digits happens below so // we don't need a redundant check here. - is_prefix = true; - if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some() - && iter.is_buffer_empty() - && format.required_integer_digits() - { + lz_prefix = true; + let prefix = iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()); + has_prefix = prefix.is_some(); + if has_prefix && iter.is_buffer_empty() && format.required_integer_digits() { return Err(Error::EmptyInteger(iter.cursor())); } } + if format.required_base_prefix() && !has_prefix { + return Err(Error::MissingBasePrefix(iter.cursor())); + } } // Parse our integral digits. @@ -600,7 +606,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // Check if integer leading zeros are disabled. #[cfg(feature = "format")] - if !is_prefix && format.no_float_leading_zeros() { + if !lz_prefix && format.no_float_leading_zeros() { if integer_digits.len() > 1 && integer_digits.first() == Some(&b'0') { return Err(Error::InvalidLeadingZeros(start.cursor())); } @@ -741,11 +747,14 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // that the first character **is not** a digit separator. #[allow(unused_variables)] let base_suffix = format.base_suffix(); - #[cfg(feature = "format")] + #[cfg(all(feature = "format", feature = "power-of-two"))] if base_suffix != 0 { - if byte.first_is(base_suffix, format.case_sensitive_base_suffix()) { + let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix()); + if is_suffix { // SAFETY: safe since `byte.len() >= 1`. unsafe { byte.step_unchecked() }; + } else if format.required_base_suffix() { + return Err(Error::MissingBaseSuffix(byte.cursor())); } } diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs index eabf3f56..002e6980 100644 --- a/lexical-parse-float/tests/api_tests.rs +++ b/lexical-parse-float/tests/api_tests.rs @@ -1307,3 +1307,39 @@ fn supported_test() { let value = f64::from_lexical_partial_with_options::(float.as_bytes(), &OPTIONS); assert_eq!(value, Ok((12345.0, 7))); } + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn require_base_prefix_test() { + use core::num; + + const PREFIX: u128 = NumberFormatBuilder::new() + .base_prefix(num::NonZeroU8::new(b'd')) + .required_base_prefix(true) + .build_strict(); + const OPTIONS: Options = Options::new(); + + let value = f64::from_lexical_with_options::(b"0d12345", &OPTIONS); + assert_eq!(value, Ok(12345.0)); + let value = f64::from_lexical_with_options::(b"12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = f64::from_lexical_with_options::(b"-0d12345", &OPTIONS); + assert_eq!(value, Ok(-12345.0)); + let value = f64::from_lexical_with_options::(b"-12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + const SUFFIX: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_suffix(num::NonZeroU8::new(b'z')) + .required_base_suffix(true) + .build_strict(); + let value = f64::from_lexical_with_options::(b"0d12345z", &OPTIONS); + assert_eq!(value, Ok(12345.0)); + let value = f64::from_lexical_with_options::(b"0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(7))); + + let value = f64::from_lexical_with_options::(b"-0d12345z", &OPTIONS); + assert_eq!(value, Ok(-12345.0)); + let value = f64::from_lexical_with_options::(b"-0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); +} diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index dcb8bdd8..a5ba16fe 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -120,19 +120,26 @@ macro_rules! into_error { #[cfg(feature = "format")] macro_rules! fmt_invalid_digit { ( - $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr + $value:ident, + $iter:ident, + $c:expr, + $start_index:ident, + $invalid_digit:ident, + $has_suffix:ident, + $is_end:expr $(,)? ) => {{ // NOTE: If we have non-contiguous iterators, we could have a skip character // here at the boundary. This does not affect safety but it does affect // correctness. debug_assert!($iter.is_contiguous() || $is_end); - let base_suffix = NumberFormat::::BASE_SUFFIX; - let uncased_base_suffix = NumberFormat::::CASE_SENSITIVE_BASE_SUFFIX; + let format = NumberFormat:: {}; + let base_suffix = format.base_suffix(); + let uncased_base_suffix = format.case_sensitive_base_suffix(); // Need to check for a base suffix, if so, return a valid value. // We can't have a base suffix at the first value (need at least // 1 digit). - if base_suffix != 0 && $iter.cursor() - $start_index > 1 { + if cfg!(feature = "power-of-two") && base_suffix != 0 && $iter.cursor() - $start_index > 1 { let is_suffix = if uncased_base_suffix { $c == base_suffix } else { @@ -144,6 +151,7 @@ macro_rules! fmt_invalid_digit { // contiguous iterators. if is_suffix && $is_end && $iter.is_buffer_empty() { // Break out of the loop, we've finished parsing. + $has_suffix = true; break; } else if !$iter.is_buffer_empty() { // Haven't finished parsing, so we're going to call @@ -165,7 +173,13 @@ macro_rules! fmt_invalid_digit { #[cfg(not(feature = "format"))] macro_rules! fmt_invalid_digit { ( - $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr + $value:ident, + $iter:ident, + $c:expr, + $start_index:ident, + $invalid_digit:ident, + $has_suffix:ident, + $is_end:expr $(,)? ) => {{ $invalid_digit!($value, $iter.cursor(), $iter.current_count()); }}; @@ -393,6 +407,7 @@ where /// * `add_op` - The unchecked add/sub op. /// * `start_index` - The offset where parsing started. /// * `invalid_digit` - Behavior when an invalid digit is found. +/// * `has_suffix` - If a base suffix was found at the end of the buffer. /// * `is_end` - If iter corresponds to the full input. /// /// core: @@ -403,7 +418,8 @@ macro_rules! parse_1digit_unchecked { $add_op:ident, $start_index:ident, $invalid_digit:ident, - $is_end:expr + $has_suffix:ident, + $is_end:expr $(,)? ) => {{ // This is a slower parsing algorithm, going 1 digit at a time, but doing it in // an unchecked loop. @@ -411,7 +427,15 @@ macro_rules! parse_1digit_unchecked { while let Some(&c) = $iter.next() { let digit = match char_to_digit_const(c, radix) { Some(v) => v, - None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, $is_end), + None => fmt_invalid_digit!( + $value, + $iter, + c, + $start_index, + $invalid_digit, + $has_suffix, + $is_end, + ), }; // multiply first since compilers are good at optimizing things out and will do // a fused mul/add We must do this after getting the digit for @@ -431,6 +455,7 @@ macro_rules! parse_1digit_unchecked { /// * `add_op` - The checked add/sub op. /// * `start_index` - The offset where parsing started. /// * `invalid_digit` - Behavior when an invalid digit is found. +/// * `has_suffix` - If a base suffix was found at the end of the buffer. /// * `overflow` - If the error is overflow or underflow. /// /// core: @@ -441,7 +466,8 @@ macro_rules! parse_1digit_checked { $add_op:ident, $start_index:ident, $invalid_digit:ident, - $overflow:ident + $has_suffix:ident, + $overflow:ident $(,)? ) => {{ // This is a slower parsing algorithm, going 1 digit at a time, but doing it in // an unchecked loop. @@ -449,7 +475,15 @@ macro_rules! parse_1digit_checked { while let Some(&c) = $iter.next() { let digit = match char_to_digit_const(c, radix) { Some(v) => v, - None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, true), + None => fmt_invalid_digit!( + $value, + $iter, + c, + $start_index, + $invalid_digit, + $has_suffix, + true, + ), }; // multiply first since compilers are good at optimizing things out and will do // a fused mul/add @@ -477,6 +511,7 @@ macro_rules! parse_1digit_checked { /// * `start_index` - The offset where parsing started. /// * `invalid_digit` - Behavior when an invalid digit is found. /// * `no_multi_digit` - If to disable multi-digit optimizations. +/// * `has_suffix` - If a base suffix was found at the end of the buffer. /// * `is_end` - If iter corresponds to the full input. macro_rules! parse_digits_unchecked { ( @@ -486,7 +521,8 @@ macro_rules! parse_digits_unchecked { $start_index:ident, $invalid_digit:ident, $no_multi_digit:expr, - $is_end:expr + $has_suffix:ident, + $is_end:expr $(,)? ) => {{ let can_multi = can_try_parse_multidigits::<_, FORMAT>(&$iter); let use_multi = can_multi && !$no_multi_digit; @@ -510,7 +546,15 @@ macro_rules! parse_digits_unchecked { $value = $value.wrapping_mul(radix4).$add_op(value); } } - parse_1digit_unchecked!($value, $iter, $add_op, $start_index, $invalid_digit, $is_end) + parse_1digit_unchecked!( + $value, + $iter, + $add_op, + $start_index, + $invalid_digit, + $has_suffix, + $is_end + ) }}; } @@ -528,6 +572,7 @@ macro_rules! parse_digits_unchecked { /// * `invalid_digit` - Behavior when an invalid digit is found. /// * `overflow` - If the error is overflow or underflow. /// * `no_multi_digit` - If to disable multi-digit optimizations. +/// * `has_suffix` - If a base suffix was found at the end of the buffer. /// * `overflow_digits` - The number of digits before we need to consider /// checked ops. macro_rules! parse_digits_checked { @@ -540,7 +585,8 @@ macro_rules! parse_digits_checked { $invalid_digit:ident, $overflow:ident, $no_multi_digit:expr, - $overflow_digits:expr + $has_suffix:ident, + $overflow_digits:expr $(,)? ) => {{ // Can use the unchecked for the `max_digits` here. If we // have a non-contiguous iterator, we could have a case like @@ -557,13 +603,22 @@ macro_rules! parse_digits_checked { $start_index, $invalid_digit, $no_multi_digit, + $has_suffix, false ); } } // NOTE: all our multi-digit optimizations have been done here: skip this - parse_1digit_checked!($value, $iter, $add_op, $start_index, $invalid_digit, $overflow) + parse_1digit_checked!( + $value, + $iter, + $add_op, + $start_index, + $invalid_digit, + $has_suffix, + $overflow + ) }}; } @@ -650,6 +705,9 @@ macro_rules! algorithm { } } } + if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_prefix() && !is_prefix { + return Err(Error::MissingBasePrefix(iter.cursor())); + } // If we have a format that doesn't accept leading zeros, // check if the next value is invalid. It's invalid if the @@ -684,14 +742,60 @@ macro_rules! algorithm { // culminates in **way** slower performance overall for simple // integers, and no improvement for large integers. let mut value = T::ZERO; + #[allow(unused_mut)] + let mut has_suffix = false; if cannot_overflow && is_negative { - parse_digits_unchecked!(value, iter, wrapping_sub, start_index, $invalid_digit, $no_multi_digit, true); + parse_digits_unchecked!( + value, + iter, + wrapping_sub, + start_index, + $invalid_digit, + $no_multi_digit, + has_suffix, + true, + ); } if cannot_overflow { - parse_digits_unchecked!(value, iter, wrapping_add, start_index, $invalid_digit, $no_multi_digit, true); + parse_digits_unchecked!( + value, + iter, + wrapping_add, + start_index, + $invalid_digit, + $no_multi_digit, + has_suffix, + true, + ); } else if is_negative { - parse_digits_checked!(value, iter, checked_sub, wrapping_sub, start_index, $invalid_digit, Underflow, $no_multi_digit, overflow_digits); + parse_digits_checked!( + value, + iter, + checked_sub, + wrapping_sub, + start_index, + $invalid_digit, + Underflow, + $no_multi_digit, + has_suffix, + overflow_digits, + ); } else { - parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, $no_multi_digit, overflow_digits); + parse_digits_checked!( + value, + iter, + checked_add, + wrapping_add, + start_index, + $invalid_digit, + Overflow, + $no_multi_digit, + has_suffix, + overflow_digits, + ); + } + + if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix { + return Err(Error::MissingBaseSuffix(iter.cursor())); } $into_ok!(value, iter.buffer_length(), iter.current_count()) diff --git a/lexical-parse-integer/tests/api_tests.rs b/lexical-parse-integer/tests/api_tests.rs index 1ca88aef..d2baabdd 100644 --- a/lexical-parse-integer/tests/api_tests.rs +++ b/lexical-parse-integer/tests/api_tests.rs @@ -401,3 +401,49 @@ fn supported_test() { let value = u64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); assert_eq!(value, Ok((12345, 5))); } + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn require_base_prefix_test() { + use core::num; + + const PREFIX: u128 = NumberFormatBuilder::new() + .base_prefix(num::NonZeroU8::new(b'd')) + .required_base_prefix(true) + .build_strict(); + const OPTIONS: Options = Options::new(); + + let value = i64::from_lexical_with_options::(b"0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + let value = i64::from_lexical_with_options::(b"12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"-0d12345", &OPTIONS); + assert_eq!(value, Ok(-12345)); + let value = i64::from_lexical_with_options::(b"-12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = u64::from_lexical_with_options::(b"0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + let value = u64::from_lexical_with_options::(b"12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + const SUFFIX: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_suffix(num::NonZeroU8::new(b'z')) + .required_base_suffix(true) + .build_strict(); + let value = i64::from_lexical_with_options::(b"0d12345z", &OPTIONS); + assert_eq!(value, Ok(12345)); + let value = i64::from_lexical_with_options::(b"0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(7))); + + let value = i64::from_lexical_with_options::(b"-0d12345z", &OPTIONS); + assert_eq!(value, Ok(-12345)); + let value = i64::from_lexical_with_options::(b"-0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); + + let value = u64::from_lexical_with_options::(b"0d12345z", &OPTIONS); + assert_eq!(value, Ok(12345)); + let value = u64::from_lexical_with_options::(b"0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(7))); +} diff --git a/lexical-util/src/error.rs b/lexical-util/src/error.rs index a0ac0192..0b9a91f9 100644 --- a/lexical-util/src/error.rs +++ b/lexical-util/src/error.rs @@ -56,6 +56,10 @@ pub enum Error { InvalidPositiveSign(usize), /// Invalid negative sign for an unsigned type was found. InvalidNegativeSign(usize), + /// Missing a required base prefix when parsing the number. + MissingBasePrefix(usize), + /// Missing a required base suffix when parsing the number. + MissingBaseSuffix(usize), // NUMBER FORMAT ERRORS /// Invalid radix for the mantissa (significant) digits. @@ -167,6 +171,8 @@ impl Error { Self::MissingSign(_) => "'missing required `+/-` sign for integer'", Self::InvalidPositiveSign(_) => "'invalid `+` sign for an integer was found'", Self::InvalidNegativeSign(_) => "'invalid `-` sign for an unsigned type was found'", + Self::MissingBasePrefix(_) => "'missing a required base prefix when parsing the number'", + Self::MissingBaseSuffix(_) => "'missing a required base suffix when parsing the number'", // NUMBER FORMAT ERRORS Self::InvalidMantissaRadix => "'invalid radix for mantissa digits'", @@ -186,7 +192,7 @@ impl Error { Self::InvalidConsecutiveFractionDigitSeparator => "'enabled consecutive digit separators in the fraction without setting a valid location'", Self::InvalidConsecutiveExponentDigitSeparator => "'enabled consecutive digit separators in the exponent without setting a valid location'", Self::InvalidFlags => "'invalid flags enabled without the format feature'", - Self::Unsupported => "the desired operation is unsupported for this format", + Self::Unsupported => "'the desired operation is unsupported for this format'", // OPTION ERRORS Self::InvalidNanString => "'NaN string must started with `n`'", @@ -233,6 +239,8 @@ impl Error { Self::MissingSign(index) => Some(index), Self::InvalidPositiveSign(index) => Some(index), Self::InvalidNegativeSign(index) => Some(index), + Self::MissingBasePrefix(index) => Some(index), + Self::MissingBaseSuffix(index) => Some(index), // NUMBER FORMAT ERRORS Self::InvalidMantissaRadix => None, @@ -292,6 +300,8 @@ impl Error { is_error_type!(is_missing_sign, MissingSign(_)); is_error_type!(is_invalid_positive_sign, InvalidPositiveSign(_)); is_error_type!(is_invalid_negative_sign, InvalidNegativeSign(_)); + is_error_type!(is_missing_base_prefix, MissingBasePrefix(_)); + is_error_type!(is_missing_base_suffix, MissingBaseSuffix(_)); is_error_type!(is_invalid_mantissa_radix, InvalidMantissaRadix); is_error_type!(is_invalid_exponent_base, InvalidExponentBase); is_error_type!(is_invalid_exponent_radix, InvalidExponentRadix); @@ -391,6 +401,8 @@ impl fmt::Display for Error { Self::MissingSign(index) => write_parse_error!(formatter, description, index), Self::InvalidPositiveSign(index) => write_parse_error!(formatter, description, index), Self::InvalidNegativeSign(index) => write_parse_error!(formatter, description, index), + Self::MissingBasePrefix(index) => write_parse_error!(formatter, description, index), + Self::MissingBaseSuffix(index) => write_parse_error!(formatter, description, index), // NUMBER FORMAT ERRORS Self::InvalidMantissaRadix => format_message!(formatter, description), diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index 133e4a64..0ff98bee 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -554,12 +554,17 @@ impl NumberFormat { /// If set to [`true`], then the base prefix `x` would be considered the /// different from `X`. Can only be modified with /// [`feature`][crate#features] `power-of-two` or `radix` along with - /// `format`. Defaults to [`false`]. + /// `format`. Defaults to [`false`]. This is only used for writing numbers + /// if [`required_base_prefix`] is [`true`]. /// /// # Used For /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_prefix`]: Self::required_base_prefix #[inline(always)] pub const fn case_sensitive_base_prefix(&self) -> bool { Self::CASE_SENSITIVE_BASE_PREFIX @@ -575,12 +580,17 @@ impl NumberFormat { /// If set to [`true`], then the base suffix `x` would be considered the /// different from `X`. Can only be modified with /// [`feature`][crate#features] `power-of-two` or `radix` along with - /// `format`. Defaults to [`false`]. + /// `format`. Defaults to [`false`]. This is only used for writing numbers + /// if [`required_base_suffix`] is [`true`]. /// /// # Used For /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_suffix`]: Self::required_base_suffix #[inline(always)] pub const fn case_sensitive_base_suffix(&self) -> bool { Self::CASE_SENSITIVE_BASE_SUFFIX @@ -714,6 +724,70 @@ impl NumberFormat { Self::SUPPORTS_WRITING_FLOATS } + /// If the format requires base prefixes. + /// + /// See [`required_base_prefix`][Self::required_base_prefix]. + pub const REQUIRED_BASE_PREFIX: bool = from_flag!(FORMAT, REQUIRED_BASE_PREFIX); + + /// Get if the format requires base prefixes. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`false`]. + /// + /// # Examples + /// + /// Using a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `4d2` | ❌ | + /// | `x4d2` | ❌ | + /// | `4d2x` | ❌ | + /// | `0x4d2` | ✔️ | + /// + /// # Used For + /// + /// - Write Float + /// - Write Integer + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn required_base_prefix(&self) -> bool { + Self::REQUIRED_BASE_PREFIX + } + + /// If the format requires base suffixes. + /// + /// See [`required_base_suffix`][Self::required_base_suffix]. + pub const REQUIRED_BASE_SUFFIX: bool = from_flag!(FORMAT, REQUIRED_BASE_SUFFIX); + + /// Get if the format requires base suffixes. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`false`]. + /// + /// # Examples + /// + /// Using a base suffix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `4d2` | ❌ | + /// | `x4d2` | ❌ | + /// | `4d2x` | ✔️ | + /// | `0x4d2` | ❌ | + /// + /// # Used For + /// + /// - Write Float + /// - Write Integer + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn required_base_suffix(&self) -> bool { + Self::REQUIRED_BASE_SUFFIX + } + // DIGIT SEPARATOR FLAGS & MASKS /// If digit separators are allowed between integer digits. @@ -1258,7 +1332,9 @@ impl NumberFormat { /// setting the base prefix to `x` means that a leading `0x` will /// be ignore, if present. Can only be modified with /// [`feature`][crate#features] `power-of-two` or `radix` along with - /// `format`. Defaults to `0`, or no base prefix allowed. + /// `format`. Defaults to `0`, or no base prefix allowed. This is + /// only used for writing numbers if [`required_base_prefix`] + /// is [`true`]. This is ignored for special floating-point numbers. /// /// # Examples /// @@ -1276,6 +1352,10 @@ impl NumberFormat { /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_prefix`]: Self::required_base_prefix #[inline(always)] pub const fn base_prefix(&self) -> u8 { Self::BASE_PREFIX @@ -1298,7 +1378,9 @@ impl NumberFormat { /// setting the base prefix to `x` means that a trailing `x` will /// be ignored, if present. Can only be modified with /// [`feature`][crate#features] `power-of-two` or `radix` along with - /// `format`. Defaults to `0`, or no base suffix allowed. + /// `format`. Defaults to `0`, or no base suffix allowed. This is + /// only used for writing numbers if [`required_base_suffix`] + /// is [`true`]. This is ignored for special floating-point numbers. /// /// # Examples /// @@ -1314,6 +1396,10 @@ impl NumberFormat { /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_suffix`]: Self::required_base_suffix #[inline(always)] pub const fn base_suffix(&self) -> u8 { Self::BASE_SUFFIX diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 55799dc1..658a1c80 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -310,6 +310,8 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`supports_parsing_floats`]: Self::supports_parsing_floats\n [`supports_writing_integers`]: Self::supports_writing_integers\n [`supports_writing_floats`]: Self::supports_writing_floats\n +[`required_base_prefix`]: Self::required_base_prefix\n +[`required_base_suffix`]: Self::required_base_suffix\n [`integer_internal_digit_separator`]: Self::integer_internal_digit_separator\n [`fraction_internal_digit_separator`]: Self::fraction_internal_digit_separator\n [`exponent_internal_digit_separator`]: Self::exponent_internal_digit_separator\n @@ -345,10 +347,12 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_exponent_notation`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L757\n [`required_integer_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1129\n [`required_fraction_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1149\n -[`supports_parsing_integers`]: TODO\n -[`supports_parsing_floats`]: TODO\n -[`supports_writing_integers`]: TODO\n -[`supports_writing_floats`]: TODO\n +[`supports_parsing_integers`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1181\n +[`supports_parsing_floats`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1191\n +[`supports_writing_integers`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1201\n +[`supports_writing_floats`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1211\n +[`required_base_prefix`]: TODO\n +[`required_base_suffix`]: TODO\n [`case_sensitive_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L765\n [`integer_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L793\n [`fraction_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L805\n @@ -413,6 +417,8 @@ pub struct NumberFormatBuilder { case_sensitive_base_suffix: bool, required_integer_digits_with_exponent: bool, required_fraction_digits_with_exponent: bool, + required_base_prefix: bool, + required_base_suffix: bool, integer_internal_digit_separator: bool, fraction_internal_digit_separator: bool, exponent_internal_digit_separator: bool, @@ -478,6 +484,8 @@ impl NumberFormatBuilder { /// `false` /// - [`case_sensitive_base_suffix`][Self::get_case_sensitive_base_suffix] - /// `false` + /// - [`required_base_prefix`][Self::get_required_base_prefix] - `false` + /// - [`required_base_suffix`][Self::get_required_base_suffix] - `false` /// - [`integer_internal_digit_separator`][Self::get_integer_internal_digit_separator] - `false` /// - [`fraction_internal_digit_separator`][Self::get_fraction_internal_digit_separator] - `false` /// - [`exponent_internal_digit_separator`][Self::get_exponent_internal_digit_separator] - `false` @@ -525,6 +533,8 @@ impl NumberFormatBuilder { supports_parsing_floats: true, supports_writing_integers: true, supports_writing_floats: true, + required_base_prefix: false, + required_base_suffix: false, integer_internal_digit_separator: false, fraction_internal_digit_separator: false, exponent_internal_digit_separator: false, @@ -705,7 +715,9 @@ impl NumberFormatBuilder { /// setting the base prefix to `x` means that a leading `0x` will /// be ignore, if present. Can only be modified with /// [`feature`][crate#features] `power-of-two` or `radix` along with - /// `format`. Defaults to [`None`], or no base prefix allowed. + /// `format`. Defaults to [`None`], or no base prefix allowed. This is only + /// used for writing numbers if [`required_base_prefix`] is [`true`]. + /// This is ignored for special floating-point numbers. /// /// # Examples /// @@ -723,6 +735,10 @@ impl NumberFormatBuilder { /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_prefix`]: Self::required_base_prefix #[inline(always)] pub const fn get_base_prefix(&self) -> OptionU8 { self.base_prefix @@ -734,7 +750,9 @@ impl NumberFormatBuilder { /// setting the base prefix to `x` means that a trailing `x` will /// be ignored, if present. Can only be modified with /// [`feature`][crate#features] `power-of-two` or `radix` along with - /// `format`. Defaults to [`None`], or no base suffix allowed. + /// `format`. Defaults to [`None`], or no base suffix allowed. This is only + /// used for writing numbers if [`required_base_suffix`] is [`true`]. + /// This is ignored for special floating-point numbers. /// /// # Examples /// @@ -750,6 +768,10 @@ impl NumberFormatBuilder { /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_suffix`]: Self::required_base_suffix #[inline(always)] pub const fn get_base_suffix(&self) -> OptionU8 { self.base_suffix @@ -1105,12 +1127,17 @@ impl NumberFormatBuilder { /// If set to [`true`], then the base prefix `x` would be considered the /// different from `X`. Can only be modified with /// [`feature`][crate#features] `power-of-two` or `radix` along with - /// `format`. Defaults to [`false`]. + /// `format`. Defaults to [`false`]. This is only used for writing numbers + /// if [`required_base_prefix`] is [`true`]. /// /// # Used For /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_prefix`]: Self::required_base_prefix #[inline(always)] pub const fn get_case_sensitive_base_prefix(&self) -> bool { self.case_sensitive_base_prefix @@ -1121,12 +1148,17 @@ impl NumberFormatBuilder { /// If set to [`true`], then the base suffix `x` would be considered the /// different from `X`. Can only be modified with /// [`feature`][crate#features] `power-of-two` or `radix` along with - /// `format`. Defaults to [`false`]. + /// `format`. Defaults to [`false`]. This is only used for writing numbers + /// if [`required_base_suffix`] is [`true`]. /// /// # Used For /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_suffix`]: Self::required_base_suffix #[inline(always)] pub const fn get_case_sensitive_base_suffix(&self) -> bool { self.case_sensitive_base_suffix @@ -1212,6 +1244,54 @@ impl NumberFormatBuilder { self.supports_writing_floats } + /// Get if the format requires base prefixes. + /// + /// # Examples + /// + /// Using a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `4d2` | ❌ | + /// | `x4d2` | ❌ | + /// | `4d2x` | ❌ | + /// | `0x4d2` | ✔️ | + /// + /// # Used For + /// + /// - Write Float + /// - Write Integer + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_required_base_prefix(&self) -> bool { + self.required_base_prefix + } + + /// Get if the format requires base suffixes. + /// + /// # Examples + /// + /// Using a base suffix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `4d2` | ❌ | + /// | `x4d2` | ❌ | + /// | `4d2x` | ✔️ | + /// | `0x4d2` | ❌ | + /// + /// # Used For + /// + /// - Write Float + /// - Write Integer + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_required_base_suffix(&self) -> bool { + self.required_base_suffix + } + /// Get if digit separators are allowed between integer digits. /// /// This will not consider an input of only the digit separator @@ -1737,7 +1817,9 @@ impl NumberFormatBuilder { /// This character will come after a leading zero, so for example /// setting the base prefix to `x` means that a leading `0x` will /// be ignore, if present. Defaults to [`None`], or no base prefix - /// allowed. + /// allowed. This is only used for writing numbers if + /// [`required_base_prefix`] is [`true`]. This is ignored for special + /// floating-point numbers. /// /// # Examples /// @@ -1755,6 +1837,10 @@ impl NumberFormatBuilder { /// /// - Parse Float /// - Parse Integer + /// - Write Float + /// - Write Integer + /// + /// [`required_base_prefix`]: Self::required_base_prefix /// /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn required_mantissa_digits_with_exponent(mut self, flag: bool) -> Self { + self.required_mantissa_digits_with_exponent = flag; + self + } + /// Set if the format supports parsing integers. /// /// Defaults to [`true`]. @@ -3658,6 +3725,7 @@ impl NumberFormatBuilder { self.case_sensitive_base_suffix, CASE_SENSITIVE_BASE_SUFFIX ; self.required_integer_digits_with_exponent, REQUIRED_INTEGER_DIGITS_WITH_EXPONENT ; self.required_fraction_digits_with_exponent, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT ; + self.required_mantissa_digits_with_exponent, REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT ; self.supports_parsing_integers, SUPPORTS_PARSING_INTEGERS ; self.supports_parsing_floats, SUPPORTS_PARSING_FLOATS ; self.supports_writing_integers, SUPPORTS_WRITING_INTEGERS ; @@ -3765,6 +3833,10 @@ impl NumberFormatBuilder { format, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT ), + required_mantissa_digits_with_exponent: has_flag!( + format, + REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT + ), supports_parsing_integers: has_flag!(format, SUPPORTS_PARSING_INTEGERS), supports_parsing_floats: has_flag!(format, SUPPORTS_PARSING_FLOATS), supports_writing_integers: has_flag!(format, SUPPORTS_WRITING_INTEGERS), diff --git a/lexical-util/src/format_flags.rs b/lexical-util/src/format_flags.rs index 0b6c5f4b..115bc581 100644 --- a/lexical-util/src/format_flags.rs +++ b/lexical-util/src/format_flags.rs @@ -25,7 +25,7 @@ //! //! 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ -//! |e/P|e/S|I/E|F/E|p/I|p/F|w/I|w/F|R/P|r/S| | +//! |e/P|e/S|I/E|F/E|p/I|p/F|w/I|w/F|R/P|r/S|M/E| | //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ //! //! 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 @@ -66,6 +66,7 @@ //! w/F = The format supports writing floats. //! r/P = Require base prefixes. //! r/S = Require base suffixes. +//! M/E = Require mantissa digits with exponent. //! //! Digit Separator Flags: //! I/I = Integer internal digit separator. @@ -369,6 +370,9 @@ pub const REQUIRED_BASE_PREFIX: u128 = 1 << 24; /// If the format requires base suffixes. pub const REQUIRED_BASE_SUFFIX: u128 = 1 << 25; +/// If any significant digits are required with exponent notation. +pub const REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT: u128 = 1 << 26; + // Non-digit separator flags. const _: () = assert!(REQUIRED_INTEGER_DIGITS == 1); check_subsequent_flags!(REQUIRED_INTEGER_DIGITS, REQUIRED_FRACTION_DIGITS); @@ -397,6 +401,7 @@ check_subsequent_flags!(SUPPORTS_PARSING_FLOATS, SUPPORTS_WRITING_INTEGERS); check_subsequent_flags!(SUPPORTS_WRITING_INTEGERS, SUPPORTS_WRITING_FLOATS); check_subsequent_flags!(SUPPORTS_WRITING_FLOATS, REQUIRED_BASE_PREFIX); check_subsequent_flags!(REQUIRED_BASE_PREFIX, REQUIRED_BASE_SUFFIX); +check_subsequent_flags!(REQUIRED_BASE_SUFFIX, REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT); // DIGIT SEPARATOR FLAGS & MASKS // ----------------------------- @@ -571,6 +576,7 @@ pub const FLAG_MASK: u128 = CASE_SENSITIVE_BASE_SUFFIX | REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | + REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT | SUPPORTS_PARSING_FLOATS | SUPPORTS_PARSING_INTEGERS | SUPPORTS_WRITING_FLOATS | @@ -603,6 +609,7 @@ pub const INTERFACE_FLAG_MASK: u128 = REQUIRED_EXPONENT_NOTATION | REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | + REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT | INTERNAL_DIGIT_SEPARATOR | LEADING_DIGIT_SEPARATOR | TRAILING_DIGIT_SEPARATOR | @@ -628,6 +635,7 @@ pub const EXPONENT_FLAG_MASK: u128 = REQUIRED_EXPONENT_NOTATION | REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | + REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT | EXPONENT_INTERNAL_DIGIT_SEPARATOR | EXPONENT_LEADING_DIGIT_SEPARATOR | EXPONENT_TRAILING_DIGIT_SEPARATOR | diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index afa8bbe1..51498aea 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -43,34 +43,35 @@ use crate::format_flags as flags; /// 19. [`case_sensitive_base_suffix`][NumberFormat::case_sensitive_base_suffix] /// 20. [`required_integer_digits_with_exponent`][NumberFormat::required_integer_digits_with_exponent] /// 21. [`required_fraction_digits_with_exponent`][NumberFormat::required_fraction_digits_with_exponent] -/// 22. [`supports_parsing_integers`][NumberFormat::supports_parsing_integers] -/// 23. [`supports_parsing_floats`][NumberFormat::supports_parsing_floats] -/// 24. [`supports_writing_integers`][NumberFormat::supports_writing_integers] -/// 25. [`supports_writing_floats`][NumberFormat::supports_writing_floats] -/// 26. [`required_base_prefix`][NumberFormat::required_base_prefix] -/// 27. [`required_base_suffix`][NumberFormat::required_base_suffix] -/// 28. [`integer_internal_digit_separator`][NumberFormat::integer_internal_digit_separator] -/// 29. [`fraction_internal_digit_separator`][NumberFormat::fraction_internal_digit_separator] -/// 30. [`exponent_internal_digit_separator`][NumberFormat::exponent_internal_digit_separator] -/// 31. [`internal_digit_separator`][NumberFormat::internal_digit_separator] -/// 32. [`integer_leading_digit_separator`][NumberFormat::integer_leading_digit_separator] -/// 33. [`fraction_leading_digit_separator`][NumberFormat::fraction_leading_digit_separator] -/// 34. [`exponent_leading_digit_separator`][NumberFormat::exponent_leading_digit_separator] -/// 35. [`leading_digit_separator`][NumberFormat::leading_digit_separator] -/// 36. [`integer_trailing_digit_separator`][NumberFormat::integer_trailing_digit_separator] -/// 37. [`fraction_trailing_digit_separator`][NumberFormat::fraction_trailing_digit_separator] -/// 38. [`exponent_trailing_digit_separator`][NumberFormat::exponent_trailing_digit_separator] -/// 39. [`trailing_digit_separator`][NumberFormat::trailing_digit_separator] -/// 40. [`integer_consecutive_digit_separator`][NumberFormat::integer_consecutive_digit_separator] -/// 41. [`fraction_consecutive_digit_separator`][NumberFormat::fraction_consecutive_digit_separator] -/// 42. [`exponent_consecutive_digit_separator`][NumberFormat::exponent_consecutive_digit_separator] -/// 43. [`consecutive_digit_separator`][NumberFormat::consecutive_digit_separator] -/// 44. [`special_digit_separator`][NumberFormat::special_digit_separator] -/// 45. [`digit_separator`][NumberFormat::digit_separator] -/// 46. [`base_prefix`][NumberFormat::base_prefix] -/// 47. [`base_suffix`][NumberFormat::base_suffix] -/// 48. [`exponent_base`][NumberFormat::exponent_base] -/// 49. [`exponent_radix`][NumberFormat::exponent_radix] +/// 22. [`required_mantissa_digits_with_exponent`][NumberFormat::required_mantissa_digits_with_exponent] +/// 23. [`supports_parsing_integers`][NumberFormat::supports_parsing_integers] +/// 24. [`supports_parsing_floats`][NumberFormat::supports_parsing_floats] +/// 25. [`supports_writing_integers`][NumberFormat::supports_writing_integers] +/// 26. [`supports_writing_floats`][NumberFormat::supports_writing_floats] +/// 27. [`required_base_prefix`][NumberFormat::required_base_prefix] +/// 28. [`required_base_suffix`][NumberFormat::required_base_suffix] +/// 29. [`integer_internal_digit_separator`][NumberFormat::integer_internal_digit_separator] +/// 30. [`fraction_internal_digit_separator`][NumberFormat::fraction_internal_digit_separator] +/// 31. [`exponent_internal_digit_separator`][NumberFormat::exponent_internal_digit_separator] +/// 32. [`internal_digit_separator`][NumberFormat::internal_digit_separator] +/// 33. [`integer_leading_digit_separator`][NumberFormat::integer_leading_digit_separator] +/// 34. [`fraction_leading_digit_separator`][NumberFormat::fraction_leading_digit_separator] +/// 35. [`exponent_leading_digit_separator`][NumberFormat::exponent_leading_digit_separator] +/// 36. [`leading_digit_separator`][NumberFormat::leading_digit_separator] +/// 37. [`integer_trailing_digit_separator`][NumberFormat::integer_trailing_digit_separator] +/// 38. [`fraction_trailing_digit_separator`][NumberFormat::fraction_trailing_digit_separator] +/// 39. [`exponent_trailing_digit_separator`][NumberFormat::exponent_trailing_digit_separator] +/// 40. [`trailing_digit_separator`][NumberFormat::trailing_digit_separator] +/// 41. [`integer_consecutive_digit_separator`][NumberFormat::integer_consecutive_digit_separator] +/// 42. [`fraction_consecutive_digit_separator`][NumberFormat::fraction_consecutive_digit_separator] +/// 43. [`exponent_consecutive_digit_separator`][NumberFormat::exponent_consecutive_digit_separator] +/// 44. [`consecutive_digit_separator`][NumberFormat::consecutive_digit_separator] +/// 45. [`special_digit_separator`][NumberFormat::special_digit_separator] +/// 46. [`digit_separator`][NumberFormat::digit_separator] +/// 47. [`base_prefix`][NumberFormat::base_prefix] +/// 48. [`base_suffix`][NumberFormat::base_suffix] +/// 49. [`exponent_base`][NumberFormat::exponent_base] +/// 50. [`exponent_radix`][NumberFormat::exponent_radix] /// /// This should always be constructed via [`NumberFormatBuilder`]. /// See [`NumberFormatBuilder`] for the fields for the packed struct. @@ -665,6 +666,33 @@ impl NumberFormat { Self::REQUIRED_FRACTION_DIGITS_WITH_EXPONENT } + /// If any significant digits are required with exponent notation. + /// + /// See [`required_mantissa_digits_with_exponent`][Self::required_mantissa_digits_with_exponent]. + pub const REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT: bool = true; + + /// Get if any significant digits are required with exponent notation. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`true`]. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `.1e5` | ✔️ | + /// | `.e5` | ❌ | + /// | `1.e5` | ✔️ | + /// | `1.0e5` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn required_mantissa_digits_with_exponent(&self) -> bool { + Self::REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT + } + /// If the format supports parsing integers. /// /// See [`supports_parsing_integers`][Self::supports_parsing_integers]. diff --git a/lexical-util/src/prebuilt_formats.rs b/lexical-util/src/prebuilt_formats.rs index 16d0fc25..e67adba2 100644 --- a/lexical-util/src/prebuilt_formats.rs +++ b/lexical-util/src/prebuilt_formats.rs @@ -1,4 +1,10 @@ //! Pre-built formats for each programming language, +//! +//! The specifications for all of this code can be found +//! in [`lexical-float-format`], with sample code and +//! the logic to parse and validate the numbers. +//! +//! [`lexical-float-format`]: https://github.com/Alexhuszagh/lexical-float-format #![cfg(feature = "format")] @@ -688,46 +694,6 @@ use crate::format::NumberFormatBuilder; // db.movie.find() // ``` -// TEST CASES -// ---------- - -// NOTE: The exact value of some of these, like specials, -// will differ based on the programming languages used. - -// `N/A` - case_sensitive_base_prefix -// `N/A` - case_sensitive_base_suffix -// `.1` - required_integer_digits -// `1.` - required_fraction_digits -// `1.0e` - required_exponent_digits -// `.` - required_mantissa_digits -// `+1` - no_positive_mantissa_sign -// `1` - required_mantissa_sign -// `1.0e3` - no_exponent_notation -// `1.0e+3` - no_positive_exponent_sign -// `1.0e3` - required_exponent_sign -// `1e3` - no_exponent_without_fraction -// `NaN` - no_special -// `nan` - case_sensitive_special -// `01` - no_integer_leading_zeros -// `01.0` - no_float_leading_zeros -// `1.0` - required_exponent_notation -// `1.0E3` - case_sensitive_exponent -// `N/A` - case_sensitive_base_prefix -// `N/A` - case_sensitive_base_suffix -// `1_1.11e11` - integer_internal_digit_separator -// `11.1_1e11` - fraction_internal_digit_separator -// `11.11e1_1` - exponent_internal_digit_separator -// `_11.11e11` - integer_leading_digit_separator -// `11._11e11` - fraction_leading_digit_separator -// `11.11e_11` - exponent_leading_digit_separator -// `11_.11e11` - integer_trailing_digit_separator -// `11.11_e11` - fraction_trailing_digit_separator -// `11.11e11_` - exponent_trailing_digit_separator -// `1__1.11e11` - integer_consecutive_digit_separator -// `11.1__1e11` - fraction_consecutive_digit_separator -// `11.11e1__1` - exponent_consecutive_digit_separator -// `na_n` - special_digit_separator - // PRE-DEFINED CONSTANTS // --------------------- // @@ -747,9 +713,17 @@ use crate::format::NumberFormatBuilder; #[rustfmt::skip] pub const RUST_LITERAL: u128 = NumberFormatBuilder::new() .digit_separator(num::NonZeroU8::new(b'_')) - .required_digits(true) + .required_integer_digits(true) + .required_mantissa_digits(true) .no_positive_mantissa_sign(true) .no_special(true) + .required_exponent_digits(true) + .required_integer_digits_with_exponent(true) + .required_fraction_digits_with_exponent(true) + .supports_parsing_floats(true) + .supports_parsing_integers(true) + .supports_writing_floats(true) + .supports_writing_integers(true) .internal_digit_separator(true) .trailing_digit_separator(true) .consecutive_digit_separator(true) From e5a4f09bb560748956527c5f86af4075fe8c3166 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sun, 12 Jan 2025 20:34:00 -0600 Subject: [PATCH 05/18] Add `NumberFormatBuilder::none()` method. --- lexical-util/src/format_builder.rs | 57 +++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 7003d7e2..be63a092 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -350,7 +350,7 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_exponent_notation`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L757\n [`required_integer_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1129\n [`required_fraction_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1149\n -[`required_mantissa_digits_with_exponent`]: TODO\n +[`required_mantissa_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/47a090d/lexical-util/src/format_builder.rs#L1233\n [`supports_parsing_integers`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1181\n [`supports_parsing_floats`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1191\n [`supports_writing_integers`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1201\n @@ -562,6 +562,61 @@ impl NumberFormatBuilder { } } + /// Create new [`NumberFormatBuilder`] without any flags set. + /// + /// This only sets the default radix to 10. + #[inline(always)] + pub const fn none() -> Self { + Self { + digit_separator: None, + base_prefix: None, + base_suffix: None, + mantissa_radix: 10, + exponent_base: None, + exponent_radix: None, + required_integer_digits: false, + required_fraction_digits: false, + required_exponent_digits: false, + required_mantissa_digits: false, + no_positive_mantissa_sign: false, + required_mantissa_sign: false, + no_exponent_notation: false, + no_positive_exponent_sign: false, + required_exponent_sign: false, + no_exponent_without_fraction: false, + no_special: false, + case_sensitive_special: false, + no_integer_leading_zeros: false, + no_float_leading_zeros: false, + required_exponent_notation: false, + case_sensitive_exponent: false, + case_sensitive_base_prefix: false, + case_sensitive_base_suffix: false, + required_integer_digits_with_exponent: false, + required_fraction_digits_with_exponent: false, + required_mantissa_digits_with_exponent: false, + supports_parsing_integers: false, + supports_parsing_floats: false, + supports_writing_integers: false, + supports_writing_floats: false, + required_base_prefix: false, + required_base_suffix: false, + integer_internal_digit_separator: false, + fraction_internal_digit_separator: false, + exponent_internal_digit_separator: false, + integer_leading_digit_separator: false, + fraction_leading_digit_separator: false, + exponent_leading_digit_separator: false, + integer_trailing_digit_separator: false, + fraction_trailing_digit_separator: false, + exponent_trailing_digit_separator: false, + integer_consecutive_digit_separator: false, + fraction_consecutive_digit_separator: false, + exponent_consecutive_digit_separator: false, + special_digit_separator: false, + } + } + /// Create number format for standard, binary number. #[cfg(feature = "power-of-two")] pub const fn binary() -> u128 { From 0bc4df48c9cd17213a68d6f5a0c17213fa801d1c Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Mon, 13 Jan 2025 17:14:10 -0600 Subject: [PATCH 06/18] Improve parsing of sign characters. --- lexical-parse-integer/src/algorithm.rs | 15 +++--- lexical-util/src/iterator.rs | 71 +++++++++++++++++++++++--- 2 files changed, 70 insertions(+), 16 deletions(-) diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index a5ba16fe..7b2857fd 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -209,20 +209,18 @@ macro_rules! parse_sign { $invalid_positive:ident, $missing:ident ) => { - // NOTE: `read_if` optimizes poorly since we then match after - match $byte.integer_iter().first() { - Some(&b'+') if !$no_positive => { + match $byte.integer_iter().parse_sign() { + (false, true) if !$no_positive => { // SAFETY: We have at least 1 item left since we peaked a value unsafe { $byte.step_unchecked() }; Ok(false) }, - Some(&b'+') if $no_positive => Err(Error::$invalid_positive($byte.cursor())), - Some(&b'-') if $is_signed => { + (false, true) if $no_positive => Err(Error::$invalid_positive($byte.cursor())), + (true, true) if $is_signed => { // SAFETY: We have at least 1 item left since we peaked a value unsafe { $byte.step_unchecked() }; Ok(true) }, - Some(_) if $required => Err(Error::$missing($byte.cursor())), _ if $required => Err(Error::$missing($byte.cursor())), _ => Ok(false), } @@ -742,7 +740,7 @@ macro_rules! algorithm { // culminates in **way** slower performance overall for simple // integers, and no improvement for large integers. let mut value = T::ZERO; - #[allow(unused_mut)] + #[allow(unused_variables, unused_mut)] let mut has_suffix = false; if cannot_overflow && is_negative { parse_digits_unchecked!( @@ -794,7 +792,8 @@ macro_rules! algorithm { ); } - if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix { + #[cfg(all(feature = "format", feature = "power-of-two"))] + if format.required_base_suffix() && !has_suffix { return Err(Error::MissingBaseSuffix(iter.cursor())); } diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs index 342f56f0..b85edb5e 100644 --- a/lexical-util/src/iterator.rs +++ b/lexical-util/src/iterator.rs @@ -124,7 +124,8 @@ pub unsafe trait Iter<'a> { self.get_buffer().get(self.cursor()) } - /// Check if the next element is a given value. + /// Check if the next element is a given value, in a case- + /// sensitive manner. #[inline(always)] fn first_is_cased(&self, value: u8) -> bool { Some(&value) == self.first() @@ -167,6 +168,10 @@ pub unsafe trait Iter<'a> { /// [`increment_count`] afterwards or else the internal count will /// be incorrect. /// + /// This does not skip digit separators and so if used incorrectly, + /// the buffer may be in an invalid state, such as setting the next + /// return value to a digit separator it should have skipped. + /// /// [`increment_count`]: DigitsIter::increment_count /// /// # Panics @@ -183,13 +188,16 @@ pub unsafe trait Iter<'a> { /// Advance the internal slice by 1 element. /// - /// /// This does not increment the count of items: returns: this only /// increments the index, not the total digits returned. You must /// use this carefully: if stepping over a digit, you must then call /// [`increment_count`] afterwards or else the internal count will /// be incorrect. /// + /// This does not skip digit separators and so if used incorrectly, + /// the buffer may be in an invalid state, such as setting the next + /// return value to a digit separator it should have skipped. + /// /// [`increment_count`]: DigitsIter::increment_count /// /// # Panics @@ -233,6 +241,7 @@ pub unsafe trait Iter<'a> { /// Try to read a the next four bytes as a u32. /// /// This does not advance the internal state of the iterator. + /// This will only return a value for contiguous iterators. #[inline(always)] fn peek_u32(&self) -> Option { if Self::IS_CONTIGUOUS && self.as_slice().len() >= mem::size_of::() { @@ -309,9 +318,32 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { /// for iterators that find the first non-zero value, etc. We optimize /// this for the case where we have contiguous iterators, since /// non-contiguous iterators already have a major performance penalty. + /// + /// That is, say we have the following buffer and are skipping `_` + /// characters, peek will advance the internal index to `2` if it + /// can skip characters there. + /// + /// +---+---+---+---+---+ +---+---+---+---+ + /// | _ | 2 | _ | _ | 3 | -> | 2 | _ | _ | 3 | + /// +---+---+---+---+---+ +---+---+---+---+ + /// + /// For implementation reasons, where digit separators may not be + /// allowed afterwards that character, it must stop right there. fn peek(&mut self) -> Option; /// Peek the next value of the iterator, and step only if it exists. + /// + /// This will always advance to one byte past the peek value, since + /// we may need to know internally if the next character is a digit + /// separator. + /// + /// That is, say we have the following buffer and are skipping `_` + /// characters, peek will advance the internal index to `_` if it + /// can skip characters there. + /// + /// +---+---+---+---+---+ +---+---+---+ + /// | _ | 2 | _ | _ | 3 | -> | _ | _ | 3 | + /// +---+---+---+---+---+ +---+---+---+ #[inline(always)] fn try_read(&mut self) -> Option { if let Some(value) = self.peek() { @@ -323,13 +355,15 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { } } - /// Check if the next element is a given value. + /// Check if the next element is a given value, in a case- + /// sensitive manner. #[inline(always)] fn peek_is_cased(&mut self, value: u8) -> bool { Some(&value) == self.peek() } - /// Check if the next element is a given value without case sensitivity. + /// Check if the next element is a given value without case + /// sensitivity. #[inline(always)] fn peek_is_uncased(&mut self, value: u8) -> bool { if let Some(&c) = self.peek() { @@ -351,7 +385,7 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { } /// Peek the next value and consume it if the read value matches the - /// expected one. + /// expected one using a custom predicate. #[inline(always)] fn read_if bool>(&mut self, pred: Pred) -> Option { // NOTE: This was implemented to remove usage of unsafe throughout to code @@ -370,7 +404,8 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { } } - /// Read a value if the value matches the provided one. + /// Read a value if the value matches the provided one, in a case- + /// sensitive manner. #[inline(always)] fn read_if_value_cased(&mut self, value: u8) -> Option { if self.peek() == Some(&value) { @@ -389,7 +424,8 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { self.read_if(|x| x.eq_ignore_ascii_case(&value)) } - /// Read a value if the value matches the provided one. + /// Read a value if the value matches the provided one, with optional + /// case sensitivity. #[inline(always)] fn read_if_value(&mut self, value: u8, is_cased: bool) -> Option { if is_cased { @@ -399,7 +435,7 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { } } - /// Skip zeros from the start of the iterator + /// Skip zeros from the start of the iterator. #[inline(always)] fn skip_zeros(&mut self) -> usize { let start = self.current_count(); @@ -411,4 +447,23 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { /// Determine if the character is a digit. fn is_digit(&self, value: u8) -> bool; + + // ------- + + /// Parse the sign from the iterator. + /// + /// If this allows leading digit separators, it will handle + /// those internally and advance the state as needed. This + /// returned if the value is negative and if a sign was found. + /// + /// The default implementation does not support digit separators. + #[inline(always)] + fn parse_sign(&mut self) -> (bool, bool) { + // NOTE: `read_if` optimizes poorly since we then match after + match self.first() { + Some(&b'+') => (false, true), + Some(&b'-') => (true, true), + _ => (false, false) + } + } } From df3d60aed89225e03dbe41177f385c6772c98894 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Mon, 13 Jan 2025 17:50:58 -0600 Subject: [PATCH 07/18] Add in minor optimizations for parsing integers. --- lexical-parse-integer/src/algorithm.rs | 6 +++--- lexical-util/src/num.rs | 12 +++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index 7b2857fd..5f524101 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -742,7 +742,7 @@ macro_rules! algorithm { let mut value = T::ZERO; #[allow(unused_variables, unused_mut)] let mut has_suffix = false; - if cannot_overflow && is_negative { + if T::IS_SIGNED && cannot_overflow && is_negative { parse_digits_unchecked!( value, iter, @@ -753,7 +753,7 @@ macro_rules! algorithm { has_suffix, true, ); - } if cannot_overflow { + } else if cannot_overflow { parse_digits_unchecked!( value, iter, @@ -764,7 +764,7 @@ macro_rules! algorithm { has_suffix, true, ); - } else if is_negative { + } else if T::IS_SIGNED && is_negative { parse_digits_checked!( value, iter, diff --git a/lexical-util/src/num.rs b/lexical-util/src/num.rs index cd32dc62..5494dd94 100644 --- a/lexical-util/src/num.rs +++ b/lexical-util/src/num.rs @@ -686,15 +686,21 @@ pub trait Integer: !self.is_odd() } - /// Get the maximum number of digits before the slice will overflow. + /// Get the maximum number of digits before the slice could overflow. /// /// This is effectively the `floor(log(2^BITS-1, radix))`, but we can /// try to go a bit lower without worrying too much. #[inline(always)] fn overflow_digits(radix: u32) -> usize { // this is heavily optimized for base10 and it's a way under estimate - // that said, it's fast and works. - if radix <= 16 { + // that said, it's fast and works. the radix is **known** at compile + // time so we can optimize this further. + if cfg!(not(feature = "power-of-two")) || radix == 10 { + // NOTE: We generally want powers-of-two since it makes the comparison + // faster (it can just look for the upper bits being set), and luckily + // for radices of 10 we can always use `2 * bytes`. + mem::size_of::() * 2 + } else if radix <= 16 { mem::size_of::() * 2 - Self::IS_SIGNED as usize } else { // way under approximation but always works and is fast From ff1c8325452147cd4e886db990b721bb3dcb24d8 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Tue, 14 Jan 2025 07:30:23 -0600 Subject: [PATCH 08/18] Add in many more digit separator flags. This adds in the logic for new digit separator flags defining a number format, as well as enhances our documentation on the specification, however, it does not implement the parsing logic yet. Therefore, the unittests are expected to be broken now. [skip ci] --- CHANGELOG | 3 +- lexical-parse-integer/src/algorithm.rs | 3 +- lexical-util/src/feature_format.rs | 552 +++++++++- lexical-util/src/format_builder.rs | 1384 +++++++++++++++++++++++- lexical-util/src/format_flags.rs | 113 +- lexical-util/src/not_feature_format.rs | 504 ++++++++- 6 files changed, 2532 insertions(+), 27 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index bd9f3f41..42760a75 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -19,7 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for `required_integer_digits_with_exponent`, `required_fraction_digits_with_exponent`, and `required_mantissa_digits_with_exponent`, that is,`1.e5` and `.1e5`, as opposed to just requiring`1e5` (#215). - Added `supports_parsing_integers`, `supports_parsing_floats`, `supports_writing_integers`, and `supports_writing_floats` for our number formats (#215). - Added `required_base_prefix` and `required_base_suffix` for our number formats, requiring base prefixes and/or suffixes when parsing, and allowing writing base prefixes and/or suffixes (#215). -- Added `NumberFormatBuilder::none()` for create a format with no flags set. +- Added `NumberFormatBuilder::none()` for create a format with no flags set (#215). +- Added in many more digit separator flags for the `NumberFormat`, including for signs, base prefixes, base suffixes, and restricting digit separators at the start of the number (#215). ### Changed diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index 5f524101..845c06fa 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -792,8 +792,7 @@ macro_rules! algorithm { ); } - #[cfg(all(feature = "format", feature = "power-of-two"))] - if format.required_base_suffix() && !has_suffix { + if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix { return Err(Error::MissingBaseSuffix(iter.cursor())); } diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index c64adc79..ff276b1b 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -59,6 +59,74 @@ macro_rules! from_flag { /// assert!(!format.no_exponent_notation()); /// # } /// ``` +/// +/// # Number Details +/// +/// This assumes a number that follows the following conventions. You should +/// design custom number formats using these assumptions. +/// +/// #### Integers +/// +/// ```text +/// +--1--+--2--+--3--+--4--+--5--+--6--+--7--+--8--+--9--+--10-+--11-+ +/// | __ | +/- | __ | 0x | __ | 12 | __ | 34 | __ | h | __ | +/// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +/// ``` +/// +/// Where the components are: +/// 1. Sign digit separators (rarely, if ever, used) +/// 2. Sign +/// 3. Leading base prefix digit separators +/// 4. Base prefix (always `0` + a character) +/// 5. Leading integer digit separators +/// 6. Integer digits +/// 7. Internal integer digit separators +/// 8. Integer digits +/// 9. Trailing integer digit separators +/// 10. Base suffix (such as `h` for X86 assembly) +/// 11. Trailing base suffix digit separators +/// +/// #### Floats +/// +/// This has all the same components as integers, with many additional +/// ones. +/// +/// ```text +/// +--1--+--2--+--3--+--4--+--5--+--6--+--7--+--8--+--9--+-10--+-11--+-12--+-13--+-14--+-15--+ +/// | __ | +/- | __ | 0x | __ | 12 | __ | 34 | __ | . | __ | 56 | __ | 78 | __ | +/// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +/// +/// +-16--+-17--+-18--+-19--+-20--+-21--+-22--+-23--+-24--+-25--+-----+-----+-----+-----+-----+ +/// | e | __ | +/- | __ | 90 | __ | 12 | __ | h | __ | | | | | | +/// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +/// ``` +/// +/// Where the components are: +/// 1. Mantissa sign digit separators (rarely, if ever, used) +/// 2. Mantissa sign +/// 3. Leading base prefix digit separators +/// 4. Base prefix (always `0` + a character) +/// 5. Leading integer digit separators +/// 6. Integer digits +/// 7. Internal integer digit separators +/// 8. Integer digits +/// 9. Trailing integer digit separators +/// 10. Decimal point +/// 11. Leading fraction digit separators +/// 12. Fraction digits +/// 13. Internal fraction digit separators +/// 14. Fraction digits +/// 15. Trailing fraction digit separators +/// 16. Exponent symbol +/// 17. Exponent sign digit separators (rarely, if ever, used) +/// 18. Exponent sign +/// 19. Leading exponent digit separators +/// 20. Exponent digits +/// 21. Internal exponent digit separators +/// 22. Exponent digits +/// 23. Trailing exponent digit separators +/// 24. Base suffix (such as `h` for X86 assembly) +/// 25. Trailing base suffix digit separators pub struct NumberFormat; #[rustfmt::skip] @@ -817,6 +885,460 @@ impl NumberFormat { // DIGIT SEPARATOR FLAGS & MASKS + /// If digit separators are allowed at the absolute start of the number. + /// + /// See [`start_digit_separator`][Self::start_digit_separator]. + pub const START_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, START_DIGIT_SEPARATOR); + + /// Get if digit separators are allowed at the absolute start of the number. + /// + /// This modifies the behavior of [`integer_sign_digit_separator`] and + /// [`integer_leading_digit_separator`] so that the digit separators can + /// occur at the absolute start and not just internal to the number. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`integer_sign_digit_separator`]: Self::integer_sign_digit_separator + /// [`integer_leading_digit_separator`]: Self::integer_leading_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`, integer leading digit separators enabled, + /// and starting digit separators enabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `_1` | ✔️ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | + /// + /// Using a digit separator of `_`, integer leading digit separators enabled, + /// and starting digit separators disabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `_1` | ❌ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn start_digit_separator(&self) -> bool { + Self::START_DIGIT_SEPARATOR + } + + /// If digit separators are allowed before the sign of the integer. + /// + /// See [`integer_sign_digit_separator`][Self::integer_sign_digit_separator]. + pub const INTEGER_SIGN_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTEGER_SIGN_DIGIT_SEPARATOR); + + /// Get if digit separators are allowed before the sign of the integer. + /// + /// You can disable support for digit separators at the absolute start + /// using the [`start_digit_separator`] flag. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`start_digit_separator`]: Self::start_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ✔️ | + /// | `+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn integer_sign_digit_separator(&self) -> bool { + Self::INTEGER_SIGN_DIGIT_SEPARATOR + } + + /// If consecutive digit separators are allowed before the sign of the integer. + /// + /// See [`integer_consecutive_sign_digit_separator`][Self::integer_consecutive_sign_digit_separator]. + pub const INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR); + + /// Get if consecutive digit separators are allowed before the sign of the + /// integer. + /// + /// That is, using `_` as a digit separator `__` would be allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ✔️ | + /// | `__+1` | ✔️ | + /// | `+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn integer_consecutive_sign_digit_separator(&self) -> bool { + Self::INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR + } + + /// If digit separators are allowed before the sign of the exponent. + /// + /// See [`exponent_sign_digit_separator`][Self::exponent_sign_digit_separator]. + pub const EXPONENT_SIGN_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, EXPONENT_SIGN_DIGIT_SEPARATOR); + + /// Get if digit separators are allowed before the sign of the exponent. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e1` | ✔️ | + /// | `1.0e_` | ❌ | + /// | `1.0e_1` | ❌ | + /// | `1.0e_+1` | ✔️ | + /// | `1.0e+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn exponent_sign_digit_separator(&self) -> bool { + Self::EXPONENT_SIGN_DIGIT_SEPARATOR + } + + /// If consecutive digit separators are allowed before the sign of the exponent. + /// + /// See [`exponent_consecutive_sign_digit_separator`][Self::exponent_consecutive_sign_digit_separator]. + pub const EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR); + + /// Get if consecutive digit separators are allowed before the sign of the exponent. + /// + /// That is, using `_` as a digit separator `__` would be allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e1` | ✔️ | + /// | `1.0e_` | ❌ | + /// | `1.0e_1` | ❌ | + /// | `1.0e_+1` | ✔️ | + /// | `1.0e__+1` | ✔️ | + /// | `1.0e+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn exponent_consecutive_sign_digit_separator(&self) -> bool { + Self::EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed between the `0` and the base prefix. + /// + /// See [`base_prefix_internal_digit_separator`][Self::base_prefix_internal_digit_separator]. + pub const BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR); + + /// Get if a digit separator is allowed between the `0` and the base prefix. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, and a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0x_1` | ❌ | + /// | `0_x1` | ✔️ | + /// | `+0_x1` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_prefix_internal_digit_separator(&self) -> bool { + Self::BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed before the base prefix. + /// + /// See [`base_prefix_leading_digit_separator`][Self::base_prefix_leading_digit_separator]. + pub const BASE_PREFIX_LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, BASE_PREFIX_LEADING_DIGIT_SEPARATOR); + + /// Get if a digit separator is allowed before the base prefix. + /// + /// This is taken **after** the sign and therefore may be at the absolute + /// start of the number or internal to it. You can disable support for + /// digit separators at the absolute start using the + /// [`start_digit_separator`] flag. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`start_digit_separator`]: Self::start_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base prefix of `x`, and starting digit + /// separators enabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `0x1` | ✔️ | + /// | `_0x1` | ✔️ | + /// | `0x_1` | ❌ | + /// | `0_x1` | ❌ | + /// | `+_0x1` | ✔️ | + /// | `_+0x1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_prefix_leading_digit_separator(&self) -> bool { + Self::BASE_PREFIX_LEADING_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed after the base prefix. + /// + /// See [`base_prefix_trailing_digit_separator`][Self::base_prefix_trailing_digit_separator]. + pub const BASE_PREFIX_TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, BASE_PREFIX_TRAILING_DIGIT_SEPARATOR); + + /// Get if a digit separator is allowed after the base prefix. + /// + /// If a base prefix is present, then [`integer_leading_digit_separator`] overlaps + /// with this functionality, and either being enabled, as well as consecutive + /// digit separators, will consider it to be valid. If both are set and neither + /// enables consecutive digit separators, then `0x__1` is not valid. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`integer_leading_digit_separator`]: Self::integer_leading_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0_x1` | ❌ | + /// | `0x_1` | ✔️ | + /// | `0x__1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_prefix_trailing_digit_separator(&self) -> bool { + Self::BASE_PREFIX_TRAILING_DIGIT_SEPARATOR + } + + /// If multiple consecutive base prefix digit separators are allowed. + /// + /// See [`base_prefix_consecutive_digit_separator`][Self::base_prefix_consecutive_digit_separator]. + pub const BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR); + + /// Get if multiple consecutive base prefix digit separators are allowed. + /// + /// That is, using `_` as a digit separator `__` would be allowed where any + /// digit separators (leading, trailing, internal) are allowed in the + /// base prefix. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base prefix of `x`, and trailing base + /// prefix digit separators. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0_x1` | ❌ | + /// | `0x_1` | ✔️ | + /// | `0x__1` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_prefix_consecutive_digit_separator(&self) -> bool { + Self::BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed between the base suffix. + /// + /// See [`base_suffix_internal_digit_separator`][Self::base_suffix_internal_digit_separator]. + pub const BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR); + + /// Get if a digit separator is allowed between the base suffix. + /// + /// This is currently unused, since base suffixes are a single character. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_suffix_internal_digit_separator(&self) -> bool { + Self::BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed before the base suffix. + /// + /// See [`base_suffix_leading_digit_separator`][Self::base_suffix_leading_digit_separator]. + pub const BASE_SUFFIX_LEADING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, BASE_SUFFIX_LEADING_DIGIT_SEPARATOR); + + /// Get if a digit separator is allowed before the base suffix. + /// + /// If a base suffix is present, then [`integer_trailing_digit_separator`] + /// or [`exponent_trailing_digit_separator`] may overlap + /// with this functionality (depending on the number components), and either + /// being enabled, as well as consecutive digit separators, will + /// consider it to be valid. If both are set and neither + /// enables consecutive digit separators, then `1__h` is not valid. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`integer_trailing_digit_separator`]: Self::integer_trailing_digit_separator + /// [`exponent_trailing_digit_separator`]: Self::exponent_trailing_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base suffix of `h`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ✔️ | + /// | `12__h` | ✔️ | + /// | `12h_` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_suffix_leading_digit_separator(&self) -> bool { + Self::BASE_SUFFIX_LEADING_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed after the base suffix. + /// + /// See [`base_suffix_trailing_digit_separator`][Self::base_suffix_trailing_digit_separator]. + pub const BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR); + + /// Get if a digit separator is allowed after the base suffix. + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base suffix of `h`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ❌ | + /// | `12h_` | ✔️ | + /// | `12h__` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_suffix_trailing_digit_separator(&self) -> bool { + Self::BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR + } + + /// If multiple consecutive base suffix digit separators are allowed. + /// + /// See [`base_suffix_consecutive_digit_separator`][Self::base_suffix_consecutive_digit_separator]. + pub const BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR: bool = from_flag!(FORMAT, BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR); + + /// Get if multiple consecutive base suffix digit separators are allowed. + /// + /// That is, using `_` as a digit separator `__` would be allowed where any + /// digit separators (leading, trailing, internal) are allowed in the + /// base suffix. Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base suffix of `h`, and trailing base + /// suffix digit separators. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ❌ | + /// | `12h_` | ✔️ | + /// | `12h__` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_suffix_consecutive_digit_separator(&self) -> bool { + Self::BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR + } + /// If digit separators are allowed between integer digits. /// /// This will not consider an input of only the digit separator @@ -961,13 +1483,27 @@ impl NumberFormat { /// Get if a digit separator is allowed before any integer digits. /// + /// This is taken **after** any signs and base prefixes, and therefore + /// may be at the absolute start of the number or internal to it. You + /// can disable support for digit separators at the absolute start + /// using the [`start_digit_separator`] flag. For only those before or after + /// a base prefix, see [`base_prefix_leading_digit_separator`] and + /// [`base_prefix_trailing_digit_separator`], respectively. For only those + /// prior to a sign, see [`integer_sign_digit_separator`]. + /// /// This will consider an input of only the digit separator /// to be a identical to empty input. Can only be modified with /// [`feature`][crate#features] `format`. Defaults to [`false`]. /// + /// [`start_digit_separator`]: Self::start_digit_separator + /// [`base_prefix_leading_digit_separator`]: Self::base_prefix_leading_digit_separator + /// [`base_prefix_trailing_digit_separator`]: Self::base_prefix_trailing_digit_separator + /// [`integer_sign_digit_separator`]: Self::integer_sign_digit_separator + /// /// # Examples /// - /// Using a digit separator of `_`. + /// Using a digit separator of `_`, and an optional base prefix of + /// `x` and starting digit separators enabled. /// /// | Input | Valid? | /// |:-:|:-:| @@ -976,6 +1512,10 @@ impl NumberFormat { /// | `1_1` | ❌ | /// | `1_` | ❌ | /// | `_1` | ✔️ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | /// /// # Used For /// @@ -1363,6 +1903,10 @@ impl NumberFormat { /// only used for writing numbers if [`required_base_prefix`] /// is [`true`]. This is ignored for special floating-point numbers. /// + /// Note that base prefixes are not allowed for the exponent digits + /// of a float: the prefix (such as `0x`) must come before any other + /// significant digits, optionally with digit separators present. + /// /// # Examples /// /// Using a base prefix of `x`. @@ -1409,6 +1953,12 @@ impl NumberFormat { /// only used for writing numbers if [`required_base_suffix`] /// is [`true`]. This is ignored for special floating-point numbers. /// + /// Note that base suffixes are not allowed for the exponent digits + /// of a float: the suffix, if it's present, will always refer to the + /// float as a whole, optionally with digit separators present. For + /// example, `4d2p0101x` would have the `x` refer to the full float, + /// not the exponent of `0101`. + /// /// # Examples /// /// Using a base suffix of `x`. diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index be63a092..4072e08e 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -135,6 +135,32 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { /// - [`case_sensitive_exponent`]: If exponent characters are case-sensitive. /// - [`case_sensitive_base_prefix`]: If base prefixes are case-sensitive. /// - [`case_sensitive_base_suffix`]: If base suffixes are case-sensitive. +/// - [`start_digit_separator`]: If digit separators are allowed at the absolute +/// start of the number. +/// - [`integer_sign_digit_separator`]: If digit separators are allowed before +/// the sign of the integer. +/// - [`integer_consecutive_sign_digit_separator`]: If consecutive digit +/// separators are allowed before the sign of the integer. +/// - [`exponent_sign_digit_separator`]: If digit separators are allowed before +/// the sign of the exponent. +/// - [`exponent_consecutive_sign_digit_separator`]: If consecutive digit +/// separators are allowed before the sign of the exponent. +/// - [`base_prefix_internal_digit_separator`]: If a digit separator is allowed +/// between the base prefix. +/// - [`base_prefix_leading_digit_separator`]: If a digit separator is allowed +/// before the base prefix. +/// - [`base_prefix_trailing_digit_separator`]: If a digit separator is allowed +/// after the base prefix. +/// - [`base_prefix_consecutive_digit_separator`]: If multiple consecutive base +/// prefix digit separators are allowed. +/// - [`base_suffix_internal_digit_separator`]: If a digit separator is allowed +/// between the base suffix. +/// - [`base_suffix_leading_digit_separator`]: If a digit separator is allowed +/// before the base suffix. +/// - [`base_suffix_trailing_digit_separator`]: If a digit separator is allowed +/// after the base suffix. +/// - [`base_suffix_consecutive_digit_separator`]: If multiple consecutive base +/// suffix digit separators are allowed. /// - [`integer_internal_digit_separator`]: If digit separators are allowed /// between integer digits. /// - [`fraction_internal_digit_separator`]: If digit separators are allowed @@ -178,6 +204,28 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { /// required. /// - [`no_integer_leading_zeros`]: If leading zeros before an integer are not /// allowed. +/// - [`start_digit_separator`]: If digit separators are allowed at the absolute +/// start of the number. +/// - [`integer_sign_digit_separator`]: If digit separators are allowed before +/// the sign of the integer. +/// - [`integer_consecutive_sign_digit_separator`]: If consecutive digit +/// separators are allowed before the sign of the integer. +/// - [`base_prefix_internal_digit_separator`]: If a digit separator is allowed +/// between the base prefix. +/// - [`base_prefix_leading_digit_separator`]: If a digit separator is allowed +/// before the base prefix. +/// - [`base_prefix_trailing_digit_separator`]: If a digit separator is allowed +/// after the base prefix. +/// - [`base_prefix_consecutive_digit_separator`]: If multiple consecutive base +/// prefix digit separators are allowed. +/// - [`base_suffix_internal_digit_separator`]: If a digit separator is allowed +/// between the base suffix. +/// - [`base_suffix_leading_digit_separator`]: If a digit separator is allowed +/// before the base suffix. +/// - [`base_suffix_trailing_digit_separator`]: If a digit separator is allowed +/// after the base suffix. +/// - [`base_suffix_consecutive_digit_separator`]: If multiple consecutive base +/// suffix digit separators are allowed. /// - [`integer_internal_digit_separator`]: If digit separators are allowed /// between integer digits. /// - [`integer_leading_digit_separator`]: If a digit separator is allowed @@ -248,6 +296,32 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { /// - [`case_sensitive_exponent`]: If exponent characters are case-sensitive. /// - [`case_sensitive_base_prefix`]: If base prefixes are case-sensitive. /// - [`case_sensitive_base_suffix`]: If base suffixes are case-sensitive. +/// - [`start_digit_separator`]: If digit separators are allowed at the absolute +/// start of the number. +/// - [`integer_sign_digit_separator`]: If digit separators are allowed before +/// the sign of the integer. +/// - [`integer_consecutive_sign_digit_separator`]: If consecutive digit +/// separators are allowed before the sign of the integer. +/// - [`exponent_sign_digit_separator`]: If digit separators are allowed before +/// the sign of the exponent. +/// - [`exponent_consecutive_sign_digit_separator`]: If consecutive digit +/// separators are allowed before the sign of the exponent. +/// - [`base_prefix_internal_digit_separator`]: If a digit separator is allowed +/// between the base prefix. +/// - [`base_prefix_leading_digit_separator`]: If a digit separator is allowed +/// before the base prefix. +/// - [`base_prefix_trailing_digit_separator`]: If a digit separator is allowed +/// after the base prefix. +/// - [`base_prefix_consecutive_digit_separator`]: If multiple consecutive base +/// prefix digit separators are allowed. +/// - [`base_suffix_internal_digit_separator`]: If a digit separator is allowed +/// between the base suffix. +/// - [`base_suffix_leading_digit_separator`]: If a digit separator is allowed +/// before the base suffix. +/// - [`base_suffix_trailing_digit_separator`]: If a digit separator is allowed +/// after the base suffix. +/// - [`base_suffix_consecutive_digit_separator`]: If multiple consecutive base +/// suffix digit separators are allowed. /// - [`integer_internal_digit_separator`]: If digit separators are allowed /// between integer digits. /// - [`fraction_internal_digit_separator`]: If digit separators are allowed @@ -315,6 +389,19 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`supports_parsing_floats`]: Self::supports_parsing_floats\n [`supports_writing_integers`]: Self::supports_writing_integers\n [`supports_writing_floats`]: Self::supports_writing_floats\n +[`start_digit_separator`]: Self::start_digit_separator\n +[`integer_sign_digit_separator`]: Self::integer_sign_digit_separator\n +[`integer_consecutive_sign_digit_separator`]: Self::integer_consecutive_sign_digit_separator\n +[`exponent_sign_digit_separator`]: Self::exponent_sign_digit_separator\n +[`exponent_consecutive_sign_digit_separator`]: Self::exponent_consecutive_sign_digit_separator\n +[`base_prefix_internal_digit_separator`]: Self::base_prefix_internal_digit_separator\n +[`base_prefix_leading_digit_separator`]: Self::base_prefix_leading_digit_separator\n +[`base_prefix_trailing_digit_separator`]: Self::base_prefix_trailing_digit_separator\n +[`base_prefix_consecutive_digit_separator`]: Self::base_prefix_consecutive_digit_separator\n +[`base_suffix_internal_digit_separator`]: Self::base_suffix_internal_digit_separator\n +[`base_suffix_leading_digit_separator`]: Self::base_suffix_leading_digit_separator\n +[`base_suffix_trailing_digit_separator`]: Self::base_suffix_trailing_digit_separator\n +[`base_suffix_consecutive_digit_separator`]: Self::base_suffix_consecutive_digit_separator\n [`integer_internal_digit_separator`]: Self::integer_internal_digit_separator\n [`fraction_internal_digit_separator`]: Self::fraction_internal_digit_separator\n [`exponent_internal_digit_separator`]: Self::exponent_internal_digit_separator\n @@ -356,6 +443,19 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`supports_writing_integers`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1201\n [`supports_writing_floats`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1211\n [`case_sensitive_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L765\n +[`start_digit_separator`]: https://TODO.com\n +[`integer_sign_digit_separator`]: https://TODO.com\n +[`integer_consecutive_sign_digit_separator`]: https://TODO.com\n +[`exponent_sign_digit_separator`]: https://TODO.com\n +[`exponent_consecutive_sign_digit_separator`]: https://TODO.com\n +[`base_prefix_internal_digit_separator`]: https://TODO.com\n +[`base_prefix_leading_digit_separator`]: https://TODO.com\n +[`base_prefix_trailing_digit_separator`]: https://TODO.com\n +[`base_prefix_consecutive_digit_separator`]: https://TODO.com\n +[`base_suffix_internal_digit_separator`]: https://TODO.com\n +[`base_suffix_leading_digit_separator`]: https://TODO.com\n +[`base_suffix_trailing_digit_separator`]: https://TODO.com\n +[`base_suffix_consecutive_digit_separator`]: https://TODO.com\n [`integer_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L793\n [`fraction_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L805\n [`exponent_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L817\n @@ -392,6 +492,74 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_base_suffix`]: https://github.com/Alexhuszagh/rust-lexical/blob/63f9adf/lexical-util/src/format_builder.rs#L1291\n " )] +/// +/// # Number Details +/// +/// This assumes a number that follows the following conventions. You should +/// design custom number formats using these assumptions. +/// +/// #### Integers +/// +/// ```text +/// +--1--+--2--+--3--+--4--+--5--+--6--+--7--+--8--+--9--+--10-+--11-+ +/// | __ | +/- | __ | 0x | __ | 12 | __ | 34 | __ | h | __ | +/// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +/// ``` +/// +/// Where the components are: +/// 1. Sign digit separators (rarely, if ever, used) +/// 2. Sign +/// 3. Leading base prefix digit separators +/// 4. Base prefix (always `0` + a character) +/// 5. Leading integer digit separators +/// 6. Integer digits +/// 7. Internal integer digit separators +/// 8. Integer digits +/// 9. Trailing integer digit separators +/// 10. Base suffix (such as `h` for X86 assembly) +/// 11. Trailing base suffix digit separators +/// +/// #### Floats +/// +/// This has all the same components as integers, with many additional +/// ones. +/// +/// ```text +/// +--1--+--2--+--3--+--4--+--5--+--6--+--7--+--8--+--9--+-10--+-11--+-12--+-13--+-14--+-15--+ +/// | __ | +/- | __ | 0x | __ | 12 | __ | 34 | __ | . | __ | 56 | __ | 78 | __ | +/// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +/// +/// +-16--+-17--+-18--+-19--+-20--+-21--+-22--+-23--+-24--+-25--+-----+-----+-----+-----+-----+ +/// | e | __ | +/- | __ | 90 | __ | 12 | __ | h | __ | | | | | | +/// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +/// ``` +/// +/// Where the components are: +/// 1. Mantissa sign digit separators (rarely, if ever, used) +/// 2. Mantissa sign +/// 3. Leading base prefix digit separators +/// 4. Base prefix (always `0` + a character) +/// 5. Leading integer digit separators +/// 6. Integer digits +/// 7. Internal integer digit separators +/// 8. Integer digits +/// 9. Trailing integer digit separators +/// 10. Decimal point +/// 11. Leading fraction digit separators +/// 12. Fraction digits +/// 13. Internal fraction digit separators +/// 14. Fraction digits +/// 15. Trailing fraction digit separators +/// 16. Exponent symbol +/// 17. Exponent sign digit separators (rarely, if ever, used) +/// 18. Exponent sign +/// 19. Leading exponent digit separators +/// 20. Exponent digits +/// 21. Internal exponent digit separators +/// 22. Exponent digits +/// 23. Trailing exponent digit separators +/// 24. Base suffix (such as `h` for X86 assembly) +/// 25. Trailing base suffix digit separators pub struct NumberFormatBuilder { digit_separator: OptionU8, base_prefix: OptionU8, @@ -426,6 +594,19 @@ pub struct NumberFormatBuilder { required_mantissa_digits_with_exponent: bool, required_base_prefix: bool, required_base_suffix: bool, + start_digit_separator: bool, + integer_sign_digit_separator: bool, + integer_consecutive_sign_digit_separator: bool, + exponent_sign_digit_separator: bool, + exponent_consecutive_sign_digit_separator: bool, + base_prefix_internal_digit_separator: bool, + base_prefix_leading_digit_separator: bool, + base_prefix_trailing_digit_separator: bool, + base_prefix_consecutive_digit_separator: bool, + base_suffix_internal_digit_separator: bool, + base_suffix_leading_digit_separator: bool, + base_suffix_trailing_digit_separator: bool, + base_suffix_consecutive_digit_separator: bool, integer_internal_digit_separator: bool, fraction_internal_digit_separator: bool, exponent_internal_digit_separator: bool, @@ -496,6 +677,21 @@ impl NumberFormatBuilder { /// `false` /// - [`required_base_prefix`][Self::get_required_base_prefix] - `false` /// - [`required_base_suffix`][Self::get_required_base_suffix] - `false` + /// - [`start_digit_separator`][Self::start_digit_separator] - `true` + /// - [`integer_sign_digit_separator`][Self::integer_sign_digit_separator] - + /// `false` + /// - [`integer_consecutive_sign_digit_separator`][Self::integer_consecutive_sign_digit_separator] - `false` + /// - [`exponent_sign_digit_separator`][Self::exponent_sign_digit_separator] + /// - `false` + /// - [`exponent_consecutive_sign_digit_separator`][Self::exponent_consecutive_sign_digit_separator] - `false` + /// - [`base_prefix_internal_digit_separator`][Self::base_prefix_internal_digit_separator] - `false` + /// - [`base_prefix_leading_digit_separator`][Self::base_prefix_leading_digit_separator] - `false` + /// - [`base_prefix_trailing_digit_separator`][Self::base_prefix_trailing_digit_separator] - `false` + /// - [`base_prefix_consecutive_digit_separator`][Self::base_prefix_consecutive_digit_separator] - `false` + /// - [`base_suffix_internal_digit_separator`][Self::base_suffix_internal_digit_separator] - `false` + /// - [`base_suffix_leading_digit_separator`][Self::base_suffix_leading_digit_separator] - `false` + /// - [`base_suffix_trailing_digit_separator`][Self::base_suffix_trailing_digit_separator] - `false` + /// - [`base_suffix_consecutive_digit_separator`][Self::base_suffix_consecutive_digit_separator] - `false` /// - [`integer_internal_digit_separator`][Self::get_integer_internal_digit_separator] - `false` /// - [`fraction_internal_digit_separator`][Self::get_fraction_internal_digit_separator] - `false` /// - [`exponent_internal_digit_separator`][Self::get_exponent_internal_digit_separator] - `false` @@ -546,6 +742,19 @@ impl NumberFormatBuilder { supports_writing_floats: true, required_base_prefix: false, required_base_suffix: false, + start_digit_separator: true, + integer_sign_digit_separator: false, + integer_consecutive_sign_digit_separator: false, + exponent_sign_digit_separator: false, + exponent_consecutive_sign_digit_separator: false, + base_prefix_internal_digit_separator: false, + base_prefix_leading_digit_separator: false, + base_prefix_trailing_digit_separator: false, + base_prefix_consecutive_digit_separator: false, + base_suffix_internal_digit_separator: false, + base_suffix_leading_digit_separator: false, + base_suffix_trailing_digit_separator: false, + base_suffix_consecutive_digit_separator: false, integer_internal_digit_separator: false, fraction_internal_digit_separator: false, exponent_internal_digit_separator: false, @@ -601,6 +810,19 @@ impl NumberFormatBuilder { supports_writing_floats: false, required_base_prefix: false, required_base_suffix: false, + start_digit_separator: true, + integer_sign_digit_separator: false, + integer_consecutive_sign_digit_separator: false, + exponent_sign_digit_separator: false, + exponent_consecutive_sign_digit_separator: false, + base_prefix_internal_digit_separator: false, + base_prefix_leading_digit_separator: false, + base_prefix_trailing_digit_separator: false, + base_prefix_consecutive_digit_separator: false, + base_suffix_internal_digit_separator: false, + base_suffix_leading_digit_separator: false, + base_suffix_trailing_digit_separator: false, + base_suffix_consecutive_digit_separator: false, integer_internal_digit_separator: false, fraction_internal_digit_separator: false, exponent_internal_digit_separator: false, @@ -785,6 +1007,10 @@ impl NumberFormatBuilder { /// used for writing numbers if [`required_base_prefix`] is [`true`]. /// This is ignored for special floating-point numbers. /// + /// Note that base prefixes are not allowed for the exponent digits + /// of a float: the prefix (such as `0x`) must come before any other + /// significant digits, optionally with digit separators present. + /// /// # Examples /// /// Using a base prefix of `x`. @@ -820,6 +1046,12 @@ impl NumberFormatBuilder { /// used for writing numbers if [`required_base_suffix`] is [`true`]. /// This is ignored for special floating-point numbers. /// + /// Note that base suffixes are not allowed for the exponent digits + /// of a float: the suffix, if it's present, will always refer to the + /// float as a whole, optionally with digit separators present. For + /// example, `4d2p0101x` would have the `x` refer to the full float, + /// not the exponent of `0101`. + /// /// # Examples /// /// Using a base suffix of `x`. @@ -1377,6 +1609,365 @@ impl NumberFormatBuilder { self.required_base_suffix } + /// Get if digit separators are allowed at the absolute start of the number. + /// + /// This modifies the behavior of [`integer_sign_digit_separator`] and + /// [`integer_leading_digit_separator`] so that the digit separators can + /// occur at the absolute start and not just internal to the number. + /// + /// [`integer_sign_digit_separator`]: Self::get_integer_sign_digit_separator + /// [`integer_leading_digit_separator`]: Self::get_integer_leading_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`, integer leading digit separators + /// enabled, and starting digit separators enabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `_1` | ✔️ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | + /// + /// Using a digit separator of `_`, integer leading digit separators + /// enabled, and starting digit separators disabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `_1` | ❌ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_start_digit_separator(&self) -> bool { + self.start_digit_separator + } + + /// Get if digit separators are allowed before the sign of the integer. + /// + /// You can disable support for digit separators at the absolute start + /// using the [`start_digit_separator`] flag. + /// + /// [`start_digit_separator`]: Self::get_start_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ✔️ | + /// | `+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_integer_sign_digit_separator(&self) -> bool { + self.integer_sign_digit_separator + } + + /// Get if consecutive digit separators are allowed before the sign of the + /// integer. + /// + /// That is, using `_` as a digit separator `__` would be allowed. Defaults + /// to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ✔️ | + /// | `__+1` | ✔️ | + /// | `+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_integer_consecutive_sign_digit_separator(&self) -> bool { + self.integer_consecutive_sign_digit_separator + } + + /// Get if digit separators are allowed before the sign of the exponent. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e1` | ✔️ | + /// | `1.0e_` | ❌ | + /// | `1.0e_1` | ❌ | + /// | `1.0e_+1` | ✔️ | + /// | `1.0e+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn get_exponent_sign_digit_separator(&self) -> bool { + self.exponent_sign_digit_separator + } + + /// Get if consecutive digit separators are allowed before the sign of the + /// exponent. + /// + /// That is, using `_` as a digit separator `__` would be allowed. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e1` | ✔️ | + /// | `1.0e_` | ❌ | + /// | `1.0e_1` | ❌ | + /// | `1.0e_+1` | ✔️ | + /// | `1.0e__+1` | ✔️ | + /// | `1.0e+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn get_exponent_consecutive_sign_digit_separator(&self) -> bool { + self.exponent_consecutive_sign_digit_separator + } + + /// Get if a digit separator is allowed between the `0` and the base prefix. + /// + /// # Examples + /// + /// Using a digit separator of `_`, and a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0x_1` | ❌ | + /// | `0_x1` | ✔️ | + /// | `+0_x1` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_base_prefix_internal_digit_separator(&self) -> bool { + self.base_prefix_internal_digit_separator + } + + /// Get if a digit separator is allowed before the base prefix. + /// + /// This is taken **after** the sign and therefore may be at the absolute + /// start of the number or internal to it. You can disable support for + /// digit separators at the absolute start using the + /// [`start_digit_separator`] flag. + /// + /// [`start_digit_separator`]: Self::get_start_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base prefix of `x`, and starting digit + /// separators enabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `0x1` | ✔️ | + /// | `_0x1` | ✔️ | + /// | `0x_1` | ❌ | + /// | `0_x1` | ❌ | + /// | `+_0x1` | ✔️ | + /// | `_+0x1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_base_prefix_leading_digit_separator(&self) -> bool { + self.base_prefix_leading_digit_separator + } + + /// Get if a digit separator is allowed after the base prefix. + /// + /// If a base prefix is present, then [`integer_leading_digit_separator`] + /// overlaps with this functionality, and either being enabled, as well + /// as consecutive digit separators, will consider it to be valid. If + /// both are set and neither enables consecutive digit separators, then + /// `0x__1` is not valid. + /// + /// [`integer_leading_digit_separator`]: Self::get_integer_leading_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0_x1` | ❌ | + /// | `0x_1` | ✔️ | + /// | `0x__1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_base_prefix_trailing_digit_separator(&self) -> bool { + self.base_prefix_trailing_digit_separator + } + + /// Get if multiple consecutive base prefix digit separators are allowed. + /// + /// That is, using `_` as a digit separator `__` would be allowed where any + /// digit separators (leading, trailing, internal) are allowed in the + /// base prefix. Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base prefix of `x`, and trailing base + /// prefix digit separators. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0_x1` | ❌ | + /// | `0x_1` | ✔️ | + /// | `0x__1` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_base_prefix_consecutive_digit_separator(&self) -> bool { + self.base_prefix_consecutive_digit_separator + } + + /// Get if a digit separator is allowed between the base suffix. + /// + /// This is currently unused, since base suffixes are a single character. + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_base_suffix_internal_digit_separator(&self) -> bool { + self.base_suffix_internal_digit_separator + } + + /// Get if a digit separator is allowed before the base suffix. + /// + /// If a base suffix is present, then [`integer_trailing_digit_separator`] + /// or [`exponent_trailing_digit_separator`] may overlap + /// with this functionality (depending on the number components), and either + /// being enabled, as well as consecutive digit separators, will + /// consider it to be valid. If both are set and neither + /// enables consecutive digit separators, then `1__h` is not valid. + /// + /// [`integer_trailing_digit_separator`]: Self::get_integer_trailing_digit_separator + /// [`exponent_trailing_digit_separator`]: Self::get_exponent_trailing_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base suffix of `h`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ✔️ | + /// | `12__h` | ✔️ | + /// | `12h_` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_base_suffix_leading_digit_separator(&self) -> bool { + self.base_suffix_leading_digit_separator + } + + /// Get if a digit separator is allowed after the base suffix. + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base suffix of `h`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ❌ | + /// | `12h_` | ✔️ | + /// | `12h__` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_base_suffix_trailing_digit_separator(&self) -> bool { + self.base_suffix_trailing_digit_separator + } + + /// Get if multiple consecutive base suffix digit separators are allowed. + /// + /// That is, using `_` as a digit separator `__` would be allowed where any + /// digit separators (leading, trailing, internal) are allowed in the + /// base suffix. Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base suffix of `h`, and trailing base + /// suffix digit separators. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ❌ | + /// | `12h_` | ✔️ | + /// | `12h__` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn get_base_suffix_consecutive_digit_separator(&self) -> bool { + self.base_suffix_consecutive_digit_separator + } + /// Get if digit separators are allowed between integer digits. /// /// This will not consider an input of only the digit separator @@ -1461,13 +2052,26 @@ impl NumberFormatBuilder { /// Get if a digit separator is allowed before any integer digits. /// + /// This is taken **after** any signs and base prefixes, and therefore + /// may be at the absolute start of the number or internal to it. You + /// can disable support for digit separators at the absolute start + /// using the [`start_digit_separator`] flag. For only those before or after + /// a base prefix, see [`base_prefix_leading_digit_separator`] and + /// [`base_prefix_trailing_digit_separator`], respectively. For only those + /// prior to a sign, see [`integer_sign_digit_separator`]. + /// /// This will consider an input of only the digit separator - /// to be a identical to empty input. Can only be modified with - /// [`feature`][crate#features] `format`. Defaults to [`false`]. + /// to be a identical to empty input. Defaults to [`false`]. + /// + /// [`start_digit_separator`]: Self::get_start_digit_separator + /// [`base_prefix_leading_digit_separator`]: Self::get_base_prefix_leading_digit_separator + /// [`base_prefix_trailing_digit_separator`]: Self::get_base_prefix_trailing_digit_separator + /// [`integer_sign_digit_separator`]: Self::get_integer_sign_digit_separator /// /// # Examples /// - /// Using a digit separator of `_`. + /// Using a digit separator of `_`, and an optional base prefix of + /// `x` and starting digit separators enabled. /// /// | Input | Valid? | /// |:-:|:-:| @@ -1476,6 +2080,10 @@ impl NumberFormatBuilder { /// | `1_1` | ❌ | /// | `1_` | ❌ | /// | `_1` | ✔️ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | /// /// # Used For /// @@ -1906,6 +2514,10 @@ impl NumberFormatBuilder { /// [`required_base_prefix`] is [`true`]. This is ignored for special /// floating-point numbers. /// + /// Note that base prefixes are not allowed for the exponent digits + /// of a float: the prefix (such as `0x`) must come before any other + /// significant digits, optionally with digit separators present. + /// /// # Examples /// /// Using a base prefix of `x`. @@ -1961,6 +2573,12 @@ impl NumberFormatBuilder { /// [`required_base_suffix`] is [`true`]. This is ignored for special /// floating-point numbers. /// + /// Note that base suffixes are not allowed for the exponent digits + /// of a float: the suffix, if it's present, will always refer to the + /// float as a whole, optionally with digit separators present. For + /// example, `4d2p0101x` would have the `x` refer to the full float, + /// not the exponent of `0101`. + /// /// # Examples /// /// Using a base suffix of `x`. @@ -3014,22 +3632,668 @@ impl NumberFormatBuilder { self } - /// Set if digit separators are allowed between integer digits. + /// Set if digit separators are allowed at the absolute start of the number. /// - /// This will not consider an input of only the digit separator - /// to be a valid separator: the digit separator must be surrounded by - /// digits. Defaults to [`false`]. + /// This modifies the behavior of [`integer_sign_digit_separator`] and + /// [`integer_leading_digit_separator`] so that the digit separators can + /// occur at the absolute start and not just internal to the number. + /// + /// Defaults to [`true`]. + /// + /// [`integer_sign_digit_separator`]: Self::get_integer_sign_digit_separator + /// [`integer_leading_digit_separator`]: Self::get_integer_leading_digit_separator /// /// # Examples /// - /// Using a digit separator of `_`. + /// Using a digit separator of `_`, integer leading digit separators + /// enabled, and starting digit separators enabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `_1` | ✔️ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | + /// + /// Using a digit separator of `_`, integer leading digit separators + /// enabled, and starting digit separators disabled. /// /// | Input | Valid? | /// |:-:|:-:| - /// | `1` | ✔️ | - /// | `_` | ❌ | - /// | `1_1` | ✔️ | - /// | `1_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn start_digit_separator(mut self, flag: bool) -> Self { + self.start_digit_separator = flag; + self + } + + /// Set if digit separators are allowed before the sign of the integer. + /// + /// You can disable support for digit separators at the absolute start + /// using the [`start_digit_separator`] flag. + /// + /// [`start_digit_separator`]: Self::get_start_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ✔️ | + /// | `+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn integer_sign_digit_separator(mut self, flag: bool) -> Self { + self.integer_sign_digit_separator = flag; + self + } + + /// Set if consecutive digit separators are allowed before the sign of the + /// integer. + /// + /// That is, using `_` as a digit separator `__` would be allowed. Defaults + /// to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ✔️ | + /// | `__+1` | ✔️ | + /// | `+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn integer_consecutive_sign_digit_separator(mut self, flag: bool) -> Self { + self.integer_consecutive_sign_digit_separator = flag; + self + } + + /// Set if digit separators are allowed before the sign of the exponent. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e1` | ✔️ | + /// | `1.0e_` | ❌ | + /// | `1.0e_1` | ❌ | + /// | `1.0e_+1` | ✔️ | + /// | `1.0e+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn exponent_sign_digit_separator(mut self, flag: bool) -> Self { + self.exponent_sign_digit_separator = flag; + self + } + + /// Set if consecutive digit separators are allowed before the sign of the + /// exponent. + /// + /// That is, using `_` as a digit separator `__` would be allowed. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e1` | ✔️ | + /// | `1.0e_` | ❌ | + /// | `1.0e_1` | ❌ | + /// | `1.0e_+1` | ✔️ | + /// | `1.0e__+1` | ✔️ | + /// | `1.0e+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn exponent_consecutive_sign_digit_separator(mut self, flag: bool) -> Self { + self.exponent_consecutive_sign_digit_separator = flag; + self + } + + /// Set if a digit separator is allowed between the `0` and the base prefix. + /// + /// # Examples + /// + /// Using a digit separator of `_`, and a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0x_1` | ❌ | + /// | `0_x1` | ✔️ | + /// | `+0_x1` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_prefix_internal_digit_separator(mut self, flag: bool) -> Self { + self.base_prefix_internal_digit_separator = flag; + self + } + + /// Set if a digit separator is allowed before the base prefix. + /// + /// This is taken **after** the sign and therefore may be at the absolute + /// start of the number or internal to it. You can disable support for + /// digit separators at the absolute start using the + /// [`start_digit_separator`] flag. + /// + /// [`start_digit_separator`]: Self::get_start_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base prefix of `x`, and starting digit + /// separators enabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `0x1` | ✔️ | + /// | `_0x1` | ✔️ | + /// | `0x_1` | ❌ | + /// | `0_x1` | ❌ | + /// | `+_0x1` | ✔️ | + /// | `_+0x1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_prefix_leading_digit_separator(mut self, flag: bool) -> Self { + self.base_prefix_leading_digit_separator = flag; + self + } + + /// Set if a digit separator is allowed after the base prefix. + /// + /// If a base prefix is present, then [`integer_leading_digit_separator`] + /// overlaps with this functionality, and either being enabled, as well + /// as consecutive digit separators, will consider it to be valid. If + /// both are set and neither enables consecutive digit separators, then + /// `0x__1` is not valid. + /// + /// [`integer_leading_digit_separator`]: Self::get_integer_leading_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0_x1` | ❌ | + /// | `0x_1` | ✔️ | + /// | `0x__1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_prefix_trailing_digit_separator(mut self, flag: bool) -> Self { + self.base_prefix_trailing_digit_separator = flag; + self + } + + /// Set if multiple consecutive base prefix digit separators are allowed. + /// + /// That is, using `_` as a digit separator `__` would be allowed where any + /// digit separators (leading, trailing, internal) are allowed in the + /// base prefix. Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base prefix of `x`, and trailing base + /// prefix digit separators. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0_x1` | ❌ | + /// | `0x_1` | ✔️ | + /// | `0x__1` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_prefix_consecutive_digit_separator(mut self, flag: bool) -> Self { + self.base_prefix_consecutive_digit_separator = flag; + self + } + + /// Set if a digit separator is allowed between the base suffix. + /// + /// This is currently unused, since base suffixes are a single character. + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_suffix_internal_digit_separator(mut self, flag: bool) -> Self { + self.base_suffix_internal_digit_separator = flag; + self + } + + /// Set if a digit separator is allowed before the base suffix. + /// + /// If a base suffix is present, then [`integer_trailing_digit_separator`] + /// or [`exponent_trailing_digit_separator`] may overlap + /// with this functionality (depending on the number components), and either + /// being enabled, as well as consecutive digit separators, will + /// consider it to be valid. If both are set and neither + /// enables consecutive digit separators, then `1__h` is not valid. + /// + /// [`integer_trailing_digit_separator`]: Self::get_integer_trailing_digit_separator + /// [`exponent_trailing_digit_separator`]: Self::get_exponent_trailing_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base suffix of `h`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ✔️ | + /// | `12__h` | ✔️ | + /// | `12h_` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_suffix_leading_digit_separator(mut self, flag: bool) -> Self { + self.base_suffix_leading_digit_separator = flag; + self + } + + /// Set if a digit separator is allowed after the base suffix. + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base suffix of `h`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ❌ | + /// | `12h_` | ✔️ | + /// | `12h__` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_suffix_trailing_digit_separator(mut self, flag: bool) -> Self { + self.base_suffix_trailing_digit_separator = flag; + self + } + + /// Set if multiple consecutive base suffix digit separators are allowed. + /// + /// That is, using `_` as a digit separator `__` would be allowed where any + /// digit separators (leading, trailing, internal) are allowed in the + /// base suffix. Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base suffix of `h`, and trailing base + /// suffix digit separators. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ❌ | + /// | `12h_` | ✔️ | + /// | `12h__` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_suffix_consecutive_digit_separator(mut self, flag: bool) -> Self { + self.base_suffix_consecutive_digit_separator = flag; + self + } + + /// Set if digit separators are allowed between integer digits. + /// + /// This will not consider an input of only the digit separator + /// to be a valid separator: the digit separator must be surrounded by + /// digits. Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `1_1` | ✔️ | + /// | `1_` | ❌ | /// | `_1` | ❌ | /// /// # Used For @@ -3169,12 +4433,26 @@ impl NumberFormatBuilder { /// Set if a digit separator is allowed before any integer digits. /// + /// This is taken **after** any signs and base prefixes, and therefore + /// may be at the absolute start of the number or internal to it. You + /// can disable support for digit separators at the absolute start + /// using the [`start_digit_separator`] flag. For only those before or after + /// a base prefix, see [`base_prefix_leading_digit_separator`] and + /// [`base_prefix_trailing_digit_separator`], respectively. For only those + /// prior to a sign, see [`integer_sign_digit_separator`]. + /// /// This will consider an input of only the digit separator /// to be a identical to empty input. Defaults to [`false`]. /// + /// [`start_digit_separator`]: Self::get_start_digit_separator + /// [`base_prefix_leading_digit_separator`]: Self::get_base_prefix_leading_digit_separator + /// [`base_prefix_trailing_digit_separator`]: Self::get_base_prefix_trailing_digit_separator + /// [`integer_sign_digit_separator`]: Self::get_integer_sign_digit_separator + /// /// # Examples /// - /// Using a digit separator of `_`. + /// Using a digit separator of `_`, an optional base prefix of `x` and + /// starting digit separators enabled. /// /// | Input | Valid? | /// |:-:|:-:| @@ -3183,6 +4461,10 @@ impl NumberFormatBuilder { /// | `1_1` | ❌ | /// | `1_` | ❌ | /// | `_1` | ✔️ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | /// /// # Used For /// @@ -3192,20 +4474,36 @@ impl NumberFormatBuilder { /// #[inline(always)] @@ -3272,6 +4570,8 @@ impl NumberFormatBuilder { /// | `1.1e1_1` | ❌ | /// | `1.1e1_` | ❌ | /// | `1.1e_1` | ✔️ | + /// | `1.1e_+1` | ❌ | + /// | `1.1e+_1` | ✔️ | /// /// # Used For /// @@ -3288,6 +4588,8 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"1.1e1_1", &PF_OPTS), Err(Error::InvalidDigit(5))); /// assert_eq!(parse_with_options::(b"1.1e1_", &PF_OPTS), Err(Error::InvalidDigit(5))); /// assert_eq!(parse_with_options::(b"1.1e_1", &PF_OPTS), Ok(11.0)); + /// assert_eq!(parse_with_options::(b"1.1e_+1", &PF_OPTS), Err(Error::EmptyExponent(5))); + /// assert_eq!(parse_with_options::(b"1.1e+_1", &PF_OPTS), Ok(11.0)); /// ``` /// --> #[inline(always)] @@ -3787,6 +5089,19 @@ impl NumberFormatBuilder { self.supports_writing_floats, SUPPORTS_WRITING_FLOATS ; self.required_base_prefix, REQUIRED_BASE_PREFIX ; self.required_base_suffix, REQUIRED_BASE_SUFFIX ; + self.start_digit_separator, START_DIGIT_SEPARATOR ; + self.integer_sign_digit_separator, INTEGER_SIGN_DIGIT_SEPARATOR ; + self.integer_consecutive_sign_digit_separator, INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR ; + self.exponent_sign_digit_separator, EXPONENT_SIGN_DIGIT_SEPARATOR ; + self.exponent_consecutive_sign_digit_separator, EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR ; + self.base_prefix_internal_digit_separator, BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR ; + self.base_prefix_leading_digit_separator, BASE_PREFIX_LEADING_DIGIT_SEPARATOR ; + self.base_prefix_trailing_digit_separator, BASE_PREFIX_TRAILING_DIGIT_SEPARATOR ; + self.base_prefix_consecutive_digit_separator, BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR ; + self.base_suffix_internal_digit_separator, BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR ; + self.base_suffix_leading_digit_separator, BASE_SUFFIX_LEADING_DIGIT_SEPARATOR ; + self.base_suffix_trailing_digit_separator, BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR ; + self.base_suffix_consecutive_digit_separator, BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR ; self.integer_internal_digit_separator, INTEGER_INTERNAL_DIGIT_SEPARATOR ; self.fraction_internal_digit_separator, FRACTION_INTERNAL_DIGIT_SEPARATOR ; self.exponent_internal_digit_separator, EXPONENT_INTERNAL_DIGIT_SEPARATOR ; @@ -3898,6 +5213,49 @@ impl NumberFormatBuilder { supports_writing_floats: has_flag!(format, SUPPORTS_WRITING_FLOATS), required_base_prefix: has_flag!(format, REQUIRED_BASE_PREFIX), required_base_suffix: has_flag!(format, REQUIRED_BASE_SUFFIX), + start_digit_separator: has_flag!(format, START_DIGIT_SEPARATOR), + integer_sign_digit_separator: has_flag!(format, INTEGER_SIGN_DIGIT_SEPARATOR), + integer_consecutive_sign_digit_separator: has_flag!( + format, + INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR + ), + exponent_sign_digit_separator: has_flag!(format, EXPONENT_SIGN_DIGIT_SEPARATOR), + exponent_consecutive_sign_digit_separator: has_flag!( + format, + EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR + ), + base_prefix_internal_digit_separator: has_flag!( + format, + BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR + ), + base_prefix_leading_digit_separator: has_flag!( + format, + BASE_PREFIX_LEADING_DIGIT_SEPARATOR + ), + base_prefix_trailing_digit_separator: has_flag!( + format, + BASE_PREFIX_TRAILING_DIGIT_SEPARATOR + ), + base_prefix_consecutive_digit_separator: has_flag!( + format, + BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR + ), + base_suffix_internal_digit_separator: has_flag!( + format, + BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR + ), + base_suffix_leading_digit_separator: has_flag!( + format, + BASE_SUFFIX_LEADING_DIGIT_SEPARATOR + ), + base_suffix_trailing_digit_separator: has_flag!( + format, + BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR + ), + base_suffix_consecutive_digit_separator: has_flag!( + format, + BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR + ), integer_internal_digit_separator: has_flag!(format, INTEGER_INTERNAL_DIGIT_SEPARATOR), fraction_internal_digit_separator: has_flag!(format, FRACTION_INTERNAL_DIGIT_SEPARATOR), exponent_internal_digit_separator: has_flag!(format, EXPONENT_INTERNAL_DIGIT_SEPARATOR), diff --git a/lexical-util/src/format_flags.rs b/lexical-util/src/format_flags.rs index 115bc581..6fef1f71 100644 --- a/lexical-util/src/format_flags.rs +++ b/lexical-util/src/format_flags.rs @@ -30,12 +30,12 @@ //! //! 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ -//! |I/I|F/I|E/I|I/L|F/L|E/L|I/T|F/T|E/T|I/C|F/C|E/C|S/D| | +//! |I/I|F/I|E/I|I/L|F/L|E/L|I/T|F/T|E/T|I/C|F/C|E/C|S/D|s/D|I/s|I/c| //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ //! //! 48 49 50 51 52 53 54 55 56 57 58 59 60 62 62 63 64 //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ -//! | | +//! |E/s|E/c|P/I|P/L|P/T|P/C|S/I|S/L|S/T|S/C| | //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ //! //! Where: @@ -54,7 +54,7 @@ //! S/C = Case-sensitive special (non-finite) values. //! N/I = No integer leading zeros. //! N/F = No float leading zeros. -//! R/e = Required exponent characters. +//! R/e = Required exponent notation. //! e/C = Case-sensitive exponent character. //! e/P = Case-sensitive base prefix. //! e/S = Case-sensitive base suffix. @@ -82,6 +82,19 @@ //! F/C = Fraction consecutive digit separator. //! E/C = Exponent consecutive digit separator. //! S/D = Special (non-finite) digit separator. +//! s/D = Absolute start digit separator. +//! I/s = Integer sign digit separator. +//! I/c = Integer sign consecutive digit separator. +//! E/s = Exponent sign digit separator. +//! E/c = Exponent sign consecutive digit separator. +//! P/I = Base prefix internal digit separator. +//! P/L = Base prefix leading digit separator. +//! P/T = Base prefix trailing digit separator. +//! P/C = Base prefix consecutive digit separator. +//! S/I = Base suffix internal digit separator. +//! S/L = Base suffix leading digit separator. +//! S/T = Base suffix trailing digit separator. +//! S/C = Base suffix consecutive digit separator. //! ``` //! //! The upper 64-bits are designated for control characters and radixes, @@ -268,6 +281,9 @@ pub const REQUIRED_DIGITS: u128 = REQUIRED_INTEGER_DIGITS | REQUIRED_FRACTION_DIGITS | REQUIRED_EXPONENT_DIGITS | + REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | + REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | + REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT | REQUIRED_MANTISSA_DIGITS; /// Positive sign before the mantissa is not allowed. @@ -466,9 +482,48 @@ pub const CONSECUTIVE_DIGIT_SEPARATOR: u128 = FRACTION_CONSECUTIVE_DIGIT_SEPARATOR | EXPONENT_CONSECUTIVE_DIGIT_SEPARATOR; -/// Any digit separators are allowed in special (non-finite) values. +/// If any digit separators are allowed in special (non-finite) values. pub const SPECIAL_DIGIT_SEPARATOR: u128 = 1 << 44; +/// If digit separators are allowed at the absolute start of the number. +pub const START_DIGIT_SEPARATOR: u128 = 1 << 45; + +/// If digit separators are allowed before the sign of the integer. +pub const INTEGER_SIGN_DIGIT_SEPARATOR: u128 = 1 << 46; + +/// If consecutive digit separators are allowed before the sign of the integer. +pub const INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR: u128 = 1 << 47; + +/// If digit separators are allowed before the sign of the exponent. +pub const EXPONENT_SIGN_DIGIT_SEPARATOR: u128 = 1 << 48; + +/// If consecutive digit separators are allowed before the sign of the exponent. +pub const EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR: u128 = 1 << 49; + +/// If a digit separator is allowed between the `0` and the base prefix. +pub const BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR: u128 = 1 << 50; + +/// If a digit separator is allowed before the base prefix. +pub const BASE_PREFIX_LEADING_DIGIT_SEPARATOR: u128 = 1 << 51; + +/// If a digit separator is allowed after the base prefix. +pub const BASE_PREFIX_TRAILING_DIGIT_SEPARATOR: u128 = 1 << 52; + +/// Multiple consecutive base prefix digit separators are allowed. +pub const BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR: u128 = 1 << 53; + +/// If a digit separator is allowed between the base suffix. +pub const BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR: u128 = 1 << 54; + +/// If a digit separator is allowed before the base suffix. +pub const BASE_SUFFIX_LEADING_DIGIT_SEPARATOR: u128 = 1 << 55; + +/// If a digit separator is allowed after the base suffix. +pub const BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR: u128 = 1 << 56; + +/// Multiple consecutive base suffix digit separators are allowed. +pub const BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR: u128 = 1 << 57; + // Digit separator flags. const _: () = assert!(INTEGER_INTERNAL_DIGIT_SEPARATOR == 1 << 32); check_subsequent_flags!(INTEGER_INTERNAL_DIGIT_SEPARATOR, FRACTION_INTERNAL_DIGIT_SEPARATOR); @@ -483,6 +538,19 @@ check_subsequent_flags!(EXPONENT_TRAILING_DIGIT_SEPARATOR, INTEGER_CONSECUTIVE_D check_subsequent_flags!(INTEGER_CONSECUTIVE_DIGIT_SEPARATOR, FRACTION_CONSECUTIVE_DIGIT_SEPARATOR); check_subsequent_flags!(FRACTION_CONSECUTIVE_DIGIT_SEPARATOR, EXPONENT_CONSECUTIVE_DIGIT_SEPARATOR); check_subsequent_flags!(EXPONENT_CONSECUTIVE_DIGIT_SEPARATOR, SPECIAL_DIGIT_SEPARATOR); +check_subsequent_flags!(SPECIAL_DIGIT_SEPARATOR, START_DIGIT_SEPARATOR); +check_subsequent_flags!(START_DIGIT_SEPARATOR, INTEGER_SIGN_DIGIT_SEPARATOR); +check_subsequent_flags!(INTEGER_SIGN_DIGIT_SEPARATOR, INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR); +check_subsequent_flags!(INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR, EXPONENT_SIGN_DIGIT_SEPARATOR); +check_subsequent_flags!(EXPONENT_SIGN_DIGIT_SEPARATOR, EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR); +check_subsequent_flags!(EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR, BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR); +check_subsequent_flags!(BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR, BASE_PREFIX_LEADING_DIGIT_SEPARATOR); +check_subsequent_flags!(BASE_PREFIX_LEADING_DIGIT_SEPARATOR, BASE_PREFIX_TRAILING_DIGIT_SEPARATOR); +check_subsequent_flags!(BASE_PREFIX_TRAILING_DIGIT_SEPARATOR, BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR); +check_subsequent_flags!(BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR, BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR); +check_subsequent_flags!(BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR, BASE_SUFFIX_LEADING_DIGIT_SEPARATOR); +check_subsequent_flags!(BASE_SUFFIX_LEADING_DIGIT_SEPARATOR, BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR); +check_subsequent_flags!(BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR, BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR); // CONTROL CHARACTER & RADIX MASKS // ------------------------------- @@ -574,9 +642,6 @@ pub const FLAG_MASK: u128 = CASE_SENSITIVE_EXPONENT | CASE_SENSITIVE_BASE_PREFIX | CASE_SENSITIVE_BASE_SUFFIX | - REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | - REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | - REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT | SUPPORTS_PARSING_FLOATS | SUPPORTS_PARSING_INTEGERS | SUPPORTS_WRITING_FLOATS | @@ -622,7 +687,11 @@ pub const DIGIT_SEPARATOR_FLAG_MASK: u128 = LEADING_DIGIT_SEPARATOR | TRAILING_DIGIT_SEPARATOR | CONSECUTIVE_DIGIT_SEPARATOR | - SPECIAL_DIGIT_SEPARATOR; + SPECIAL_DIGIT_SEPARATOR | + START_DIGIT_SEPARATOR_FLAG_MASK | + SIGN_DIGIT_SEPARATOR_FLAG_MASK | + BASE_PREFIX_DIGIT_SEPARATOR_FLAG_MASK | + BASE_SUFFIX_DIGIT_SEPARATOR_FLAG_MASK; /// Mask to extract exponent flags. #[doc(hidden)] @@ -665,6 +734,34 @@ pub const EXPONENT_DIGIT_SEPARATOR_FLAG_MASK: u128 = EXPONENT_TRAILING_DIGIT_SEPARATOR | EXPONENT_CONSECUTIVE_DIGIT_SEPARATOR; +/// Mask to extract start digit separator flags. +#[doc(hidden)] +pub const START_DIGIT_SEPARATOR_FLAG_MASK: u128 = START_DIGIT_SEPARATOR; + +/// Mask to extract sign digit separator flags. +#[doc(hidden)] +pub const SIGN_DIGIT_SEPARATOR_FLAG_MASK: u128 = + INTEGER_SIGN_DIGIT_SEPARATOR | + INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR | + EXPONENT_SIGN_DIGIT_SEPARATOR | + EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR; + +/// Mask to extract base prefix digit separator flags. +#[doc(hidden)] +pub const BASE_PREFIX_DIGIT_SEPARATOR_FLAG_MASK: u128 = + BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR | + BASE_PREFIX_LEADING_DIGIT_SEPARATOR | + BASE_PREFIX_TRAILING_DIGIT_SEPARATOR | + BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR; + +/// Mask to extract base suffix digit separator flags. +#[doc(hidden)] +pub const BASE_SUFFIX_DIGIT_SEPARATOR_FLAG_MASK: u128 = + BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR | + BASE_SUFFIX_LEADING_DIGIT_SEPARATOR | + BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR | + BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR; + // EXTRACTORS // ---------- diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index 51498aea..41adddca 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -72,6 +72,19 @@ use crate::format_flags as flags; /// 48. [`base_suffix`][NumberFormat::base_suffix] /// 49. [`exponent_base`][NumberFormat::exponent_base] /// 50. [`exponent_radix`][NumberFormat::exponent_radix] +/// 51. [`start_digit_separator`][NumberFormat::start_digit_separator] +/// 52. [`integer_sign_digit_separator`][NumberFormat::integer_sign_digit_separator] +/// 53. [`integer_consecutive_sign_digit_separator`][NumberFormat::integer_consecutive_sign_digit_separator] +/// 54. [`exponent_sign_digit_separator`][NumberFormat::exponent_sign_digit_separator] +/// 55. [`exponent_consecutive_sign_digit_separator`][NumberFormat::exponent_consecutive_sign_digit_separator] +/// 56. [`base_prefix_internal_digit_separator`][NumberFormat::base_prefix_internal_digit_separator] +/// 57. [`base_prefix_leading_digit_separator`][NumberFormat::base_prefix_leading_digit_separator] +/// 58. [`base_prefix_trailing_digit_separator`][NumberFormat::base_prefix_trailing_digit_separator] +/// 59. [`base_prefix_consecutive_digit_separator`][NumberFormat::base_prefix_consecutive_digit_separator] +/// 60. [`base_suffix_internal_digit_separator`][NumberFormat::base_suffix_internal_digit_separator] +/// 61. [`base_suffix_leading_digit_separator`][NumberFormat::base_suffix_leading_digit_separator] +/// 62. [`base_suffix_trailing_digit_separator`][NumberFormat::base_suffix_trailing_digit_separator] +/// 63. [`base_suffix_consecutive_digit_separator`][NumberFormat::base_suffix_consecutive_digit_separator] /// /// This should always be constructed via [`NumberFormatBuilder`]. /// See [`NumberFormatBuilder`] for the fields for the packed struct. @@ -831,7 +844,465 @@ impl NumberFormat { // DIGIT SEPARATOR FLAGS & MASKS - // If digit separators are allowed between integer digits. + /// If digit separators are allowed at the absolute start of the number. + /// + /// See [`start_digit_separator`][Self::start_digit_separator]. + pub const START_DIGIT_SEPARATOR: bool = false; + + /// Get if digit separators are allowed at the absolute start of the number. + /// + /// This modifies the behavior of [`integer_sign_digit_separator`] and + /// [`integer_leading_digit_separator`] so that the digit separators can + /// occur at the absolute start and not just internal to the number. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`integer_sign_digit_separator`]: Self::integer_sign_digit_separator + /// [`integer_leading_digit_separator`]: Self::integer_leading_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`, integer leading digit separators + /// enabled, and starting digit separators enabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `_1` | ✔️ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | + /// + /// Using a digit separator of `_`, integer leading digit separators + /// enabled, and starting digit separators disabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `_1` | ❌ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn start_digit_separator(&self) -> bool { + Self::START_DIGIT_SEPARATOR + } + + /// If digit separators are allowed before the sign of the integer. + /// + /// See [`integer_sign_digit_separator`][Self::integer_sign_digit_separator]. + pub const INTEGER_SIGN_DIGIT_SEPARATOR: bool = false; + + /// Get if digit separators are allowed before the sign of the integer. + /// + /// You can disable support for digit separators at the absolute start + /// using the [`start_digit_separator`] flag. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`start_digit_separator`]: Self::start_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ✔️ | + /// | `+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn integer_sign_digit_separator(&self) -> bool { + Self::INTEGER_SIGN_DIGIT_SEPARATOR + } + + /// If consecutive digit separators are allowed before the sign of the + /// integer. + /// + /// See [`integer_consecutive_sign_digit_separator`][Self::integer_consecutive_sign_digit_separator]. + pub const INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR: bool = false; + + /// Get if consecutive digit separators are allowed before the sign of the + /// integer. + /// + /// That is, using `_` as a digit separator `__` would be allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `_` | ❌ | + /// | `_1` | ❌ | + /// | `_+1` | ✔️ | + /// | `__+1` | ✔️ | + /// | `+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn integer_consecutive_sign_digit_separator(&self) -> bool { + Self::INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR + } + + /// If digit separators are allowed before the sign of the exponent. + /// + /// See [`exponent_sign_digit_separator`][Self::exponent_sign_digit_separator]. + pub const EXPONENT_SIGN_DIGIT_SEPARATOR: bool = false; + + /// Get if digit separators are allowed before the sign of the exponent. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e1` | ✔️ | + /// | `1.0e_` | ❌ | + /// | `1.0e_1` | ❌ | + /// | `1.0e_+1` | ✔️ | + /// | `1.0e+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn exponent_sign_digit_separator(&self) -> bool { + Self::EXPONENT_SIGN_DIGIT_SEPARATOR + } + + /// If consecutive digit separators are allowed before the sign of the + /// exponent. + /// + /// See [`exponent_consecutive_sign_digit_separator`][Self::exponent_consecutive_sign_digit_separator]. + pub const EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR: bool = false; + + /// Get if consecutive digit separators are allowed before the sign of the + /// exponent. + /// + /// That is, using `_` as a digit separator `__` would be allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e1` | ✔️ | + /// | `1.0e_` | ❌ | + /// | `1.0e_1` | ❌ | + /// | `1.0e_+1` | ✔️ | + /// | `1.0e__+1` | ✔️ | + /// | `1.0e+_1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn exponent_consecutive_sign_digit_separator(&self) -> bool { + Self::EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed between the `0` and the base prefix. + /// + /// See [`base_prefix_internal_digit_separator`][Self::base_prefix_internal_digit_separator]. + pub const BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR: bool = false; + + /// Get if a digit separator is allowed between the `0` and the base prefix. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, and a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0x_1` | ❌ | + /// | `0_x1` | ✔️ | + /// | `+0_x1` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_prefix_internal_digit_separator(&self) -> bool { + Self::BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed before the base prefix. + /// + /// See [`base_prefix_leading_digit_separator`][Self::base_prefix_leading_digit_separator]. + pub const BASE_PREFIX_LEADING_DIGIT_SEPARATOR: bool = false; + + /// Get if a digit separator is allowed before the base prefix. + /// + /// This is taken **after** the sign and therefore may be at the absolute + /// start of the number or internal to it. You can disable support for + /// digit separators at the absolute start using the + /// [`start_digit_separator`] flag. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`start_digit_separator`]: Self::start_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base prefix of `x`, and starting digit + /// separators enabled. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `0x1` | ✔️ | + /// | `_0x1` | ✔️ | + /// | `0x_1` | ❌ | + /// | `0_x1` | ❌ | + /// | `+_0x1` | ✔️ | + /// | `_+0x1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_prefix_leading_digit_separator(&self) -> bool { + Self::BASE_PREFIX_LEADING_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed after the base prefix. + /// + /// See [`base_prefix_trailing_digit_separator`][Self::base_prefix_trailing_digit_separator]. + pub const BASE_PREFIX_TRAILING_DIGIT_SEPARATOR: bool = false; + + /// Get if a digit separator is allowed after the base prefix. + /// + /// If a base prefix is present, then [`integer_leading_digit_separator`] + /// overlaps with this functionality, and either being enabled, as well + /// as consecutive digit separators, will consider it to be valid. If + /// both are set and neither enables consecutive digit separators, then + /// `0x__1` is not valid. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`integer_leading_digit_separator`]: Self::integer_leading_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base prefix of `x`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0_x1` | ❌ | + /// | `0x_1` | ✔️ | + /// | `0x__1` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_prefix_trailing_digit_separator(&self) -> bool { + Self::BASE_PREFIX_TRAILING_DIGIT_SEPARATOR + } + + /// If multiple consecutive base prefix digit separators are allowed. + /// + /// See [`base_prefix_consecutive_digit_separator`][Self::base_prefix_consecutive_digit_separator]. + pub const BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR: bool = false; + + /// Get if multiple consecutive base prefix digit separators are allowed. + /// + /// That is, using `_` as a digit separator `__` would be allowed where any + /// digit separators (leading, trailing, internal) are allowed in the + /// base prefix. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base prefix of `x`, and trailing base + /// prefix digit separators. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1` | ✔️ | + /// | `0x1` | ✔️ | + /// | `_0x1` | ❌ | + /// | `0_x1` | ❌ | + /// | `0x_1` | ✔️ | + /// | `0x__1` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_prefix_consecutive_digit_separator(&self) -> bool { + Self::BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed between the base suffix. + /// + /// See [`base_suffix_internal_digit_separator`][Self::base_suffix_internal_digit_separator]. + pub const BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR: bool = false; + + /// Get if a digit separator is allowed between the base suffix. + /// + /// This is currently unused, since base suffixes are a single character. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_suffix_internal_digit_separator(&self) -> bool { + Self::BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed before the base suffix. + /// + /// See [`base_suffix_leading_digit_separator`][Self::base_suffix_leading_digit_separator]. + pub const BASE_SUFFIX_LEADING_DIGIT_SEPARATOR: bool = false; + + /// Get if a digit separator is allowed before the base suffix. + /// + /// If a base suffix is present, then [`integer_trailing_digit_separator`] + /// or [`exponent_trailing_digit_separator`] may overlap + /// with this functionality (depending on the number components), and either + /// being enabled, as well as consecutive digit separators, will + /// consider it to be valid. If both are set and neither + /// enables consecutive digit separators, then `1__h` is not valid. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// Defaults to [`false`]. + /// + /// [`integer_trailing_digit_separator`]: Self::integer_trailing_digit_separator + /// [`exponent_trailing_digit_separator`]: Self::exponent_trailing_digit_separator + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base suffix of `h`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ✔️ | + /// | `12__h` | ✔️ | + /// | `12h_` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_suffix_leading_digit_separator(&self) -> bool { + Self::BASE_SUFFIX_LEADING_DIGIT_SEPARATOR + } + + /// If a digit separator is allowed after the base suffix. + /// + /// See [`base_suffix_trailing_digit_separator`][Self::base_suffix_trailing_digit_separator]. + pub const BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR: bool = false; + + /// Get if a digit separator is allowed after the base suffix. + /// + /// # Examples + /// + /// Using a digit separator of `_` and a base suffix of `h`. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ❌ | + /// | `12h_` | ✔️ | + /// | `12h__` | ❌ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_suffix_trailing_digit_separator(&self) -> bool { + Self::BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR + } + + /// If multiple consecutive base suffix digit separators are allowed. + /// + /// See [`base_suffix_consecutive_digit_separator`][Self::base_suffix_consecutive_digit_separator]. + pub const BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR: bool = false; + + /// Get if multiple consecutive base suffix digit separators are allowed. + /// + /// That is, using `_` as a digit separator `__` would be allowed where any + /// digit separators (leading, trailing, internal) are allowed in the + /// base suffix. Defaults to [`false`]. + /// + /// # Examples + /// + /// Using a digit separator of `_`, a base suffix of `h`, and trailing base + /// suffix digit separators. + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `12h` | ✔️ | + /// | `1_2h` | ❌ | + /// | `12_h` | ❌ | + /// | `12h_` | ✔️ | + /// | `12h__` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + pub const fn base_suffix_consecutive_digit_separator(&self) -> bool { + Self::BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR + } + + /// If digit separators are allowed between integer digits. /// /// This will not consider an input of only the digit separator /// to be a valid separator: the digit separator must be surrounded by @@ -976,13 +1447,27 @@ impl NumberFormat { /// Get if a digit separator is allowed before any integer digits. /// + /// This is taken **after** any signs and base prefixes, and therefore + /// may be at the absolute start of the number or internal to it. You + /// can disable support for digit separators at the absolute start + /// using the [`start_digit_separator`] flag. For only those before or after + /// a base prefix, see [`base_prefix_leading_digit_separator`] and + /// [`base_prefix_trailing_digit_separator`], respectively. For only those + /// prior to a sign, see [`integer_sign_digit_separator`]. + /// /// This will consider an input of only the digit separator /// to be a identical to empty input. Can only be modified with /// [`feature`][crate#features] `format`. Defaults to [`false`]. /// + /// [`start_digit_separator`]: Self::start_digit_separator + /// [`base_prefix_leading_digit_separator`]: Self::base_prefix_leading_digit_separator + /// [`base_prefix_trailing_digit_separator`]: Self::base_prefix_trailing_digit_separator + /// [`integer_sign_digit_separator`]: Self::integer_sign_digit_separator + /// /// # Examples /// - /// Using a digit separator of `_`. + /// Using a digit separator of `_`, and an optional base prefix of + /// `x` and starting digit separators enabled. /// /// | Input | Valid? | /// |:-:|:-:| @@ -991,6 +1476,10 @@ impl NumberFormat { /// | `1_1` | ❌ | /// | `1_` | ❌ | /// | `_1` | ✔️ | + /// | `_+1` | ❌ | + /// | `+_1` | ✔️ | + /// | `+0x_1` | ✔️ | + /// | `+_0x1` | ❌ | /// /// # Used For /// @@ -1378,6 +1867,10 @@ impl NumberFormat { /// used for writing numbers if [`required_base_prefix`] is [`true`]. /// This is ignored for special floating-point numbers. /// + /// Note that base prefixes are not allowed for the exponent digits + /// of a float: the prefix (such as `0x`) must come before any other + /// significant digits, optionally with digit separators present. + /// /// # Examples /// /// Using a base prefix of `x`. @@ -1424,6 +1917,12 @@ impl NumberFormat { /// used for writing numbers if [`required_base_suffix`] is [`true`]. /// This is ignored for special floating-point numbers. /// + /// Note that base suffixes are not allowed for the exponent digits + /// of a float: the suffix, if it's present, will always refer to the + /// float as a whole, optionally with digit separators present. For + /// example, `4d2p0101x` would have the `x` refer to the full float, + /// not the exponent of `0101`. + /// /// # Examples /// /// Using a base suffix of `x`. @@ -1663,6 +2162,7 @@ pub(crate) const fn radix_error_impl(format: u128) -> Error { pub(crate) const fn format_error_impl(format: u128) -> Error { let valid_flags = flags::REQUIRED_EXPONENT_DIGITS | flags::REQUIRED_MANTISSA_DIGITS + | flags::REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT | flags::SUPPORTS_PARSING_FLOATS | flags::SUPPORTS_PARSING_INTEGERS | flags::SUPPORTS_WRITING_FLOATS From 236a00ae5ebaec0e8e24718857eb85cf883de930 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Tue, 14 Jan 2025 18:40:50 -0600 Subject: [PATCH 09/18] Add in our implementation for parsing signs with digit separators. This adds support for the integer and exponent digit separator flag parsing. --- lexical-parse-float/src/parse.rs | 2 + lexical-parse-integer/src/algorithm.rs | 6 +- lexical-util/src/format_builder.rs | 170 ++++++++++++++++--------- lexical-util/src/iterator.rs | 14 +- lexical-util/src/skip.rs | 115 ++++++++++++++++- 5 files changed, 239 insertions(+), 68 deletions(-) diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index f0fcb37e..7f95e440 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -175,6 +175,7 @@ pub fn parse_mantissa_sign(byte: &mut Bytes<'_, FORMAT>) -> parse_sign!( byte, true, + integer_iter, format.no_positive_mantissa_sign(), format.required_mantissa_sign(), InvalidPositiveSign, @@ -189,6 +190,7 @@ pub fn parse_exponent_sign(byte: &mut Bytes<'_, FORMAT>) -> parse_sign!( byte, true, + exponent_iter, format.no_positive_exponent_sign(), format.required_exponent_sign(), InvalidPositiveExponentSign, diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index 845c06fa..591376a3 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -204,12 +204,13 @@ macro_rules! parse_sign { ( $byte:ident, $is_signed:expr, + $as_iter:ident, $no_positive:expr, $required:expr, $invalid_positive:ident, - $missing:ident + $missing:ident $(,)? ) => { - match $byte.integer_iter().parse_sign() { + match $byte.$as_iter().parse_sign() { (false, true) if !$no_positive => { // SAFETY: We have at least 1 item left since we peaked a value unsafe { $byte.step_unchecked() }; @@ -234,6 +235,7 @@ pub fn parse_sign(byte: &mut Bytes<'_, FORMAT>) parse_sign!( byte, T::IS_SIGNED, + integer_iter, format.no_positive_mantissa_sign(), format.required_mantissa_sign(), InvalidPositiveSign, diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 4072e08e..3ffaa12e 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -443,19 +443,19 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`supports_writing_integers`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1201\n [`supports_writing_floats`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1211\n [`case_sensitive_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L765\n -[`start_digit_separator`]: https://TODO.com\n -[`integer_sign_digit_separator`]: https://TODO.com\n -[`integer_consecutive_sign_digit_separator`]: https://TODO.com\n -[`exponent_sign_digit_separator`]: https://TODO.com\n -[`exponent_consecutive_sign_digit_separator`]: https://TODO.com\n -[`base_prefix_internal_digit_separator`]: https://TODO.com\n -[`base_prefix_leading_digit_separator`]: https://TODO.com\n -[`base_prefix_trailing_digit_separator`]: https://TODO.com\n -[`base_prefix_consecutive_digit_separator`]: https://TODO.com\n -[`base_suffix_internal_digit_separator`]: https://TODO.com\n -[`base_suffix_leading_digit_separator`]: https://TODO.com\n -[`base_suffix_trailing_digit_separator`]: https://TODO.com\n -[`base_suffix_consecutive_digit_separator`]: https://TODO.com\n +[`start_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1650\n +[`integer_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1678\n +[`integer_consecutive_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1706\n +[`exponent_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1728\n +[`exponent_consecutive_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1754\n +[`base_prefix_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1777\n +[`base_prefix_leading_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1809\n +[`base_prefix_trailing_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1841\n +[`base_prefix_consecutive_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1870\n +[`base_suffix_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1883\n +[`base_suffix_leading_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1916\n +[`base_suffix_trailing_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1939\n +[`base_suffix_consecutive_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1967\n [`integer_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L793\n [`fraction_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L805\n [`exponent_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L817\n @@ -742,7 +742,7 @@ impl NumberFormatBuilder { supports_writing_floats: true, required_base_prefix: false, required_base_suffix: false, - start_digit_separator: true, + start_digit_separator: cfg!(feature = "format"), integer_sign_digit_separator: false, integer_consecutive_sign_digit_separator: false, exponent_sign_digit_separator: false, @@ -3729,26 +3729,15 @@ impl NumberFormatBuilder { /// /// #[inline(always)] @@ -3784,17 +3773,16 @@ impl NumberFormatBuilder { /// /// #[inline(always)] @@ -3829,11 +3817,11 @@ impl NumberFormatBuilder { /// .exponent_sign_digit_separator(true) /// .build_strict(); /// assert_eq!(parse_with_options::(b"1.0e1", &PF_OPTS), Ok(10.0)); - /// assert_eq!(parse_with_options::(b"1.0e_", &PF_OPTS), Err(Error::InvalidDigit(4))); - /// assert_eq!(parse_with_options::(b"1.0e_1", &PF_OPTS), Err(Error::InvalidDigit(4))); + /// assert_eq!(parse_with_options::(b"1.0e_", &PF_OPTS), Err(Error::EmptyExponent(4))); + /// assert_eq!(parse_with_options::(b"1.0e_1", &PF_OPTS), Err(Error::EmptyExponent(4))); /// assert_eq!(parse_with_options::(b"1.0e_+1", &PF_OPTS), Ok(10.0)); - /// assert_eq!(parse_with_options::(b"1.0e__+1", &PF_OPTS), Err(Error::InvalidDigit(4))); - /// assert_eq!(parse_with_options::(b"1.0e+_1", &PF_OPTS), Err(Error::InvalidDigit(4))); + /// assert_eq!(parse_with_options::(b"1.0e__+1", &PF_OPTS), Err(Error::EmptyExponent(4))); + /// assert_eq!(parse_with_options::(b"1.0e+_1", &PF_OPTS), Err(Error::EmptyExponent(5))); /// ``` /// --> #[inline(always)] @@ -3873,11 +3861,11 @@ impl NumberFormatBuilder { /// .exponent_consecutive_sign_digit_separator(true) /// .build_strict(); /// assert_eq!(parse_with_options::(b"1.0e1", &PF_OPTS), Ok(10.0)); - /// assert_eq!(parse_with_options::(b"1.0e_", &PF_OPTS), Err(Error::InvalidDigit(4))); - /// assert_eq!(parse_with_options::(b"1.0e_1", &PF_OPTS), Err(Error::InvalidDigit(4))); + /// assert_eq!(parse_with_options::(b"1.0e_", &PF_OPTS), Err(Error::EmptyExponent(4))); + /// assert_eq!(parse_with_options::(b"1.0e_1", &PF_OPTS), Err(Error::EmptyExponent(4))); /// assert_eq!(parse_with_options::(b"1.0e_+1", &PF_OPTS), Ok(10.0)); /// assert_eq!(parse_with_options::(b"1.0e__+1", &PF_OPTS), Ok(10.0)); - /// assert_eq!(parse_with_options::(b"1.0e+_1", &PF_OPTS), Err(Error::InvalidDigit(4))); + /// assert_eq!(parse_with_options::(b"1.0e+_1", &PF_OPTS), Err(Error::EmptyExponent(5))); /// ``` /// --> #[inline(always)] @@ -4973,8 +4961,8 @@ impl NumberFormatBuilder { /// Allow digit separators in all locations for all components. /// /// This enables leading, trailing, internal, and consecutive digit - /// separators for the integer, fraction, and exponent components. Defaults - /// to [`false`]. + /// separators for the integer, fraction, exponent, special, sign, base + /// prefix, and base suffix components. Defaults to [`false`]. /// /// # Used For /// @@ -4987,6 +4975,13 @@ impl NumberFormatBuilder { self = self.fraction_digit_separator_flags(flag); self = self.exponent_digit_separator_flags(flag); self = self.special_digit_separator(flag); + self = self.start_digit_separator(flag); + self = self.sign_digit_separator_flags(flag); + #[cfg(feature = "power-of-two")] + { + self = self.base_prefix_digit_separator_flags(flag); + self = self.base_suffix_digit_separator_flags(flag); + } self } @@ -5045,6 +5040,61 @@ impl NumberFormatBuilder { self } + /// Set all sign digit separator flag masks. + /// + /// This enables digit separators, including consecutive ones, + /// for the integer and exponent formats. Defaults to [`false`]. + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[inline(always)] + #[cfg(feature = "format")] + pub const fn sign_digit_separator_flags(mut self, flag: bool) -> Self { + self = self.integer_sign_digit_separator(flag); + self = self.integer_consecutive_sign_digit_separator(flag); + self = self.exponent_sign_digit_separator(flag); + self = self.exponent_consecutive_sign_digit_separator(flag); + self + } + + /// Set all base prefix digit separator flag masks. + /// + /// This enables leading, trailing, internal, and consecutive digit + /// separators for the base prefix component. Defaults to [`false`]. + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_prefix_digit_separator_flags(mut self, flag: bool) -> Self { + self = self.base_prefix_internal_digit_separator(flag); + self = self.base_prefix_leading_digit_separator(flag); + self = self.base_prefix_trailing_digit_separator(flag); + self = self.base_prefix_consecutive_digit_separator(flag); + self + } + + /// Set all base suffix digit separator flag masks. + /// + /// This enables leading, trailing, internal, and consecutive digit + /// separators for the base suffix component. Defaults to [`false`]. + /// + /// # Used For + /// + /// - Parse Float + /// - Parse Integer + #[cfg(all(feature = "format", feature = "power-of-two"))] + pub const fn base_suffix_digit_separator_flags(mut self, flag: bool) -> Self { + self = self.base_suffix_internal_digit_separator(flag); + self = self.base_suffix_leading_digit_separator(flag); + self = self.base_suffix_trailing_digit_separator(flag); + self = self.base_suffix_consecutive_digit_separator(flag); + self + } + // BUILDER /// Create 128-bit, packed number format struct from builder options. @@ -5089,19 +5139,19 @@ impl NumberFormatBuilder { self.supports_writing_floats, SUPPORTS_WRITING_FLOATS ; self.required_base_prefix, REQUIRED_BASE_PREFIX ; self.required_base_suffix, REQUIRED_BASE_SUFFIX ; - self.start_digit_separator, START_DIGIT_SEPARATOR ; - self.integer_sign_digit_separator, INTEGER_SIGN_DIGIT_SEPARATOR ; - self.integer_consecutive_sign_digit_separator, INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR ; - self.exponent_sign_digit_separator, EXPONENT_SIGN_DIGIT_SEPARATOR ; - self.exponent_consecutive_sign_digit_separator, EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR ; - self.base_prefix_internal_digit_separator, BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR ; - self.base_prefix_leading_digit_separator, BASE_PREFIX_LEADING_DIGIT_SEPARATOR ; - self.base_prefix_trailing_digit_separator, BASE_PREFIX_TRAILING_DIGIT_SEPARATOR ; - self.base_prefix_consecutive_digit_separator, BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR ; - self.base_suffix_internal_digit_separator, BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR ; - self.base_suffix_leading_digit_separator, BASE_SUFFIX_LEADING_DIGIT_SEPARATOR ; - self.base_suffix_trailing_digit_separator, BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR ; - self.base_suffix_consecutive_digit_separator, BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR ; + self.start_digit_separator, START_DIGIT_SEPARATOR ; + self.integer_sign_digit_separator, INTEGER_SIGN_DIGIT_SEPARATOR ; + self.integer_consecutive_sign_digit_separator, INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR ; + self.exponent_sign_digit_separator, EXPONENT_SIGN_DIGIT_SEPARATOR ; + self.exponent_consecutive_sign_digit_separator, EXPONENT_CONSECUTIVE_SIGN_DIGIT_SEPARATOR ; + self.base_prefix_internal_digit_separator, BASE_PREFIX_INTERNAL_DIGIT_SEPARATOR ; + self.base_prefix_leading_digit_separator, BASE_PREFIX_LEADING_DIGIT_SEPARATOR ; + self.base_prefix_trailing_digit_separator, BASE_PREFIX_TRAILING_DIGIT_SEPARATOR ; + self.base_prefix_consecutive_digit_separator, BASE_PREFIX_CONSECUTIVE_DIGIT_SEPARATOR ; + self.base_suffix_internal_digit_separator, BASE_SUFFIX_INTERNAL_DIGIT_SEPARATOR ; + self.base_suffix_leading_digit_separator, BASE_SUFFIX_LEADING_DIGIT_SEPARATOR ; + self.base_suffix_trailing_digit_separator, BASE_SUFFIX_TRAILING_DIGIT_SEPARATOR ; + self.base_suffix_consecutive_digit_separator, BASE_SUFFIX_CONSECUTIVE_DIGIT_SEPARATOR ; self.integer_internal_digit_separator, INTEGER_INTERNAL_DIGIT_SEPARATOR ; self.fraction_internal_digit_separator, FRACTION_INTERNAL_DIGIT_SEPARATOR ; self.exponent_internal_digit_separator, EXPONENT_INTERNAL_DIGIT_SEPARATOR ; diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs index b85edb5e..ca6ea945 100644 --- a/lexical-util/src/iterator.rs +++ b/lexical-util/src/iterator.rs @@ -115,13 +115,23 @@ pub unsafe trait Iter<'a> { Self::IS_CONTIGUOUS } + /// Get a value at an index without stepping to it from the underlying + /// buffer. + /// + /// This does **NOT** skip digits, and directly fetches the item + /// from the underlying buffer, relative to the current cursor. + #[inline(always)] + fn get(&self, index: usize) -> Option<&'a u8> { + self.get_buffer().get(self.cursor() + index) + } + /// Get the next value available without consuming it. /// /// This does **NOT** skip digits, and directly fetches the item /// from the underlying buffer. #[inline(always)] fn first(&self) -> Option<&'a u8> { - self.get_buffer().get(self.cursor()) + self.get(0) } /// Check if the next element is a given value, in a case- @@ -463,7 +473,7 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { match self.first() { Some(&b'+') => (false, true), Some(&b'-') => (true, true), - _ => (false, false) + _ => (false, false), } } } diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index 66a3ff23..af28c365 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -1404,9 +1404,108 @@ macro_rules! skip_iterator_digits_iter_base { }; } +/// Internal helper for parsing the sign once a given index is known. +#[inline(always)] +fn parse_sign_impl(bytes: &[u8], index: usize) -> (bool, bool) { + match bytes.get(index) { + Some(&b'+') => (false, true), + Some(&b'-') => (true, true), + _ => (false, false), + } +} + +/// Uses the internal flags to parse out flags. +#[inline(always)] +fn parse_sign<'a, T>( + iter: &mut T, + digit_separator: u8, + can_skip: bool, + consecutive: bool, +) -> (bool, bool) +where + T: DigitsIter<'a>, +{ + let bytes = iter.get_buffer(); + let mut index = iter.cursor(); + if digit_separator != 0 && can_skip { + if consecutive { + while bytes.get(index) == Some(&digit_separator) { + index += 1; + } + } else if bytes.get(index) == Some(&digit_separator) { + index += 1; + } + // only advance the internal state if we have a sign + // otherwise, we need to keep the start exactly where + // the buffer started + match parse_sign_impl(bytes, index) { + (is_negative, true) => { + // SAFETY: safe, since the was fetched from processing + // digits above, that is, it must be <= iterator size. + // This was validated by `parse_sign_impl`, which returned + // `Some(...)`. + unsafe { iter.set_cursor(index) }; + (is_negative, true) + }, + (_, false) => (false, false), + } + } else { + parse_sign_impl(bytes, index) + } +} + +/// Parse specifically the integer sign component. +#[inline(always)] +fn integer_parse_sign<'a, T, const FORMAT: u128>(iter: &mut T) -> (bool, bool) +where + T: DigitsIter<'a>, +{ + let format = NumberFormat::<{ FORMAT }> {}; + parse_sign( + iter, + format.digit_separator(), + format.integer_sign_digit_separator(), + format.integer_consecutive_sign_digit_separator(), + ) +} + +/// Parse specifically the fraction sign component. +#[inline(always)] +fn fraction_parse_sign<'a, T, const FORMAT: u128>(iter: &mut T) -> (bool, bool) +where + T: DigitsIter<'a>, +{ + _ = iter; + unimplemented!() +} + +/// Parse specifically the exponent sign component. +#[inline(always)] +fn exponent_parse_sign<'a, T, const FORMAT: u128>(iter: &mut T) -> (bool, bool) +where + T: DigitsIter<'a>, +{ + let format = NumberFormat::<{ FORMAT }> {}; + parse_sign( + iter, + format.digit_separator(), + format.exponent_sign_digit_separator(), + format.exponent_consecutive_sign_digit_separator(), + ) +} + /// Create impl `ByteIter` block for skip iterator. macro_rules! skip_iterator_bytesiter_impl { - ($iterator:ident, $mask:ident, $count:ident, $i:ident, $l:ident, $t:ident, $c:ident) => { + ( + $iterator:ident, + $mask:ident, + $count:ident, + $i:ident, + $l:ident, + $t:ident, + $c:ident, + $sign_parser:ident $(,)? + ) => { unsafe impl<'a: 'b, 'b, const FORMAT: u128> Iter<'a> for $iterator<'a, 'b, FORMAT> { skip_iterator_iter_base!(FORMAT, $mask, $count); } @@ -1480,6 +1579,11 @@ macro_rules! skip_iterator_bytesiter_impl { let format = NumberFormat::<{ FORMAT }> {}; char_is_digit_const(value, format.mantissa_radix()) } + + #[inline(always)] + fn parse_sign(&mut self) -> (bool, bool) { + $sign_parser::<_, FORMAT>(self) + } } }; } @@ -1497,7 +1601,8 @@ skip_iterator_bytesiter_impl!( INTEGER_INTERNAL_DIGIT_SEPARATOR, INTEGER_LEADING_DIGIT_SEPARATOR, INTEGER_TRAILING_DIGIT_SEPARATOR, - INTEGER_CONSECUTIVE_DIGIT_SEPARATOR + INTEGER_CONSECUTIVE_DIGIT_SEPARATOR, + integer_parse_sign, ); // FRACTION DIGITS ITERATOR @@ -1516,7 +1621,8 @@ skip_iterator_bytesiter_impl!( FRACTION_INTERNAL_DIGIT_SEPARATOR, FRACTION_LEADING_DIGIT_SEPARATOR, FRACTION_TRAILING_DIGIT_SEPARATOR, - FRACTION_CONSECUTIVE_DIGIT_SEPARATOR + FRACTION_CONSECUTIVE_DIGIT_SEPARATOR, + fraction_parse_sign, ); // EXPONENT DIGITS ITERATOR @@ -1535,7 +1641,8 @@ skip_iterator_bytesiter_impl!( EXPONENT_INTERNAL_DIGIT_SEPARATOR, EXPONENT_LEADING_DIGIT_SEPARATOR, EXPONENT_TRAILING_DIGIT_SEPARATOR, - EXPONENT_CONSECUTIVE_DIGIT_SEPARATOR + EXPONENT_CONSECUTIVE_DIGIT_SEPARATOR, + exponent_parse_sign, ); // SPECIAL DIGITS ITERATOR From 82ac816fe0bd24f835bac97ac7d9c37d60052076 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 16 Jan 2025 18:13:52 -0600 Subject: [PATCH 10/18] Migrate to using raw digits to simplify our APIs. This keeps our digit counts separate for each component rather than keep them as one of a larger memo, since we can handle eac individual digit separator component correctly like this, and it simplifies and improves performance when one component doesn't allow digit separators. --- lexical-parse-float/src/binary.rs | 14 +- lexical-parse-float/src/lib.rs | 2 +- lexical-parse-float/src/parse.rs | 61 +++--- lexical-parse-float/tests/api_tests.rs | 45 +++- lexical-parse-float/tests/parse_tests.rs | 8 +- lexical-parse-integer/src/algorithm.rs | 133 +++++------- lexical-util/src/format_builder.rs | 94 ++++---- lexical-util/src/format_flags.rs | 26 ++- lexical-util/src/iterator.rs | 81 +++++-- lexical-util/src/noskip.rs | 36 +-- lexical-util/src/skip.rs | 241 ++++++++++++++------- lexical-util/tests/feature_format_tests.rs | 3 +- lexical-util/tests/iterator_tests.rs | 14 +- lexical-write-float/src/lib.rs | 2 +- 14 files changed, 464 insertions(+), 296 deletions(-) diff --git a/lexical-parse-float/src/binary.rs b/lexical-parse-float/src/binary.rs index 1e787deb..a7b49283 100644 --- a/lexical-parse-float/src/binary.rs +++ b/lexical-parse-float/src/binary.rs @@ -101,7 +101,7 @@ pub fn binary(num: &Number, lossy: bool) -> Ext #[cfg_attr(not(feature = "compact"), inline(always))] #[allow(unused_mut)] pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>( - mut iter: Iter, + iter: &mut Iter, mantissa: &mut u64, step: &mut usize, overflowed: &mut bool, @@ -118,7 +118,7 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>( debug_assert!(radix < 16, "larger radices will wrap on radix^8"); let radix8 = format.radix8() as u64; while *step > 8 { - if let Some(v) = algorithm::try_parse_8digits::(&mut iter) { + if let Some(v) = algorithm::try_parse_8digits::(iter) { *mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v); *step -= 8; } else { @@ -169,9 +169,10 @@ pub fn slow_binary(num: Number) -> ExtendedFloa // Parse the integer digits. let mut step = u64_step(radix); let mut integer = num.integer.bytes::(); - integer.integer_iter().skip_zeros(); + let mut integer_iter = integer.integer_iter(); + integer_iter.skip_zeros(); parse_u64_digits::<_, FORMAT>( - integer.integer_iter(), + &mut integer_iter, &mut mantissa, &mut step, &mut overflow, @@ -181,11 +182,12 @@ pub fn slow_binary(num: Number) -> ExtendedFloa // Parse the fraction digits. if let Some(fraction) = num.fraction { let mut fraction = fraction.bytes::(); + let mut fraction_iter = fraction.fraction_iter(); if mantissa == 0 { - fraction.fraction_iter().skip_zeros(); + fraction_iter.skip_zeros(); } parse_u64_digits::<_, FORMAT>( - fraction.fraction_iter(), + &mut fraction_iter, &mut mantissa, &mut step, &mut overflow, diff --git a/lexical-parse-float/src/lib.rs b/lexical-parse-float/src/lib.rs index ad36d3f4..f3307300 100644 --- a/lexical-parse-float/src/lib.rs +++ b/lexical-parse-float/src/lib.rs @@ -59,7 +59,7 @@ //! [Comprehensive Configuration]: #comprehensive-configuration //! //! ```rust -//! # #[cfg(feature = "radix")] { +//! # #[cfg(feature = "format")] { //! # use core::str; //! use lexical_parse_float::{Error, FromLexicalWithOptions, NumberFormatBuilder, Options}; //! diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index 7f95e440..2dc85f00 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -541,6 +541,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // NOTE: `lz_prefix` is if we had a leading zero when // checking for a base prefix: it is not if the prefix // exists or not. + // TODO: MIGRATE TO BASE PREFIX LOGIC #[allow(unused_variables)] let mut lz_prefix = false; #[cfg(all(feature = "format", feature = "power-of-two"))] @@ -568,12 +569,14 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // Parse our integral digits. let mut mantissa = 0_u64; let start = byte.clone(); + let mut integer_iter = byte.integer_iter(); + let start_count = integer_iter.digits(); #[cfg(not(feature = "compact"))] - parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa); - parse_digits(byte.integer_iter(), format.mantissa_radix(), |digit| { + parse_8digits::<_, FORMAT>(&mut integer_iter, &mut mantissa); + parse_digits(&mut integer_iter, format.mantissa_radix(), |digit| { mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64); }); - let mut n_digits = byte.current_count() - start.current_count(); + let mut n_digits = integer_iter.digits_since(start_count); #[cfg(feature = "format")] let n_before_dot = n_digits; #[cfg(feature = "format")] @@ -598,7 +601,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // SAFETY: safe, since `n_digits <= start.as_slice().len()`. // This is since `byte.len() >= start.len()` but has to have // the same end bounds (that is, `start = byte.clone()`), so - // `0 <= byte.current_count() <= start.current_count() <= start.lent()` + // `0 <= byte.digits() <= start.digits() <= start.len()` // so, this will always return only the integer digits. // // NOTE: Removing this code leads to ~10% reduction in parsing @@ -609,7 +612,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // Check if integer leading zeros are disabled. #[cfg(feature = "format")] if !lz_prefix && format.no_float_leading_zeros() { - if integer_digits.len() > 1 && integer_digits.first() == Some(&b'0') { + let mut integer = integer_digits.bytes::(); + if integer_digits.len() > 1 && integer.integer_iter().peek() == Some(&b'0') { return Err(Error::InvalidLeadingZeros(start.cursor())); } } @@ -627,17 +631,19 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // SAFETY: byte cannot be empty due to `first_is` unsafe { byte.step_unchecked() }; let before = byte.clone(); + let mut fraction_iter = byte.fraction_iter(); + let start_count = fraction_iter.digits(); #[cfg(not(feature = "compact"))] - parse_8digits::<_, FORMAT>(byte.fraction_iter(), &mut mantissa); - parse_digits(byte.fraction_iter(), format.mantissa_radix(), |digit| { + parse_8digits::<_, FORMAT>(&mut fraction_iter, &mut mantissa); + parse_digits(&mut fraction_iter, format.mantissa_radix(), |digit| { mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64); }); - n_after_dot = byte.current_count() - before.current_count(); + n_after_dot = fraction_iter.digits_since(start_count); // NOTE: We can't use the number of digits to extract the slice for // non-contiguous iterators, but we also need to the number of digits // for our value calculation. We store both, and let the compiler know // to optimize it out when not needed. - let b_after_dot = if cfg!(feature = "format") && !byte.fraction_iter().is_contiguous() { + let b_after_dot = if cfg!(feature = "format") && !fraction_iter.is_contiguous() { byte.cursor() - before.cursor() } else { n_after_dot @@ -672,9 +678,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // check to see if we have any invalid leading zeros n_digits += n_after_dot; - if format.required_mantissa_digits() - && (n_digits == 0 || (cfg!(feature = "format") && byte.current_count() == 0)) - { + if format.required_mantissa_digits() && n_digits == 0 { let any_digits = start.clone().integer_iter().peek().is_some(); // NOTE: This is because numbers like `_12.34` have significant digits, // they just don't have a valid digit (#97). @@ -731,14 +735,15 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( } let is_negative_exponent = parse_exponent_sign(&mut byte)?; - let before = byte.current_count(); - parse_digits(byte.exponent_iter(), format.exponent_radix(), |digit| { + let mut exponent_iter = byte.exponent_iter(); + let start_count = exponent_iter.digits(); + parse_digits(&mut exponent_iter, format.exponent_radix(), |digit| { if explicit_exponent < 0x10000000 { explicit_exponent *= format.exponent_radix() as i64; explicit_exponent += digit as i64; } }); - if format.required_exponent_digits() && byte.current_count() - before == 0 { + if format.required_exponent_digits() && exponent_iter.digits_since(start_count) == 0 { return Err(Error::EmptyExponent(byte.cursor())); } // Handle our sign, and get the explicit part of the exponent. @@ -755,10 +760,10 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // Check to see if we have a valid base suffix. // We've already trimmed any leading digit separators here, so we can be safe // that the first character **is not** a digit separator. - #[allow(unused_variables)] - let base_suffix = format.base_suffix(); + // FIXME: Improve parsing of this #[cfg(all(feature = "format", feature = "power-of-two"))] - if base_suffix != 0 { + if format.has_base_suffix() { + let base_suffix = format.base_suffix(); let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix()); if is_suffix { // SAFETY: safe since `byte.len() >= 1`. @@ -814,8 +819,9 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( let mut integer = integer_digits.bytes::<{ FORMAT }>(); // Skip leading zeros, so we can use the step properly. let mut integer_iter = integer.integer_iter(); + let integer_start = integer_iter.digits(); integer_iter.skip_zeros(); - parse_u64_digits::<_, FORMAT>(integer_iter, &mut mantissa, &mut step); + parse_u64_digits::<_, FORMAT>(&mut integer_iter, &mut mantissa, &mut step); // NOTE: With the format feature enabled and non-contiguous iterators, we can // have null fraction digits even if step was not 0. We want to make the // none check as late in there as possible: any of them should @@ -828,7 +834,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( || (cfg!(feature = "format") && !byte.is_contiguous() && fraction_digits.is_none()) { // Filled our mantissa with just the integer. - int_end - integer.current_count() as i64 + int_end - integer_iter.digits_since(integer_start) as i64 } else { // We know this can't be a None since we had more than 19 // digits previously, so we overflowed a 64-bit integer, @@ -837,12 +843,13 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // point, and at least 1 fractional digit. let mut fraction = fraction_digits.unwrap().bytes::<{ FORMAT }>(); let mut fraction_iter = fraction.fraction_iter(); + let fraction_start = fraction_iter.digits(); // Skip leading zeros, so we can use the step properly. if mantissa == 0 { fraction_iter.skip_zeros(); } - parse_u64_digits::<_, FORMAT>(fraction_iter, &mut mantissa, &mut step); - -(fraction.current_count() as i64) + parse_u64_digits::<_, FORMAT>(&mut fraction_iter, &mut mantissa, &mut step); + -(fraction_iter.digits_since(fraction_start) as i64) }; if format.mantissa_radix() == format.exponent_base() { exponent = implicit_exponent; @@ -898,7 +905,7 @@ pub fn parse_complete_number<'a, const FORMAT: u128>( /// Iteratively parse and consume digits from bytes. #[inline(always)] -pub fn parse_digits<'a, Iter, Cb>(mut iter: Iter, radix: u32, mut cb: Cb) +pub fn parse_digits<'a, Iter, Cb>(iter: &mut Iter, radix: u32, mut cb: Cb) where Iter: DigitsIter<'a>, Cb: FnMut(u32), @@ -923,7 +930,7 @@ where /// The iterator must be of the significant digits, not the exponent. #[inline(always)] #[cfg(not(feature = "compact"))] -pub fn parse_8digits<'a, Iter, const FORMAT: u128>(mut iter: Iter, mantissa: &mut u64) +pub fn parse_8digits<'a, Iter, const FORMAT: u128>(iter: &mut Iter, mantissa: &mut u64) where Iter: DigitsIter<'a>, { @@ -934,7 +941,7 @@ where let radix8 = format.radix8() as u64; // Can do up to 2 iterations without overflowing, however, for large // inputs, this is much faster than any other alternative. - while let Some(v) = algorithm::try_parse_8digits::(&mut iter) { + while let Some(v) = algorithm::try_parse_8digits::(iter) { *mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v); } } @@ -948,7 +955,7 @@ where /// must be of the significant digits, not the exponent. #[cfg_attr(not(feature = "compact"), inline(always))] pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>( - mut iter: Iter, + iter: &mut Iter, mantissa: &mut u64, step: &mut usize, ) where @@ -963,7 +970,7 @@ pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>( debug_assert!(radix < 16, "radices over 16 will overflow with radix^8"); let radix8 = format.radix8() as u64; while *step > 8 { - if let Some(v) = algorithm::try_parse_8digits::(&mut iter) { + if let Some(v) = algorithm::try_parse_8digits::(iter) { *mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v); *step -= 8; } else { diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs index 002e6980..04d69e57 100644 --- a/lexical-parse-float/tests/api_tests.rs +++ b/lexical-parse-float/tests/api_tests.rs @@ -1051,7 +1051,7 @@ fn f64_exponent_leading_digit_separator_test() { .build_strict(); const OPTIONS: Options = Options::new(); assert!(f64::from_lexical_with_options::(b"31.01e7_1", &OPTIONS).is_err()); - assert!(f64::from_lexical_with_options::(b"31.01e_71", &OPTIONS).is_ok()); + assert_eq!(f64::from_lexical_with_options::(b"31.01e_71", &OPTIONS), Ok(31.01e71)); assert!(f64::from_lexical_with_options::(b"31.01e71_", &OPTIONS).is_err()); } @@ -1343,3 +1343,46 @@ fn require_base_prefix_test() { let value = f64::from_lexical_with_options::(b"-0d12345", &OPTIONS); assert_eq!(value, Err(Error::MissingBaseSuffix(8))); } + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_digit_separator_edge_cases_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const NO_PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(1))); + + let value = f64::from_lexical_with_options::(b"+_12345", &OPTIONS); + assert_eq!(value, Ok(12345.0)); + + let value = f64::from_lexical_with_options::(b"+12345e_+23", &OPTIONS); + assert_eq!(value, Err(Error::EmptyExponent(8))); + + let value = f64::from_lexical_with_options::(b"+12345e+_23", &OPTIONS); + assert_eq!(value, Ok(1.2345e27)); + + const PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .required_base_prefix(true) + .leading_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = f64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Ok(12345.0)); + + // TODO: This fails + let value = f64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); + assert_eq!(value, Ok(12345.0)); + + // TODO:> Add suffix +} diff --git a/lexical-parse-float/tests/parse_tests.rs b/lexical-parse-float/tests/parse_tests.rs index 3f6f2634..a8b33830 100644 --- a/lexical-parse-float/tests/parse_tests.rs +++ b/lexical-parse-float/tests/parse_tests.rs @@ -154,7 +154,7 @@ fn parse_digits_test() { let mut mantissa: u64 = 0; let digits = b"1234567890123456789012345"; let mut byte = digits.bytes::<{ FORMAT }>(); - parse::parse_digits(byte.integer_iter(), 10, |digit| { + parse::parse_digits(&mut byte.integer_iter(), 10, |digit| { mantissa = mantissa.wrapping_mul(10).wrapping_add(digit as _); }); assert_eq!(mantissa, 1096246371337559929); @@ -167,7 +167,7 @@ fn parse_8digits_test() { let mut mantissa: u64 = 0; let digits = b"1234567890123456789012345"; let mut byte = digits.bytes::<{ FORMAT }>(); - parse::parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa); + parse::parse_8digits::<_, FORMAT>(&mut byte.integer_iter(), &mut mantissa); // We don't check for overflow. assert_eq!(mantissa, 11177671081359486962); } @@ -179,7 +179,7 @@ fn parse_u64_digits_test() { let mut step = u64_step(10); let digits = b"1234567890123456789012345"; let mut byte = digits.bytes::<{ FORMAT }>(); - parse::parse_u64_digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa, &mut step); + parse::parse_u64_digits::<_, FORMAT>(&mut byte.integer_iter(), &mut mantissa, &mut step); assert_eq!(mantissa, 1234567890123456789); assert_eq!(step, 0); @@ -187,7 +187,7 @@ fn parse_u64_digits_test() { let mut step = u64_step(10); let digits = b"1234567890123456789"; let mut byte = digits.bytes::<{ FORMAT }>(); - parse::parse_u64_digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa, &mut step); + parse::parse_u64_digits::<_, FORMAT>(&mut byte.integer_iter(), &mut mantissa, &mut step); assert_eq!(mantissa, 1234567890123456789); assert_eq!(step, 0); } diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index 591376a3..e982003e 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -58,14 +58,29 @@ macro_rules! required_digits { }; } +/// If a buffer is empty, return the value or an error. +macro_rules! maybe_into_empty { + ($iter:ident, $into_ok:ident) => { + if $iter.is_buffer_empty() { + // Our default format **ALWAYS** requires significant digits, however, + // we can have cases where we don + #[cfg(not(feature = "format"))] + into_error!(Empty, $iter.cursor()); + + #[cfg(feature = "format")] + $into_ok!(T::ZERO, $iter.cursor(), false) + } + }; +} + /// Return an value for a complete parser. macro_rules! into_ok_complete { - ($value:expr, $index:expr, $count:expr) => {{ + ($value:expr, $index:expr, $any:expr) => {{ #[cfg(not(feature = "format"))] return Ok(as_cast($value)); #[cfg(feature = "format")] - if required_digits!() && $count == 0 { + if required_digits!() && !$any { into_error!(Empty, $index); } else { return Ok(as_cast($value)); @@ -75,12 +90,12 @@ macro_rules! into_ok_complete { /// Return an value and index for a partial parser. macro_rules! into_ok_partial { - ($value:expr, $index:expr, $count:expr) => {{ + ($value:expr, $index:expr, $any:expr) => {{ #[cfg(not(feature = "format"))] return Ok((as_cast($value), $index)); #[cfg(feature = "format")] - if required_digits!() && $count == 0 { + if required_digits!() && !$any { into_error!(Empty, $index); } else { return Ok((as_cast($value), $index)); @@ -90,7 +105,7 @@ macro_rules! into_ok_partial { /// Return an error for a complete parser upon an invalid digit. macro_rules! invalid_digit_complete { - ($value:expr, $index:expr, $count:expr) => { + ($value:expr, $index:expr, $any:expr) => { // Don't do any overflow checking here: we don't need it. into_error!(InvalidDigit, $index - 1) }; @@ -99,9 +114,9 @@ macro_rules! invalid_digit_complete { /// Return a value for a partial parser upon an invalid digit. /// This checks for numeric overflow, and returns the appropriate error. macro_rules! invalid_digit_partial { - ($value:expr, $index:expr, $count:expr) => { + ($value:expr, $index:expr, $any:expr) => { // NOTE: The value is already positive/negative - into_ok_partial!($value, $index - 1, $count) + into_ok_partial!($value, $index - 1, $any) }; } @@ -165,7 +180,7 @@ macro_rules! fmt_invalid_digit { } } // Might have handled our base-prefix here. - $invalid_digit!($value, $iter.cursor(), $iter.current_count()) + $invalid_digit!($value, $iter.cursor(), $iter.digits() != 0) }}; } @@ -181,7 +196,7 @@ macro_rules! fmt_invalid_digit { $has_suffix:ident, $is_end:expr $(,)? ) => {{ - $invalid_digit!($value, $iter.cursor(), $iter.current_count()); + $invalid_digit!($value, $iter.cursor(), $iter.digits() != 0); }}; } @@ -657,77 +672,40 @@ macro_rules! algorithm { // The skip version of the iterator automatically coalesces to // the no-skip iterator. let mut byte = $bytes.bytes::(); - let radix = NumberFormat::::MANTISSA_RADIX; + let format = NumberFormat:: {}; + let radix = format.mantissa_radix(); let is_negative = parse_sign::(&mut byte)?; let mut iter = byte.integer_iter(); - if iter.is_buffer_empty() { - // Our default format **ALWAYS** requires significant digits, however, - // we can have cases where we don - #[cfg(not(feature = "format"))] - into_error!(Empty, iter.cursor()); - - #[cfg(feature = "format")] - if required_digits!() { - into_error!(Empty, iter.cursor()); - } else { - $into_ok!(T::ZERO, iter.cursor(), 0) - } + maybe_into_empty!(iter, $into_ok); + + // skip and validate an optional base prefix + #[cfg(all(feature = "format", feature = "power-of-two"))] + if iter.read_base_prefix() { + maybe_into_empty!(iter, $into_ok); + } else if format.required_base_prefix() { + return Err(Error::MissingBasePrefix(iter.cursor())); } - // Feature-gate a lot of format-only code here to simplify analysis with our branching - // We only want to skip the zeros if have either require a base prefix or we don't - // allow integer leading zeros, since the skip is expensive - #[allow(unused_variables, unused_mut)] - let mut start_index = iter.cursor(); - #[cfg_attr(not(feature = "format"), allow(unused_variables))] - let format = NumberFormat:: {}; + // NOTE: always do a peek so any leading digit separators + // are skipped, and we can get the correct index #[cfg(feature = "format")] - if format.has_base_prefix() || format.no_integer_leading_zeros() { - // Skip any leading zeros. We want to do our check if it can't possibly overflow after. - // For skipping digit-based formats, this approximation is a way over estimate. + if format.no_integer_leading_zeros() && iter.peek() == Some(&b'0') { // NOTE: Skipping zeros is **EXPENSIVE* so we skip that without our format feature + let index = iter.cursor(); let zeros = iter.skip_zeros(); - start_index += zeros; - - // Now, check to see if we have a valid base prefix. - let mut is_prefix = false; - let base_prefix = format.base_prefix(); - if base_prefix != 0 && zeros == 1 { - // Check to see if the next character is the base prefix. - // We must have a format like `0x`, `0d`, `0o`. Note: - if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some() { - is_prefix = true; - if iter.is_buffer_empty() { - into_error!(Empty, iter.cursor()); - } else { - start_index += 1; - } - } - } - if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_prefix() && !is_prefix { - return Err(Error::MissingBasePrefix(iter.cursor())); - } - - // If we have a format that doesn't accept leading zeros, - // check if the next value is invalid. It's invalid if the - // first is 0, and the next is not a valid digit. - if !is_prefix && format.no_integer_leading_zeros() && zeros != 0 { - // Cannot have a base prefix and no leading zeros. - let index = iter.cursor() - zeros; - if zeros > 1 { - into_error!(InvalidLeadingZeros, index); - } - // NOTE: Zeros has to be 0 here, so our index == 1 or 2 (depending on sign) - match iter.peek().map(|&c| char_to_digit_const(c, format.radix())) { - // Valid digit, we have an invalid value. - Some(Some(_)) => into_error!(InvalidLeadingZeros, index), - // Have a non-digit character that follows. - Some(None) => $invalid_digit!(::ZERO, iter.cursor() + 1, iter.current_count()), - // No digits following, has to be ok - None => $into_ok!(::ZERO, index, iter.current_count()), - }; + if zeros > 1 { + into_error!(InvalidLeadingZeros, index); } + // NOTE: Zeros has to be 1 here, so our index == 1 or 2 (depending on sign) + match iter.peek().map(|&c| char_to_digit_const(c, format.radix())) { + // Valid digit, we have an invalid value. + Some(Some(_)) => into_error!(InvalidLeadingZeros, index), + // Have a non-digit character that follows. + Some(None) => $invalid_digit!(::ZERO, iter.cursor() + 1, iter.digits() != 0), + // No digits following, has to be ok + None => $into_ok!(::ZERO, index, iter.digits() != 0), + }; } // shorter strings cannot possibly overflow so a great optimization @@ -741,9 +719,13 @@ macro_rules! algorithm { // and even if parsing a 64-bit integer is marginally faster, it // culminates in **way** slower performance overall for simple // integers, and no improvement for large integers. - let mut value = T::ZERO; - #[allow(unused_variables, unused_mut)] + #[allow(unused)] let mut has_suffix = false; + // FIXME: This is only used for the parsing of the base suffix. + #[allow(unused)] + let start_index = iter.cursor(); + + let mut value = T::ZERO; if T::IS_SIGNED && cannot_overflow && is_negative { parse_digits_unchecked!( value, @@ -794,11 +776,12 @@ macro_rules! algorithm { ); } - if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix { + #[cfg(all(feature = "format", feature = "power-of-two"))] + if format.required_base_suffix() && !has_suffix { return Err(Error::MissingBaseSuffix(iter.cursor())); } - $into_ok!(value, iter.buffer_length(), iter.current_count()) + $into_ok!(value, iter.buffer_length(), iter.digits() != 0) }}; } diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 3ffaa12e..14cfa72b 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -3901,17 +3901,17 @@ impl NumberFormatBuilder { /// .digit_separator(num::NonZeroU8::new(b'_')) /// .base_prefix_internal_digit_separator(true) /// .build_strict(); - /// assert_eq!(parse_with_options::(b"0x1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"_0x1", &PF_OPTS), Err(Error::InvalidDigit(0))); - /// assert_eq!(parse_with_options::(b"0x_1", &PF_OPTS), Err(Error::InvalidDigit(2))); - /// assert_eq!(parse_with_options::(b"0_x1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"+0_x1", &PF_OPTS), Ok(1.0)); - /// - /// assert_eq!(parse_with_options::(b"0x1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"_0x1", &PI_OPTS), Err(Error::InvalidDigit(0))); - /// assert_eq!(parse_with_options::(b"0x_1", &PI_OPTS), Err(Error::InvalidDigit(2))); - /// assert_eq!(parse_with_options::(b"0_x1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"+0_x1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"0d1", &PF_OPTS), Ok(1.0)); + /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(0))); + /// assert_eq!(parse_with_options::(b"0d_1", &PF_OPTS), Err(Error::InvalidDigit(2))); + /// assert_eq!(parse_with_options::(b"0_d1", &PF_OPTS), Ok(1.0)); + /// assert_eq!(parse_with_options::(b"+0_d1", &PF_OPTS), Ok(1.0)); + /// + /// assert_eq!(parse_with_options::(b"0d1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(0))); + /// assert_eq!(parse_with_options::(b"0d_1", &PI_OPTS), Err(Error::InvalidDigit(2))); + /// assert_eq!(parse_with_options::(b"0_d1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"+0_d1", &PI_OPTS), Ok(1)); /// ``` /// --> #[inline(always)] @@ -3958,26 +3958,26 @@ impl NumberFormatBuilder { /// .start_digit_separator(true) /// .build_strict(); /// assert_eq!(parse_with_options::(b"1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"_1", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"_+1", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"_0x1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"0x_1", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"+_0x1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"_+0x1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"_1", &PF_OPTS), Err(Error::InvalidDigit(0))); + /// assert_eq!(parse_with_options::(b"_+1", &PF_OPTS), Err(Error::InvalidDigit(0))); + /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Ok(1.0)); + /// assert_eq!(parse_with_options::(b"0d_1", &PF_OPTS), Err(Error::InvalidDigit(2))); + /// assert_eq!(parse_with_options::(b"+_0d1", &PF_OPTS), Ok(1.0)); + /// assert_eq!(parse_with_options::(b"_+0d1", &PF_OPTS), Err(Error::InvalidDigit(0))); /// /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"_1", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"_+1", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"_0x1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"0x_1", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"+_0x1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"_+0x1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"_1", &PI_OPTS), Err(Error::InvalidDigit(0))); + /// assert_eq!(parse_with_options::(b"_+1", &PI_OPTS), Err(Error::InvalidDigit(0))); + /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"0d_1", &PI_OPTS), Err(Error::InvalidDigit(2))); + /// assert_eq!(parse_with_options::(b"+_0d1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"_+0d1", &PI_OPTS), Err(Error::InvalidDigit(0))); /// /// const NO_START: u128 = NumberFormatBuilder::rebuild(FORMAT) /// .start_digit_separator(false) /// .build_strict(); - /// assert_eq!(parse_with_options::(b"_0x1", &PF_OPTS), Err(Error::Empty(1))); - /// assert_eq!(parse_with_options::(b"_0x1", &PI_OPTS), Err(Error::Empty(1))); + /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(0))); + /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(0))); /// ``` /// --> #[inline(always)] @@ -4023,18 +4023,18 @@ impl NumberFormatBuilder { /// .base_prefix_trailing_digit_separator(true) /// .build_strict(); /// assert_eq!(parse_with_options::(b"1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"0x1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"_0x1", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"0_x1", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"0x_1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"0x__1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0d1", &PF_OPTS), Ok(1.0)); + /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0_d1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0d_1", &PF_OPTS), Ok(1.0)); + /// assert_eq!(parse_with_options::(b"0d__1", &PF_OPTS), Err(Error::InvalidDigit(1))); /// /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"0x1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"_0x1", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"0_x1", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"0x_1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"0x__1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0d1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0_d1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0d_1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"0d__1", &PI_OPTS), Err(Error::InvalidDigit(1))); /// ``` /// --> #[inline(always)] @@ -4077,17 +4077,17 @@ impl NumberFormatBuilder { /// .base_prefix_trailing_digit_separator(true) /// .base_prefix_consecutive_digit_separator(true) /// .build_strict(); - /// assert_eq!(parse_with_options::(b"0x1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"_0x1", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"0_x1", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"0x_1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"0x__1", &PF_OPTS), Ok(1.0)); - /// - /// assert_eq!(parse_with_options::(b"0x1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"_0x1", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"0_x1", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"0x_1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"0x__1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"0d1", &PF_OPTS), Ok(1.0)); + /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0_d1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0d_1", &PF_OPTS), Ok(1.0)); + /// assert_eq!(parse_with_options::(b"0d__1", &PF_OPTS), Ok(1.0)); + /// + /// assert_eq!(parse_with_options::(b"0d1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0_d1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0d_1", &PI_OPTS), Ok(1)); + /// assert_eq!(parse_with_options::(b"0d__1", &PI_OPTS), Ok(1)); /// ``` /// --> #[inline(always)] @@ -5166,7 +5166,7 @@ impl NumberFormatBuilder { self.exponent_consecutive_digit_separator, EXPONENT_CONSECUTIVE_DIGIT_SEPARATOR ; self.special_digit_separator, SPECIAL_DIGIT_SEPARATOR ; ); - if format & flags::DIGIT_SEPARATOR_FLAG_MASK != 0 { + if format & flags::ALL_DIGIT_SEPARATOR_FLAG_MASK != 0 { format |= (unwrap_or_zero(self.digit_separator) as u128) << flags::DIGIT_SEPARATOR_SHIFT; } diff --git a/lexical-util/src/format_flags.rs b/lexical-util/src/format_flags.rs index 6fef1f71..505cc44e 100644 --- a/lexical-util/src/format_flags.rs +++ b/lexical-util/src/format_flags.rs @@ -648,11 +648,8 @@ pub const FLAG_MASK: u128 = SUPPORTS_WRITING_INTEGERS | REQUIRED_BASE_PREFIX | REQUIRED_BASE_SUFFIX | - INTERNAL_DIGIT_SEPARATOR | - LEADING_DIGIT_SEPARATOR | - TRAILING_DIGIT_SEPARATOR | - CONSECUTIVE_DIGIT_SEPARATOR | - SPECIAL_DIGIT_SEPARATOR; + START_DIGIT_SEPARATOR_FLAG_MASK | + ALL_DIGIT_SEPARATOR_FLAG_MASK; /// Mask to extract the flag bits controlling interface parsing. /// @@ -681,18 +678,33 @@ pub const INTERFACE_FLAG_MASK: u128 = CONSECUTIVE_DIGIT_SEPARATOR; /// Mask to extract digit separator flags. +/// NOTE: This **CANNOT** incude the special or other digit +/// separators for how our skip iterators are optimized. +/// We have a 2nd one, `ALL_DIGIT_SEPARATOR_FLAG_MASK`, +/// just in case this is required. #[doc(hidden)] pub const DIGIT_SEPARATOR_FLAG_MASK: u128 = INTERNAL_DIGIT_SEPARATOR | LEADING_DIGIT_SEPARATOR | TRAILING_DIGIT_SEPARATOR | - CONSECUTIVE_DIGIT_SEPARATOR | + CONSECUTIVE_DIGIT_SEPARATOR; + +/// Mask to extract digit separator flags. +/// NOTE: `START_DIGIT_SEPARATOR` is a modifier and +/// therefore not a digit separator flag. +#[cfg(feature = "power-of-two")] +pub const ALL_DIGIT_SEPARATOR_FLAG_MASK: u128 = + DIGIT_SEPARATOR_FLAG_MASK | SPECIAL_DIGIT_SEPARATOR | - START_DIGIT_SEPARATOR_FLAG_MASK | SIGN_DIGIT_SEPARATOR_FLAG_MASK | BASE_PREFIX_DIGIT_SEPARATOR_FLAG_MASK | BASE_SUFFIX_DIGIT_SEPARATOR_FLAG_MASK; +#[cfg(not(feature = "power-of-two"))] +pub const ALL_DIGIT_SEPARATOR_FLAG_MASK: u128 = + DIGIT_SEPARATOR_FLAG_MASK | + SPECIAL_DIGIT_SEPARATOR; + /// Mask to extract exponent flags. #[doc(hidden)] pub const EXPONENT_FLAG_MASK: u128 = diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs index ca6ea945..1fcda1f8 100644 --- a/lexical-util/src/iterator.rs +++ b/lexical-util/src/iterator.rs @@ -97,19 +97,15 @@ pub unsafe trait Iter<'a> { /// pass if the cursor was set between the two. unsafe fn set_cursor(&mut self, index: usize); - /// Get the current number of digits returned by the iterator. - /// - /// For contiguous iterators, this can include the sign character, decimal - /// point, and the exponent sign (that is, it is always the cursor). For - /// non-contiguous iterators, this must always be the only the number of - /// digits returned. - /// - /// This is never used for indexing but will be used for API detection. - fn current_count(&self) -> usize; - // PROPERTIES - /// Determine if the buffer is contiguous. + /// Determine if the iterator is contiguous. + /// + /// For digits iterators, this may mean that only that component + /// of the number if contiguous, but the rest is not: that is, + /// digit separators may be allowed in the integer but not the + /// fraction, and the integer iterator would be contiguous but + /// the fraction would not. #[inline(always)] fn is_contiguous(&self) -> bool { Self::IS_CONTIGUOUS @@ -125,6 +121,16 @@ pub unsafe trait Iter<'a> { self.get_buffer().get(self.cursor() + index) } + /// Check if two values are equal, with optional case sensitivity. + #[inline(always)] + fn is_value_equal(lhs: u8, rhs: u8, is_cased: bool) -> bool { + if is_cased { + lhs == rhs + } else { + lhs.eq_ignore_ascii_case(&rhs) + } + } + /// Get the next value available without consuming it. /// /// This does **NOT** skip digits, and directly fetches the item @@ -322,6 +328,28 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { /// this increments the count by 1. fn increment_count(&mut self); + /// Get the number of digits the iterator has encountered. + /// + /// This is always relative to the start of the iterator: recreating + /// the iterator will reset this count. The absolute value of this + /// is not defined: only the relative value between 2 calls. For + /// consecutive digit separators, this is based on the index in the + /// buffer. For non-consecutive iterators, the count is internally + /// incremented. + fn digits(&self) -> usize; + + /// Get number of digits returned relative to a previous state. + /// + /// This allows you to determine how many digits were returned + /// since a previous state, but is meant to be strongish-ly + /// typed so the caller knows it only works within a single + /// iterator. Calling this on an iterator other than the one + /// used at the start may lead to unpredictable results. + #[inline(always)] + fn digits_since(&self, start: usize) -> usize { + self.digits() - start + } + /// Peek the next value of the iterator, without consuming it. /// /// Note that this can modify the internal state, by skipping digits @@ -448,11 +476,11 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { /// Skip zeros from the start of the iterator. #[inline(always)] fn skip_zeros(&mut self) -> usize { - let start = self.current_count(); + let start = self.digits(); while self.read_if_value_cased(b'0').is_some() { self.increment_count(); } - self.current_count() - start + self.digits_since(start) } /// Determine if the character is a digit. @@ -467,13 +495,22 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { /// returned if the value is negative and if a sign was found. /// /// The default implementation does not support digit separators. - #[inline(always)] - fn parse_sign(&mut self) -> (bool, bool) { - // NOTE: `read_if` optimizes poorly since we then match after - match self.first() { - Some(&b'+') => (false, true), - Some(&b'-') => (true, true), - _ => (false, false), - } - } + fn parse_sign(&mut self) -> (bool, bool); + + /// Read the base prefix, if present, returning if the base prefix + /// was present. + /// + /// If the base prefix was not present, it does not consume any + /// leading zeroes, so they can be processed afterwards. Otherwise, + /// it advances the iterator state to the end of the base prefix, + /// including consuming any trailing digit separators. + /// + /// Any caller that consumes leading digit separators will need + /// to ignore it if base prefix trailing digit separators are enabled. + fn read_base_prefix(&mut self) -> bool; + + // TODO: Should implement the `is_base_prefix` internally here at + // the start and should only be used if it has one. Since the format + // will be known in the skip iterator, this is doable. + // TODO: Should implement `is_base_suffix` here } diff --git a/lexical-util/src/noskip.rs b/lexical-util/src/noskip.rs index 5852ba73..a99e851a 100644 --- a/lexical-util/src/noskip.rs +++ b/lexical-util/src/noskip.rs @@ -125,17 +125,6 @@ unsafe impl<'a, const __: u128> Iter<'a> for Bytes<'a, __> { self.index = index; } - /// Get the current number of digits returned by the iterator. - /// - /// For contiguous iterators, this can include the sign character, decimal - /// point, and the exponent sign (that is, it is always the cursor). For - /// non-contiguous iterators, this must always be the only the number of - /// digits returned. - #[inline(always)] - fn current_count(&self) -> usize { - self.index - } - #[inline(always)] #[allow(clippy::assertions_on_constants)] // reason="ensuring safety invariants are valid" unsafe fn step_by_unchecked(&mut self, count: usize) { @@ -217,11 +206,6 @@ unsafe impl<'a: 'b, 'b, const __: u128> Iter<'a> for DigitsIterator<'a, 'b, __> unsafe { self.byte.set_cursor(index) }; } - #[inline(always)] - fn current_count(&self) -> usize { - self.byte.current_count() - } - #[inline(always)] unsafe fn step_by_unchecked(&mut self, count: usize) { debug_assert!(self.as_slice().len() >= count); @@ -248,6 +232,11 @@ impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for DigitsIterator<'a, 'b, F fn increment_count(&mut self) { } + #[inline(always)] + fn digits(&self) -> usize { + self.cursor() + } + #[inline(always)] fn peek(&mut self) -> Option<::Item> { self.byte.slc.get(self.byte.index) @@ -259,6 +248,21 @@ impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for DigitsIterator<'a, 'b, F let format = NumberFormat::<{ FORMAT }> {}; char_is_digit_const(value, format.mantissa_radix()) } + + #[inline(always)] + fn parse_sign(&mut self) -> (bool, bool) { + // NOTE: `read_if` optimizes poorly since we then match after + match self.first() { + Some(&b'+') => (false, true), + Some(&b'-') => (true, true), + _ => (false, false), + } + } + + #[inline(always)] + fn read_base_prefix(&mut self) -> bool { + false + } } impl<'a: 'b, 'b, const __: u128> Iterator for DigitsIterator<'a, 'b, __> { diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index af28c365..884d1af7 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -836,13 +836,13 @@ macro_rules! peek_1 { if is_digit_separator { // NOTE: This cannot iteratively search for the next value, // or else the consecutive digit separator has no effect (#96). - let is_skip = if $self.current_count() == 0 { + let is_skip = if $self.digits() == 0 { $is_skip!(@first $self) } else { $is_skip!(@internal $self) }; if is_skip { - // SAFETY: Safe since `index < buffer.len()`, so `index + 1 <= buffer.len()`` + // SAFETY: Safe since `index < buffer.len()`, so `index + 1 <= buffer.len()` unsafe { $self.set_cursor(index + 1) }; buffer.get(index + 1) } else { @@ -869,7 +869,7 @@ macro_rules! peek_n { // NOTE: We can do some pretty major optimizations for internal values, // since we can check the location and don't need to check previous values. if is_digit_separator { - let is_skip = if $self.current_count() == 0 { + let is_skip = if $self.digits() == 0 { $is_skip!(@first $self) } else { $is_skip!(@internal $self) @@ -1048,15 +1048,6 @@ pub struct Bytes<'a, const FORMAT: u128> { slc: &'a [u8], /// Current index of the iterator in the slice. index: usize, - /// The current count of integer digits returned by the iterator. - /// This is only used if the iterator is not contiguous. - integer_count: usize, - /// The current count of fraction digits returned by the iterator. - /// This is only used if the iterator is not contiguous. - fraction_count: usize, - /// The current count of exponent digits returned by the iterator. - /// This is only used if the iterator is not contiguous. - exponent_count: usize, } impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { @@ -1066,9 +1057,6 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { Self { slc, index: 0, - integer_count: 0, - fraction_count: 0, - exponent_count: 0, } } @@ -1088,9 +1076,6 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { Self { slc, index, - integer_count: 0, - fraction_count: 0, - exponent_count: 0, } } @@ -1099,6 +1084,7 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { pub fn integer_iter<'b>(&'b mut self) -> IntegerDigitsIterator<'a, 'b, FORMAT> { IntegerDigitsIterator { byte: self, + digits: 0, } } @@ -1107,6 +1093,7 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { pub fn fraction_iter<'b>(&'b mut self) -> FractionDigitsIterator<'a, 'b, FORMAT> { FractionDigitsIterator { byte: self, + digits: 0, } } @@ -1115,6 +1102,7 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { pub fn exponent_iter<'b>(&'b mut self) -> ExponentDigitsIterator<'a, 'b, FORMAT> { ExponentDigitsIterator { byte: self, + digits: 0, } } @@ -1123,6 +1111,7 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { pub fn special_iter<'b>(&'b mut self) -> SpecialDigitsIterator<'a, 'b, FORMAT> { SpecialDigitsIterator { byte: self, + digits: 0, } } @@ -1176,7 +1165,11 @@ impl<'a, const FORMAT: u128> Bytes<'a, FORMAT> { unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { /// If each yielded value is adjacent in memory. - const IS_CONTIGUOUS: bool = NumberFormat::<{ FORMAT }>::DIGIT_SEPARATOR == 0; + /// We can have leading and trailing digit separators + /// prior to our significant digits which will still use + /// consecutive digit separators, so we ignore those for + /// our checks. + const IS_CONTIGUOUS: bool = FORMAT & flags::DIGIT_SEPARATOR_FLAG_MASK == 0; #[inline(always)] fn get_buffer(&self) -> &'a [u8] { @@ -1200,23 +1193,6 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { self.index = index; } - /// Get the current number of digits returned by the iterator. - /// - /// For contiguous iterators, this can include the sign character, decimal - /// point, and the exponent sign (that is, it is always the cursor). For - /// non-contiguous iterators, this must always be the only the number of - /// digits returned. - #[inline(always)] - fn current_count(&self) -> usize { - // If the buffer is contiguous, then we don't need to track the - // number of values: the current index is enough. - if Self::IS_CONTIGUOUS { - self.index - } else { - self.integer_count + self.fraction_count + self.exponent_count - } - } - #[inline(always)] unsafe fn step_by_unchecked(&mut self, count: usize) { // SAFETY: Safe if the buffer has at least `N` elements. @@ -1240,6 +1216,8 @@ macro_rules! skip_iterator { pub struct $iterator<'a: 'b, 'b, const FORMAT: u128> { /// The internal byte object for the skip iterator. byte: &'b mut Bytes<'a, FORMAT>, + /// The number of digits found. + digits: usize, } }; } @@ -1280,7 +1258,8 @@ macro_rules! skip_iterator_impl { #[inline(always)] pub fn new(byte: &'b mut Bytes<'a, FORMAT>) -> Self { Self { - byte, + byte: byte, + digits: 0, } } @@ -1342,7 +1321,7 @@ macro_rules! skip_iterator_iterator_impl { /// Create base methods for the Iter block of a skip iterator. macro_rules! skip_iterator_iter_base { - ($format:ident, $mask:ident, $count:ident) => { + ($format:ident, $mask:ident) => { // It's contiguous if we don't skip over any values. // IE, the digit separator flags for the iterator over // the digits doesn't skip any values. @@ -1365,21 +1344,6 @@ macro_rules! skip_iterator_iter_base { unsafe { self.byte.set_cursor(index) }; } - /// Get the current number of digits returned by the iterator. - /// - /// For contiguous iterators, this can include the sign character, decimal - /// point, and the exponent sign (that is, it is always the cursor). For - /// non-contiguous iterators, this must always be the only the number of - /// digits returned. - #[inline(always)] - fn current_count(&self) -> usize { - if Self::IS_CONTIGUOUS { - self.byte.current_count() - } else { - self.byte.$count - } - } - #[inline(always)] unsafe fn step_by_unchecked(&mut self, count: usize) { // SAFETY: Safe if the buffer has at least `N` elements. @@ -1404,6 +1368,22 @@ macro_rules! skip_iterator_digits_iter_base { }; } +/// Iteratively consume digits matching the given value. +/// +/// This simplifies consuming digit separators for our internal, specialized +/// use, since for signs and base prefixes/suffixes they're 1-off uses. +#[inline(always)] +fn consume(bytes: &[u8], value: u8, mut index: usize, consecutive: bool) -> usize { + if consecutive { + while bytes.get(index) == Some(&value) { + index += 1; + } + } else if bytes.get(index) == Some(&value) { + index += 1; + } + index +} + /// Internal helper for parsing the sign once a given index is known. #[inline(always)] fn parse_sign_impl(bytes: &[u8], index: usize) -> (bool, bool) { @@ -1426,31 +1406,24 @@ where T: DigitsIter<'a>, { let bytes = iter.get_buffer(); - let mut index = iter.cursor(); if digit_separator != 0 && can_skip { - if consecutive { - while bytes.get(index) == Some(&digit_separator) { - index += 1; - } - } else if bytes.get(index) == Some(&digit_separator) { - index += 1; - } + let index = consume(bytes, digit_separator, iter.cursor(), consecutive); // only advance the internal state if we have a sign // otherwise, we need to keep the start exactly where // the buffer started match parse_sign_impl(bytes, index) { (is_negative, true) => { - // SAFETY: safe, since the was fetched from processing - // digits above, that is, it must be <= iterator size. - // This was validated by `parse_sign_impl`, which returned - // `Some(...)`. + // SAFETY: safe, since consume will never go `> bytes.len()`, and + // `iter.cursor() <= bytes.len()`, that is, we must be within + // the valid bounds, and `bytes.get(index)` in `parse_sign_impl` + // just returned `Some(...)`. unsafe { iter.set_cursor(index) }; (is_negative, true) }, (_, false) => (false, false), } } else { - parse_sign_impl(bytes, index) + parse_sign_impl(bytes, iter.cursor()) } } @@ -1499,7 +1472,6 @@ macro_rules! skip_iterator_bytesiter_impl { ( $iterator:ident, $mask:ident, - $count:ident, $i:ident, $l:ident, $t:ident, @@ -1507,7 +1479,7 @@ macro_rules! skip_iterator_bytesiter_impl { $sign_parser:ident $(,)? ) => { unsafe impl<'a: 'b, 'b, const FORMAT: u128> Iter<'a> for $iterator<'a, 'b, FORMAT> { - skip_iterator_iter_base!(FORMAT, $mask, $count); + skip_iterator_iter_base!(FORMAT, $mask); } impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for $iterator<'a, 'b, FORMAT> { @@ -1519,7 +1491,18 @@ macro_rules! skip_iterator_bytesiter_impl { /// this increments the count by 1. #[inline(always)] fn increment_count(&mut self) { - self.byte.$count += 1; + if !self.is_contiguous() { + self.digits += 1; + } + } + + #[inline(always)] + fn digits(&self) -> usize { + if self.is_contiguous() { + self.cursor() + } else { + self.digits + } } /// Peek the next value of the iterator, without consuming it. @@ -1553,22 +1536,37 @@ macro_rules! skip_iterator_bytesiter_impl { const LTC: u128 = LT | C; const ILTC: u128 = ILT | C; + // NOTE: This is a special case where we would normally use a leading + // digit separator, however, we're not at the start of our buffer. + // We're doing as many compile-time conditions as possible. + let is_integer = flags::$i == flags::INTEGER_INTERNAL_DIGIT_SEPARATOR; + let can_skip = !is_integer || format.start_digit_separator() || self.cursor() != 0; match format.digit_separator_flags() & flags::$mask { 0 => peek_noskip!(self), I => peek_i!(self), - L => peek_l!(self), + L if can_skip => peek_l!(self), T => peek_t!(self), - IL => peek_il!(self), + IL if can_skip => peek_il!(self), IT => peek_it!(self), - LT => peek_lt!(self), - ILT => peek_ilt!(self), + LT if can_skip => peek_lt!(self), + ILT if can_skip => peek_ilt!(self), IC => peek_ic!(self), - LC => peek_lc!(self), + LC if can_skip => peek_lc!(self), TC => peek_tc!(self), - ILC => peek_ilc!(self), + ILC if can_skip => peek_ilc!(self), ITC => peek_itc!(self), - LTC => peek_ltc!(self), - ILTC => peek_iltc!(self), + LTC if can_skip => peek_ltc!(self), + ILTC if can_skip => peek_iltc!(self), + + L if !can_skip => peek_noskip!(self), + IL if !can_skip => peek_i!(self), + LT if !can_skip => peek_t!(self), + ILT if !can_skip => peek_it!(self), + LC if !can_skip => peek_noskip!(self), + ILC if !can_skip => peek_ic!(self), + LTC if !can_skip => peek_tc!(self), + ILTC if !can_skip => peek_itc!(self), + _ => unreachable!(), } } @@ -1584,6 +1582,71 @@ macro_rules! skip_iterator_bytesiter_impl { fn parse_sign(&mut self) -> (bool, bool) { $sign_parser::<_, FORMAT>(self) } + + #[inline(always)] + fn read_base_prefix(&mut self) -> bool { + let format = NumberFormat::<{ FORMAT }> {}; + let digit_separator = format.digit_separator(); + let base_prefix = format.base_prefix(); + let is_cased = format.case_sensitive_base_prefix(); + + let is_integer = flags::$i == flags::INTEGER_INTERNAL_DIGIT_SEPARATOR; + if !is_integer || !cfg!(feature = "power-of-two") || base_prefix == 0 { + return false; + } + + // grab our cursor information + let mut index = self.cursor(); + let bytes = self.get_buffer(); + + // we can skip if we're not at the absolute start (a preceeding sign) + // or we've enabled start digit separators. + let consecutive = format.base_prefix_consecutive_digit_separator(); + let skip_start = format.start_digit_separator() || index > 0; + if skip_start && format.base_prefix_leading_digit_separator() { + index = consume(bytes, digit_separator, index, consecutive); + } + + if bytes.get(index) != Some(&b'0') { + return false; + } + index += 1; + debug_assert!(index <= bytes.len()); + + if format.base_prefix_internal_digit_separator() { + index = consume(bytes, digit_separator, index, consecutive); + } + if bytes + .get(index) + .map(|&x| !Self::is_value_equal(x, base_prefix, is_cased)) + .unwrap_or(false) + { + return false; + } + index += 1; + debug_assert!(index <= bytes.len()); + + // NOTE: We want to simplify our implementation, so leave this in a + // simple state for our integer parser. We shouldn't skip digits + // if the integer can skip leading digit separators and we can skip + // trailing, but they can consume consecutive separators, since that + // would just be re-processing data. + let prefix_trailing = format.base_prefix_trailing_digit_separator(); + let mut should_skip = prefix_trailing; + if format.integer_leading_digit_separator() { + should_skip &= consecutive && !format.integer_consecutive_digit_separator(); + } + if should_skip { + index = consume(bytes, digit_separator, index, consecutive); + } + + // SAFETY: safe, since we've consumed at most 1 digit prior to + // consume, we will never go `> bytes.len()`, so this is safe. + debug_assert!(index <= bytes.len()); + unsafe { self.set_cursor(index) }; + + true + } } }; } @@ -1597,7 +1660,6 @@ skip_iterator_iterator_impl!(IntegerDigitsIterator); skip_iterator_bytesiter_impl!( IntegerDigitsIterator, INTEGER_DIGIT_SEPARATOR_FLAG_MASK, - integer_count, INTEGER_INTERNAL_DIGIT_SEPARATOR, INTEGER_LEADING_DIGIT_SEPARATOR, INTEGER_TRAILING_DIGIT_SEPARATOR, @@ -1617,7 +1679,6 @@ skip_iterator_iterator_impl!(FractionDigitsIterator); skip_iterator_bytesiter_impl!( FractionDigitsIterator, FRACTION_DIGIT_SEPARATOR_FLAG_MASK, - fraction_count, FRACTION_INTERNAL_DIGIT_SEPARATOR, FRACTION_LEADING_DIGIT_SEPARATOR, FRACTION_TRAILING_DIGIT_SEPARATOR, @@ -1637,7 +1698,6 @@ skip_iterator_iterator_impl!(ExponentDigitsIterator); skip_iterator_bytesiter_impl!( ExponentDigitsIterator, EXPONENT_DIGIT_SEPARATOR_FLAG_MASK, - exponent_count, EXPONENT_INTERNAL_DIGIT_SEPARATOR, EXPONENT_LEADING_DIGIT_SEPARATOR, EXPONENT_TRAILING_DIGIT_SEPARATOR, @@ -1660,7 +1720,7 @@ impl<'a: 'b, 'b, const FORMAT: u128> SpecialDigitsIterator<'a, 'b, FORMAT> { } unsafe impl<'a: 'b, 'b, const FORMAT: u128> Iter<'a> for SpecialDigitsIterator<'a, 'b, FORMAT> { - skip_iterator_iter_base!(FORMAT, SPECIAL_DIGIT_SEPARATOR, integer_count); + skip_iterator_iter_base!(FORMAT, SPECIAL_DIGIT_SEPARATOR); } impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for SpecialDigitsIterator<'a, 'b, FORMAT> { @@ -1671,6 +1731,15 @@ impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for SpecialDigitsIterator<'a fn increment_count(&mut self) { } + #[inline(always)] + fn digits(&self) -> usize { + if self.is_contiguous() { + self.cursor() + } else { + self.digits + } + } + /// Peek the next value of the iterator, without consuming it. #[inline(always)] fn peek(&mut self) -> Option<::Item> { @@ -1688,4 +1757,14 @@ impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for SpecialDigitsIterator<'a let format = NumberFormat::<{ FORMAT }> {}; char_is_digit_const(value, format.mantissa_radix()) } + + #[inline(always)] + fn parse_sign(&mut self) -> (bool, bool) { + unimplemented!(); + } + + #[inline(always)] + fn read_base_prefix(&mut self) -> bool { + unimplemented!(); + } } diff --git a/lexical-util/tests/feature_format_tests.rs b/lexical-util/tests/feature_format_tests.rs index 97f7480e..d5e52b8e 100644 --- a/lexical-util/tests/feature_format_tests.rs +++ b/lexical-util/tests/feature_format_tests.rs @@ -7,7 +7,8 @@ use lexical_util::format; #[test] fn ignore_test() { let fmt = format::NumberFormat::<{ format::IGNORE }> {}; - let expected = format::DIGIT_SEPARATOR_FLAG_MASK + let expected = format::ALL_DIGIT_SEPARATOR_FLAG_MASK + | format::START_DIGIT_SEPARATOR | format::SUPPORTS_PARSING_FLOATS | format::SUPPORTS_PARSING_INTEGERS | format::SUPPORTS_WRITING_FLOATS diff --git a/lexical-util/tests/iterator_tests.rs b/lexical-util/tests/iterator_tests.rs index 9550a78d..212b667d 100644 --- a/lexical-util/tests/iterator_tests.rs +++ b/lexical-util/tests/iterator_tests.rs @@ -24,13 +24,13 @@ fn digits_iterator_test() { assert_eq!(u32::from_le(iter.peek_u32().unwrap()), 0x34333231); assert_eq!(iter.buffer_length(), 5); assert_eq!(iter.cursor(), 0); - assert_eq!(iter.current_count(), 0); + assert_eq!(iter.digits(), 0); unsafe { iter.step_by_unchecked(4); } assert_eq!(iter.buffer_length(), 5); assert_eq!(iter.cursor(), 4); - assert_eq!(iter.current_count(), 4); + assert_eq!(iter.digits(), 4); assert_eq!(iter.peek(), Some(&b'5')); assert_eq!(iter.peek(), Some(&b'5')); assert_eq!(iter.next(), Some(&b'5')); @@ -84,13 +84,13 @@ fn skip_iterator_test() { assert_eq!(iter.is_buffer_empty(), false); assert_eq!(iter.buffer_length(), 6); assert_eq!(iter.cursor(), 0); - assert_eq!(iter.current_count(), 0); + assert_eq!(iter.digits(), 0); unsafe { iter.step_unchecked() }; assert_eq!(iter.cursor(), 1); - assert_eq!(iter.current_count(), 0); + assert_eq!(iter.digits(), 0); iter.next(); assert_eq!(iter.cursor(), 2); - assert_eq!(iter.current_count(), 1); + assert_eq!(iter.digits(), 1); let mut byte = digits.bytes::<{ FORMAT }>(); let mut iter = byte.integer_iter(); @@ -100,10 +100,10 @@ fn skip_iterator_test() { assert_eq!(iter.next(), Some(&b'2')); assert_eq!(iter.next(), Some(&b'3')); assert_eq!(iter.cursor(), 3); - assert_eq!(iter.current_count(), 3); + assert_eq!(iter.digits(), 3); assert_eq!(iter.next(), Some(&b'4')); assert_eq!(iter.cursor(), 5); - assert_eq!(iter.current_count(), 4); + assert_eq!(iter.digits(), 4); assert_eq!(iter.next(), Some(&b'5')); assert_eq!(iter.next(), None); diff --git a/lexical-write-float/src/lib.rs b/lexical-write-float/src/lib.rs index 99a3fbbf..7c0746d0 100644 --- a/lexical-write-float/src/lib.rs +++ b/lexical-write-float/src/lib.rs @@ -150,7 +150,7 @@ //! written is as follows: //! //! ```rust -//! # #[cfg(feature = "radix")] { +//! # #[cfg(feature = "format")] { //! # use core::str; //! use lexical_write_float::{FormattedSize, NumberFormatBuilder, Options, ToLexicalWithOptions}; //! From dab5e53eb024e5e157fb0ce878bad7d1b7263194 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Fri, 17 Jan 2025 17:09:09 -0600 Subject: [PATCH 11/18] Improve parsing floats to use our better base prefix logic. --- lexical-parse-float/src/parse.rs | 98 ++++++++++---------------- lexical-parse-integer/src/algorithm.rs | 12 ++-- lexical-util/src/feature_format.rs | 16 +++-- lexical-util/src/format_builder.rs | 35 +++++++-- lexical-util/src/not_feature_format.rs | 16 +++-- lexical-util/src/skip.rs | 2 +- 6 files changed, 95 insertions(+), 84 deletions(-) diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index 2dc85f00..683620bf 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -251,11 +251,10 @@ pub fn parse_complete( options: &Options, ) -> Result { let mut byte = bytes.bytes::<{ FORMAT }>(); + let format = NumberFormat:: {}; let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - if NumberFormat::::REQUIRED_INTEGER_DIGITS - || NumberFormat::::REQUIRED_MANTISSA_DIGITS - { + if format.required_integer_digits() || format.required_mantissa_digits() { return Err(Error::Empty(byte.cursor())); } else { return Ok(F::ZERO); @@ -294,11 +293,10 @@ pub fn fast_path_complete( options: &Options, ) -> Result { let mut byte = bytes.bytes::<{ FORMAT }>(); + let format = NumberFormat:: {}; let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - if NumberFormat::::REQUIRED_INTEGER_DIGITS - || NumberFormat::::REQUIRED_MANTISSA_DIGITS - { + if format.required_integer_digits() || format.required_mantissa_digits() { return Err(Error::Empty(byte.cursor())); } else { return Ok(F::ZERO); @@ -319,11 +317,10 @@ pub fn parse_partial( options: &Options, ) -> Result<(F, usize)> { let mut byte = bytes.bytes::<{ FORMAT }>(); + let format = NumberFormat:: {}; let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - if NumberFormat::::REQUIRED_INTEGER_DIGITS - || NumberFormat::::REQUIRED_MANTISSA_DIGITS - { + if format.required_integer_digits() || format.required_mantissa_digits() { return Err(Error::Empty(byte.cursor())); } else { return Ok((F::ZERO, byte.cursor())); @@ -368,11 +365,10 @@ pub fn fast_path_partial( options: &Options, ) -> Result<(F, usize)> { let mut byte = bytes.bytes::<{ FORMAT }>(); + let format = NumberFormat:: {}; let is_negative = parse_mantissa_sign(&mut byte)?; if byte.integer_iter().is_consumed() { - if NumberFormat::::REQUIRED_INTEGER_DIGITS - || NumberFormat::::REQUIRED_MANTISSA_DIGITS - { + if format.required_integer_digits() || format.required_mantissa_digits() { return Err(Error::Empty(byte.cursor())); } else { return Ok((F::ZERO, byte.cursor())); @@ -535,51 +531,31 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( let bits_per_digit = shared::log2(format.mantissa_radix()) as i64; let bits_per_base = shared::log2(format.exponent_base()) as i64; - // INTEGER - - // Check to see if we have a valid base prefix. - // NOTE: `lz_prefix` is if we had a leading zero when - // checking for a base prefix: it is not if the prefix - // exists or not. - // TODO: MIGRATE TO BASE PREFIX LOGIC - #[allow(unused_variables)] - let mut lz_prefix = false; - #[cfg(all(feature = "format", feature = "power-of-two"))] - { - let base_prefix = format.base_prefix(); - let mut has_prefix = false; - let mut iter = byte.integer_iter(); - if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() { - // Check to see if the next character is the base prefix. - // We must have a format like `0x`, `0d`, `0o`. - // NOTE: The check for empty integer digits happens below so - // we don't need a redundant check here. - lz_prefix = true; - let prefix = iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()); - has_prefix = prefix.is_some(); - if has_prefix && iter.is_buffer_empty() && format.required_integer_digits() { - return Err(Error::EmptyInteger(iter.cursor())); - } - } - if format.required_base_prefix() && !has_prefix { - return Err(Error::MissingBasePrefix(iter.cursor())); + // skip and validate an optional base prefix + let has_base_prefix = cfg!(feature = "format") && byte.integer_iter().read_base_prefix(); + if cfg!(feature = "format") && has_base_prefix { + if byte.is_buffer_empty() && format.required_integer_digits() { + return Err(Error::EmptyInteger(byte.cursor())); } + } else if format.required_base_prefix() { + return Err(Error::MissingBasePrefix(byte.cursor())); } - // Parse our integral digits. - let mut mantissa = 0_u64; + // INTEGER + let start = byte.clone(); + let mut mantissa = 0_u64; let mut integer_iter = byte.integer_iter(); - let start_count = integer_iter.digits(); + let integer_start = integer_iter.digits(); + + // Parse our integral digits. #[cfg(not(feature = "compact"))] parse_8digits::<_, FORMAT>(&mut integer_iter, &mut mantissa); parse_digits(&mut integer_iter, format.mantissa_radix(), |digit| { mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64); }); - let mut n_digits = integer_iter.digits_since(start_count); - #[cfg(feature = "format")] + let mut n_digits = integer_iter.digits_since(integer_start); let n_before_dot = n_digits; - #[cfg(feature = "format")] if format.required_integer_digits() && n_digits == 0 { return Err(Error::EmptyInteger(byte.cursor())); } @@ -610,10 +586,13 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( let integer_digits = unsafe { start.as_slice().get_unchecked(..b_digits) }; // Check if integer leading zeros are disabled. - #[cfg(feature = "format")] - if !lz_prefix && format.no_float_leading_zeros() { + if cfg!(feature = "format") + && format.no_float_leading_zeros() + && !has_base_prefix + && n_before_dot > 1 + { let mut integer = integer_digits.bytes::(); - if integer_digits.len() > 1 && integer.integer_iter().peek() == Some(&b'0') { + if integer.integer_iter().peek() == Some(&b'0') { return Err(Error::InvalidLeadingZeros(start.cursor())); } } @@ -632,13 +611,13 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( unsafe { byte.step_unchecked() }; let before = byte.clone(); let mut fraction_iter = byte.fraction_iter(); - let start_count = fraction_iter.digits(); + let fraction_count = fraction_iter.digits(); #[cfg(not(feature = "compact"))] parse_8digits::<_, FORMAT>(&mut fraction_iter, &mut mantissa); parse_digits(&mut fraction_iter, format.mantissa_radix(), |digit| { mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64); }); - n_after_dot = fraction_iter.digits_since(start_count); + n_after_dot = fraction_iter.digits_since(fraction_count); // NOTE: We can't use the number of digits to extract the slice for // non-contiguous iterators, but we also need to the number of digits // for our value calculation. We store both, and let the compiler know @@ -674,7 +653,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // NOTE: Check if we have our exponent **BEFORE** checking if the // mantissa is empty, so we can ensure let has_exponent = byte - .first_is(exponent_character, format.case_sensitive_exponent() && cfg!(feature = "format")); + .first_is(exponent_character, cfg!(feature = "format") && format.case_sensitive_exponent()); // check to see if we have any invalid leading zeros n_digits += n_after_dot; @@ -701,8 +680,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( unsafe { byte.step_unchecked() }; // Check float format syntax checks. - #[cfg(feature = "format")] - { + if cfg!(feature = "format") { // NOTE: We've overstepped for the safety invariant before. if format.no_exponent_notation() { return Err(Error::InvalidExponent(byte.cursor() - 1)); @@ -736,14 +714,14 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( let is_negative_exponent = parse_exponent_sign(&mut byte)?; let mut exponent_iter = byte.exponent_iter(); - let start_count = exponent_iter.digits(); + let exponent_start = exponent_iter.digits(); parse_digits(&mut exponent_iter, format.exponent_radix(), |digit| { if explicit_exponent < 0x10000000 { explicit_exponent *= format.exponent_radix() as i64; explicit_exponent += digit as i64; } }); - if format.required_exponent_digits() && exponent_iter.digits_since(start_count) == 0 { + if format.required_exponent_digits() && exponent_iter.digits_since(exponent_start) == 0 { return Err(Error::EmptyExponent(byte.cursor())); } // Handle our sign, and get the explicit part of the exponent. @@ -760,9 +738,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // Check to see if we have a valid base suffix. // We've already trimmed any leading digit separators here, so we can be safe // that the first character **is not** a digit separator. - // FIXME: Improve parsing of this - #[cfg(all(feature = "format", feature = "power-of-two"))] - if format.has_base_suffix() { + // TODO: Improve parsing of this using a base suffix method + if cfg!(all(feature = "format", feature = "power-of-two")) && format.has_base_suffix() { let base_suffix = format.base_suffix(); let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix()); if is_suffix { @@ -779,8 +756,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( let end = byte.cursor(); let mut step = u64_step(format.mantissa_radix()); let mut many_digits = false; - #[cfg(feature = "format")] - if !format.required_mantissa_digits() && n_digits == 0 { + if cfg!(feature = "format") && !format.required_mantissa_digits() && n_digits == 0 { exponent = 0; } if n_digits <= step { diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index e982003e..cfbf808e 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -674,14 +674,15 @@ macro_rules! algorithm { let mut byte = $bytes.bytes::(); let format = NumberFormat:: {}; let radix = format.mantissa_radix(); + debug_assert!(format.is_valid(), "should have already checked for an invalid number format"); let is_negative = parse_sign::(&mut byte)?; let mut iter = byte.integer_iter(); maybe_into_empty!(iter, $into_ok); // skip and validate an optional base prefix - #[cfg(all(feature = "format", feature = "power-of-two"))] - if iter.read_base_prefix() { + let has_base_prefix = cfg!(feature = "format") && iter.read_base_prefix(); + if cfg!(feature = "format") && has_base_prefix { maybe_into_empty!(iter, $into_ok); } else if format.required_base_prefix() { return Err(Error::MissingBasePrefix(iter.cursor())); @@ -689,8 +690,7 @@ macro_rules! algorithm { // NOTE: always do a peek so any leading digit separators // are skipped, and we can get the correct index - #[cfg(feature = "format")] - if format.no_integer_leading_zeros() && iter.peek() == Some(&b'0') { + if cfg!(feature = "format") && format.no_integer_leading_zeros() && !has_base_prefix && iter.peek() == Some(&b'0') { // NOTE: Skipping zeros is **EXPENSIVE* so we skip that without our format feature let index = iter.cursor(); let zeros = iter.skip_zeros(); @@ -719,7 +719,6 @@ macro_rules! algorithm { // and even if parsing a 64-bit integer is marginally faster, it // culminates in **way** slower performance overall for simple // integers, and no improvement for large integers. - #[allow(unused)] let mut has_suffix = false; // FIXME: This is only used for the parsing of the base suffix. #[allow(unused)] @@ -776,8 +775,7 @@ macro_rules! algorithm { ); } - #[cfg(all(feature = "format", feature = "power-of-two"))] - if format.required_base_suffix() && !has_suffix { + if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix { return Err(Error::MissingBaseSuffix(iter.cursor())); } diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index ff276b1b..b58d4d66 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -516,16 +516,20 @@ impl NumberFormat { /// Get if leading zeros before an integer are not allowed. /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`false`]. + /// Can only be modified with [`feature`][crate#features] `format`. This + /// only applies if there is no base prefix: that is, the zeros are + /// at the absolute start of the number. Defaults to [`false`]. /// /// # Examples /// + /// With a base prefix of `x`. + /// /// | Input | Valid? | /// |:-:|:-:| /// | `01` | ❌ | /// | `0` | ✔️ | /// | `10` | ✔️ | + /// | `0x01` | ✔️ | /// /// # Used For /// @@ -544,17 +548,21 @@ impl NumberFormat { /// /// This is before the significant digits of the float, that is, if there is /// 1 or more digits in the integral component and the leading digit is 0, - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`false`]. + /// Can only be modified with [`feature`][crate#features] `format`. This + /// only applies if there is no base prefix: that is, the zeros are + /// at the absolute start of the number. Defaults to [`false`]. /// /// # Examples /// + /// With a base prefix of `x`. + /// /// | Input | Valid? | /// |:-:|:-:| /// | `01` | ❌ | /// | `01.0` | ❌ | /// | `0` | ✔️ | /// | `10` | ✔️ | + /// | `0x01.0` | ✔️ | /// | `0.1` | ✔️ | /// /// # Used For diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 14cfa72b..8c24ae88 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -1339,16 +1339,20 @@ impl NumberFormatBuilder { /// Get if leading zeros before an integer are not allowed. /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`false`]. + /// Can only be modified with [`feature`][crate#features] `format`. This + /// only applies if there is no base prefix: that is, the zeros are + /// at the absolute start of the number. Defaults to [`false`]. /// /// # Examples /// + /// With a base prefix of `x`. + /// /// | Input | Valid? | /// |:-:|:-:| /// | `01` | ❌ | /// | `0` | ✔️ | /// | `10` | ✔️ | + /// | `0x01` | ✔️ | /// /// # Used For /// @@ -1362,17 +1366,21 @@ impl NumberFormatBuilder { /// /// This is before the significant digits of the float, that is, if there is /// 1 or more digits in the integral component and the leading digit is 0, - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`false`]. + /// Can only be modified with [`feature`][crate#features] `format`. This + /// only applies if there is no base prefix: that is, the zeros are + /// at the absolute start of the number. Defaults to [`false`]. /// /// # Examples /// + /// With a base prefix of `x`. + /// /// | Input | Valid? | /// |:-:|:-:| /// | `01` | ❌ | /// | `01.0` | ❌ | /// | `0` | ✔️ | /// | `10` | ✔️ | + /// | `0x01.0` | ✔️ | /// | `0.1` | ✔️ | /// /// # Used For @@ -3148,15 +3156,19 @@ impl NumberFormatBuilder { /// Set if leading zeros before an integer are not allowed. /// - /// Defaults to [`false`]. + /// This only applies if there is no base prefix: that is, the zeros are + /// at the absolute start of the number. Defaults to [`false`]. /// /// # Examples /// + /// With a base prefix of `x`. + /// /// | Input | Valid? | /// |:-:|:-:| /// | `01` | ❌ | /// | `0` | ✔️ | /// | `10` | ✔️ | + /// | `0x01` | ✔️ | /// /// # Used For /// @@ -3165,6 +3177,7 @@ impl NumberFormatBuilder { /// #[inline(always)] @@ -3189,16 +3203,20 @@ impl NumberFormatBuilder { /// /// This is before the significant digits of the float, that is, if there is /// 1 or more digits in the integral component and the leading digit is 0, - /// Defaults to [`false`]. + /// This only applies if there is no base prefix: that is, the zeros are + /// at the absolute start of the number. Defaults to [`false`]. /// /// # Examples /// + /// With a base prefix of `x`. + /// /// | Input | Valid? | /// |:-:|:-:| /// | `01` | ❌ | /// | `01.0` | ❌ | /// | `0` | ✔️ | /// | `10` | ✔️ | + /// | `0x01.0` | ✔️ | /// | `0.1` | ✔️ | /// /// # Used For @@ -3208,6 +3226,7 @@ impl NumberFormatBuilder { /// #[inline(always)] diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index 41adddca..b87677d1 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -474,16 +474,20 @@ impl NumberFormat { /// Get if leading zeros before an integer are not allowed. /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`false`]. + /// Can only be modified with [`feature`][crate#features] `format`. This + /// only applies if there is no base prefix: that is, the zeros are + /// at the absolute start of the number. Defaults to [`false`]. /// /// # Examples /// + /// With a base prefix of `x`. + /// /// | Input | Valid? | /// |:-:|:-:| /// | `01` | ❌ | /// | `0` | ✔️ | /// | `10` | ✔️ | + /// | `0x01` | ✔️ | /// /// # Used For /// @@ -502,17 +506,21 @@ impl NumberFormat { /// /// This is before the significant digits of the float, that is, if there is /// 1 or more digits in the integral component and the leading digit is 0, - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`false`]. + /// Can only be modified with [`feature`][crate#features] `format`. This + /// only applies if there is no base prefix: that is, the zeros are + /// at the absolute start of the number. Defaults to [`false`]. /// /// # Examples /// + /// With a base prefix of `x`. + /// /// | Input | Valid? | /// |:-:|:-:| /// | `01` | ❌ | /// | `01.0` | ❌ | /// | `0` | ✔️ | /// | `10` | ✔️ | + /// | `0x01.0` | ✔️ | /// | `0.1` | ✔️ | /// /// # Used For diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index 884d1af7..0811a9f1 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -1258,7 +1258,7 @@ macro_rules! skip_iterator_impl { #[inline(always)] pub fn new(byte: &'b mut Bytes<'a, FORMAT>) -> Self { Self { - byte: byte, + byte, digits: 0, } } From 5b3c7209f6db868134b4ad9a0e1a75a215b79ad2 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sat, 18 Jan 2025 21:12:43 -0600 Subject: [PATCH 12/18] Turn off LTO for benches. It's causing massive variability with code changes, without any changes for benches when run independently. --- extras/benchmark/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/extras/benchmark/Cargo.toml b/extras/benchmark/Cargo.toml index d331679d..7b508572 100644 --- a/extras/benchmark/Cargo.toml +++ b/extras/benchmark/Cargo.toml @@ -12,4 +12,3 @@ resolver = "2" opt-level = 3 debug = false debug-assertions = false -lto = true From 79287ac53359febf1baa1653e1324687b82d3ebe Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Sun, 19 Jan 2025 09:48:20 -0600 Subject: [PATCH 13/18] Add in heavy optimizations for parsing integers. This improves both the parsing of sign and digits for better performance, especially with non-decimal radices and larger digit counts. --- lexical-parse-float/src/lib.rs | 9 +- lexical-parse-float/src/parse.rs | 114 ++++------ lexical-parse-float/tests/api_tests.rs | 16 +- lexical-parse-integer/src/algorithm.rs | 182 ++++++--------- lexical-util/src/digit.rs | 14 +- lexical-util/src/iterator.rs | 221 +++++++++++++++--- lexical-util/src/noskip.rs | 69 ++++-- lexical-util/src/num.rs | 43 ++-- lexical-util/src/skip.rs | 302 ++++++++++++------------- 9 files changed, 539 insertions(+), 431 deletions(-) diff --git a/lexical-parse-float/src/lib.rs b/lexical-parse-float/src/lib.rs index f3307300..317c0eff 100644 --- a/lexical-parse-float/src/lib.rs +++ b/lexical-parse-float/src/lib.rs @@ -78,7 +78,7 @@ //! //! let value = "1.234e+300"; //! let result = f64::from_lexical_with_options::(value.as_bytes(), &OPTIONS); -//! assert_eq!(result, Err(Error::MissingSign(0))); +//! assert_eq!(result, Err(Error::MissingMantissaSign(0))); //! # } //! ``` //! @@ -132,7 +132,7 @@ //! } //! //! assert_eq!(parse_json_float(b"-1"), Ok(-1.0)); -//! assert_eq!(parse_json_float(b"+1"), Err(Error::InvalidPositiveSign(0))); +//! assert_eq!(parse_json_float(b"+1"), Err(Error::InvalidPositiveMantissaSign(0))); //! assert_eq!(parse_json_float(b"1"), Ok(1.0)); //! assert_eq!(parse_json_float(b"1."), Err(Error::EmptyFraction(2))); //! assert_eq!(parse_json_float(b"0.1"), Ok(0.1)); @@ -193,7 +193,7 @@ //! assert_eq!(value.map(|x| x.is_nan()), Ok(true)); //! //! let value = f64::from_lexical_with_options::(b"+1_2.3_4", &OPTIONS); -//! assert_eq!(value, Err(Error::InvalidPositiveSign(0))); +//! assert_eq!(value, Err(Error::InvalidPositiveMantissaSign(0))); //! //! let value = f64::from_lexical_with_options::(b"0.3_4", &OPTIONS); //! assert_eq!(value, Ok(0.34)); @@ -566,9 +566,6 @@ mod table_lemire; mod table_radix; mod table_small; -#[macro_use(parse_sign)] -extern crate lexical_parse_integer; - // Re-exports #[cfg(feature = "f16")] pub use lexical_util::bf16::bf16; diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index 683620bf..0e3d85f9 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -75,6 +75,35 @@ macro_rules! check_radix { }}; } +/// If a buffer is empty, return the value or an error. +macro_rules! maybe_into_empty { + ($iter:expr, $into_ok:ident) => {{ + let mut iter = $iter; + let format = NumberFormat:: {}; + if iter.is_consumed() { + if format.required_integer_digits() || format.required_mantissa_digits() { + return Err(Error::Empty(iter.cursor())); + } else { + return $into_ok!(F::ZERO, iter.cursor()); + } + } + }}; +} + +/// Return an value for a complete parser. +macro_rules! into_ok_complete { + ($value:expr, $index:expr) => {{ + Ok($value) + }}; +} + +/// Return an value and index for a partial parser. +macro_rules! into_ok_partial { + ($value:expr, $index:expr) => {{ + Ok(($value, $index)) + }}; +} + /// Parse integer trait, implemented in terms of the optimized back-end. pub trait ParseFloat: LemireFloat { /// Forward complete parser parameters to the backend. @@ -168,36 +197,6 @@ parse_float_as_f32! { bf16 f16 } // different internally. Most of the code is shared, so the duplicated // code is only like 30 lines. -/// Parse the sign from the leading digits. -#[cfg_attr(not(feature = "compact"), inline(always))] -pub fn parse_mantissa_sign(byte: &mut Bytes<'_, FORMAT>) -> Result { - let format = NumberFormat::<{ FORMAT }> {}; - parse_sign!( - byte, - true, - integer_iter, - format.no_positive_mantissa_sign(), - format.required_mantissa_sign(), - InvalidPositiveSign, - MissingSign - ) -} - -/// Parse the sign from the leading digits. -#[cfg_attr(not(feature = "compact"), inline(always))] -pub fn parse_exponent_sign(byte: &mut Bytes<'_, FORMAT>) -> Result { - let format = NumberFormat::<{ FORMAT }> {}; - parse_sign!( - byte, - true, - exponent_iter, - format.no_positive_exponent_sign(), - format.required_exponent_sign(), - InvalidPositiveExponentSign, - MissingExponentSign - ) -} - /// Utility to extract the result and handle any errors from parsing a `Number`. /// /// - `format` - The numerical format as a packed integer @@ -251,15 +250,8 @@ pub fn parse_complete( options: &Options, ) -> Result { let mut byte = bytes.bytes::<{ FORMAT }>(); - let format = NumberFormat:: {}; - let is_negative = parse_mantissa_sign(&mut byte)?; - if byte.integer_iter().is_consumed() { - if format.required_integer_digits() || format.required_mantissa_digits() { - return Err(Error::Empty(byte.cursor())); - } else { - return Ok(F::ZERO); - } - } + let is_negative = byte.read_mantissa_sign()?; + maybe_into_empty!(byte.integer_iter(), into_ok_complete); // Parse our a small representation of our number. let num: Number<'_> = @@ -293,15 +285,8 @@ pub fn fast_path_complete( options: &Options, ) -> Result { let mut byte = bytes.bytes::<{ FORMAT }>(); - let format = NumberFormat:: {}; - let is_negative = parse_mantissa_sign(&mut byte)?; - if byte.integer_iter().is_consumed() { - if format.required_integer_digits() || format.required_mantissa_digits() { - return Err(Error::Empty(byte.cursor())); - } else { - return Ok(F::ZERO); - } - } + let is_negative = byte.read_mantissa_sign()?; + maybe_into_empty!(byte.integer_iter(), into_ok_complete); // Parse our a small representation of our number. let num = @@ -317,15 +302,8 @@ pub fn parse_partial( options: &Options, ) -> Result<(F, usize)> { let mut byte = bytes.bytes::<{ FORMAT }>(); - let format = NumberFormat:: {}; - let is_negative = parse_mantissa_sign(&mut byte)?; - if byte.integer_iter().is_consumed() { - if format.required_integer_digits() || format.required_mantissa_digits() { - return Err(Error::Empty(byte.cursor())); - } else { - return Ok((F::ZERO, byte.cursor())); - } - } + let is_negative = byte.read_mantissa_sign()?; + maybe_into_empty!(byte.integer_iter(), into_ok_partial); // Parse our a small representation of our number. let (num, count) = parse_number!( @@ -365,15 +343,8 @@ pub fn fast_path_partial( options: &Options, ) -> Result<(F, usize)> { let mut byte = bytes.bytes::<{ FORMAT }>(); - let format = NumberFormat:: {}; - let is_negative = parse_mantissa_sign(&mut byte)?; - if byte.integer_iter().is_consumed() { - if format.required_integer_digits() || format.required_mantissa_digits() { - return Err(Error::Empty(byte.cursor())); - } else { - return Ok((F::ZERO, byte.cursor())); - } - } + let is_negative = byte.read_mantissa_sign()?; + maybe_into_empty!(byte.integer_iter(), into_ok_partial); // Parse our a small representation of our number. let (num, count) = parse_number!( @@ -532,12 +503,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( let bits_per_base = shared::log2(format.exponent_base()) as i64; // skip and validate an optional base prefix - let has_base_prefix = cfg!(feature = "format") && byte.integer_iter().read_base_prefix(); - if cfg!(feature = "format") && has_base_prefix { - if byte.is_buffer_empty() && format.required_integer_digits() { - return Err(Error::EmptyInteger(byte.cursor())); - } - } else if format.required_base_prefix() { + let has_base_prefix = cfg!(feature = "format") && byte.read_base_prefix(); + if cfg!(feature = "format") && !has_base_prefix && format.required_base_prefix() { return Err(Error::MissingBasePrefix(byte.cursor())); } @@ -712,7 +679,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( } } - let is_negative_exponent = parse_exponent_sign(&mut byte)?; + let is_negative_exponent = byte.read_exponent_sign()?; let mut exponent_iter = byte.exponent_iter(); let exponent_start = exponent_iter.digits(); parse_digits(&mut exponent_iter, format.exponent_radix(), |digit| { @@ -738,7 +705,6 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( // Check to see if we have a valid base suffix. // We've already trimmed any leading digit separators here, so we can be safe // that the first character **is not** a digit separator. - // TODO: Improve parsing of this using a base suffix method if cfg!(all(feature = "format", feature = "power-of-two")) && format.has_base_suffix() { let base_suffix = format.base_suffix(); let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix()); diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs index 04d69e57..bb40dde8 100644 --- a/lexical-parse-float/tests/api_tests.rs +++ b/lexical-parse-float/tests/api_tests.rs @@ -858,11 +858,11 @@ fn f64_no_exponent_notation_test() { fn f64_optional_exponent_test() { const FORMAT: u128 = format::PERMISSIVE; const OPTIONS: Options = Options::new(); - assert!(f64::from_lexical_with_options::(b"+3.0e7", &OPTIONS).is_ok()); - assert!(f64::from_lexical_with_options::(b"+3.0e-7", &OPTIONS).is_ok()); - assert!(f64::from_lexical_with_options::(b"+3.0e", &OPTIONS).is_ok()); - assert!(f64::from_lexical_with_options::(b"+3.0e-", &OPTIONS).is_ok()); - assert!(f64::from_lexical_with_options::(b"+3.0", &OPTIONS).is_ok()); + assert_eq!(f64::from_lexical_with_options::(b"+3.0e7", &OPTIONS), Ok(3.0e7)); + assert_eq!(f64::from_lexical_with_options::(b"+3.0e-7", &OPTIONS), Ok(3.0e-7)); + assert_eq!(f64::from_lexical_with_options::(b"+3.0e", &OPTIONS), Ok(3.0)); + assert_eq!(f64::from_lexical_with_options::(b"+3.0e-", &OPTIONS), Ok(3.0)); + assert_eq!(f64::from_lexical_with_options::(b"+3.0", &OPTIONS), Ok(3.0)); } #[test] @@ -870,11 +870,11 @@ fn f64_optional_exponent_test() { fn f64_required_exponent_test() { const FORMAT: u128 = rebuild(format::PERMISSIVE).required_exponent_digits(true).build_strict(); const OPTIONS: Options = Options::new(); - assert!(f64::from_lexical_with_options::(b"+3.0e7", &OPTIONS).is_ok()); - assert!(f64::from_lexical_with_options::(b"+3.0e-7", &OPTIONS).is_ok()); + assert_eq!(f64::from_lexical_with_options::(b"+3.0e7", &OPTIONS), Ok(3.0e7)); + assert_eq!(f64::from_lexical_with_options::(b"+3.0e-7", &OPTIONS), Ok(3.0e-7)); assert!(f64::from_lexical_with_options::(b"+3.0e", &OPTIONS).is_err()); assert!(f64::from_lexical_with_options::(b"+3.0e-", &OPTIONS).is_err()); - assert!(f64::from_lexical_with_options::(b"+3.0", &OPTIONS).is_ok()); + assert_eq!(f64::from_lexical_with_options::(b"+3.0", &OPTIONS), Ok(3.0)); } #[test] diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index cfbf808e..6ef56141 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -35,7 +35,7 @@ use lexical_util::digit::char_to_digit_const; use lexical_util::error::Error; use lexical_util::format::NumberFormat; -use lexical_util::iterator::{AsBytes, Bytes, DigitsIter, Iter}; +use lexical_util::iterator::{AsBytes, DigitsIter, Iter}; use lexical_util::num::{as_cast, Integer}; use lexical_util::result::Result; @@ -200,64 +200,6 @@ macro_rules! fmt_invalid_digit { }}; } -/// Parse the sign from the leading digits. -/// -/// This routine does the following: -/// -/// 1. Parses the sign digit. -/// 2. Handles if positive signs before integers are not allowed. -/// 3. Handles negative signs if the type is unsigned. -/// 4. Handles if the sign is required, but missing. -/// 5. Handles if the iterator is empty, before or after parsing the sign. -/// 6. Handles if the iterator has invalid, leading zeros. -/// -/// Returns if the value is negative, or any values detected when -/// validating the input. -#[doc(hidden)] -#[macro_export] -macro_rules! parse_sign { - ( - $byte:ident, - $is_signed:expr, - $as_iter:ident, - $no_positive:expr, - $required:expr, - $invalid_positive:ident, - $missing:ident $(,)? - ) => { - match $byte.$as_iter().parse_sign() { - (false, true) if !$no_positive => { - // SAFETY: We have at least 1 item left since we peaked a value - unsafe { $byte.step_unchecked() }; - Ok(false) - }, - (false, true) if $no_positive => Err(Error::$invalid_positive($byte.cursor())), - (true, true) if $is_signed => { - // SAFETY: We have at least 1 item left since we peaked a value - unsafe { $byte.step_unchecked() }; - Ok(true) - }, - _ if $required => Err(Error::$missing($byte.cursor())), - _ => Ok(false), - } - }; -} - -/// Parse the sign from the leading digits. -#[cfg_attr(not(feature = "compact"), inline(always))] -pub fn parse_sign(byte: &mut Bytes<'_, FORMAT>) -> Result { - let format = NumberFormat:: {}; - parse_sign!( - byte, - T::IS_SIGNED, - integer_iter, - format.no_positive_mantissa_sign(), - format.required_mantissa_sign(), - InvalidPositiveSign, - MissingSign - ) -} - // FOUR DIGITS /// Determine if 4 bytes, read raw from bytes, are 4 digits for the radix. @@ -540,7 +482,7 @@ macro_rules! parse_digits_unchecked { $is_end:expr $(,)? ) => {{ let can_multi = can_try_parse_multidigits::<_, FORMAT>(&$iter); - let use_multi = can_multi && !$no_multi_digit; + let use_multi = !$no_multi_digit && can_multi; // these cannot overflow. also, we use at most 3 for a 128-bit float and 1 for a // 64-bit float NOTE: Miri will complain about this if we use radices >= @@ -671,26 +613,34 @@ macro_rules! algorithm { // Removes conditional paths, to, which simplifies maintenance. // The skip version of the iterator automatically coalesces to // the no-skip iterator. - let mut byte = $bytes.bytes::(); let format = NumberFormat:: {}; let radix = format.mantissa_radix(); debug_assert!(format.is_valid(), "should have already checked for an invalid number format"); - let is_negative = parse_sign::(&mut byte)?; + // this is optimized for no-skip iterators, but works well + // for skip iterators as well. + let mut byte = $bytes.bytes::(); + let is_negative = byte.read_integer_sign(T::IS_SIGNED)?; + maybe_into_empty!(byte, $into_ok); let mut iter = byte.integer_iter(); - maybe_into_empty!(iter, $into_ok); // skip and validate an optional base prefix + // FIXME: Optimize this, but this then would require tracking digits after + // here for the base prefix cases. let has_base_prefix = cfg!(feature = "format") && iter.read_base_prefix(); if cfg!(feature = "format") && has_base_prefix { maybe_into_empty!(iter, $into_ok); - } else if format.required_base_prefix() { + } else if cfg!(feature = "format") && format.required_base_prefix() { return Err(Error::MissingBasePrefix(iter.cursor())); } // NOTE: always do a peek so any leading digit separators // are skipped, and we can get the correct index - if cfg!(feature = "format") && format.no_integer_leading_zeros() && !has_base_prefix && iter.peek() == Some(&b'0') { + if cfg!(feature = "format") + && format.no_integer_leading_zeros() + && !has_base_prefix + && iter.peek() == Some(&b'0') + { // NOTE: Skipping zeros is **EXPENSIVE* so we skip that without our format feature let index = iter.cursor(); let zeros = iter.skip_zeros(); @@ -725,61 +675,67 @@ macro_rules! algorithm { let start_index = iter.cursor(); let mut value = T::ZERO; - if T::IS_SIGNED && cannot_overflow && is_negative { - parse_digits_unchecked!( - value, - iter, - wrapping_sub, - start_index, - $invalid_digit, - $no_multi_digit, - has_suffix, - true, - ); - } else if cannot_overflow { - parse_digits_unchecked!( - value, - iter, - wrapping_add, - start_index, - $invalid_digit, - $no_multi_digit, - has_suffix, - true, - ); - } else if T::IS_SIGNED && is_negative { - parse_digits_checked!( - value, - iter, - checked_sub, - wrapping_sub, - start_index, - $invalid_digit, - Underflow, - $no_multi_digit, - has_suffix, - overflow_digits, - ); + if cannot_overflow { + if !T::IS_SIGNED || !is_negative { + parse_digits_unchecked!( + value, + iter, + wrapping_add, + start_index, + $invalid_digit, + $no_multi_digit, + has_suffix, + true, + ); + } else { + parse_digits_unchecked!( + value, + iter, + wrapping_sub, + start_index, + $invalid_digit, + $no_multi_digit, + has_suffix, + true, + ); + } } else { - parse_digits_checked!( - value, - iter, - checked_add, - wrapping_add, - start_index, - $invalid_digit, - Overflow, - $no_multi_digit, - has_suffix, - overflow_digits, - ); + if !T::IS_SIGNED || !is_negative { + parse_digits_checked!( + value, + iter, + checked_add, + wrapping_add, + start_index, + $invalid_digit, + Overflow, + $no_multi_digit, + has_suffix, + overflow_digits, + ); + } else { + parse_digits_checked!( + value, + iter, + checked_sub, + wrapping_sub, + start_index, + $invalid_digit, + Underflow, + $no_multi_digit, + has_suffix, + overflow_digits, + ); + } } if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix { return Err(Error::MissingBaseSuffix(iter.cursor())); } - $into_ok!(value, iter.buffer_length(), iter.digits() != 0) + // NOTE: This is always true if we don't have digit separators, since checking + // for no skip always checks if there's a next digit, which simplifies it. + $into_ok!(value, iter.buffer_length(), iter.is_contiguous() || iter.digits() != 0) }}; } diff --git a/lexical-util/src/digit.rs b/lexical-util/src/digit.rs index 6bb08fe4..25567ed4 100644 --- a/lexical-util/src/digit.rs +++ b/lexical-util/src/digit.rs @@ -11,21 +11,13 @@ // using a runtime algorithm is preferable. /// Unchecked, highly optimized algorithm to convert a char to a digit. -/// This only works if the input character is known to be a valid digit. +/// This does no input validation: you must check the value is <= radix. #[inline(always)] pub const fn char_to_valid_digit_const(c: u8, radix: u32) -> u32 { - if radix <= 10 { - // Optimize for small radixes. + if radix <= 10 || c <= b'9' { (c.wrapping_sub(b'0')) as u32 } else { - // Fallback, still decently fast. - let digit = match c { - b'0'..=b'9' => c - b'0', - b'A'..=b'Z' => c - b'A' + 10, - b'a'..=b'z' => c - b'a' + 10, - _ => 0xFF, - }; - digit as u32 + ((c | 0x20).wrapping_sub(b'a')) as u32 + 10 } } diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs index 1fcda1f8..7682f334 100644 --- a/lexical-util/src/iterator.rs +++ b/lexical-util/src/iterator.rs @@ -9,11 +9,129 @@ use core::mem; // Re-export our digit iterators. +use crate::error::Error; +use crate::format::NumberFormat; #[cfg(not(feature = "format"))] pub use crate::noskip::{AsBytes, Bytes}; +use crate::result::Result; #[cfg(feature = "format")] pub use crate::skip::{AsBytes, Bytes}; +/// Converts the sign parser to return a result and handles increment the +/// internal state. +/// +/// 1. Parses the sign digit. +/// 2. Handles if positive signs before integers are not allowed. +/// 3. Handles negative signs if the type is unsigned. +/// 4. Handles if the sign is required, but missing. +/// 5. Handles if the iterator is empty, before or after parsing the sign. +/// 6. Handles if the iterator has invalid, leading zeros. +/// +/// This does not handle missing digits: it is assumed the caller will. +/// +/// It assumes the next digit is the sign character, that is, +/// leading and trailing digits **HAVE** been handled for non- +/// contiguous iterators. +macro_rules! read_sign { + ( + $byte:ident, + $index:ident, + $is_signed:expr, + $no_positive:expr, + $required:expr, + $invalid_positive:ident, + $missing:ident $(,)? + ) => {{ + let (is_negative, have_sign) = match $byte.get_buffer().get($index) { + Some(&b'+') => (false, true), + Some(&b'-') => (true, true), + _ => (false, false), + }; + match (is_negative, have_sign) { + (false, true) if !$no_positive => { + // SAFETY: We have at least 1 item left since we peaked a value + unsafe { $byte.set_cursor($index + 1) }; + Ok(false) + }, + (false, true) if $no_positive => Err(Error::$invalid_positive($byte.cursor())), + (true, true) if $is_signed => { + // SAFETY: We have at least 1 item left since we peaked a value + unsafe { $byte.set_cursor($index + 1) }; + Ok(true) + }, + _ if $required => Err(Error::$missing($byte.cursor())), + _ => Ok(false), + } + }}; +} + +/// Parse the sign from the leading integer digits. +/// +/// It assumes the next digit is the sign character, that is, +/// leading and trailing digits **HAVE** been handled for non- +/// contiguous iterators. +#[cfg_attr(not(feature = "compact"), inline(always))] +pub(crate) fn read_integer_sign( + byte: &mut Bytes<'_, FORMAT>, + index: usize, + is_signed: bool, +) -> Result { + let format = NumberFormat:: {}; + read_sign!( + byte, + index, + is_signed, + format.no_positive_mantissa_sign(), + format.required_mantissa_sign(), + InvalidPositiveSign, + MissingSign, + ) +} + +/// Parse the sign from the leading mantissa digits. +/// +/// It assumes the next digit is the sign character, that is, +/// leading and trailing digits **HAVE** been handled for non- +/// contiguous iterators. +#[cfg_attr(not(feature = "compact"), inline(always))] +pub(crate) fn read_mantissa_sign( + byte: &mut Bytes<'_, FORMAT>, + index: usize, +) -> Result { + let format = NumberFormat:: {}; + read_sign!( + byte, + index, + true, + format.no_positive_mantissa_sign(), + format.required_mantissa_sign(), + InvalidPositiveMantissaSign, + MissingMantissaSign, + ) +} + +/// Parse the sign from the leading exponent digits. +/// +/// It assumes the next digit is the sign character, that is, +/// leading and trailing digits **HAVE** been handled for non- +/// contiguous iterators. +#[cfg_attr(not(feature = "compact"), inline(always))] +pub(crate) fn read_exponent_sign( + byte: &mut Bytes<'_, FORMAT>, + index: usize, +) -> Result { + let format = NumberFormat:: {}; + read_sign!( + byte, + index, + true, + format.no_positive_exponent_sign(), + format.required_exponent_sign(), + InvalidPositiveExponentSign, + MissingExponentSign, + ) +} + /// A trait for working with iterables of bytes. /// /// These iterators can either be contiguous or not contiguous and provide @@ -282,6 +400,81 @@ pub unsafe trait Iter<'a> { None } } + + /// Parse the sign from an integer (not for floats). + /// + /// If this allows leading digit separators, it will handle + /// those internally and advance the state as needed. This + /// returned if the value is negative, or any error found when parsing the + /// sign. This does not handle missing digits: it is assumed the caller + /// will. This internally increments the count to right after the sign. + /// + /// The default implementation does not support digit separators. + /// + /// 1. Parses the sign digit. + /// 2. Handles if positive signs are not allowed. + /// 3. Handles negative signs if the type is unsigned. + /// 4. Handles if the sign is required, but missing. + /// 5. Handles if the iterator is empty, before or after parsing the sign. + /// 6. Handles if the iterator has invalid, leading zeros. + fn read_integer_sign(&mut self, is_signed: bool) -> Result; + + /// Parse the sign from a mantissa (only for floats). + /// + /// If this allows leading digit separators, it will handle + /// those internally and advance the state as needed. This + /// returned if the value is negative, or any error found when parsing the + /// sign. This does not handle missing digits: it is assumed the caller + /// will. This internally increments the count to right after the sign. + /// + /// The default implementation does not support digit separators. + /// + /// 1. Parses the sign digit. + /// 2. Handles if positive signs are not allowed. + /// 3. Handles negative signs if the type is unsigned. + /// 4. Handles if the sign is required, but missing. + /// 5. Handles if the iterator is empty, before or after parsing the sign. + /// 6. Handles if the iterator has invalid, leading zeros. + fn read_mantissa_sign(&mut self) -> Result; + + /// Parse the sign from an exponent. + /// + /// If this allows leading digit separators, it will handle + /// those internally and advance the state as needed. This + /// returned if the value is negative, or any error found when parsing the + /// sign. This does not handle missing digits: it is assumed the caller + /// will. This internally increments the count to right after the sign. + /// + /// The default implementation does not support digit separators. + /// + /// 1. Parses the sign digit. + /// 2. Handles if positive signs are not allowed. + /// 3. Handles negative signs if the type is unsigned. + /// 4. Handles if the sign is required, but missing. + /// 5. Handles if the iterator is empty, before or after parsing the sign. + /// 6. Handles if the iterator has invalid, leading zeros. + fn read_exponent_sign(&mut self) -> Result; + + /// Read the base prefix, if present, returning if the base prefix + /// was present. + /// + /// If the base prefix was not present, it does not consume any + /// leading zeroes or digit separators, so they can be processed afterwards. + /// Otherwise, it advances the iterator state to the end of the base + /// prefix, including consuming any trailing digit separators. + /// + /// Any caller that consumes leading digit separators will need + /// to ignore it if base prefix trailing digit separators are enabled. + fn read_base_prefix(&mut self) -> bool; + + /// Read the base suffix, if present, returning if the base suffix + /// was present. + /// + /// If the base suffix was not present, it does not consume any + /// digits or digit separators, so the total digit count is valid. + /// Otherwise, it advances the iterator state to the end of the base + /// suffix, including consuming any trailing digit separators. + fn read_base_suffix(&mut self, has_exponent: bool) -> bool; } /// Iterator over a contiguous block of bytes. @@ -485,32 +678,4 @@ pub trait DigitsIter<'a>: Iterator + Iter<'a> { /// Determine if the character is a digit. fn is_digit(&self, value: u8) -> bool; - - // ------- - - /// Parse the sign from the iterator. - /// - /// If this allows leading digit separators, it will handle - /// those internally and advance the state as needed. This - /// returned if the value is negative and if a sign was found. - /// - /// The default implementation does not support digit separators. - fn parse_sign(&mut self) -> (bool, bool); - - /// Read the base prefix, if present, returning if the base prefix - /// was present. - /// - /// If the base prefix was not present, it does not consume any - /// leading zeroes, so they can be processed afterwards. Otherwise, - /// it advances the iterator state to the end of the base prefix, - /// including consuming any trailing digit separators. - /// - /// Any caller that consumes leading digit separators will need - /// to ignore it if base prefix trailing digit separators are enabled. - fn read_base_prefix(&mut self) -> bool; - - // TODO: Should implement the `is_base_prefix` internally here at - // the start and should only be used if it has one. Since the format - // will be known in the skip iterator, this is doable. - // TODO: Should implement `is_base_suffix` here } diff --git a/lexical-util/src/noskip.rs b/lexical-util/src/noskip.rs index a99e851a..b3eeb0da 100644 --- a/lexical-util/src/noskip.rs +++ b/lexical-util/src/noskip.rs @@ -9,7 +9,8 @@ use core::{mem, ptr}; use crate::digit::char_is_digit_const; use crate::format::NumberFormat; -use crate::iterator::{DigitsIter, Iter}; +use crate::iterator::{self, DigitsIter, Iter}; +use crate::result::Result; // AS DIGITS // --------- @@ -142,6 +143,32 @@ unsafe impl<'a, const __: u128> Iter<'a> for Bytes<'a, __> { // SAFETY: safe as long as the slice has at least count elements. unsafe { ptr::read_unaligned::(self.as_ptr() as *const _) } } + + #[inline(always)] + fn read_integer_sign(&mut self, is_signed: bool) -> Result { + iterator::read_integer_sign(self, self.cursor(), is_signed) + } + + #[inline(always)] + fn read_mantissa_sign(&mut self) -> Result { + iterator::read_mantissa_sign(self, self.cursor()) + } + + #[inline(always)] + fn read_exponent_sign(&mut self) -> Result { + iterator::read_exponent_sign(self, self.cursor()) + } + + #[inline(always)] + fn read_base_prefix(&mut self) -> bool { + false + } + + #[inline(always)] + fn read_base_suffix(&mut self, has_exponent: bool) -> bool { + _ = has_exponent; + false + } } // DIGITS ITERATOR @@ -219,6 +246,31 @@ unsafe impl<'a: 'b, 'b, const __: u128> Iter<'a> for DigitsIterator<'a, 'b, __> // SAFETY: safe as long as the slice has at least count elements. unsafe { self.byte.peek_many_unchecked() } } + + #[inline(always)] + fn read_integer_sign(&mut self, is_signed: bool) -> Result { + self.byte.read_integer_sign(is_signed) + } + + #[inline(always)] + fn read_mantissa_sign(&mut self) -> Result { + self.byte.read_mantissa_sign() + } + + #[inline(always)] + fn read_exponent_sign(&mut self) -> Result { + self.byte.read_exponent_sign() + } + + #[inline(always)] + fn read_base_prefix(&mut self) -> bool { + self.byte.read_base_prefix() + } + + #[inline(always)] + fn read_base_suffix(&mut self, has_exponent: bool) -> bool { + self.byte.read_base_suffix(has_exponent) + } } impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for DigitsIterator<'a, 'b, FORMAT> { @@ -248,21 +300,6 @@ impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for DigitsIterator<'a, 'b, F let format = NumberFormat::<{ FORMAT }> {}; char_is_digit_const(value, format.mantissa_radix()) } - - #[inline(always)] - fn parse_sign(&mut self) -> (bool, bool) { - // NOTE: `read_if` optimizes poorly since we then match after - match self.first() { - Some(&b'+') => (false, true), - Some(&b'-') => (true, true), - _ => (false, false), - } - } - - #[inline(always)] - fn read_base_prefix(&mut self) -> bool { - false - } } impl<'a: 'b, 'b, const __: u128> Iterator for DigitsIterator<'a, 'b, __> { diff --git a/lexical-util/src/num.rs b/lexical-util/src/num.rs index 5494dd94..6bbdc977 100644 --- a/lexical-util/src/num.rs +++ b/lexical-util/src/num.rs @@ -392,38 +392,45 @@ pub trait Number: { /// If the number can hold negative values. const IS_SIGNED: bool; + + /// If the number is a floating-point number. + const IS_FLOAT: bool; + + /// If the number is an integer. + const IS_INTEGER: bool = !Self::IS_FLOAT; } macro_rules! number_impl { - ($($t:tt $is_signed:literal ; )*) => ($( + ($($t:tt $is_signed:literal $is_float:literal ; )*) => ($( impl Number for $t { const IS_SIGNED: bool = $is_signed; + const IS_FLOAT: bool = $is_float; } )*) } number_impl! { - u8 false ; - u16 false ; - u32 false ; - u64 false ; - u128 false ; - usize false ; - i8 true ; - i16 true ; - i32 true ; - i64 true ; - i128 true ; - isize true ; - f32 true ; - f64 true ; - // f128 true + u8 false false ; + u16 false false ; + u32 false false ; + u64 false false ; + u128 false false ; + usize false false ; + i8 true false ; + i16 true false ; + i32 true false ; + i64 true false ; + i128 true false ; + isize true false ; + f32 true true ; + f64 true true ; + // f128 true true ; } #[cfg(feature = "f16")] number_impl! { - f16 true ; - bf16 true ; + f16 true true ; + bf16 true true ; } // INTEGER diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index 0811a9f1..a04b1252 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -46,7 +46,8 @@ use core::{mem, ptr}; use crate::digit::char_is_digit_const; use crate::format::NumberFormat; use crate::format_flags as flags; -use crate::iterator::{DigitsIter, Iter}; +use crate::iterator::{self, DigitsIter, Iter}; +use crate::result::Result; // IS_ILTC // ------- @@ -1204,6 +1205,115 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { // SAFETY: Safe if the buffer has at least `size_of::` elements. unsafe { self.peek_many_unchecked_impl(Self::IS_CONTIGUOUS) } } + + #[inline(always)] + fn read_integer_sign(&mut self, is_signed: bool) -> Result { + let format = NumberFormat::<{ FORMAT }> {}; + let index = maybe_consume( + self.get_buffer(), + format.digit_separator(), + self.cursor(), + format.integer_sign_digit_separator(), + format.integer_consecutive_sign_digit_separator(), + ); + iterator::read_integer_sign(self, index, is_signed) + } + + #[inline(always)] + fn read_mantissa_sign(&mut self) -> Result { + let format = NumberFormat::<{ FORMAT }> {}; + let index = maybe_consume( + self.get_buffer(), + format.digit_separator(), + self.cursor(), + format.integer_sign_digit_separator(), + format.integer_consecutive_sign_digit_separator(), + ); + iterator::read_mantissa_sign(self, index) + } + + #[inline(always)] + fn read_exponent_sign(&mut self) -> Result { + let format = NumberFormat::<{ FORMAT }> {}; + let index = maybe_consume( + self.get_buffer(), + format.digit_separator(), + self.cursor(), + format.exponent_sign_digit_separator(), + format.exponent_consecutive_sign_digit_separator(), + ); + iterator::read_exponent_sign(self, index) + } + + #[inline(always)] + fn read_base_prefix(&mut self) -> bool { + let format = NumberFormat::<{ FORMAT }> {}; + let digit_separator = format.digit_separator(); + let base_prefix = format.base_prefix(); + let is_cased = format.case_sensitive_base_prefix(); + + if !cfg!(feature = "power-of-two") || base_prefix == 0 { + return false; + } + + // grab our cursor information + let mut index = self.cursor(); + let bytes = self.get_buffer(); + + // we can skip if we're not at the absolute start (a preceeding sign) + // or we've enabled start digit separators. + let consecutive = format.base_prefix_consecutive_digit_separator(); + let skip_start = format.start_digit_separator() || index > 0; + if skip_start && format.base_prefix_leading_digit_separator() { + index = consume(bytes, digit_separator, index, consecutive); + } + + if bytes.get(index) != Some(&b'0') { + return false; + } + index += 1; + debug_assert!(index <= bytes.len()); + + if format.base_prefix_internal_digit_separator() { + index = consume(bytes, digit_separator, index, consecutive); + } + if bytes + .get(index) + .map(|&x| !Self::is_value_equal(x, base_prefix, is_cased)) + .unwrap_or(false) + { + return false; + } + index += 1; + debug_assert!(index <= bytes.len()); + + // NOTE: We want to simplify our implementation, so leave this in a + // simple state for our integer parser. We shouldn't skip digits + // if the integer can skip leading digit separators and we can skip + // trailing, but they can consume consecutive separators, since that + // would just be re-processing data. + let prefix_trailing = format.base_prefix_trailing_digit_separator(); + let mut should_skip = prefix_trailing; + if format.integer_leading_digit_separator() { + should_skip &= consecutive && !format.integer_consecutive_digit_separator(); + } + if should_skip { + index = consume(bytes, digit_separator, index, consecutive); + } + + // SAFETY: safe, since we've consumed at most 1 digit prior to + // consume, we will never go `> bytes.len()`, so this is safe. + debug_assert!(index <= bytes.len()); + unsafe { self.set_cursor(index) }; + + true + } + + #[inline(always)] + fn read_base_suffix(&mut self, has_exponent: bool) -> bool { + _ = has_exponent; + todo!(); // TODO: Implement and test + } } // ITERATOR HELPERS @@ -1355,6 +1465,31 @@ macro_rules! skip_iterator_iter_base { // SAFETY: Safe if the buffer has at least `size_of::` elements. unsafe { self.byte.peek_many_unchecked_impl(Self::IS_CONTIGUOUS) } } + + #[inline(always)] + fn read_integer_sign(&mut self, is_signed: bool) -> Result { + self.byte.read_integer_sign(is_signed) + } + + #[inline(always)] + fn read_mantissa_sign(&mut self) -> Result { + self.byte.read_mantissa_sign() + } + + #[inline(always)] + fn read_exponent_sign(&mut self) -> Result { + self.byte.read_exponent_sign() + } + + #[inline(always)] + fn read_base_prefix(&mut self) -> bool { + self.byte.read_base_prefix() + } + + #[inline(always)] + fn read_base_suffix(&mut self, has_exponent: bool) -> bool { + self.byte.read_base_suffix(has_exponent) + } }; } @@ -1384,89 +1519,22 @@ fn consume(bytes: &[u8], value: u8, mut index: usize, consecutive: bool) -> usiz index } -/// Internal helper for parsing the sign once a given index is known. +/// Maybe iteratively consume digits matching the value, if it's not 0. #[inline(always)] -fn parse_sign_impl(bytes: &[u8], index: usize) -> (bool, bool) { - match bytes.get(index) { - Some(&b'+') => (false, true), - Some(&b'-') => (true, true), - _ => (false, false), - } -} - -/// Uses the internal flags to parse out flags. -#[inline(always)] -fn parse_sign<'a, T>( - iter: &mut T, - digit_separator: u8, +fn maybe_consume( + bytes: &[u8], + value: u8, + index: usize, can_skip: bool, consecutive: bool, -) -> (bool, bool) -where - T: DigitsIter<'a>, -{ - let bytes = iter.get_buffer(); - if digit_separator != 0 && can_skip { - let index = consume(bytes, digit_separator, iter.cursor(), consecutive); - // only advance the internal state if we have a sign - // otherwise, we need to keep the start exactly where - // the buffer started - match parse_sign_impl(bytes, index) { - (is_negative, true) => { - // SAFETY: safe, since consume will never go `> bytes.len()`, and - // `iter.cursor() <= bytes.len()`, that is, we must be within - // the valid bounds, and `bytes.get(index)` in `parse_sign_impl` - // just returned `Some(...)`. - unsafe { iter.set_cursor(index) }; - (is_negative, true) - }, - (_, false) => (false, false), - } +) -> usize { + if value != 0 && can_skip { + consume(bytes, value, index, consecutive) } else { - parse_sign_impl(bytes, iter.cursor()) + index } } -/// Parse specifically the integer sign component. -#[inline(always)] -fn integer_parse_sign<'a, T, const FORMAT: u128>(iter: &mut T) -> (bool, bool) -where - T: DigitsIter<'a>, -{ - let format = NumberFormat::<{ FORMAT }> {}; - parse_sign( - iter, - format.digit_separator(), - format.integer_sign_digit_separator(), - format.integer_consecutive_sign_digit_separator(), - ) -} - -/// Parse specifically the fraction sign component. -#[inline(always)] -fn fraction_parse_sign<'a, T, const FORMAT: u128>(iter: &mut T) -> (bool, bool) -where - T: DigitsIter<'a>, -{ - _ = iter; - unimplemented!() -} - -/// Parse specifically the exponent sign component. -#[inline(always)] -fn exponent_parse_sign<'a, T, const FORMAT: u128>(iter: &mut T) -> (bool, bool) -where - T: DigitsIter<'a>, -{ - let format = NumberFormat::<{ FORMAT }> {}; - parse_sign( - iter, - format.digit_separator(), - format.exponent_sign_digit_separator(), - format.exponent_consecutive_sign_digit_separator(), - ) -} - /// Create impl `ByteIter` block for skip iterator. macro_rules! skip_iterator_bytesiter_impl { ( @@ -1577,76 +1645,6 @@ macro_rules! skip_iterator_bytesiter_impl { let format = NumberFormat::<{ FORMAT }> {}; char_is_digit_const(value, format.mantissa_radix()) } - - #[inline(always)] - fn parse_sign(&mut self) -> (bool, bool) { - $sign_parser::<_, FORMAT>(self) - } - - #[inline(always)] - fn read_base_prefix(&mut self) -> bool { - let format = NumberFormat::<{ FORMAT }> {}; - let digit_separator = format.digit_separator(); - let base_prefix = format.base_prefix(); - let is_cased = format.case_sensitive_base_prefix(); - - let is_integer = flags::$i == flags::INTEGER_INTERNAL_DIGIT_SEPARATOR; - if !is_integer || !cfg!(feature = "power-of-two") || base_prefix == 0 { - return false; - } - - // grab our cursor information - let mut index = self.cursor(); - let bytes = self.get_buffer(); - - // we can skip if we're not at the absolute start (a preceeding sign) - // or we've enabled start digit separators. - let consecutive = format.base_prefix_consecutive_digit_separator(); - let skip_start = format.start_digit_separator() || index > 0; - if skip_start && format.base_prefix_leading_digit_separator() { - index = consume(bytes, digit_separator, index, consecutive); - } - - if bytes.get(index) != Some(&b'0') { - return false; - } - index += 1; - debug_assert!(index <= bytes.len()); - - if format.base_prefix_internal_digit_separator() { - index = consume(bytes, digit_separator, index, consecutive); - } - if bytes - .get(index) - .map(|&x| !Self::is_value_equal(x, base_prefix, is_cased)) - .unwrap_or(false) - { - return false; - } - index += 1; - debug_assert!(index <= bytes.len()); - - // NOTE: We want to simplify our implementation, so leave this in a - // simple state for our integer parser. We shouldn't skip digits - // if the integer can skip leading digit separators and we can skip - // trailing, but they can consume consecutive separators, since that - // would just be re-processing data. - let prefix_trailing = format.base_prefix_trailing_digit_separator(); - let mut should_skip = prefix_trailing; - if format.integer_leading_digit_separator() { - should_skip &= consecutive && !format.integer_consecutive_digit_separator(); - } - if should_skip { - index = consume(bytes, digit_separator, index, consecutive); - } - - // SAFETY: safe, since we've consumed at most 1 digit prior to - // consume, we will never go `> bytes.len()`, so this is safe. - debug_assert!(index <= bytes.len()); - unsafe { self.set_cursor(index) }; - - true - } } }; } @@ -1757,14 +1755,4 @@ impl<'a: 'b, 'b, const FORMAT: u128> DigitsIter<'a> for SpecialDigitsIterator<'a let format = NumberFormat::<{ FORMAT }> {}; char_is_digit_const(value, format.mantissa_radix()) } - - #[inline(always)] - fn parse_sign(&mut self) -> (bool, bool) { - unimplemented!(); - } - - #[inline(always)] - fn read_base_prefix(&mut self) -> bool { - unimplemented!(); - } } From b8d2f482b682b2ea0a32737a2b2ead6ad6435702 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Tue, 21 Jan 2025 20:27:02 -0600 Subject: [PATCH 14/18] Improve parsing of base prefixes and suffixes. This adds comprehensive tests, including correctness ones, as well as performance benchmarks to ensure optimal ASM is generated on x86_64, including re-arranging some code to optimize jumps. --- lexical-parse-float/src/parse.rs | 19 +- lexical-parse-float/tests/api_tests.rs | 329 ++++++++++++++- lexical-parse-integer/src/algorithm.rs | 263 +++++------- .../tests/algorithm_tests.rs | 5 +- lexical-parse-integer/tests/api_tests.rs | 384 +++++++++++++++++- lexical-util/src/feature_format.rs | 5 +- lexical-util/src/format_builder.rs | 3 + lexical-util/src/iterator.rs | 4 +- lexical-util/src/noskip.rs | 12 +- lexical-util/src/not_feature_format.rs | 5 +- lexical-util/src/skip.rs | 91 ++++- scripts/asm.sh | 2 +- 12 files changed, 894 insertions(+), 228 deletions(-) diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs index 0e3d85f9..5f991ac6 100644 --- a/lexical-parse-float/src/parse.rs +++ b/lexical-parse-float/src/parse.rs @@ -503,10 +503,8 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( let bits_per_base = shared::log2(format.exponent_base()) as i64; // skip and validate an optional base prefix - let has_base_prefix = cfg!(feature = "format") && byte.read_base_prefix(); - if cfg!(feature = "format") && !has_base_prefix && format.required_base_prefix() { - return Err(Error::MissingBasePrefix(byte.cursor())); - } + let has_base_prefix = + cfg!(all(feature = "format", feature = "power-of-two")) && byte.read_base_prefix()?; // INTEGER @@ -703,18 +701,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>( } // Check to see if we have a valid base suffix. - // We've already trimmed any leading digit separators here, so we can be safe - // that the first character **is not** a digit separator. - if cfg!(all(feature = "format", feature = "power-of-two")) && format.has_base_suffix() { - let base_suffix = format.base_suffix(); - let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix()); - if is_suffix { - // SAFETY: safe since `byte.len() >= 1`. - unsafe { byte.step_unchecked() }; - } else if format.required_base_suffix() { - return Err(Error::MissingBaseSuffix(byte.cursor())); - } - } + _ = byte.read_base_suffix(has_exponent)?; // CHECK OVERFLOW diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs index bb40dde8..46e2a3c1 100644 --- a/lexical-parse-float/tests/api_tests.rs +++ b/lexical-parse-float/tests/api_tests.rs @@ -1346,7 +1346,7 @@ fn require_base_prefix_test() { #[test] #[cfg(all(feature = "format", feature = "power-of-two"))] -fn base_prefix_digit_separator_edge_cases_test() { +fn base_prefix_no_digit_separator_test() { use core::num; const OPTIONS: Options = Options::new(); @@ -1355,34 +1355,331 @@ fn base_prefix_digit_separator_edge_cases_test() { .leading_digit_separator(true) .build_strict(); - let value = f64::from_lexical_with_options::(b"_+12345", &OPTIONS); + let value = f64::from_lexical_with_options::(b"_+12345.6", &OPTIONS); assert_eq!(value, Err(Error::InvalidDigit(1))); - let value = f64::from_lexical_with_options::(b"+_12345", &OPTIONS); - assert_eq!(value, Ok(12345.0)); + let value = f64::from_lexical_with_options::(b"+_12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"1.2", &OPTIONS); + assert_eq!(value, Ok(1.2)); + + const OPT_PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .build_strict(); - let value = f64::from_lexical_with_options::(b"+12345e_+23", &OPTIONS); - assert_eq!(value, Err(Error::EmptyExponent(8))); + let value = f64::from_lexical_with_options::(b"1.2", &OPTIONS); + assert_eq!(value, Ok(1.2)); - let value = f64::from_lexical_with_options::(b"+12345e+_23", &OPTIONS); - assert_eq!(value, Ok(1.2345e27)); + let value = f64::from_lexical_with_options::(b"0d1.2", &OPTIONS); + assert_eq!(value, Ok(1.2)); const PREFIX: u128 = NumberFormatBuilder::new() .digit_separator(num::NonZeroU8::new(b'_')) .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) .required_base_prefix(true) - .leading_digit_separator(true) .build_strict(); - let value = f64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + let value = f64::from_lexical_with_options::(b"_+0d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = f64::from_lexical_with_options::(b"+_0d12345.6", &OPTIONS); assert_eq!(value, Err(Error::MissingBasePrefix(1))); - let value = f64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); - assert_eq!(value, Ok(12345.0)); + let value = f64::from_lexical_with_options::(b"+0_d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); - // TODO: This fails - let value = f64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); - assert_eq!(value, Ok(12345.0)); + let value = f64::from_lexical_with_options::(b"+0d_12345.6", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + let value = f64::from_lexical_with_options::(b"+0d12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_l_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_prefix(true) + .base_prefix_leading_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+0d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = f64::from_lexical_with_options::(b"+_0d12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+__0d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = f64::from_lexical_with_options::(b"+0_d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = f64::from_lexical_with_options::(b"+0d_12345.6", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + let value = f64::from_lexical_with_options::(b"+0d12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_consecutive_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"+__0d12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_i_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_prefix(true) + .base_prefix_internal_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+0d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = f64::from_lexical_with_options::(b"+_0d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = f64::from_lexical_with_options::(b"+0_d12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+0__d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = f64::from_lexical_with_options::(b"+0d_12345.6", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + let value = f64::from_lexical_with_options::(b"+0d12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_consecutive_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"+0__d12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_t_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_prefix(true) + .base_prefix_trailing_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+0d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = f64::from_lexical_with_options::(b"+_0d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = f64::from_lexical_with_options::(b"+0_d12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = f64::from_lexical_with_options::(b"+0d_12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+0d__12345.6", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(4))); + + let value = f64::from_lexical_with_options::(b"+0d12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + // special case: overlap with a leading digit separator + const LEADING: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .leading_digit_separator(true) + .required_base_prefix(true) + .base_prefix_trailing_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"+0d_12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+0d__12345.6", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_consecutive_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"+0d__12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_suffix_no_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const NO_SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+12345.6", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(1))); + + let value = f64::from_lexical_with_options::(b"+_12345.6", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"1.2", &OPTIONS); + assert_eq!(value, Ok(1.2)); + + const OPT_SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"1.2", &OPTIONS); + assert_eq!(value, Ok(1.2)); + + let value = f64::from_lexical_with_options::(b"1.2d", &OPTIONS); + assert_eq!(value, Ok(1.2)); + + const SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_suffix(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+12345.6d", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(0))); + + let value = f64::from_lexical_with_options::(b"+12345.6_d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); + + let value = f64::from_lexical_with_options::(b"+12345.6d", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+12345.6d_", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(9))); + + let value = f64::from_lexical_with_options::(b"+12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_suffix_l_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_suffix(true) + .base_suffix_leading_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+12345.6d", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(0))); + + let value = f64::from_lexical_with_options::(b"+12345.6_d", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+12345.6__d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); + + let value = f64::from_lexical_with_options::(b"+12345.6d", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+12345.6d_", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(9))); + + let value = f64::from_lexical_with_options::(b"+12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); + + // special case: overlap with a trailing digit separator + const TRAILING: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .trailing_digit_separator(true) + .required_base_suffix(true) + .base_suffix_leading_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"+12345.6_d", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+12345.6__d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(SUFFIX) + .base_suffix_consecutive_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"+12345.6__d", &OPTIONS); + assert_eq!(value, Ok(12345.6)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_suffix_t_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_suffix(true) + .base_suffix_trailing_digit_separator(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"_+12345.6d", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(0))); + + let value = f64::from_lexical_with_options::(b"+12345.6_d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); + + let value = f64::from_lexical_with_options::(b"+12345.6d", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+12345.6d_", &OPTIONS); + assert_eq!(value, Ok(12345.6)); + + let value = f64::from_lexical_with_options::(b"+12345.6", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(8))); + + let value = f64::from_lexical_with_options::(b"+12345.6d__", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(10))); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(SUFFIX) + .base_suffix_consecutive_digit_separator(true) + .build_strict(); - // TODO:> Add suffix + let value = f64::from_lexical_with_options::(b"+12345.6d__", &OPTIONS); + assert_eq!(value, Ok(12345.6)); } diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index 6ef56141..119d5743 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -50,37 +50,30 @@ const fn can_try_parse_multidigits<'a, Iter: DigitsIter<'a>, const FORMAT: u128> } // Get if digits are required for the format. -#[cfg_attr(not(feature = "format"), allow(unused_macros))] macro_rules! required_digits { - () => { - NumberFormat::::REQUIRED_INTEGER_DIGITS - || NumberFormat::::REQUIRED_MANTISSA_DIGITS - }; + () => {{ + let format = NumberFormat:: {}; + format.required_integer_digits() || format.required_mantissa_digits() + }}; } /// If a buffer is empty, return the value or an error. macro_rules! maybe_into_empty { ($iter:ident, $into_ok:ident) => { if $iter.is_buffer_empty() { - // Our default format **ALWAYS** requires significant digits, however, - // we can have cases where we don - #[cfg(not(feature = "format"))] - into_error!(Empty, $iter.cursor()); - - #[cfg(feature = "format")] - $into_ok!(T::ZERO, $iter.cursor(), false) + if required_digits!() { + into_error!(Empty, $iter.cursor()); + } else { + $into_ok!(T::ZERO, $iter.cursor(), false) + } } }; } /// Return an value for a complete parser. macro_rules! into_ok_complete { - ($value:expr, $index:expr, $any:expr) => {{ - #[cfg(not(feature = "format"))] - return Ok(as_cast($value)); - - #[cfg(feature = "format")] - if required_digits!() && !$any { + ($value:expr, $index:expr, $any_digits:expr) => {{ + if required_digits!() && !$any_digits { into_error!(Empty, $index); } else { return Ok(as_cast($value)); @@ -90,12 +83,8 @@ macro_rules! into_ok_complete { /// Return an value and index for a partial parser. macro_rules! into_ok_partial { - ($value:expr, $index:expr, $any:expr) => {{ - #[cfg(not(feature = "format"))] - return Ok((as_cast($value), $index)); - - #[cfg(feature = "format")] - if required_digits!() && !$any { + ($value:expr, $index:expr, $any_digits:expr) => {{ + if required_digits!() && !$any_digits { into_error!(Empty, $index); } else { return Ok((as_cast($value), $index)); @@ -105,7 +94,7 @@ macro_rules! into_ok_partial { /// Return an error for a complete parser upon an invalid digit. macro_rules! invalid_digit_complete { - ($value:expr, $index:expr, $any:expr) => { + ($value:expr, $index:expr, $any_digits:expr) => { // Don't do any overflow checking here: we don't need it. into_error!(InvalidDigit, $index - 1) }; @@ -114,9 +103,9 @@ macro_rules! invalid_digit_complete { /// Return a value for a partial parser upon an invalid digit. /// This checks for numeric overflow, and returns the appropriate error. macro_rules! invalid_digit_partial { - ($value:expr, $index:expr, $any:expr) => { + ($value:expr, $index:expr, $any_digits:expr) => { // NOTE: The value is already positive/negative - into_ok_partial!($value, $index - 1, $any) + into_ok_partial!($value, $index - 1, $any_digits) }; } @@ -127,76 +116,33 @@ macro_rules! into_error { }}; } -/// Handle an invalid digit if the format feature is enabled. -/// -/// This is because we can have special, non-digit characters near -/// the start or internally. If `$is_end` is set to false, there **MUST** -/// be elements in the underlying slice after the current iterator. -#[cfg(feature = "format")] -macro_rules! fmt_invalid_digit { +// This has a pessimization for the codegen when we can have +// base prefixes, which causes an inefficient jump to return. +// So, we separate our suffix and no suffix variants. +// This is only used **IF** we have a base prefix + +/// Handle an invalid digit, with a base suffix. +macro_rules! on_invalid_digit { ( $value:ident, $iter:ident, - $c:expr, - $start_index:ident, + $start_digits:ident, + $into_ok:ident, $invalid_digit:ident, - $has_suffix:ident, $is_end:expr $(,)? ) => {{ - // NOTE: If we have non-contiguous iterators, we could have a skip character - // here at the boundary. This does not affect safety but it does affect - // correctness. - debug_assert!($iter.is_contiguous() || $is_end); - - let format = NumberFormat:: {}; - let base_suffix = format.base_suffix(); - let uncased_base_suffix = format.case_sensitive_base_suffix(); - // Need to check for a base suffix, if so, return a valid value. - // We can't have a base suffix at the first value (need at least - // 1 digit). - if cfg!(feature = "power-of-two") && base_suffix != 0 && $iter.cursor() - $start_index > 1 { - let is_suffix = if uncased_base_suffix { - $c == base_suffix + let have_any = $iter.digits_since($start_digits) != 0; + if cfg!(all(feature = "format", feature = "power-of-two")) + && $iter.read_base_suffix(false)? + { + if $iter.is_buffer_empty() && $is_end { + $into_ok!($value, $iter.buffer_length(), have_any) } else { - $c.eq_ignore_ascii_case(&base_suffix) - }; - // NOTE: If we're using the `take_n` optimization where it can't - // be the end, then the iterator cannot be done. So, in that case, - // we need to end. `take_n` also can never be used for non- - // contiguous iterators. - if is_suffix && $is_end && $iter.is_buffer_empty() { - // Break out of the loop, we've finished parsing. - $has_suffix = true; - break; - } else if !$iter.is_buffer_empty() { - // Haven't finished parsing, so we're going to call - // `invalid_digit!`. Need to ensure we include the - // base suffix in that. - - // SAFETY: safe since the iterator is not empty, as checked - // in `$iter.is_buffer_empty()`. Adding in the check hopefully - // will be elided since it's a known constant. - unsafe { $iter.step_unchecked() }; + $invalid_digit!($value, $iter.cursor(), have_any) } + } else { + $invalid_digit!($value, $iter.cursor() + 1, have_any) } - // Might have handled our base-prefix here. - $invalid_digit!($value, $iter.cursor(), $iter.digits() != 0) - }}; -} - -/// Just return an invalid digit -#[cfg(not(feature = "format"))] -macro_rules! fmt_invalid_digit { - ( - $value:ident, - $iter:ident, - $c:expr, - $start_index:ident, - $invalid_digit:ident, - $has_suffix:ident, - $is_end:expr $(,)? - ) => {{ - $invalid_digit!($value, $iter.cursor(), $iter.digits() != 0); }}; } @@ -362,9 +308,8 @@ where /// * `value` - The current parsed value. /// * `iter` - An iterator over all bytes in the input. /// * `add_op` - The unchecked add/sub op. -/// * `start_index` - The offset where parsing started. +/// * `start_digits` - The number of digits where parsing started. /// * `invalid_digit` - Behavior when an invalid digit is found. -/// * `has_suffix` - If a base suffix was found at the end of the buffer. /// * `is_end` - If iter corresponds to the full input. /// /// core: @@ -373,26 +318,32 @@ macro_rules! parse_1digit_unchecked { $value:ident, $iter:ident, $add_op:ident, - $start_index:ident, + $start_digits:ident, + $into_ok:ident, $invalid_digit:ident, - $has_suffix:ident, $is_end:expr $(,)? ) => {{ // This is a slower parsing algorithm, going 1 digit at a time, but doing it in // an unchecked loop. - let radix = NumberFormat::::MANTISSA_RADIX; + let format = NumberFormat:: {}; + let radix = format.mantissa_radix(); while let Some(&c) = $iter.next() { let digit = match char_to_digit_const(c, radix) { Some(v) => v, - None => fmt_invalid_digit!( - $value, - $iter, - c, - $start_index, - $invalid_digit, - $has_suffix, - $is_end, - ), + None => { + // This optimizes better for success cases, which is what we want. + // It's an odd hack, but it's tested to work. + // SAFETY: Safe since we must have gotten one digit from next. + unsafe { $iter.set_cursor($iter.cursor() - 1) }; + on_invalid_digit!( + $value, + $iter, + $start_digits, + $into_ok, + $invalid_digit, + $is_end + ) + }, }; // multiply first since compilers are good at optimizing things out and will do // a fused mul/add We must do this after getting the digit for @@ -410,9 +361,8 @@ macro_rules! parse_1digit_unchecked { /// * `value` - The current parsed value. /// * `iter` - An iterator over all bytes in the input. /// * `add_op` - The checked add/sub op. -/// * `start_index` - The offset where parsing started. +/// * `start_digits` - The number of digits where parsing started. /// * `invalid_digit` - Behavior when an invalid digit is found. -/// * `has_suffix` - If a base suffix was found at the end of the buffer. /// * `overflow` - If the error is overflow or underflow. /// /// core: @@ -421,34 +371,41 @@ macro_rules! parse_1digit_checked { $value:ident, $iter:ident, $add_op:ident, - $start_index:ident, + $start_digits:ident, + $into_ok:ident, $invalid_digit:ident, - $has_suffix:ident, $overflow:ident $(,)? ) => {{ // This is a slower parsing algorithm, going 1 digit at a time, but doing it in // an unchecked loop. - let radix = NumberFormat::::MANTISSA_RADIX; + let format = NumberFormat:: {}; + let radix = format.mantissa_radix(); + let is_end = true; while let Some(&c) = $iter.next() { let digit = match char_to_digit_const(c, radix) { Some(v) => v, - None => fmt_invalid_digit!( - $value, - $iter, - c, - $start_index, - $invalid_digit, - $has_suffix, - true, - ), + None => { + // This optimizes better for success cases, which is what we want. + // It's an odd hack, but it's tested to work. + // SAFETY: Safe since we must have gotten one digit from next. + unsafe { $iter.set_cursor($iter.cursor() - 1) }; + on_invalid_digit!( + $value, + $iter, + $start_digits, + $into_ok, + $invalid_digit, + is_end + ) + }, }; // multiply first since compilers are good at optimizing things out and will do // a fused mul/add - $value = - match $value.checked_mul(as_cast(radix)).and_then(|x| x.$add_op(as_cast(digit))) { - Some(value) => value, - None => into_error!($overflow, $iter.cursor() - 1), - } + let mul = $value.checked_mul(as_cast(radix)); + $value = match mul.and_then(|x| x.$add_op(as_cast(digit))) { + Some(value) => value, + None => into_error!($overflow, $iter.cursor() - 1), + } } }}; } @@ -465,20 +422,19 @@ macro_rules! parse_1digit_checked { /// * `value` - The current parsed value. /// * `iter` - An iterator over all bytes in the input. /// * `add_op` - The unchecked add/sub op. -/// * `start_index` - The offset where parsing started. +/// * `start_digits` - The number of digits where parsing started. /// * `invalid_digit` - Behavior when an invalid digit is found. /// * `no_multi_digit` - If to disable multi-digit optimizations. -/// * `has_suffix` - If a base suffix was found at the end of the buffer. /// * `is_end` - If iter corresponds to the full input. macro_rules! parse_digits_unchecked { ( $value:ident, $iter:ident, $add_op:ident, - $start_index:ident, + $start_digits:ident, + $into_ok:ident, $invalid_digit:ident, $no_multi_digit:expr, - $has_suffix:ident, $is_end:expr $(,)? ) => {{ let can_multi = can_try_parse_multidigits::<_, FORMAT>(&$iter); @@ -507,9 +463,9 @@ macro_rules! parse_digits_unchecked { $value, $iter, $add_op, - $start_index, + $start_digits, + $into_ok, $invalid_digit, - $has_suffix, $is_end ) }}; @@ -525,11 +481,10 @@ macro_rules! parse_digits_unchecked { /// * `iter` - An iterator over all bytes in the input. /// * `add_op` - The checked add/sub op. /// * `add_op_uc` - The unchecked add/sub op for small digit optimizations. -/// * `start_index` - The offset where parsing started. +/// * `start_digits` - The number of digits where parsing started. /// * `invalid_digit` - Behavior when an invalid digit is found. /// * `overflow` - If the error is overflow or underflow. /// * `no_multi_digit` - If to disable multi-digit optimizations. -/// * `has_suffix` - If a base suffix was found at the end of the buffer. /// * `overflow_digits` - The number of digits before we need to consider /// checked ops. macro_rules! parse_digits_checked { @@ -538,11 +493,11 @@ macro_rules! parse_digits_checked { $iter:ident, $add_op:ident, $add_op_uc:ident, - $start_index:ident, + $start_digits:ident, + $into_ok:ident, $invalid_digit:ident, $overflow:ident, $no_multi_digit:expr, - $has_suffix:ident, $overflow_digits:expr $(,)? ) => {{ // Can use the unchecked for the `max_digits` here. If we @@ -557,10 +512,10 @@ macro_rules! parse_digits_checked { $value, small_iter, $add_op_uc, - $start_index, + $start_digits, + $into_ok, $invalid_digit, $no_multi_digit, - $has_suffix, false ); } @@ -571,9 +526,9 @@ macro_rules! parse_digits_checked { $value, $iter, $add_op, - $start_index, + $start_digits, + $into_ok, $invalid_digit, - $has_suffix, $overflow ) }}; @@ -625,13 +580,9 @@ macro_rules! algorithm { let mut iter = byte.integer_iter(); // skip and validate an optional base prefix - // FIXME: Optimize this, but this then would require tracking digits after - // here for the base prefix cases. - let has_base_prefix = cfg!(feature = "format") && iter.read_base_prefix(); - if cfg!(feature = "format") && has_base_prefix { + let has_base_prefix = cfg!(all(feature = "format", feature = "power-of-two")) && iter.read_base_prefix()?; + if cfg!(all(feature = "format", feature = "power-of-two")) && has_base_prefix { maybe_into_empty!(iter, $into_ok); - } else if cfg!(feature = "format") && format.required_base_prefix() { - return Err(Error::MissingBasePrefix(iter.cursor())); } // NOTE: always do a peek so any leading digit separators @@ -652,9 +603,9 @@ macro_rules! algorithm { // Valid digit, we have an invalid value. Some(Some(_)) => into_error!(InvalidLeadingZeros, index), // Have a non-digit character that follows. - Some(None) => $invalid_digit!(::ZERO, iter.cursor() + 1, iter.digits() != 0), + Some(None) => $invalid_digit!(::ZERO, iter.cursor() + 1, true), // No digits following, has to be ok - None => $into_ok!(::ZERO, index, iter.digits() != 0), + None => $into_ok!(::ZERO, index, true), }; } @@ -669,10 +620,7 @@ macro_rules! algorithm { // and even if parsing a 64-bit integer is marginally faster, it // culminates in **way** slower performance overall for simple // integers, and no improvement for large integers. - let mut has_suffix = false; - // FIXME: This is only used for the parsing of the base suffix. - #[allow(unused)] - let start_index = iter.cursor(); + let start_digits = iter.digits(); let mut value = T::ZERO; if cannot_overflow { @@ -681,10 +629,10 @@ macro_rules! algorithm { value, iter, wrapping_add, - start_index, + start_digits, + $into_ok, $invalid_digit, $no_multi_digit, - has_suffix, true, ); } else { @@ -692,10 +640,10 @@ macro_rules! algorithm { value, iter, wrapping_sub, - start_index, + start_digits, + $into_ok, $invalid_digit, $no_multi_digit, - has_suffix, true, ); } @@ -706,11 +654,11 @@ macro_rules! algorithm { iter, checked_add, wrapping_add, - start_index, + start_digits, + $into_ok, $invalid_digit, Overflow, $no_multi_digit, - has_suffix, overflow_digits, ); } else { @@ -719,23 +667,24 @@ macro_rules! algorithm { iter, checked_sub, wrapping_sub, - start_index, + start_digits, + $into_ok, $invalid_digit, Underflow, $no_multi_digit, - has_suffix, overflow_digits, ); } } - if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix { + if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() { return Err(Error::MissingBaseSuffix(iter.cursor())); } // NOTE: This is always true if we don't have digit separators, since checking // for no skip always checks if there's a next digit, which simplifies it. - $into_ok!(value, iter.buffer_length(), iter.is_contiguous() || iter.digits() != 0) + let any_digits = cfg!(not(feature = "format")) || iter.is_contiguous() || iter.digits_since(start_digits) != 0; + $into_ok!(value, iter.buffer_length(), any_digits) }}; } diff --git a/lexical-parse-integer/tests/algorithm_tests.rs b/lexical-parse-integer/tests/algorithm_tests.rs index 6ce0d1ba..ab3c47c4 100644 --- a/lexical-parse-integer/tests/algorithm_tests.rs +++ b/lexical-parse-integer/tests/algorithm_tests.rs @@ -4,6 +4,7 @@ mod util; use lexical_parse_integer::algorithm; use lexical_parse_integer::options::SMALL_NUMBERS; +use lexical_util::error::Error; use lexical_util::format::STANDARD; use lexical_util::iterator::AsBytes; #[cfg(feature = "power-of-two")] @@ -135,7 +136,7 @@ fn algorithm_test() { assert_eq!(parse_u32(b"12345"), Ok((12345, 5))); assert_eq!(parse_u32(b"+12345"), Ok((12345, 6))); - assert_eq!(parse_u32(b"-12345"), Ok((0, 0))); + assert_eq!(parse_u32(b"-12345"), Err(Error::Empty(0))); assert_eq!(parse_i32(b"12345"), Ok((12345, 5))); assert_eq!(parse_i32(b"-12345"), Ok((-12345, 6))); assert_eq!(parse_i32(b"+12345"), Ok((12345, 6))); @@ -170,7 +171,7 @@ fn algorithm_128_test() { assert_eq!(parse_u128(b"12345"), Ok((12345, 5))); assert_eq!(parse_u128(b"+12345"), Ok((12345, 6))); - assert_eq!(parse_u128(b"-12345"), Ok((0, 0))); + assert_eq!(parse_u128(b"-12345"), Err(Error::Empty(0))); assert_eq!(parse_i128(b"12345"), Ok((12345, 5))); assert_eq!(parse_i128(b"-12345"), Ok((-12345, 6))); assert_eq!(parse_i128(b"+12345"), Ok((12345, 6))); diff --git a/lexical-parse-integer/tests/api_tests.rs b/lexical-parse-integer/tests/api_tests.rs index d2baabdd..108a0882 100644 --- a/lexical-parse-integer/tests/api_tests.rs +++ b/lexical-parse-integer/tests/api_tests.rs @@ -270,10 +270,16 @@ fn i32_integer_consecutive_digit_separator_test() { .integer_consecutive_digit_separator(true) .build_strict(); - assert!(i32::from_lexical_with_options::(b"3_1", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"3__1", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"_31", &OPTIONS).is_err()); - assert!(i32::from_lexical_with_options::(b"31_", &OPTIONS).is_err()); + assert_eq!(i32::from_lexical_with_options::(b"3_1", &OPTIONS), Ok(31)); + assert_eq!(i32::from_lexical_with_options::(b"3__1", &OPTIONS), Ok(31)); + assert_eq!( + i32::from_lexical_with_options::(b"_31", &OPTIONS), + Err(Error::InvalidDigit(0)) + ); + assert_eq!( + i32::from_lexical_with_options::(b"31_", &OPTIONS), + Err(Error::InvalidDigit(2)) + ); } #[test] @@ -349,13 +355,16 @@ fn base_prefix_and_suffix_test() { .base_suffix(num::NonZeroU8::new(b'h')) .build_strict(); const OPTIONS: Options = Options::new(); - assert!(i32::from_lexical_with_options::(b"+3h", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"+0x3", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"+0x3h", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"+0x3h ", &OPTIONS).is_err()); - assert!(i32::from_lexical_with_options::(b"+0xh", &OPTIONS).is_err()); - assert!(i32::from_lexical_with_options::(b"+h", &OPTIONS).is_err()); - assert!(i32::from_lexical_with_options::(b"+0x", &OPTIONS).is_err()); + assert_eq!(i32::from_lexical_with_options::(b"+3h", &OPTIONS), Ok(3)); + assert_eq!(i32::from_lexical_with_options::(b"+0x3", &OPTIONS), Ok(3)); + assert_eq!(i32::from_lexical_with_options::(b"+0x3h", &OPTIONS), Ok(3)); + assert_eq!( + i32::from_lexical_with_options::(b"+0x3h ", &OPTIONS), + Err(Error::InvalidDigit(4)) + ); + assert_eq!(i32::from_lexical_with_options::(b"+0xh", &OPTIONS), Err(Error::Empty(4))); + assert_eq!(i32::from_lexical_with_options::(b"+h", &OPTIONS), Err(Error::Empty(2))); + assert_eq!(i32::from_lexical_with_options::(b"+0x", &OPTIONS), Err(Error::Empty(3))); } #[test] @@ -415,6 +424,8 @@ fn require_base_prefix_test() { let value = i64::from_lexical_with_options::(b"0d12345", &OPTIONS); assert_eq!(value, Ok(12345)); + let value = i64::from_lexical_with_options::(b"0D12345", &OPTIONS); + assert_eq!(value, Ok(12345)); let value = i64::from_lexical_with_options::(b"12345", &OPTIONS); assert_eq!(value, Err(Error::MissingBasePrefix(0))); @@ -447,3 +458,354 @@ fn require_base_prefix_test() { let value = u64::from_lexical_with_options::(b"0d12345", &OPTIONS); assert_eq!(value, Err(Error::MissingBaseSuffix(7))); } + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_no_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const NO_PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(1))); + + let value = i64::from_lexical_with_options::(b"+_12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"1", &OPTIONS); + assert_eq!(value, Ok(1)); + + const OPT_PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"1", &OPTIONS); + assert_eq!(value, Ok(1)); + + let value = i64::from_lexical_with_options::(b"0d1", &OPTIONS); + assert_eq!(value, Ok(1)); + + const PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_prefix(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0_d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + let value = i64::from_lexical_with_options::(b"+0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_l_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_prefix(true) + .base_prefix_leading_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+__0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0_d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + let value = i64::from_lexical_with_options::(b"+0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_consecutive_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"+__0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_i_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_prefix(true) + .base_prefix_internal_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0_d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+0__d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + let value = i64::from_lexical_with_options::(b"+0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_consecutive_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"+0__d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_t_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_prefix(true) + .base_prefix_trailing_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0_d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+0d__12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(4))); + + let value = i64::from_lexical_with_options::(b"+0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + // special case: overlap with a leading digit separator + const LEADING: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .leading_digit_separator(true) + .required_base_prefix(true) + .base_prefix_trailing_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+0d__12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_consecutive_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"+0d__12345", &OPTIONS); + assert_eq!(value, Ok(12345)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_suffix_no_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const NO_SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(1))); + + let value = i64::from_lexical_with_options::(b"+_12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"1", &OPTIONS); + assert_eq!(value, Ok(1)); + + const OPT_SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"1", &OPTIONS); + assert_eq!(value, Ok(1)); + + let value = i64::from_lexical_with_options::(b"1d", &OPTIONS); + assert_eq!(value, Ok(1)); + + const SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_suffix(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+12345d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(0))); + + let value = i64::from_lexical_with_options::(b"+12345_d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(6))); + + let value = i64::from_lexical_with_options::(b"+12345d", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+12345d_", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(6))); + + let value = i64::from_lexical_with_options::(b"+12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(6))); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_suffix_l_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_suffix(true) + .base_suffix_leading_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+12345d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(0))); + + let value = i64::from_lexical_with_options::(b"+12345_d", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+12345__d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(6))); + + let value = i64::from_lexical_with_options::(b"+12345d", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+12345d_", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(6))); + + let value = i64::from_lexical_with_options::(b"+12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(6))); + + // special case: overlap with a trailing digit separator + const TRAILING: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .trailing_digit_separator(true) + .required_base_suffix(true) + .base_suffix_leading_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"+12345_d", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+12345__d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(6))); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(SUFFIX) + .base_suffix_consecutive_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"+12345__d", &OPTIONS); + assert_eq!(value, Ok(12345)); +} + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_suffix_t_digit_separator_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const SUFFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_suffix(num::NonZeroU8::new(b'd')) + .internal_digit_separator(true) + .required_base_suffix(true) + .base_suffix_trailing_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+12345d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(0))); + + let value = i64::from_lexical_with_options::(b"+12345_d", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(6))); + + let value = i64::from_lexical_with_options::(b"+12345d", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+12345d_", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBaseSuffix(6))); + + let value = i64::from_lexical_with_options::(b"+12345d__", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(7))); + + const CONSECUTIVE: u128 = NumberFormatBuilder::rebuild(SUFFIX) + .base_suffix_consecutive_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"+12345d__", &OPTIONS); + assert_eq!(value, Ok(12345)); +} + +#[test] +fn empty_partial_test() { + const OPTIONS: Options = Options::new(); + + let value = u64::from_lexical_with_options::(b"-12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(0))); + + let value = u64::from_lexical_partial_with_options::(b"-12345", &OPTIONS); + assert_eq!(value, Err(Error::Empty(0))); +} diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index b58d4d66..17dd5359 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -866,8 +866,9 @@ impl NumberFormat { /// Get if the format requires base suffixes. /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`false`]. + /// Can only be modified with [`feature`][crate#features] `format`. This will + /// override most errors, including all invalid digit errors. Defaults to + /// [`false`]. /// /// # Examples /// diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 8c24ae88..9390bb94 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -1595,6 +1595,8 @@ impl NumberFormatBuilder { /// Get if the format requires base suffixes. /// + /// This will override most errors, including all invalid digit errors. + /// /// # Examples /// /// Using a base suffix of `x`. @@ -3627,6 +3629,7 @@ impl NumberFormatBuilder { /// Set if the format requires base suffixes. /// + /// This will override most errors, including all invalid digit errors. /// Defaults to [`false`]. /// /// # Examples diff --git a/lexical-util/src/iterator.rs b/lexical-util/src/iterator.rs index 7682f334..1fcc3edc 100644 --- a/lexical-util/src/iterator.rs +++ b/lexical-util/src/iterator.rs @@ -465,7 +465,7 @@ pub unsafe trait Iter<'a> { /// /// Any caller that consumes leading digit separators will need /// to ignore it if base prefix trailing digit separators are enabled. - fn read_base_prefix(&mut self) -> bool; + fn read_base_prefix(&mut self) -> Result; /// Read the base suffix, if present, returning if the base suffix /// was present. @@ -474,7 +474,7 @@ pub unsafe trait Iter<'a> { /// digits or digit separators, so the total digit count is valid. /// Otherwise, it advances the iterator state to the end of the base /// suffix, including consuming any trailing digit separators. - fn read_base_suffix(&mut self, has_exponent: bool) -> bool; + fn read_base_suffix(&mut self, has_exponent: bool) -> Result; } /// Iterator over a contiguous block of bytes. diff --git a/lexical-util/src/noskip.rs b/lexical-util/src/noskip.rs index b3eeb0da..0a1a7af1 100644 --- a/lexical-util/src/noskip.rs +++ b/lexical-util/src/noskip.rs @@ -160,14 +160,14 @@ unsafe impl<'a, const __: u128> Iter<'a> for Bytes<'a, __> { } #[inline(always)] - fn read_base_prefix(&mut self) -> bool { - false + fn read_base_prefix(&mut self) -> Result { + Ok(false) } #[inline(always)] - fn read_base_suffix(&mut self, has_exponent: bool) -> bool { + fn read_base_suffix(&mut self, has_exponent: bool) -> Result { _ = has_exponent; - false + Ok(false) } } @@ -263,12 +263,12 @@ unsafe impl<'a: 'b, 'b, const __: u128> Iter<'a> for DigitsIterator<'a, 'b, __> } #[inline(always)] - fn read_base_prefix(&mut self) -> bool { + fn read_base_prefix(&mut self) -> Result { self.byte.read_base_prefix() } #[inline(always)] - fn read_base_suffix(&mut self, has_exponent: bool) -> bool { + fn read_base_suffix(&mut self, has_exponent: bool) -> Result { self.byte.read_base_suffix(has_exponent) } } diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index b87677d1..1b322773 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -825,8 +825,9 @@ impl NumberFormat { /// Get if the format requires base suffixes. /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`false`]. + /// Can only be modified with [`feature`][crate#features] `format`. This + /// will override most errors, including all invalid digit errors. + /// Defaults to [`false`]. /// /// # Examples /// diff --git a/lexical-util/src/skip.rs b/lexical-util/src/skip.rs index a04b1252..2ae52d7e 100644 --- a/lexical-util/src/skip.rs +++ b/lexical-util/src/skip.rs @@ -44,6 +44,7 @@ use core::{mem, ptr}; use crate::digit::char_is_digit_const; +use crate::error::Error; use crate::format::NumberFormat; use crate::format_flags as flags; use crate::iterator::{self, DigitsIter, Iter}; @@ -1246,14 +1247,22 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { } #[inline(always)] - fn read_base_prefix(&mut self) -> bool { + fn read_base_prefix(&mut self) -> Result { let format = NumberFormat::<{ FORMAT }> {}; let digit_separator = format.digit_separator(); let base_prefix = format.base_prefix(); let is_cased = format.case_sensitive_base_prefix(); + let into_false = || { + if format.required_base_prefix() { + Err(Error::MissingBasePrefix(self.cursor())) + } else { + Ok(false) + } + }; + if !cfg!(feature = "power-of-two") || base_prefix == 0 { - return false; + return into_false(); } // grab our cursor information @@ -1269,7 +1278,7 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { } if bytes.get(index) != Some(&b'0') { - return false; + return into_false(); } index += 1; debug_assert!(index <= bytes.len()); @@ -1277,12 +1286,14 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { if format.base_prefix_internal_digit_separator() { index = consume(bytes, digit_separator, index, consecutive); } + debug_assert!(index <= bytes.len()); + if bytes .get(index) .map(|&x| !Self::is_value_equal(x, base_prefix, is_cased)) - .unwrap_or(false) + .unwrap_or(true) { - return false; + return into_false(); } index += 1; debug_assert!(index <= bytes.len()); @@ -1292,8 +1303,7 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { // if the integer can skip leading digit separators and we can skip // trailing, but they can consume consecutive separators, since that // would just be re-processing data. - let prefix_trailing = format.base_prefix_trailing_digit_separator(); - let mut should_skip = prefix_trailing; + let mut should_skip = format.base_prefix_trailing_digit_separator(); if format.integer_leading_digit_separator() { should_skip &= consecutive && !format.integer_consecutive_digit_separator(); } @@ -1306,13 +1316,68 @@ unsafe impl<'a, const FORMAT: u128> Iter<'a> for Bytes<'a, FORMAT> { debug_assert!(index <= bytes.len()); unsafe { self.set_cursor(index) }; - true + Ok(true) } #[inline(always)] - fn read_base_suffix(&mut self, has_exponent: bool) -> bool { - _ = has_exponent; - todo!(); // TODO: Implement and test + fn read_base_suffix(&mut self, has_exponent: bool) -> Result { + let format = NumberFormat::<{ FORMAT }> {}; + let digit_separator = format.digit_separator(); + let base_suffix = format.base_suffix(); + let is_cased = format.case_sensitive_base_suffix(); + + let into_false = || { + if format.required_base_suffix() { + Err(Error::MissingBaseSuffix(self.cursor())) + } else { + Ok(false) + } + }; + + if !cfg!(feature = "power-of-two") || base_suffix == 0 { + return into_false(); + } + + // grab our cursor information + let mut index = self.cursor(); + let bytes = self.get_buffer(); + let consecutive = format.base_suffix_consecutive_digit_separator(); + + // we cannot skip leading digit separators if we do not have consecutive + // digit separators and accepted trailing digit separators before. + let mut should_skip = format.base_suffix_leading_digit_separator(); + if has_exponent && format.exponent_trailing_digit_separator() { + should_skip &= !format.exponent_consecutive_digit_separator(); + } else if !has_exponent && format.integer_trailing_digit_separator() { + should_skip &= !format.integer_consecutive_digit_separator(); + } + if should_skip { + index = consume(bytes, digit_separator, index, consecutive); + } + debug_assert!(index <= bytes.len()); + + if bytes + .get(index) + .map(|&x| !Self::is_value_equal(x, base_suffix, is_cased)) + .unwrap_or(true) + { + return into_false(); + } + index += 1; + debug_assert!(index <= bytes.len()); + + // we had a base suffix: consume our trailing digits + // internal digit separators are a no-op. + if format.base_suffix_trailing_digit_separator() { + index = consume(bytes, digit_separator, index, consecutive); + } + + // SAFETY: safe, since we've consumed at most 1 digit prior to + // consume, we will never go `> bytes.len()`, so this is safe. + debug_assert!(index <= bytes.len()); + unsafe { self.set_cursor(index) }; + + Ok(true) } } @@ -1482,12 +1547,12 @@ macro_rules! skip_iterator_iter_base { } #[inline(always)] - fn read_base_prefix(&mut self) -> bool { + fn read_base_prefix(&mut self) -> Result { self.byte.read_base_prefix() } #[inline(always)] - fn read_base_suffix(&mut self, has_exponent: bool) -> bool { + fn read_base_suffix(&mut self, has_exponent: bool) -> Result { self.byte.read_base_suffix(has_exponent) } }; diff --git a/scripts/asm.sh b/scripts/asm.sh index 79bbee82..4cc63851 100755 --- a/scripts/asm.sh +++ b/scripts/asm.sh @@ -10,4 +10,4 @@ home=$(dirname "${script_home}") cd "${home}"/extras/asm export RUSTFLAGS="--emit asm -C llvm-args=-x86-asm-syntax=intel" -cargo +nightly build --release "$@" +cargo +nightly build --release "$@" --features="${FEATURES}" From b655c040fa47673f6b0696d8679d280285d04ed9 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 22 Jan 2025 00:49:15 -0600 Subject: [PATCH 15/18] Add explicit tests that using leading zeros works with digit separators but not without if disabled. --- lexical-parse-float/tests/api_tests.rs | 24 ++++++++++++++++++++++++ lexical-parse-integer/tests/api_tests.rs | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs index 46e2a3c1..25ad5d1b 100644 --- a/lexical-parse-float/tests/api_tests.rs +++ b/lexical-parse-float/tests/api_tests.rs @@ -1399,6 +1399,30 @@ fn base_prefix_no_digit_separator_test() { assert_eq!(value, Ok(12345.6)); } +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_leading_zeros_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const OPT_PREFIX: u128 = NumberFormatBuilder::new() + .base_prefix(num::NonZeroU8::new(b'd')) + .no_float_leading_zeros(true) + .build_strict(); + + let value = f64::from_lexical_with_options::(b"1.2", &OPTIONS); + assert_eq!(value, Ok(1.2)); + + let value = f64::from_lexical_with_options::(b"01.2", &OPTIONS); + assert_eq!(value, Err(Error::InvalidLeadingZeros(0))); + + let value = f64::from_lexical_with_options::(b"0d1.2", &OPTIONS); + assert_eq!(value, Ok(1.2)); + + let value = f64::from_lexical_with_options::(b"0d01.2", &OPTIONS); + assert_eq!(value, Ok(1.2)); +} + #[test] #[cfg(all(feature = "format", feature = "power-of-two"))] fn base_prefix_l_digit_separator_test() { diff --git a/lexical-parse-integer/tests/api_tests.rs b/lexical-parse-integer/tests/api_tests.rs index 108a0882..4940bab2 100644 --- a/lexical-parse-integer/tests/api_tests.rs +++ b/lexical-parse-integer/tests/api_tests.rs @@ -514,6 +514,30 @@ fn base_prefix_no_digit_separator_test() { assert_eq!(value, Ok(12345)); } +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_leading_zeros_test() { + use core::num; + + const OPTIONS: Options = Options::new(); + const OPT_PREFIX: u128 = NumberFormatBuilder::new() + .base_prefix(num::NonZeroU8::new(b'd')) + .no_integer_leading_zeros(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"1", &OPTIONS); + assert_eq!(value, Ok(1)); + + let value = i64::from_lexical_with_options::(b"01", &OPTIONS); + assert_eq!(value, Err(Error::InvalidLeadingZeros(0))); + + let value = i64::from_lexical_with_options::(b"0d1", &OPTIONS); + assert_eq!(value, Ok(1)); + + let value = i64::from_lexical_with_options::(b"0d01", &OPTIONS); + assert_eq!(value, Ok(1)); +} + #[test] #[cfg(all(feature = "format", feature = "power-of-two"))] fn base_prefix_l_digit_separator_test() { From 55f53bffd767fd2ce4a5d1a6fb11157c0e33be8c Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Wed, 22 Jan 2025 18:05:29 -0600 Subject: [PATCH 16/18] Remove `supports_parsing_*` and `supports_writing_*`. This is a waste of flag space and should be removed prior to any releases. --- CHANGELOG | 2 +- lexical-parse-float/src/api.rs | 8 +- lexical-parse-float/tests/api_tests.rs | 32 ----- lexical-parse-integer/src/api.rs | 9 +- lexical-parse-integer/tests/api_tests.rs | 44 ------ lexical-util/src/error.rs | 6 - lexical-util/src/feature_format.rs | 72 ---------- lexical-util/src/format_builder.rs | 132 ----------------- lexical-util/src/format_flags.rs | 34 +---- lexical-util/src/not_feature_format.rs | 156 +++++---------------- lexical-util/tests/feature_format_tests.rs | 7 +- lexical-write-float/src/api.rs | 7 +- lexical-write-float/tests/api_tests.rs | 29 +--- lexical-write-integer/src/api.rs | 9 +- lexical-write-integer/tests/api_tests.rs | 32 +---- 15 files changed, 54 insertions(+), 525 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 42760a75..21b0682b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -17,10 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Re-export `NumberFormat` to our other crates (#204). - Added `Options::from_radix` for all options for similar APIs for each (#208). - Support for `required_integer_digits_with_exponent`, `required_fraction_digits_with_exponent`, and `required_mantissa_digits_with_exponent`, that is,`1.e5` and `.1e5`, as opposed to just requiring`1e5` (#215). -- Added `supports_parsing_integers`, `supports_parsing_floats`, `supports_writing_integers`, and `supports_writing_floats` for our number formats (#215). - Added `required_base_prefix` and `required_base_suffix` for our number formats, requiring base prefixes and/or suffixes when parsing, and allowing writing base prefixes and/or suffixes (#215). - Added `NumberFormatBuilder::none()` for create a format with no flags set (#215). - Added in many more digit separator flags for the `NumberFormat`, including for signs, base prefixes, base suffixes, and restricting digit separators at the start of the number (#215). +- Added many more pre-defined formatting constants (#215). ### Changed diff --git a/lexical-parse-float/src/api.rs b/lexical-parse-float/src/api.rs index e82a635a..5975a330 100644 --- a/lexical-parse-float/src/api.rs +++ b/lexical-parse-float/src/api.rs @@ -51,9 +51,7 @@ macro_rules! float_from_lexical { ) -> lexical_util::result::Result { let format = NumberFormat::<{ FORMAT }> {}; - if !format.supports_parsing_floats() { - return Err(Error::Unsupported); - } else if !format.is_valid() { + if !format.is_valid() { return Err(format.error()); } else if !is_valid_options_punctuation(FORMAT, options.exponent(), options.decimal_point()) { return Err(Error::InvalidPunctuation); @@ -68,9 +66,7 @@ macro_rules! float_from_lexical { ) -> lexical_util::result::Result<(Self, usize)> { let format = NumberFormat::<{ FORMAT }> {}; - if !format.supports_parsing_floats() { - return Err(Error::Unsupported); - } else if !format.is_valid() { + if !format.is_valid() { return Err(format.error()); } else if !is_valid_options_punctuation(FORMAT, options.exponent(), options.decimal_point()) { return Err(Error::InvalidPunctuation); diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs index 25ad5d1b..a34ff0b5 100644 --- a/lexical-parse-float/tests/api_tests.rs +++ b/lexical-parse-float/tests/api_tests.rs @@ -1276,38 +1276,6 @@ fn issue68_test() { assert_eq!(f64::INFINITY, f64::from_lexical_with_options::(hex, &OPTIONS).unwrap()); } -#[test] -#[cfg(feature = "format")] -fn unsupported_test() { - const FORMAT: u128 = NumberFormatBuilder::new().supports_parsing_floats(false).build_strict(); - const OPTIONS: Options = Options::new(); - - let float = "12345.0"; - let value = f64::from_lexical_with_options::(float.as_bytes(), &OPTIONS); - assert_eq!(value, Err(Error::Unsupported)); - - let value = f64::from_lexical_partial_with_options::(float.as_bytes(), &OPTIONS); - assert_eq!(value, Err(Error::Unsupported)); -} - -#[test] -#[cfg(feature = "format")] -fn supported_test() { - const FORMAT: u128 = NumberFormatBuilder::new() - .supports_parsing_integers(false) - .supports_writing_integers(false) - .supports_writing_floats(false) - .build_strict(); - const OPTIONS: Options = Options::new(); - - let float = "12345.0"; - let value = f64::from_lexical_with_options::(float.as_bytes(), &OPTIONS); - assert_eq!(value, Ok(12345.0)); - - let value = f64::from_lexical_partial_with_options::(float.as_bytes(), &OPTIONS); - assert_eq!(value, Ok((12345.0, 7))); -} - #[test] #[cfg(all(feature = "format", feature = "power-of-two"))] fn require_base_prefix_test() { diff --git a/lexical-parse-integer/src/api.rs b/lexical-parse-integer/src/api.rs index 0d8efd29..44e0c335 100644 --- a/lexical-parse-integer/src/api.rs +++ b/lexical-parse-integer/src/api.rs @@ -2,7 +2,6 @@ #![doc(hidden)] -use lexical_util::error::Error; use lexical_util::format::{NumberFormat, STANDARD}; use lexical_util::{from_lexical, from_lexical_with_options}; @@ -43,9 +42,7 @@ macro_rules! integer_from_lexical { ) -> lexical_util::result::Result { let format = NumberFormat::<{ FORMAT }> {}; - if !format.supports_parsing_integers() { - return Err(Error::Unsupported); - } else if !format.is_valid() { + if !format.is_valid() { return Err(format.error()); } Self::parse_complete::(bytes, options) @@ -58,9 +55,7 @@ macro_rules! integer_from_lexical { ) -> lexical_util::result::Result<(Self, usize)> { let format = NumberFormat::<{ FORMAT }> {}; - if !format.supports_parsing_integers() { - return Err(Error::Unsupported); - } else if !format.is_valid() { + if !format.is_valid() { return Err(format.error()); } Self::parse_partial::(bytes, options) diff --git a/lexical-parse-integer/tests/api_tests.rs b/lexical-parse-integer/tests/api_tests.rs index 4940bab2..ee10ce39 100644 --- a/lexical-parse-integer/tests/api_tests.rs +++ b/lexical-parse-integer/tests/api_tests.rs @@ -367,50 +367,6 @@ fn base_prefix_and_suffix_test() { assert_eq!(i32::from_lexical_with_options::(b"+0x", &OPTIONS), Err(Error::Empty(3))); } -#[test] -#[cfg(feature = "format")] -fn unsupported_test() { - const FORMAT: u128 = NumberFormatBuilder::new().supports_parsing_integers(false).build_strict(); - const OPTIONS: Options = Options::new(); - - let integer = "12345"; - let value = i64::from_lexical_with_options::(integer.as_bytes(), &OPTIONS); - assert_eq!(value, Err(Error::Unsupported)); - - let value = i64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); - assert_eq!(value, Err(Error::Unsupported)); - - let value = u64::from_lexical_with_options::(integer.as_bytes(), &OPTIONS); - assert_eq!(value, Err(Error::Unsupported)); - - let value = u64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); - assert_eq!(value, Err(Error::Unsupported)); -} - -#[test] -#[cfg(feature = "format")] -fn supported_test() { - const FORMAT: u128 = NumberFormatBuilder::new() - .supports_parsing_floats(false) - .supports_writing_integers(false) - .supports_writing_floats(false) - .build_strict(); - const OPTIONS: Options = Options::new(); - - let integer = "12345"; - let value = i64::from_lexical_with_options::(integer.as_bytes(), &OPTIONS); - assert_eq!(value, Ok(12345)); - - let value = i64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); - assert_eq!(value, Ok((12345, 5))); - - let value = u64::from_lexical_with_options::(integer.as_bytes(), &OPTIONS); - assert_eq!(value, Ok(12345)); - - let value = u64::from_lexical_partial_with_options::(integer.as_bytes(), &OPTIONS); - assert_eq!(value, Ok((12345, 5))); -} - #[test] #[cfg(all(feature = "format", feature = "power-of-two"))] fn require_base_prefix_test() { diff --git a/lexical-util/src/error.rs b/lexical-util/src/error.rs index 153c6909..8b55f2d4 100644 --- a/lexical-util/src/error.rs +++ b/lexical-util/src/error.rs @@ -98,8 +98,6 @@ pub enum Error { InvalidConsecutiveExponentDigitSeparator, /// Invalid flags were set without the format feature. InvalidFlags, - /// If the operation is unsupported. - Unsupported, // OPTION ERRORS /// Invalid NaN string: must start with an `n` character. @@ -195,7 +193,6 @@ impl Error { Self::InvalidConsecutiveFractionDigitSeparator => "'enabled consecutive digit separators in the fraction without setting a valid location'", Self::InvalidConsecutiveExponentDigitSeparator => "'enabled consecutive digit separators in the exponent without setting a valid location'", Self::InvalidFlags => "'invalid flags enabled without the format feature'", - Self::Unsupported => "'the desired operation is unsupported for this format'", // OPTION ERRORS Self::InvalidNanString => "'NaN string must started with `n`'", @@ -264,7 +261,6 @@ impl Error { Self::InvalidConsecutiveFractionDigitSeparator => None, Self::InvalidConsecutiveExponentDigitSeparator => None, Self::InvalidFlags => None, - Self::Unsupported => None, // OPTION ERRORS Self::InvalidNanString => None, @@ -332,7 +328,6 @@ impl Error { InvalidConsecutiveExponentDigitSeparator ); is_error_type!(is_invalid_flags, InvalidFlags); - is_error_type!(is_unsupported, Unsupported); is_error_type!(is_invalid_nan_string, InvalidNanString); is_error_type!(is_nan_string_too_long, NanStringTooLong); is_error_type!(is_invalid_inf_string, InvalidInfString); @@ -435,7 +430,6 @@ impl fmt::Display for Error { format_message!(formatter, description) }, Self::InvalidFlags => format_message!(formatter, description), - Self::Unsupported => format_message!(formatter, description), // OPTION ERRORS Self::InvalidNanString => options_message!(formatter, description), diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index 17dd5359..7e8fd005 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -755,78 +755,6 @@ impl NumberFormat { Self::REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT } - /// If the format supports parsing integers. - /// - /// See [`supports_parsing_integers`][Self::supports_parsing_integers]. - pub const SUPPORTS_PARSING_INTEGERS: bool = from_flag!(FORMAT, SUPPORTS_PARSING_INTEGERS); - - /// Get if the format supports parsing integers. - /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`true`]. - /// - /// # Used For - /// - /// - Parse Integer - #[inline(always)] - pub const fn supports_parsing_integers(&self) -> bool { - Self::SUPPORTS_PARSING_INTEGERS - } - - /// If the format supports parsing floats. - /// - /// See [`supports_parsing_floats`][Self::supports_parsing_floats]. - pub const SUPPORTS_PARSING_FLOATS: bool = from_flag!(FORMAT, SUPPORTS_PARSING_FLOATS); - - /// Get if the format supports parsing floats. - /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`true`]. - /// - /// # Used For - /// - /// - Parse Float - #[inline(always)] - pub const fn supports_parsing_floats(&self) -> bool { - Self::SUPPORTS_PARSING_FLOATS - } - - /// If the format supports writing integers. - /// - /// See [`supports_writing_integers`][Self::supports_writing_integers]. - pub const SUPPORTS_WRITING_INTEGERS: bool = from_flag!(FORMAT, SUPPORTS_WRITING_INTEGERS); - - /// Get if the format supports writing integers. - /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`true`]. - /// - /// # Used For - /// - /// - Write Integer - #[inline(always)] - pub const fn supports_writing_integers(&self) -> bool { - Self::SUPPORTS_WRITING_INTEGERS - } - - /// If the format supports writing floats. - /// - /// See [`supports_writing_floats`][Self::supports_writing_floats]. - pub const SUPPORTS_WRITING_FLOATS: bool = from_flag!(FORMAT, SUPPORTS_WRITING_FLOATS); - - /// Get if the format supports writing floats. - /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`true`]. - /// - /// # Used For - /// - /// - Write Float - #[inline(always)] - pub const fn supports_writing_floats(&self) -> bool { - Self::SUPPORTS_WRITING_FLOATS - } - /// If the format requires base prefixes. /// /// See [`required_base_prefix`][Self::required_base_prefix]. diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 9390bb94..d59a9526 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -385,10 +385,6 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_fraction_digits_with_exponent`]: Self::required_fraction_digits_with_exponent\n [`required_mantissa_digits_with_exponent`]: Self::required_mantissa_digits_with_exponent\n [`case_sensitive_exponent`]: Self::case_sensitive_exponent\n -[`supports_parsing_integers`]: Self::supports_parsing_integers\n -[`supports_parsing_floats`]: Self::supports_parsing_floats\n -[`supports_writing_integers`]: Self::supports_writing_integers\n -[`supports_writing_floats`]: Self::supports_writing_floats\n [`start_digit_separator`]: Self::start_digit_separator\n [`integer_sign_digit_separator`]: Self::integer_sign_digit_separator\n [`integer_consecutive_sign_digit_separator`]: Self::integer_consecutive_sign_digit_separator\n @@ -438,10 +434,6 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_integer_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1129\n [`required_fraction_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1149\n [`required_mantissa_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/47a090d/lexical-util/src/format_builder.rs#L1233\n -[`supports_parsing_integers`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1181\n -[`supports_parsing_floats`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1191\n -[`supports_writing_integers`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1201\n -[`supports_writing_floats`]: https://github.com/Alexhuszagh/rust-lexical/blob/f53fae0/lexical-util/src/format_builder.rs#L1211\n [`case_sensitive_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L765\n [`start_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1650\n [`integer_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1678\n @@ -582,10 +574,6 @@ pub struct NumberFormatBuilder { no_integer_leading_zeros: bool, no_float_leading_zeros: bool, required_exponent_notation: bool, - supports_parsing_integers: bool, - supports_parsing_floats: bool, - supports_writing_integers: bool, - supports_writing_floats: bool, case_sensitive_exponent: bool, case_sensitive_base_prefix: bool, case_sensitive_base_suffix: bool, @@ -661,14 +649,6 @@ impl NumberFormatBuilder { /// - [`required_integer_digits_with_exponent`][Self::get_required_integer_digits_with_exponent] -`false` /// - [`required_fraction_digits_with_exponent`][Self::get_required_fraction_digits_with_exponent] -`false` /// - [`required_mantissa_digits_with_exponent`][Self::get_required_mantissa_digits_with_exponent] -`true` - /// - [`supports_parsing_integers`][Self::get_supports_parsing_integers] - - /// `true` - /// - [`supports_parsing_floats`][Self::get_supports_parsing_floats] - - /// `true` - /// - [`supports_writing_integers`][Self::get_supports_writing_integers] - - /// `true` - /// - [`supports_writing_floats`][Self::get_supports_writing_floats] - - /// `true` /// - [`case_sensitive_exponent`][Self::get_case_sensitive_exponent] - /// `false` /// - [`case_sensitive_base_prefix`][Self::get_case_sensitive_base_prefix] - @@ -736,10 +716,6 @@ impl NumberFormatBuilder { required_integer_digits_with_exponent: false, required_fraction_digits_with_exponent: false, required_mantissa_digits_with_exponent: true, - supports_parsing_integers: true, - supports_parsing_floats: true, - supports_writing_integers: true, - supports_writing_floats: true, required_base_prefix: false, required_base_suffix: false, start_digit_separator: cfg!(feature = "format"), @@ -804,10 +780,6 @@ impl NumberFormatBuilder { required_integer_digits_with_exponent: false, required_fraction_digits_with_exponent: false, required_mantissa_digits_with_exponent: false, - supports_parsing_integers: false, - supports_parsing_floats: false, - supports_writing_integers: false, - supports_writing_floats: false, required_base_prefix: false, required_base_suffix: false, start_digit_separator: true, @@ -1529,46 +1501,6 @@ impl NumberFormatBuilder { self.required_mantissa_digits_with_exponent } - /// Get if the format supports parsing integers. - /// - /// # Used For - /// - /// - Parse Integer - #[inline(always)] - pub fn get_supports_parsing_integers(&self) -> bool { - self.supports_parsing_integers - } - - /// Get if the format supports parsing floats. - /// - /// # Used For - /// - /// - Parse Float - #[inline(always)] - pub fn get_supports_parsing_floats(&self) -> bool { - self.supports_parsing_floats - } - - /// Get if the format supports writing integers. - /// - /// # Used For - /// - /// - Write Integer - #[inline(always)] - pub fn get_supports_writing_integers(&self) -> bool { - self.supports_writing_integers - } - - /// Get if the format supports writing floats. - /// - /// # Used For - /// - /// - Write Float - #[inline(always)] - pub fn get_supports_writing_floats(&self) -> bool { - self.supports_writing_floats - } - /// Get if the format requires base prefixes. /// /// # Examples @@ -3543,62 +3475,6 @@ impl NumberFormatBuilder { self } - /// Set if the format supports parsing integers. - /// - /// Defaults to [`true`]. - /// - /// # Used For - /// - /// - Parse Integer - #[inline(always)] - #[cfg(feature = "format")] - pub const fn supports_parsing_integers(mut self, flag: bool) -> Self { - self.supports_parsing_integers = flag; - self - } - - /// Set if the format supports parsing floats. - /// - /// Defaults to [`true`]. - /// - /// # Used For - /// - /// - Parse Float - #[inline(always)] - #[cfg(feature = "format")] - pub const fn supports_parsing_floats(mut self, flag: bool) -> Self { - self.supports_parsing_floats = flag; - self - } - - /// Set if the format supports writing integers. - /// - /// Defaults to [`true`]. - /// - /// # Used For - /// - /// - Write Integer - #[inline(always)] - #[cfg(feature = "format")] - pub const fn supports_writing_integers(mut self, flag: bool) -> Self { - self.supports_writing_integers = flag; - self - } - - /// Set if the format supports writing floats. - /// - /// Defaults to [`true`]. - /// - /// # Used For - /// - /// - Write Float - #[inline(always)] - #[cfg(feature = "format")] - pub const fn supports_writing_floats(mut self, flag: bool) -> Self { - self.supports_writing_floats = flag; - self - } - /// Set if the format requires base prefixes. /// /// Defaults to [`false`]. @@ -5157,10 +5033,6 @@ impl NumberFormatBuilder { self.required_integer_digits_with_exponent, REQUIRED_INTEGER_DIGITS_WITH_EXPONENT ; self.required_fraction_digits_with_exponent, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT ; self.required_mantissa_digits_with_exponent, REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT ; - self.supports_parsing_integers, SUPPORTS_PARSING_INTEGERS ; - self.supports_parsing_floats, SUPPORTS_PARSING_FLOATS ; - self.supports_writing_integers, SUPPORTS_WRITING_INTEGERS ; - self.supports_writing_floats, SUPPORTS_WRITING_FLOATS ; self.required_base_prefix, REQUIRED_BASE_PREFIX ; self.required_base_suffix, REQUIRED_BASE_SUFFIX ; self.start_digit_separator, START_DIGIT_SEPARATOR ; @@ -5281,10 +5153,6 @@ impl NumberFormatBuilder { format, REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT ), - supports_parsing_integers: has_flag!(format, SUPPORTS_PARSING_INTEGERS), - supports_parsing_floats: has_flag!(format, SUPPORTS_PARSING_FLOATS), - supports_writing_integers: has_flag!(format, SUPPORTS_WRITING_INTEGERS), - supports_writing_floats: has_flag!(format, SUPPORTS_WRITING_FLOATS), required_base_prefix: has_flag!(format, REQUIRED_BASE_PREFIX), required_base_suffix: has_flag!(format, REQUIRED_BASE_SUFFIX), start_digit_separator: has_flag!(format, START_DIGIT_SEPARATOR), diff --git a/lexical-util/src/format_flags.rs b/lexical-util/src/format_flags.rs index 505cc44e..27dfeb64 100644 --- a/lexical-util/src/format_flags.rs +++ b/lexical-util/src/format_flags.rs @@ -25,7 +25,7 @@ //! //! 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ -//! |e/P|e/S|I/E|F/E|p/I|p/F|w/I|w/F|R/P|r/S|M/E| | +//! |e/P|e/S|I/E|F/E|r/P|r/S|M/E| | //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ //! //! 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 @@ -60,10 +60,6 @@ //! e/S = Case-sensitive base suffix. //! I/E = Require integer digits with exponent. //! F/E = Require fraction digits with exponent. -//! p/I = The format supports parsing integers. -//! p/F = The format supports parsing floats. -//! w/I = The format supports writing integers. -//! w/F = The format supports writing floats. //! r/P = Require base prefixes. //! r/S = Require base suffixes. //! M/E = Require mantissa digits with exponent. @@ -368,26 +364,14 @@ pub const REQUIRED_INTEGER_DIGITS_WITH_EXPONENT: u128 = 1 << 18; /// notation, if the decimal point is present. pub const REQUIRED_FRACTION_DIGITS_WITH_EXPONENT: u128 = 1 << 19; -/// If the format supports parsing integers. -pub const SUPPORTS_PARSING_INTEGERS: u128 = 1 << 20; - -/// If the format supports parsing floats. -pub const SUPPORTS_PARSING_FLOATS: u128 = 1 << 21; - -/// If the format supports parsing integers. -pub const SUPPORTS_WRITING_INTEGERS: u128 = 1 << 22; - -/// If the format supports parsing floats. -pub const SUPPORTS_WRITING_FLOATS: u128 = 1 << 23; - /// If the format requires base prefixes. -pub const REQUIRED_BASE_PREFIX: u128 = 1 << 24; +pub const REQUIRED_BASE_PREFIX: u128 = 1 << 20; /// If the format requires base suffixes. -pub const REQUIRED_BASE_SUFFIX: u128 = 1 << 25; +pub const REQUIRED_BASE_SUFFIX: u128 = 1 << 21; /// If any significant digits are required with exponent notation. -pub const REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT: u128 = 1 << 26; +pub const REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT: u128 = 1 << 22; // Non-digit separator flags. const _: () = assert!(REQUIRED_INTEGER_DIGITS == 1); @@ -411,11 +395,7 @@ check_subsequent_flags!(CASE_SENSITIVE_EXPONENT, CASE_SENSITIVE_BASE_PREFIX); check_subsequent_flags!(CASE_SENSITIVE_BASE_PREFIX, CASE_SENSITIVE_BASE_SUFFIX); check_subsequent_flags!(CASE_SENSITIVE_BASE_SUFFIX, REQUIRED_INTEGER_DIGITS_WITH_EXPONENT); check_subsequent_flags!(REQUIRED_INTEGER_DIGITS_WITH_EXPONENT, REQUIRED_FRACTION_DIGITS_WITH_EXPONENT); -check_subsequent_flags!(REQUIRED_FRACTION_DIGITS_WITH_EXPONENT, SUPPORTS_PARSING_INTEGERS); -check_subsequent_flags!(SUPPORTS_PARSING_INTEGERS, SUPPORTS_PARSING_FLOATS); -check_subsequent_flags!(SUPPORTS_PARSING_FLOATS, SUPPORTS_WRITING_INTEGERS); -check_subsequent_flags!(SUPPORTS_WRITING_INTEGERS, SUPPORTS_WRITING_FLOATS); -check_subsequent_flags!(SUPPORTS_WRITING_FLOATS, REQUIRED_BASE_PREFIX); +check_subsequent_flags!(REQUIRED_FRACTION_DIGITS_WITH_EXPONENT, REQUIRED_BASE_PREFIX); check_subsequent_flags!(REQUIRED_BASE_PREFIX, REQUIRED_BASE_SUFFIX); check_subsequent_flags!(REQUIRED_BASE_SUFFIX, REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT); @@ -642,10 +622,6 @@ pub const FLAG_MASK: u128 = CASE_SENSITIVE_EXPONENT | CASE_SENSITIVE_BASE_PREFIX | CASE_SENSITIVE_BASE_SUFFIX | - SUPPORTS_PARSING_FLOATS | - SUPPORTS_PARSING_INTEGERS | - SUPPORTS_WRITING_FLOATS | - SUPPORTS_WRITING_INTEGERS | REQUIRED_BASE_PREFIX | REQUIRED_BASE_SUFFIX | START_DIGIT_SEPARATOR_FLAG_MASK | diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index 1b322773..3947cd50 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -44,47 +44,43 @@ use crate::format_flags as flags; /// 20. [`required_integer_digits_with_exponent`][NumberFormat::required_integer_digits_with_exponent] /// 21. [`required_fraction_digits_with_exponent`][NumberFormat::required_fraction_digits_with_exponent] /// 22. [`required_mantissa_digits_with_exponent`][NumberFormat::required_mantissa_digits_with_exponent] -/// 23. [`supports_parsing_integers`][NumberFormat::supports_parsing_integers] -/// 24. [`supports_parsing_floats`][NumberFormat::supports_parsing_floats] -/// 25. [`supports_writing_integers`][NumberFormat::supports_writing_integers] -/// 26. [`supports_writing_floats`][NumberFormat::supports_writing_floats] -/// 27. [`required_base_prefix`][NumberFormat::required_base_prefix] -/// 28. [`required_base_suffix`][NumberFormat::required_base_suffix] -/// 29. [`integer_internal_digit_separator`][NumberFormat::integer_internal_digit_separator] -/// 30. [`fraction_internal_digit_separator`][NumberFormat::fraction_internal_digit_separator] -/// 31. [`exponent_internal_digit_separator`][NumberFormat::exponent_internal_digit_separator] -/// 32. [`internal_digit_separator`][NumberFormat::internal_digit_separator] -/// 33. [`integer_leading_digit_separator`][NumberFormat::integer_leading_digit_separator] -/// 34. [`fraction_leading_digit_separator`][NumberFormat::fraction_leading_digit_separator] -/// 35. [`exponent_leading_digit_separator`][NumberFormat::exponent_leading_digit_separator] -/// 36. [`leading_digit_separator`][NumberFormat::leading_digit_separator] -/// 37. [`integer_trailing_digit_separator`][NumberFormat::integer_trailing_digit_separator] -/// 38. [`fraction_trailing_digit_separator`][NumberFormat::fraction_trailing_digit_separator] -/// 39. [`exponent_trailing_digit_separator`][NumberFormat::exponent_trailing_digit_separator] -/// 40. [`trailing_digit_separator`][NumberFormat::trailing_digit_separator] -/// 41. [`integer_consecutive_digit_separator`][NumberFormat::integer_consecutive_digit_separator] -/// 42. [`fraction_consecutive_digit_separator`][NumberFormat::fraction_consecutive_digit_separator] -/// 43. [`exponent_consecutive_digit_separator`][NumberFormat::exponent_consecutive_digit_separator] -/// 44. [`consecutive_digit_separator`][NumberFormat::consecutive_digit_separator] -/// 45. [`special_digit_separator`][NumberFormat::special_digit_separator] -/// 46. [`digit_separator`][NumberFormat::digit_separator] -/// 47. [`base_prefix`][NumberFormat::base_prefix] -/// 48. [`base_suffix`][NumberFormat::base_suffix] -/// 49. [`exponent_base`][NumberFormat::exponent_base] -/// 50. [`exponent_radix`][NumberFormat::exponent_radix] -/// 51. [`start_digit_separator`][NumberFormat::start_digit_separator] -/// 52. [`integer_sign_digit_separator`][NumberFormat::integer_sign_digit_separator] -/// 53. [`integer_consecutive_sign_digit_separator`][NumberFormat::integer_consecutive_sign_digit_separator] -/// 54. [`exponent_sign_digit_separator`][NumberFormat::exponent_sign_digit_separator] -/// 55. [`exponent_consecutive_sign_digit_separator`][NumberFormat::exponent_consecutive_sign_digit_separator] -/// 56. [`base_prefix_internal_digit_separator`][NumberFormat::base_prefix_internal_digit_separator] -/// 57. [`base_prefix_leading_digit_separator`][NumberFormat::base_prefix_leading_digit_separator] -/// 58. [`base_prefix_trailing_digit_separator`][NumberFormat::base_prefix_trailing_digit_separator] -/// 59. [`base_prefix_consecutive_digit_separator`][NumberFormat::base_prefix_consecutive_digit_separator] -/// 60. [`base_suffix_internal_digit_separator`][NumberFormat::base_suffix_internal_digit_separator] -/// 61. [`base_suffix_leading_digit_separator`][NumberFormat::base_suffix_leading_digit_separator] -/// 62. [`base_suffix_trailing_digit_separator`][NumberFormat::base_suffix_trailing_digit_separator] -/// 63. [`base_suffix_consecutive_digit_separator`][NumberFormat::base_suffix_consecutive_digit_separator] +/// 23. [`required_base_prefix`][NumberFormat::required_base_prefix] +/// 24. [`required_base_suffix`][NumberFormat::required_base_suffix] +/// 25. [`integer_internal_digit_separator`][NumberFormat::integer_internal_digit_separator] +/// 26. [`fraction_internal_digit_separator`][NumberFormat::fraction_internal_digit_separator] +/// 27. [`exponent_internal_digit_separator`][NumberFormat::exponent_internal_digit_separator] +/// 28. [`internal_digit_separator`][NumberFormat::internal_digit_separator] +/// 29. [`integer_leading_digit_separator`][NumberFormat::integer_leading_digit_separator] +/// 30. [`fraction_leading_digit_separator`][NumberFormat::fraction_leading_digit_separator] +/// 31. [`exponent_leading_digit_separator`][NumberFormat::exponent_leading_digit_separator] +/// 32. [`leading_digit_separator`][NumberFormat::leading_digit_separator] +/// 33. [`integer_trailing_digit_separator`][NumberFormat::integer_trailing_digit_separator] +/// 34. [`fraction_trailing_digit_separator`][NumberFormat::fraction_trailing_digit_separator] +/// 35. [`exponent_trailing_digit_separator`][NumberFormat::exponent_trailing_digit_separator] +/// 36. [`trailing_digit_separator`][NumberFormat::trailing_digit_separator] +/// 37. [`integer_consecutive_digit_separator`][NumberFormat::integer_consecutive_digit_separator] +/// 38. [`fraction_consecutive_digit_separator`][NumberFormat::fraction_consecutive_digit_separator] +/// 39. [`exponent_consecutive_digit_separator`][NumberFormat::exponent_consecutive_digit_separator] +/// 40. [`consecutive_digit_separator`][NumberFormat::consecutive_digit_separator] +/// 41. [`special_digit_separator`][NumberFormat::special_digit_separator] +/// 42. [`digit_separator`][NumberFormat::digit_separator] +/// 43. [`base_prefix`][NumberFormat::base_prefix] +/// 44. [`base_suffix`][NumberFormat::base_suffix] +/// 45. [`exponent_base`][NumberFormat::exponent_base] +/// 46. [`exponent_radix`][NumberFormat::exponent_radix] +/// 47. [`start_digit_separator`][NumberFormat::start_digit_separator] +/// 48. [`integer_sign_digit_separator`][NumberFormat::integer_sign_digit_separator] +/// 49. [`integer_consecutive_sign_digit_separator`][NumberFormat::integer_consecutive_sign_digit_separator] +/// 50. [`exponent_sign_digit_separator`][NumberFormat::exponent_sign_digit_separator] +/// 51. [`exponent_consecutive_sign_digit_separator`][NumberFormat::exponent_consecutive_sign_digit_separator] +/// 52. [`base_prefix_internal_digit_separator`][NumberFormat::base_prefix_internal_digit_separator] +/// 53. [`base_prefix_leading_digit_separator`][NumberFormat::base_prefix_leading_digit_separator] +/// 54. [`base_prefix_trailing_digit_separator`][NumberFormat::base_prefix_trailing_digit_separator] +/// 55. [`base_prefix_consecutive_digit_separator`][NumberFormat::base_prefix_consecutive_digit_separator] +/// 56. [`base_suffix_internal_digit_separator`][NumberFormat::base_suffix_internal_digit_separator] +/// 57. [`base_suffix_leading_digit_separator`][NumberFormat::base_suffix_leading_digit_separator] +/// 58. [`base_suffix_trailing_digit_separator`][NumberFormat::base_suffix_trailing_digit_separator] +/// 59. [`base_suffix_consecutive_digit_separator`][NumberFormat::base_suffix_consecutive_digit_separator] /// /// This should always be constructed via [`NumberFormatBuilder`]. /// See [`NumberFormatBuilder`] for the fields for the packed struct. @@ -714,78 +710,6 @@ impl NumberFormat { Self::REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT } - /// If the format supports parsing integers. - /// - /// See [`supports_parsing_integers`][Self::supports_parsing_integers]. - pub const SUPPORTS_PARSING_INTEGERS: bool = true; - - /// Get if the format supports parsing integers. - /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`true`]. - /// - /// # Used For - /// - /// - Parse Integer - #[inline(always)] - pub const fn supports_parsing_integers(&self) -> bool { - Self::SUPPORTS_PARSING_INTEGERS - } - - /// If the format supports parsing floats. - /// - /// See [`supports_parsing_floats`][Self::supports_parsing_floats]. - pub const SUPPORTS_PARSING_FLOATS: bool = true; - - /// Get if the format supports parsing floats. - /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`true`]. - /// - /// # Used For - /// - /// - Parse Float - #[inline(always)] - pub const fn supports_parsing_floats(&self) -> bool { - Self::SUPPORTS_PARSING_FLOATS - } - - /// If the format supports writing integers. - /// - /// See [`supports_writing_integers`][Self::supports_writing_integers]. - pub const SUPPORTS_WRITING_INTEGERS: bool = true; - - /// Get if the format supports writing integers. - /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`true`]. - /// - /// # Used For - /// - /// - Write Integer - #[inline(always)] - pub const fn supports_writing_integers(&self) -> bool { - Self::SUPPORTS_WRITING_INTEGERS - } - - /// If the format supports writing floats. - /// - /// See [`supports_writing_floats`][Self::supports_writing_floats]. - pub const SUPPORTS_WRITING_FLOATS: bool = true; - - /// Get if the format supports writing floats. - /// - /// Can only be modified with [`feature`][crate#features] `format`. Defaults - /// to [`true`]. - /// - /// # Used For - /// - /// - Write Float - #[inline(always)] - pub const fn supports_writing_floats(&self) -> bool { - Self::SUPPORTS_WRITING_FLOATS - } - /// If the format requires base prefixes. /// /// See [`required_base_prefix`][Self::required_base_prefix]. @@ -2171,11 +2095,7 @@ pub(crate) const fn radix_error_impl(format: u128) -> Error { pub(crate) const fn format_error_impl(format: u128) -> Error { let valid_flags = flags::REQUIRED_EXPONENT_DIGITS | flags::REQUIRED_MANTISSA_DIGITS - | flags::REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT - | flags::SUPPORTS_PARSING_FLOATS - | flags::SUPPORTS_PARSING_INTEGERS - | flags::SUPPORTS_WRITING_FLOATS - | flags::SUPPORTS_WRITING_INTEGERS; + | flags::REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT; if !flags::is_valid_radix(flags::mantissa_radix(format)) { Error::InvalidMantissaRadix } else if !flags::is_valid_radix(flags::exponent_base(format)) { diff --git a/lexical-util/tests/feature_format_tests.rs b/lexical-util/tests/feature_format_tests.rs index d5e52b8e..752e6b56 100644 --- a/lexical-util/tests/feature_format_tests.rs +++ b/lexical-util/tests/feature_format_tests.rs @@ -7,12 +7,7 @@ use lexical_util::format; #[test] fn ignore_test() { let fmt = format::NumberFormat::<{ format::IGNORE }> {}; - let expected = format::ALL_DIGIT_SEPARATOR_FLAG_MASK - | format::START_DIGIT_SEPARATOR - | format::SUPPORTS_PARSING_FLOATS - | format::SUPPORTS_PARSING_INTEGERS - | format::SUPPORTS_WRITING_FLOATS - | format::SUPPORTS_WRITING_INTEGERS; + let expected = format::ALL_DIGIT_SEPARATOR_FLAG_MASK | format::START_DIGIT_SEPARATOR; assert_eq!(fmt.flags(), expected); assert_eq!(fmt.digit_separator(), b'_'); assert_eq!(fmt.required_integer_digits(), false); diff --git a/lexical-write-float/src/api.rs b/lexical-write-float/src/api.rs index 0718ec7f..56b03d49 100644 --- a/lexical-write-float/src/api.rs +++ b/lexical-write-float/src/api.rs @@ -4,10 +4,9 @@ #[cfg(feature = "f16")] use lexical_util::bf16::bf16; -use lexical_util::error::Error; #[cfg(feature = "f16")] use lexical_util::f16::f16; -use lexical_util::format::{NumberFormat, STANDARD}; +use lexical_util::format::STANDARD; use lexical_util::{to_lexical, to_lexical_with_options}; use crate::options::Options; @@ -39,10 +38,6 @@ macro_rules! float_to_lexical { options: &Self::Options, ) -> &'a mut [u8] { - let format = NumberFormat::<{ FORMAT }> {}; - if !format.supports_writing_floats() { - core::panic!("{}", Error::Unsupported.description()); - } let count = self.write_float::<{ FORMAT }>(bytes, &options); &mut bytes[..count] } diff --git a/lexical-write-float/tests/api_tests.rs b/lexical-write-float/tests/api_tests.rs index e1ec1a07..7d219b02 100644 --- a/lexical-write-float/tests/api_tests.rs +++ b/lexical-write-float/tests/api_tests.rs @@ -1,5 +1,5 @@ use lexical_util::constants::BUFFER_SIZE; -#[cfg(any(feature = "format", feature = "power-of-two"))] +#[cfg(feature = "power-of-two")] use lexical_util::format::NumberFormatBuilder; use lexical_util::format::STANDARD; use lexical_write_float::{Options, ToLexical, ToLexicalWithOptions}; @@ -79,33 +79,6 @@ fn hex_test() { assert_eq!(result, b"3.039^12"); } -#[test] -#[should_panic] -#[cfg(feature = "format")] -fn unsupported_test() { - const FORMAT: u128 = NumberFormatBuilder::new().supports_writing_floats(false).build_strict(); - const OPTIONS: Options = Options::new(); - - let mut buffer = [b'\x00'; BUFFER_SIZE]; - let float = 12345.0f64; - _ = float.to_lexical_with_options::(&mut buffer, &OPTIONS); -} - -#[test] -#[cfg(feature = "format")] -fn supported_test() { - const FORMAT: u128 = NumberFormatBuilder::new() - .supports_parsing_integers(false) - .supports_parsing_floats(false) - .supports_writing_integers(false) - .build_strict(); - const OPTIONS: Options = Options::new(); - - let mut buffer = [b'\x00'; BUFFER_SIZE]; - let float = 12345.0f64; - assert_eq!(b"12345.0", float.to_lexical_with_options::(&mut buffer, &OPTIONS)); -} - #[test] #[cfg(all(feature = "format", feature = "power-of-two"))] fn require_base_prefix_test() { diff --git a/lexical-write-integer/src/api.rs b/lexical-write-integer/src/api.rs index 50dd1a1b..ee7387ef 100644 --- a/lexical-write-integer/src/api.rs +++ b/lexical-write-integer/src/api.rs @@ -2,7 +2,6 @@ #![doc(hidden)] -use lexical_util::error::Error; use lexical_util::format::{NumberFormat, STANDARD}; use lexical_util::num::SignedInteger; use lexical_util::{to_lexical, to_lexical_with_options}; @@ -153,9 +152,7 @@ macro_rules! unsigned_to_lexical { { _ = options; let format = NumberFormat::<{ FORMAT }> {}; - if !format.supports_writing_integers() { - core::panic!("{}", Error::Unsupported.description()); - } else if !format.is_valid() { + if !format.is_valid() { core::panic!("{}", format.error().description()); } let len = unsigned::<$t, FORMAT>(self, bytes); @@ -193,9 +190,7 @@ macro_rules! signed_to_lexical { { _ = options; let format = NumberFormat::<{ FORMAT }> {}; - if !format.supports_writing_integers() { - core::panic!("{}", Error::Unsupported.description()); - } else if !format.is_valid() { + if !format.is_valid() { core::panic!("{}", format.error().description()); } let len = signed::<$signed, $unsigned, FORMAT>(self, bytes); diff --git a/lexical-write-integer/tests/api_tests.rs b/lexical-write-integer/tests/api_tests.rs index f6946f23..d76959f9 100644 --- a/lexical-write-integer/tests/api_tests.rs +++ b/lexical-write-integer/tests/api_tests.rs @@ -3,7 +3,7 @@ mod util; use core::fmt::Debug; use core::str::{from_utf8_unchecked, FromStr}; -#[cfg(any(feature = "format", feature = "radix"))] +#[cfg(feature = "radix")] use lexical_util::constants::BUFFER_SIZE; #[cfg(feature = "format")] use lexical_util::format::NumberFormatBuilder; @@ -215,36 +215,6 @@ fn options_radix_test() { assert_eq!(b"A8", 128u8.to_lexical_with_options::<{ FORMAT }>(&mut buffer, &OPTIONS)); } -#[test] -#[should_panic] -#[cfg(feature = "format")] -fn unsupported_test() { - const FORMAT: u128 = NumberFormatBuilder::new().supports_writing_integers(false).build_strict(); - const OPTIONS: Options = Options::new(); - - let mut buffer = [b'\x00'; BUFFER_SIZE]; - let integer = 12345i64; - _ = integer.to_lexical_with_options::(&mut buffer, &OPTIONS); -} - -#[test] -#[cfg(feature = "format")] -fn supported_test() { - const FORMAT: u128 = NumberFormatBuilder::new() - .supports_parsing_integers(false) - .supports_parsing_floats(false) - .supports_writing_floats(false) - .build_strict(); - const OPTIONS: Options = Options::new(); - - let mut buffer = [b'\x00'; BUFFER_SIZE]; - let integer = 12345i64; - assert_eq!(b"12345", integer.to_lexical_with_options::(&mut buffer, &OPTIONS)); - - let integer = 12345u64; - assert_eq!(b"12345", integer.to_lexical_with_options::(&mut buffer, &OPTIONS)); -} - #[test] #[cfg(all(feature = "format", feature = "power-of-two"))] fn require_base_prefix_test() { From 1b955e24b958cf0c06f7535b04414838197c5dd1 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 23 Jan 2025 20:20:51 -0600 Subject: [PATCH 17/18] Adds shell of logic for toggling sign logic. This enables toggling no unsigned integer negative signs, and having no signs (even negative) for mantissas and exponents. This commit is broken and will not work for the new additions in parsing. --- CHANGELOG | 1 + lexical-util/src/error.rs | 16 ++ lexical-util/src/feature_format.rs | 91 +++++++ lexical-util/src/format_builder.rs | 355 ++++++++++++++++++++++--- lexical-util/src/format_flags.rs | 21 +- lexical-util/src/not_feature_format.rs | 99 ++++++- 6 files changed, 545 insertions(+), 38 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 21b0682b..74c6392d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `NumberFormatBuilder::none()` for create a format with no flags set (#215). - Added in many more digit separator flags for the `NumberFormat`, including for signs, base prefixes, base suffixes, and restricting digit separators at the start of the number (#215). - Added many more pre-defined formatting constants (#215). +- Added additional sign configurations, with `no_unsigned_negative_sign`, `no_mantissa_sign`, and `no_exponent_sign` (#215). ### Changed diff --git a/lexical-util/src/error.rs b/lexical-util/src/error.rs index 8b55f2d4..99e68e65 100644 --- a/lexical-util/src/error.rs +++ b/lexical-util/src/error.rs @@ -32,12 +32,16 @@ pub enum Error { EmptyFraction(usize), /// Invalid positive mantissa sign was found. InvalidPositiveMantissaSign(usize), + /// Invalid negative mantissa sign was found. + InvalidNegativeMantissaSign(usize), /// Mantissa sign was required(usize), but not found. MissingMantissaSign(usize), /// Exponent was present but not allowed. InvalidExponent(usize), /// Invalid positive exponent sign was found. InvalidPositiveExponentSign(usize), + /// Invalid negative exponent sign was found. + InvalidNegativeExponentSign(usize), /// Exponent sign was required(usize), but not found. MissingExponentSign(usize), /// Exponent was present without fraction component. @@ -159,9 +163,11 @@ impl Error { Self::EmptyInteger(_) => "'invalid float with no integer digits'", Self::EmptyFraction(_) => "'invalid float with no fraction digits'", Self::InvalidPositiveMantissaSign(_) => "'invalid `+` sign before significant digits'", + Self::InvalidNegativeMantissaSign(_) => "'invalid `-` sign before significant digits'", Self::MissingMantissaSign(_) => "'missing required `+/-` sign for significant digits'", Self::InvalidExponent(_) => "'exponent found but not allowed'", Self::InvalidPositiveExponentSign(_) => "'invalid `+` sign in exponent'", + Self::InvalidNegativeExponentSign(_) => "'invalid `-` sign in exponent'", Self::MissingExponentSign(_) => "'missing required `+/-` sign for exponent'", Self::ExponentWithoutFraction(_) => "'invalid float containing exponent without fraction'", Self::ExponentWithoutIntegerDigits(_) => "'invalid float containing exponent without integer digits'", @@ -227,9 +233,11 @@ impl Error { Self::EmptyInteger(index) => Some(index), Self::EmptyFraction(index) => Some(index), Self::InvalidPositiveMantissaSign(index) => Some(index), + Self::InvalidNegativeMantissaSign(index) => Some(index), Self::MissingMantissaSign(index) => Some(index), Self::InvalidExponent(index) => Some(index), Self::InvalidPositiveExponentSign(index) => Some(index), + Self::InvalidNegativeExponentSign(index) => Some(index), Self::MissingExponentSign(index) => Some(index), Self::ExponentWithoutFraction(index) => Some(index), Self::ExponentWithoutIntegerDigits(index) => Some(index), @@ -290,9 +298,11 @@ impl Error { is_error_type!(is_empty_integer, EmptyInteger(_)); is_error_type!(is_empty_fraction, EmptyFraction(_)); is_error_type!(is_invalid_positive_mantissa_sign, InvalidPositiveMantissaSign(_)); + is_error_type!(is_invalid_negative_mantissa_sign, InvalidNegativeMantissaSign(_)); is_error_type!(is_missing_mantissa_sign, MissingMantissaSign(_)); is_error_type!(is_invalid_exponent, InvalidExponent(_)); is_error_type!(is_invalid_positive_exponent_sign, InvalidPositiveExponentSign(_)); + is_error_type!(is_invalid_negative_exponent_sign, InvalidNegativeExponentSign(_)); is_error_type!(is_missing_exponent_sign, MissingExponentSign(_)); is_error_type!(is_exponent_without_fraction, ExponentWithoutFraction(_)); is_error_type!(is_invalid_leading_zeros, InvalidLeadingZeros(_)); @@ -380,11 +390,17 @@ impl fmt::Display for Error { Self::InvalidPositiveMantissaSign(index) => { write_parse_error!(formatter, description, index) }, + Self::InvalidNegativeMantissaSign(index) => { + write_parse_error!(formatter, description, index) + }, Self::MissingMantissaSign(index) => write_parse_error!(formatter, description, index), Self::InvalidExponent(index) => write_parse_error!(formatter, description, index), Self::InvalidPositiveExponentSign(index) => { write_parse_error!(formatter, description, index) }, + Self::InvalidNegativeExponentSign(index) => { + write_parse_error!(formatter, description, index) + }, Self::MissingExponentSign(index) => write_parse_error!(formatter, description, index), Self::ExponentWithoutFraction(index) => { write_parse_error!(formatter, description, index) diff --git a/lexical-util/src/feature_format.rs b/lexical-util/src/feature_format.rs index 7e8fd005..762a354f 100644 --- a/lexical-util/src/feature_format.rs +++ b/lexical-util/src/feature_format.rs @@ -820,6 +820,97 @@ impl NumberFormat { Self::REQUIRED_BASE_SUFFIX } + /// If a negative sign before an unsigned integer is not allowed. + /// + /// See [`no_unsigned_negative_sign`][Self::no_unsigned_negative_sign]. + pub const NO_UNSIGNED_NEGATIVE_SIGN: bool = from_flag!(FORMAT, NO_UNSIGNED_NEGATIVE_SIGN); + + /// If a negative sign before an unsigned integer is not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. This does + /// not apply to signed integers or floating point numbers. Defaults to [`true`]. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `-12` | ❌ | + /// | `+12` | ✔️ | + /// | `12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Integer + #[inline(always)] + pub const fn no_unsigned_negative_sign(&self) -> bool { + Self::NO_UNSIGNED_NEGATIVE_SIGN + } + + /// If positive or negative signs before the significant digits are not allowed. + /// + /// See [`no_mantissa_sign`][Self::no_mantissa_sign]. + pub const NO_MANTISSA_SIGN: bool = from_flag!(FORMAT, NO_MANTISSA_SIGN); + + /// If positive or negative signs before the significant digits are not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. if enabled, then + /// the type cannot represent negative literal or string values (although they may + /// be computed via mathematical operations). Defaults to [`false`]. + /// + /// If you only want to disable positive signs, see [`no_positive_mantissa_sign`]. + /// If you wish to disable negative signs on unsigned integers, see + /// [`no_unsigned_negative_sign`]. + /// + /// [`no_positive_mantissa_sign`]: Self::no_positive_mantissa_sign + /// [`no_unsigned_negative_sign`]: Self::no_unsigned_negative_sign + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `-12` | ❌ | + /// | `+12` | ❌ | + /// | `12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Integer + /// - Parse Float + #[inline(always)] + pub const fn no_mantissa_sign(&self) -> bool { + Self::NO_MANTISSA_SIGN + } + + /// If positive or negative signs before an exponent are not allowed. + /// + /// See [`no_exponent_sign`][Self::no_exponent_sign]. + pub const NO_EXPONENT_SIGN: bool = from_flag!(FORMAT, NO_EXPONENT_SIGN); + + /// If positive or negative signs before an exponent are not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`false`]. + /// + /// If you only want to disable positive signs, see [`no_positive_exponent_sign`]. + /// + /// [`no_positive_exponent_sign`]: Self::no_positive_exponent_sign + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e-12` | ❌ | + /// | `1.0e+12` | ❌ | + /// | `1.0e12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn no_exponent_sign(&self) -> bool { + Self::NO_EXPONENT_SIGN + } + // DIGIT SEPARATOR FLAGS & MASKS /// If digit separators are allowed at the absolute start of the number. diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index d59a9526..4c11b47e 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -135,6 +135,12 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { /// - [`case_sensitive_exponent`]: If exponent characters are case-sensitive. /// - [`case_sensitive_base_prefix`]: If base prefixes are case-sensitive. /// - [`case_sensitive_base_suffix`]: If base suffixes are case-sensitive. +/// - [`no_unsigned_negative_sign`]: If a negative sign before an unsigned +/// integer is not allowed. +/// - [`no_mantissa_sign`]: If positive or negative signs before an exponent are +/// not allowed. +/// - [`no_exponent_sign`]: If positive or negative signs before an exponent are +/// not allowed. /// - [`start_digit_separator`]: If digit separators are allowed at the absolute /// start of the number. /// - [`integer_sign_digit_separator`]: If digit separators are allowed before @@ -251,6 +257,44 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { /// allowed. /// - [`required_exponent_sign`]: If sign before the exponent is required. /// - [`required_exponent_notation`]: If exponent notation is required. +/// - [`case_sensitive_base_prefix`]: If base prefixes are case-sensitive. +/// - [`case_sensitive_base_suffix`]: If base suffixes are case-sensitive. +/// - [`no_unsigned_negative_sign`]: If a negative sign before an unsigned +/// integer is not allowed. +/// - [`no_mantissa_sign`]: If positive or negative signs before an exponent are +/// not allowed. +/// - [`no_exponent_sign`]: If positive or negative signs before an exponent are +/// not allowed. +/// - [`start_digit_separator`]: If digit separators are allowed at the absolute +/// start of the number. +/// - [`integer_sign_digit_separator`]: If digit separators are allowed before +/// the sign of the integer. +/// - [`integer_consecutive_sign_digit_separator`]: If consecutive digit +/// separators are allowed before the sign of the integer. +/// - [`base_prefix_internal_digit_separator`]: If a digit separator is allowed +/// between the base prefix. +/// - [`base_prefix_leading_digit_separator`]: If a digit separator is allowed +/// before the base prefix. +/// - [`base_prefix_trailing_digit_separator`]: If a digit separator is allowed +/// after the base prefix. +/// - [`base_prefix_consecutive_digit_separator`]: If multiple consecutive base +/// prefix digit separators are allowed. +/// - [`base_suffix_internal_digit_separator`]: If a digit separator is allowed +/// between the base suffix. +/// - [`base_suffix_leading_digit_separator`]: If a digit separator is allowed +/// before the base suffix. +/// - [`base_suffix_trailing_digit_separator`]: If a digit separator is allowed +/// after the base suffix. +/// - [`base_suffix_consecutive_digit_separator`]: If multiple consecutive base +/// suffix digit separators are allowed. +/// - [`integer_internal_digit_separator`]: If digit separators are allowed +/// between integer digits. +/// - [`integer_leading_digit_separator`]: If a digit separator is allowed +/// before any integer digits. +/// - [`integer_trailing_digit_separator`]: If a digit separator is allowed +/// after any integer digits. +/// - [`integer_consecutive_digit_separator`]: If multiple consecutive integer +/// digit separators are allowed. /// /// # Parse Float Fields /// @@ -296,6 +340,12 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { /// - [`case_sensitive_exponent`]: If exponent characters are case-sensitive. /// - [`case_sensitive_base_prefix`]: If base prefixes are case-sensitive. /// - [`case_sensitive_base_suffix`]: If base suffixes are case-sensitive. +/// - [`no_unsigned_negative_sign`]: If a negative sign before an unsigned +/// integer is not allowed. +/// - [`no_mantissa_sign`]: If positive or negative signs before an exponent are +/// not allowed. +/// - [`no_exponent_sign`]: If positive or negative signs before an exponent are +/// not allowed. /// - [`start_digit_separator`]: If digit separators are allowed at the absolute /// start of the number. /// - [`integer_sign_digit_separator`]: If digit separators are allowed before @@ -385,19 +435,14 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_fraction_digits_with_exponent`]: Self::required_fraction_digits_with_exponent\n [`required_mantissa_digits_with_exponent`]: Self::required_mantissa_digits_with_exponent\n [`case_sensitive_exponent`]: Self::case_sensitive_exponent\n +[`no_unsigned_negative_sign`]: Self::no_unsigned_negative_sign\n +[`no_mantissa_sign`]: Self::no_mantissa_sign\n +[`no_exponent_sign`]: Self::no_exponent_sign\n [`start_digit_separator`]: Self::start_digit_separator\n [`integer_sign_digit_separator`]: Self::integer_sign_digit_separator\n [`integer_consecutive_sign_digit_separator`]: Self::integer_consecutive_sign_digit_separator\n [`exponent_sign_digit_separator`]: Self::exponent_sign_digit_separator\n [`exponent_consecutive_sign_digit_separator`]: Self::exponent_consecutive_sign_digit_separator\n -[`base_prefix_internal_digit_separator`]: Self::base_prefix_internal_digit_separator\n -[`base_prefix_leading_digit_separator`]: Self::base_prefix_leading_digit_separator\n -[`base_prefix_trailing_digit_separator`]: Self::base_prefix_trailing_digit_separator\n -[`base_prefix_consecutive_digit_separator`]: Self::base_prefix_consecutive_digit_separator\n -[`base_suffix_internal_digit_separator`]: Self::base_suffix_internal_digit_separator\n -[`base_suffix_leading_digit_separator`]: Self::base_suffix_leading_digit_separator\n -[`base_suffix_trailing_digit_separator`]: Self::base_suffix_trailing_digit_separator\n -[`base_suffix_consecutive_digit_separator`]: Self::base_suffix_consecutive_digit_separator\n [`integer_internal_digit_separator`]: Self::integer_internal_digit_separator\n [`fraction_internal_digit_separator`]: Self::fraction_internal_digit_separator\n [`exponent_internal_digit_separator`]: Self::exponent_internal_digit_separator\n @@ -435,19 +480,14 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_fraction_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1149\n [`required_mantissa_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/47a090d/lexical-util/src/format_builder.rs#L1233\n [`case_sensitive_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L765\n +[`no_unsigned_negative_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/TODO/lexical-util/src/format_builder.rs#LTODO\n +[`no_mantissa_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/TODO/lexical-util/src/format_builder.rs#LTODO\n +[`no_exponent_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/TODO/lexical-util/src/format_builder.rs#LTODO\n [`start_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1650\n [`integer_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1678\n [`integer_consecutive_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1706\n [`exponent_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1728\n [`exponent_consecutive_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1754\n -[`base_prefix_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1777\n -[`base_prefix_leading_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1809\n -[`base_prefix_trailing_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1841\n -[`base_prefix_consecutive_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1870\n -[`base_suffix_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1883\n -[`base_suffix_leading_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1916\n -[`base_suffix_trailing_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1939\n -[`base_suffix_consecutive_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1967\n [`integer_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L793\n [`fraction_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L805\n [`exponent_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L817\n @@ -471,6 +511,14 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`case_sensitive_base_suffix`]: Self::case_sensitive_base_suffix [`required_base_prefix`]: Self::required_base_prefix [`required_base_suffix`]: Self::required_base_suffix +[`base_prefix_internal_digit_separator`]: Self::base_prefix_internal_digit_separator\n +[`base_prefix_leading_digit_separator`]: Self::base_prefix_leading_digit_separator\n +[`base_prefix_trailing_digit_separator`]: Self::base_prefix_trailing_digit_separator\n +[`base_prefix_consecutive_digit_separator`]: Self::base_prefix_consecutive_digit_separator\n +[`base_suffix_internal_digit_separator`]: Self::base_suffix_internal_digit_separator\n +[`base_suffix_leading_digit_separator`]: Self::base_suffix_leading_digit_separator\n +[`base_suffix_trailing_digit_separator`]: Self::base_suffix_trailing_digit_separator\n +[`base_suffix_consecutive_digit_separator`]: Self::base_suffix_consecutive_digit_separator\n " )] #[cfg_attr( @@ -482,6 +530,14 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`case_sensitive_base_suffix`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L781\n [`required_base_prefix`]: https://github.com/Alexhuszagh/rust-lexical/blob/63f9adf/lexical-util/src/format_builder.rs#L1267\n [`required_base_suffix`]: https://github.com/Alexhuszagh/rust-lexical/blob/63f9adf/lexical-util/src/format_builder.rs#L1291\n +[`base_prefix_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1777\n +[`base_prefix_leading_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1809\n +[`base_prefix_trailing_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1841\n +[`base_prefix_consecutive_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1870\n +[`base_suffix_internal_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1883\n +[`base_suffix_leading_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1916\n +[`base_suffix_trailing_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1939\n +[`base_suffix_consecutive_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1967\n " )] /// @@ -582,6 +638,9 @@ pub struct NumberFormatBuilder { required_mantissa_digits_with_exponent: bool, required_base_prefix: bool, required_base_suffix: bool, + no_unsigned_negative_sign: bool, + no_mantissa_sign: bool, + no_exponent_sign: bool, start_digit_separator: bool, integer_sign_digit_separator: bool, integer_consecutive_sign_digit_separator: bool, @@ -657,6 +716,10 @@ impl NumberFormatBuilder { /// `false` /// - [`required_base_prefix`][Self::get_required_base_prefix] - `false` /// - [`required_base_suffix`][Self::get_required_base_suffix] - `false` + /// - [`no_unsigned_negative_sign`][Self::get_no_unsigned_negative_sign] - + /// `true` + /// - [`no_mantissa_sign`][Self::get_no_mantissa_sign] - `false` + /// - [`no_exponent_sign`][Self::get_no_exponent_sign] - `false` /// - [`start_digit_separator`][Self::start_digit_separator] - `true` /// - [`integer_sign_digit_separator`][Self::integer_sign_digit_separator] - /// `false` @@ -718,6 +781,9 @@ impl NumberFormatBuilder { required_mantissa_digits_with_exponent: true, required_base_prefix: false, required_base_suffix: false, + no_unsigned_negative_sign: true, + no_mantissa_sign: false, + no_exponent_sign: false, start_digit_separator: cfg!(feature = "format"), integer_sign_digit_separator: false, integer_consecutive_sign_digit_separator: false, @@ -782,6 +848,9 @@ impl NumberFormatBuilder { required_mantissa_digits_with_exponent: false, required_base_prefix: false, required_base_suffix: false, + no_unsigned_negative_sign: true, + no_mantissa_sign: false, + no_exponent_sign: false, start_digit_separator: true, integer_sign_digit_separator: false, integer_consecutive_sign_digit_separator: false, @@ -1551,6 +1620,83 @@ impl NumberFormatBuilder { self.required_base_suffix } + /// Get if a negative sign before an unsigned integer is not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. This + /// does not apply to signed integers or floating point numbers. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `-12` | ❌ | + /// | `+12` | ✔️ | + /// | `12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Integer + #[inline(always)] + pub const fn get_no_unsigned_negative_sign(&self) -> bool { + self.no_unsigned_negative_sign + } + + /// Get if positive or negative signs before the significant digits are not + /// allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. if + /// enabled, then the type cannot represent negative literal or string + /// values (although they may be computed via mathematical operations). + /// + /// If you only want to disable positive signs, see + /// [`no_positive_mantissa_sign`]. If you wish to disable negative signs + /// on unsigned integers, see [`no_unsigned_negative_sign`]. + /// + /// [`no_positive_mantissa_sign`]: Self::get_no_positive_mantissa_sign + /// [`no_unsigned_negative_sign`]: Self::get_no_unsigned_negative_sign + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `-12` | ❌ | + /// | `+12` | ❌ | + /// | `12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Integer + /// - Parse Float + #[inline(always)] + pub const fn get_no_mantissa_sign(&self) -> bool { + self.no_mantissa_sign + } + + /// Get if positive or negative signs before an exponent are not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// + /// If you only want to disable positive signs, see + /// [`no_positive_exponent_sign`]. + /// + /// [`no_positive_exponent_sign`]: Self::get_no_positive_exponent_sign + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e-12` | ❌ | + /// | `1.0e+12` | ❌ | + /// | `1.0e12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn get_no_exponent_sign(&self) -> bool { + self.no_exponent_sign + } + /// Get if digit separators are allowed at the absolute start of the number. /// /// This modifies the behavior of [`integer_sign_digit_separator`] and @@ -2551,6 +2697,7 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"1x", &PF_OPTS), Ok(1.0)); /// assert_eq!(parse_with_options::(b"1x1", &PF_OPTS), Err(Error::InvalidDigit(2))); /// + /// // TODO: FIXME! This is incorrectly getting the location wrong /// assert_eq!(parse_with_options::(b"0x1", &PI_OPTS), Err(Error::InvalidDigit(2))); /// assert_eq!(parse_with_options::(b"x1", &PI_OPTS), Err(Error::InvalidDigit(0))); /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); @@ -2789,7 +2936,7 @@ impl NumberFormatBuilder { /// .build_strict(); /// assert_eq!(parse_with_options::(b"1.1", &PF_OPTS), Ok(1.1)); /// assert_eq!(parse_with_options::(b"-1.1", &PF_OPTS), Ok(-1.1)); - /// assert_eq!(parse_with_options::(b"+1.1", &PF_OPTS), Err(Error::InvalidPositiveSign(0))); + /// assert_eq!(parse_with_options::(b"+1.1", &PF_OPTS), Err(Error::InvalidPositiveMantissaSign(0))); /// /// let mut buffer = [0u8; BUFFER_SIZE]; /// assert_eq!(write_with_options::(1.1, &mut buffer, &WF_OPTS), b"1.1"); @@ -2827,7 +2974,7 @@ impl NumberFormatBuilder { /// const FORMAT: u128 = NumberFormatBuilder::new() /// .required_mantissa_sign(true) /// .build_strict(); - /// assert_eq!(parse_with_options::(b"1.1", &PF_OPTS), Err(Error::MissingSign(0))); + /// assert_eq!(parse_with_options::(b"1.1", &PF_OPTS), Err(Error::MissingMantissaSign(0))); /// assert_eq!(parse_with_options::(b"+1.1", &PF_OPTS), Ok(1.1)); /// assert_eq!(parse_with_options::(b"-1.1", &PF_OPTS), Ok(-1.1)); /// @@ -3119,7 +3266,7 @@ impl NumberFormatBuilder { /// assert_eq!(parse::(b"0"), Ok(0)); /// assert_eq!(parse::(b"10"), Ok(10)); /// - /// assert_eq!(parse_with_options::(b"01", &PI_OPTS), Ok(0)); + /// assert_eq!(parse_with_options::(b"01", &PI_OPTS), Err(Error::InvalidLeadingZeros(0))); /// assert_eq!(parse_with_options::(b"+01", &PI_OPTS), Err(Error::InvalidLeadingZeros(1))); /// assert_eq!(parse_with_options::(b"0", &PI_OPTS), Ok(0)); /// assert_eq!(parse_with_options::(b"10", &PI_OPTS), Ok(10)); @@ -3349,6 +3496,7 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"1x", &PF_OPTS), Ok(1.0)); /// assert_eq!(parse_with_options::(b"1X", &PF_OPTS), Err(Error::InvalidDigit(1))); /// + /// // TODO: This has the wrong placement /// assert_eq!(parse_with_options::(b"0x1", &PI_OPTS), Err(Error::InvalidDigit(2))); /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); /// assert_eq!(parse_with_options::(b"1x", &PI_OPTS), Ok(1)); @@ -3532,6 +3680,131 @@ impl NumberFormatBuilder { self } + /// If a negative sign before an unsigned integer is not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. This + /// does not apply to signed integers or floating point numbers. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `-12` | ❌ | + /// | `+12` | ✔️ | + /// | `12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Integer + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn no_unsigned_negative_sign(mut self, flag: bool) -> Self { + self.no_unsigned_negative_sign = flag; + self + } + + /// If positive or negative signs before the significant digits are not + /// allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. if + /// enabled, then the type cannot represent negative literal or string + /// values (although they may be computed via mathematical operations). + /// + /// If you only want to disable positive signs, see + /// [`no_positive_mantissa_sign`]. If you wish to disable negative signs + /// on unsigned integers, see [`no_unsigned_negative_sign`]. + /// + /// [`no_positive_mantissa_sign`]: Self::no_positive_mantissa_sign + /// [`no_unsigned_negative_sign`]: Self::no_unsigned_negative_sign + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `-12` | ❌ | + /// | `+12` | ❌ | + /// | `12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Integer + /// - Parse Float + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn no_mantissa_sign(mut self, flag: bool) -> Self { + self.no_mantissa_sign = flag; + self + } + + /// If positive or negative signs before an exponent are not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. + /// + /// If you only want to disable positive signs, see + /// [`no_positive_exponent_sign`]. + /// + /// [`no_positive_exponent_sign`]: Self::no_positive_exponent_sign + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e-12` | ❌ | + /// | `1.0e+12` | ❌ | + /// | `1.0e12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + /// + /// + #[inline(always)] + #[cfg(feature = "format")] + pub const fn no_exponent_sign(mut self, flag: bool) -> Self { + self.no_exponent_sign = flag; + self + } + /// Set if digit separators are allowed at the absolute start of the number. /// /// This modifies the behavior of [`integer_sign_digit_separator`] and @@ -3587,7 +3860,7 @@ impl NumberFormatBuilder { /// const NO_START: u128 = NumberFormatBuilder::new() /// .digit_separator(num::NonZeroU8::new(b'_')) /// .integer_leading_digit_separator(true) - /// .start_digit_separator(true) + /// .start_digit_separator(false) /// .build_strict(); /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); /// assert_eq!(parse_with_options::(b"_1", &PI_OPTS), Err(Error::InvalidDigit(0))); @@ -3876,6 +4149,7 @@ impl NumberFormatBuilder { /// const NO_START: u128 = NumberFormatBuilder::rebuild(FORMAT) /// .start_digit_separator(false) /// .build_strict(); + /// // TODO: Start digit separator isn't being properly respected /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(0))); /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(0))); /// ``` @@ -3924,17 +4198,17 @@ impl NumberFormatBuilder { /// .build_strict(); /// assert_eq!(parse_with_options::(b"1", &PF_OPTS), Ok(1.0)); /// assert_eq!(parse_with_options::(b"0d1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(0))); /// assert_eq!(parse_with_options::(b"0_d1", &PF_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"0d_1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"0d__1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0d__1", &PF_OPTS), Err(Error::InvalidDigit(3))); /// /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); /// assert_eq!(parse_with_options::(b"0d1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(0))); /// assert_eq!(parse_with_options::(b"0_d1", &PI_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"0d_1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"0d__1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"0d__1", &PI_OPTS), Err(Error::InvalidDigit(3))); /// ``` /// --> #[inline(always)] @@ -3978,13 +4252,13 @@ impl NumberFormatBuilder { /// .base_prefix_consecutive_digit_separator(true) /// .build_strict(); /// assert_eq!(parse_with_options::(b"0d1", &PF_OPTS), Ok(1.0)); - /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"_0d1", &PF_OPTS), Err(Error::InvalidDigit(0))); /// assert_eq!(parse_with_options::(b"0_d1", &PF_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"0d_1", &PF_OPTS), Ok(1.0)); /// assert_eq!(parse_with_options::(b"0d__1", &PF_OPTS), Ok(1.0)); /// /// assert_eq!(parse_with_options::(b"0d1", &PI_OPTS), Ok(1)); - /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"_0d1", &PI_OPTS), Err(Error::InvalidDigit(0))); /// assert_eq!(parse_with_options::(b"0_d1", &PI_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"0d_1", &PI_OPTS), Ok(1)); /// assert_eq!(parse_with_options::(b"0d__1", &PI_OPTS), Ok(1)); @@ -4051,14 +4325,15 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"12d", &PF_OPTS), Ok(12.0)); /// assert_eq!(parse_with_options::(b"1_2d", &PF_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"12_d", &PF_OPTS), Ok(12.0)); - /// assert_eq!(parse_with_options::(b"12__d", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"12d_", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"12__d", &PF_OPTS), Err(Error::InvalidDigit(2))); + /// assert_eq!(parse_with_options::(b"12d_", &PF_OPTS), Err(Error::InvalidDigit(3))); /// /// assert_eq!(parse_with_options::(b"12d", &PI_OPTS), Ok(12)); /// assert_eq!(parse_with_options::(b"1_2d", &PI_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"12_d", &PI_OPTS), Ok(12)); - /// assert_eq!(parse_with_options::(b"12__d", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"12d_", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"12__d", &PI_OPTS), Err(Error::InvalidDigit(2))); + /// // TODO: This is incorrectly using the current placement + /// assert_eq!(parse_with_options::(b"12d_", &PI_OPTS), Err(Error::InvalidDigit(3))); /// ``` /// --> #[inline(always)] @@ -4096,15 +4371,16 @@ impl NumberFormatBuilder { /// .build_strict(); /// assert_eq!(parse_with_options::(b"12d", &PF_OPTS), Ok(12.0)); /// assert_eq!(parse_with_options::(b"1_2d", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"12_d", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"12_d", &PF_OPTS), Err(Error::InvalidDigit(2))); /// assert_eq!(parse_with_options::(b"12d_", &PF_OPTS), Ok(12.0)); - /// assert_eq!(parse_with_options::(b"12d__", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"12d__", &PF_OPTS), Err(Error::InvalidDigit(4))); /// /// assert_eq!(parse_with_options::(b"12d", &PI_OPTS), Ok(12)); /// assert_eq!(parse_with_options::(b"1_2d", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"12_d", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"12_d", &PI_OPTS), Err(Error::InvalidDigit(2))); /// assert_eq!(parse_with_options::(b"12d_", &PI_OPTS), Ok(12)); - /// assert_eq!(parse_with_options::(b"12d__", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// // TODO: This is getting the location wrong + /// assert_eq!(parse_with_options::(b"12d__", &PI_OPTS), Err(Error::InvalidDigit(4))); /// ``` /// --> #[inline(always)] @@ -4148,13 +4424,13 @@ impl NumberFormatBuilder { /// .build_strict(); /// assert_eq!(parse_with_options::(b"12d", &PF_OPTS), Ok(12.0)); /// assert_eq!(parse_with_options::(b"1_2d", &PF_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"12_d", &PF_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"12_d", &PF_OPTS), Err(Error::InvalidDigit(2))); /// assert_eq!(parse_with_options::(b"12d_", &PF_OPTS), Ok(12.0)); /// assert_eq!(parse_with_options::(b"12d__", &PF_OPTS), Ok(12.0)); /// /// assert_eq!(parse_with_options::(b"12d", &PI_OPTS), Ok(12)); /// assert_eq!(parse_with_options::(b"1_2d", &PI_OPTS), Err(Error::InvalidDigit(1))); - /// assert_eq!(parse_with_options::(b"12_d", &PI_OPTS), Err(Error::InvalidDigit(1))); + /// assert_eq!(parse_with_options::(b"12_d", &PI_OPTS), Err(Error::InvalidDigit(2))); /// assert_eq!(parse_with_options::(b"12d_", &PI_OPTS), Ok(12)); /// assert_eq!(parse_with_options::(b"12d__", &PI_OPTS), Ok(12)); /// ``` @@ -4375,6 +4651,7 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"_+1", &PF_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"+_1", &PF_OPTS), Ok(1.0)); /// assert_eq!(parse_with_options::(b"+0d_1", &PF_OPTS), Ok(1.0)); + /// // TODO: This is incorrectly consuming the base prefix /// assert_eq!(parse_with_options::(b"+_0d1", &PF_OPTS), Err(Error::InvalidDigit(1))); /// /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); @@ -5035,6 +5312,9 @@ impl NumberFormatBuilder { self.required_mantissa_digits_with_exponent, REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT ; self.required_base_prefix, REQUIRED_BASE_PREFIX ; self.required_base_suffix, REQUIRED_BASE_SUFFIX ; + self.no_unsigned_negative_sign, NO_UNSIGNED_NEGATIVE_SIGN ; + self.no_mantissa_sign, NO_MANTISSA_SIGN ; + self.no_exponent_sign, NO_EXPONENT_SIGN ; self.start_digit_separator, START_DIGIT_SEPARATOR ; self.integer_sign_digit_separator, INTEGER_SIGN_DIGIT_SEPARATOR ; self.integer_consecutive_sign_digit_separator, INTEGER_CONSECUTIVE_SIGN_DIGIT_SEPARATOR ; @@ -5155,6 +5435,9 @@ impl NumberFormatBuilder { ), required_base_prefix: has_flag!(format, REQUIRED_BASE_PREFIX), required_base_suffix: has_flag!(format, REQUIRED_BASE_SUFFIX), + no_unsigned_negative_sign: has_flag!(format, NO_UNSIGNED_NEGATIVE_SIGN), + no_mantissa_sign: has_flag!(format, NO_MANTISSA_SIGN), + no_exponent_sign: has_flag!(format, NO_EXPONENT_SIGN), start_digit_separator: has_flag!(format, START_DIGIT_SEPARATOR), integer_sign_digit_separator: has_flag!(format, INTEGER_SIGN_DIGIT_SEPARATOR), integer_consecutive_sign_digit_separator: has_flag!( diff --git a/lexical-util/src/format_flags.rs b/lexical-util/src/format_flags.rs index 27dfeb64..51636ead 100644 --- a/lexical-util/src/format_flags.rs +++ b/lexical-util/src/format_flags.rs @@ -25,7 +25,7 @@ //! //! 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ -//! |e/P|e/S|I/E|F/E|r/P|r/S|M/E| | +//! |e/P|e/S|I/E|F/E|r/P|r/S|M/E|-/U|-/M|-/E| | //! +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ //! //! 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 @@ -63,6 +63,9 @@ //! r/P = Require base prefixes. //! r/S = Require base suffixes. //! M/E = Require mantissa digits with exponent. +//! -/U = No unsigned integer negative sign. +//! -/M = No mantissa positive or negative sign. +//! -/E = No exponent positive or negative sign. //! //! Digit Separator Flags: //! I/I = Integer internal digit separator. @@ -373,6 +376,15 @@ pub const REQUIRED_BASE_SUFFIX: u128 = 1 << 21; /// If any significant digits are required with exponent notation. pub const REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT: u128 = 1 << 22; +/// If a negative sign before an unsigned integer is not allowed. +pub const NO_UNSIGNED_NEGATIVE_SIGN: u128 = 1 << 23; + +/// If positive or negative signs before the significant digits are not allowed. +pub const NO_MANTISSA_SIGN: u128 = 1 << 24; + +/// If positive or negative signs before an exponent are not allowed. +pub const NO_EXPONENT_SIGN: u128 = 1 << 25; + // Non-digit separator flags. const _: () = assert!(REQUIRED_INTEGER_DIGITS == 1); check_subsequent_flags!(REQUIRED_INTEGER_DIGITS, REQUIRED_FRACTION_DIGITS); @@ -398,6 +410,9 @@ check_subsequent_flags!(REQUIRED_INTEGER_DIGITS_WITH_EXPONENT, REQUIRED_FRACTION check_subsequent_flags!(REQUIRED_FRACTION_DIGITS_WITH_EXPONENT, REQUIRED_BASE_PREFIX); check_subsequent_flags!(REQUIRED_BASE_PREFIX, REQUIRED_BASE_SUFFIX); check_subsequent_flags!(REQUIRED_BASE_SUFFIX, REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT); +check_subsequent_flags!(REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT, NO_UNSIGNED_NEGATIVE_SIGN); +check_subsequent_flags!(NO_UNSIGNED_NEGATIVE_SIGN, NO_MANTISSA_SIGN); +check_subsequent_flags!(NO_MANTISSA_SIGN, NO_EXPONENT_SIGN); // DIGIT SEPARATOR FLAGS & MASKS // ----------------------------- @@ -624,6 +639,9 @@ pub const FLAG_MASK: u128 = CASE_SENSITIVE_BASE_SUFFIX | REQUIRED_BASE_PREFIX | REQUIRED_BASE_SUFFIX | + NO_UNSIGNED_NEGATIVE_SIGN | + NO_MANTISSA_SIGN | + NO_EXPONENT_SIGN | START_DIGIT_SEPARATOR_FLAG_MASK | ALL_DIGIT_SEPARATOR_FLAG_MASK; @@ -693,6 +711,7 @@ pub const EXPONENT_FLAG_MASK: u128 = REQUIRED_INTEGER_DIGITS_WITH_EXPONENT | REQUIRED_FRACTION_DIGITS_WITH_EXPONENT | REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT | + NO_EXPONENT_SIGN | EXPONENT_INTERNAL_DIGIT_SEPARATOR | EXPONENT_LEADING_DIGIT_SEPARATOR | EXPONENT_TRAILING_DIGIT_SEPARATOR | diff --git a/lexical-util/src/not_feature_format.rs b/lexical-util/src/not_feature_format.rs index 3947cd50..fcf55a4a 100644 --- a/lexical-util/src/not_feature_format.rs +++ b/lexical-util/src/not_feature_format.rs @@ -775,6 +775,102 @@ impl NumberFormat { Self::REQUIRED_BASE_SUFFIX } + /// If a negative sign before an unsigned integer is not allowed. + /// + /// See [`no_unsigned_negative_sign`][Self::no_unsigned_negative_sign]. + pub const NO_UNSIGNED_NEGATIVE_SIGN: bool = true; + + /// If a negative sign before an unsigned integer is not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. This + /// does not apply to signed integers or floating point numbers. + /// Defaults to [`true`]. + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `-12` | ❌ | + /// | `+12` | ✔️ | + /// | `12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Integer + #[inline(always)] + pub const fn no_unsigned_negative_sign(&self) -> bool { + Self::NO_UNSIGNED_NEGATIVE_SIGN + } + + /// If positive or negative signs before the significant digits are not + /// allowed. + /// + /// See [`no_mantissa_sign`][Self::no_mantissa_sign]. + pub const NO_MANTISSA_SIGN: bool = false; + + /// If positive or negative signs before the significant digits are not + /// allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. if + /// enabled, then the type cannot represent negative literal or string + /// values (although they may be computed via mathematical operations). + /// Defaults to [`false`]. + /// + /// If you only want to disable positive signs, see + /// [`no_positive_mantissa_sign`]. If you wish to disable negative signs + /// on unsigned integers, see [`no_unsigned_negative_sign`]. + /// + /// [`no_positive_mantissa_sign`]: Self::no_positive_mantissa_sign + /// [`no_unsigned_negative_sign`]: Self::no_unsigned_negative_sign + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `-12` | ❌ | + /// | `+12` | ❌ | + /// | `12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Integer + /// - Parse Float + #[inline(always)] + pub const fn no_mantissa_sign(&self) -> bool { + Self::NO_MANTISSA_SIGN + } + + /// If positive or negative signs before an exponent are not allowed. + /// + /// See [`no_exponent_sign`][Self::no_exponent_sign]. + pub const NO_EXPONENT_SIGN: bool = false; + + /// If positive or negative signs before an exponent are not allowed. + /// + /// Can only be modified with [`feature`][crate#features] `format`. Defaults + /// to [`false`]. + /// + /// If you only want to disable positive signs, see + /// [`no_positive_exponent_sign`]. + /// + /// [`no_positive_exponent_sign`]: Self::no_positive_exponent_sign + /// + /// # Examples + /// + /// | Input | Valid? | + /// |:-:|:-:| + /// | `1.0e-12` | ❌ | + /// | `1.0e+12` | ❌ | + /// | `1.0e12` | ✔️ | + /// + /// # Used For + /// + /// - Parse Float + #[inline(always)] + pub const fn no_exponent_sign(&self) -> bool { + Self::NO_EXPONENT_SIGN + } + // DIGIT SEPARATOR FLAGS & MASKS /// If digit separators are allowed at the absolute start of the number. @@ -2095,7 +2191,8 @@ pub(crate) const fn radix_error_impl(format: u128) -> Error { pub(crate) const fn format_error_impl(format: u128) -> Error { let valid_flags = flags::REQUIRED_EXPONENT_DIGITS | flags::REQUIRED_MANTISSA_DIGITS - | flags::REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT; + | flags::REQUIRED_MANTISSA_DIGITS_WITH_EXPONENT + | flags::NO_UNSIGNED_NEGATIVE_SIGN; if !flags::is_valid_radix(flags::mantissa_radix(format)) { Error::InvalidMantissaRadix } else if !flags::is_valid_radix(flags::exponent_base(format)) { From a023beff91357ffb27228f357455dd699808a02e Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Thu, 23 Jan 2025 20:33:31 -0600 Subject: [PATCH 18/18] Add minor patches for the correct indexes on format errors in parsing integers. --- lexical-parse-integer/src/algorithm.rs | 10 +++++++--- lexical-util/src/format_builder.rs | 12 ++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs index 119d5743..cf1ed011 100644 --- a/lexical-parse-integer/src/algorithm.rs +++ b/lexical-parse-integer/src/algorithm.rs @@ -138,7 +138,7 @@ macro_rules! on_invalid_digit { if $iter.is_buffer_empty() && $is_end { $into_ok!($value, $iter.buffer_length(), have_any) } else { - $invalid_digit!($value, $iter.cursor(), have_any) + $invalid_digit!($value, $iter.cursor() + 1, have_any) } } else { $invalid_digit!($value, $iter.cursor() + 1, have_any) @@ -332,7 +332,9 @@ macro_rules! parse_1digit_unchecked { Some(v) => v, None => { // This optimizes better for success cases, which is what we want. - // It's an odd hack, but it's tested to work. + // It's an odd hack, but it's tested to work. This **HAS** + // to be used like this to get the correct digit count, even + // if we always increment it later. // SAFETY: Safe since we must have gotten one digit from next. unsafe { $iter.set_cursor($iter.cursor() - 1) }; on_invalid_digit!( @@ -386,7 +388,9 @@ macro_rules! parse_1digit_checked { Some(v) => v, None => { // This optimizes better for success cases, which is what we want. - // It's an odd hack, but it's tested to work. + // It's an odd hack, but it's tested to work. This **HAS** + // to be used like this to get the correct digit count, even + // if we always increment it later. // SAFETY: Safe since we must have gotten one digit from next. unsafe { $iter.set_cursor($iter.cursor() - 1) }; on_invalid_digit!( diff --git a/lexical-util/src/format_builder.rs b/lexical-util/src/format_builder.rs index 4c11b47e..00ee1ddd 100644 --- a/lexical-util/src/format_builder.rs +++ b/lexical-util/src/format_builder.rs @@ -480,9 +480,9 @@ const fn unwrap_or_zero(option: OptionU8) -> u8 { [`required_fraction_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/0cad692/lexical-util/src/format_builder.rs#L1149\n [`required_mantissa_digits_with_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/47a090d/lexical-util/src/format_builder.rs#L1233\n [`case_sensitive_exponent`]: https://github.com/Alexhuszagh/rust-lexical/blob/c6c5052/lexical-util/src/format_builder.rs#L765\n -[`no_unsigned_negative_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/TODO/lexical-util/src/format_builder.rs#LTODO\n -[`no_mantissa_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/TODO/lexical-util/src/format_builder.rs#LTODO\n -[`no_exponent_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/TODO/lexical-util/src/format_builder.rs#LTODO\n +[`no_unsigned_negative_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/36599c3/lexical-util/src/format_builder.rs#L1640\n +[`no_mantissa_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/36599c3/lexical-util/src/format_builder.rs#L1671\n +[`no_exponent_sign`]: https://github.com/Alexhuszagh/rust-lexical/blob/36599c3/lexical-util/src/format_builder.rs#L1696\n [`start_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1650\n [`integer_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1678\n [`integer_consecutive_sign_digit_separator`]: https://github.com/Alexhuszagh/rust-lexical/blob/27ca418/lexical-util/src/format_builder.rs#L1706\n @@ -2697,9 +2697,8 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"1x", &PF_OPTS), Ok(1.0)); /// assert_eq!(parse_with_options::(b"1x1", &PF_OPTS), Err(Error::InvalidDigit(2))); /// - /// // TODO: FIXME! This is incorrectly getting the location wrong /// assert_eq!(parse_with_options::(b"0x1", &PI_OPTS), Err(Error::InvalidDigit(2))); - /// assert_eq!(parse_with_options::(b"x1", &PI_OPTS), Err(Error::InvalidDigit(0))); + /// assert_eq!(parse_with_options::(b"x1", &PI_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); /// assert_eq!(parse_with_options::(b"1x", &PI_OPTS), Ok(1)); /// assert_eq!(parse_with_options::(b"1x1", &PI_OPTS), Err(Error::InvalidDigit(2))); @@ -3496,7 +3495,6 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"1x", &PF_OPTS), Ok(1.0)); /// assert_eq!(parse_with_options::(b"1X", &PF_OPTS), Err(Error::InvalidDigit(1))); /// - /// // TODO: This has the wrong placement /// assert_eq!(parse_with_options::(b"0x1", &PI_OPTS), Err(Error::InvalidDigit(2))); /// assert_eq!(parse_with_options::(b"1", &PI_OPTS), Ok(1)); /// assert_eq!(parse_with_options::(b"1x", &PI_OPTS), Ok(1)); @@ -4332,7 +4330,6 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"1_2d", &PI_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"12_d", &PI_OPTS), Ok(12)); /// assert_eq!(parse_with_options::(b"12__d", &PI_OPTS), Err(Error::InvalidDigit(2))); - /// // TODO: This is incorrectly using the current placement /// assert_eq!(parse_with_options::(b"12d_", &PI_OPTS), Err(Error::InvalidDigit(3))); /// ``` /// --> @@ -4379,7 +4376,6 @@ impl NumberFormatBuilder { /// assert_eq!(parse_with_options::(b"1_2d", &PI_OPTS), Err(Error::InvalidDigit(1))); /// assert_eq!(parse_with_options::(b"12_d", &PI_OPTS), Err(Error::InvalidDigit(2))); /// assert_eq!(parse_with_options::(b"12d_", &PI_OPTS), Ok(12)); - /// // TODO: This is getting the location wrong /// assert_eq!(parse_with_options::(b"12d__", &PI_OPTS), Err(Error::InvalidDigit(4))); /// ``` /// -->