Add in signed overloads for writing integers.

Fixes #191. Fixes apache/datafusion#13686
Alexhuszagh · Dec 8, 2024 · 807dfee · 807dfee
1 parent b6d0eb4
commit 807dfee
Show file tree

Hide file tree

Showing 6 changed files with 153 additions and 15 deletions.
diff --git a/lexical-write-float/src/shared.rs b/lexical-write-float/src/shared.rs
@@ -139,7 +139,7 @@ pub fn write_exponent<const FORMAT: u128>(
     bytes[*cursor] = exponent_character;
     *cursor += 1;
     let positive_exp: u32 = write_exponent_sign::<FORMAT>(bytes, cursor, exp);
-    *cursor += positive_exp.write_exponent::<FORMAT>(&mut bytes[*cursor..]);
+    *cursor += positive_exp.write_exponent_signed::<FORMAT>(&mut bytes[*cursor..]);
 }
 
 /// Detect the notation to use for the float formatter and call the appropriate

diff --git a/lexical-write-integer/src/api.rs b/lexical-write-integer/src/api.rs
@@ -55,15 +55,15 @@ where
         let unsigned = Unsigned::as_cast(value.wrapping_neg());
         buffer[0] = b'-';
         let buffer = &mut buffer[1..];
-        unsigned.write_mantissa::<FORMAT>(buffer) + 1
+        unsigned.write_mantissa_signed::<FORMAT>(buffer) + 1
     } else if cfg!(feature = "format") && format.required_mantissa_sign() {
         let unsigned = Unsigned::as_cast(value);
         buffer[0] = b'+';
         let buffer = &mut buffer[1..];
-        unsigned.write_mantissa::<FORMAT>(buffer) + 1
+        unsigned.write_mantissa_signed::<FORMAT>(buffer) + 1
     } else {
         let unsigned = Unsigned::as_cast(value);
-        unsigned.write_mantissa::<FORMAT>(buffer)
+        unsigned.write_mantissa_signed::<FORMAT>(buffer)
     }
 }
 

diff --git a/lexical-write-integer/src/decimal.rs b/lexical-write-integer/src/decimal.rs
@@ -239,14 +239,15 @@ unsafe impl DecimalCount for usize {
 
 /// Write integer to decimal string.
 pub trait Decimal: DecimalCount {
-    /// # Safety
-    ///
-    /// Safe as long as buffer is at least [`FORMATTED_SIZE`] elements long,
-    /// (or [`FORMATTED_SIZE_DECIMAL`] for decimal), and the radix is valid.
-    ///
-    /// [`FORMATTED_SIZE`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE
-    /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL
     fn decimal(self, buffer: &mut [u8]) -> usize;
+
+    /// Specialized overload is the type is sized.
+    ///
+    /// This **SHOULD NOT** be used if the data provided is unsigned.
+    #[inline(always)]
+    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
+        self.decimal(buffer)
+    }
 }
 
 // Implement decimal for type.
@@ -261,14 +262,27 @@ macro_rules! decimal_impl {
     )*);
 }
 
+// TODO: Fix for i64
+
 decimal_impl! {
     u8; from_u8
     u16; from_u16
     u32; from_u32
-    u64; from_u64
     u128; from_u128
 }
 
+impl Decimal for u64 {
+    #[inline(always)]
+    fn decimal(self, buffer: &mut [u8]) -> usize {
+        jeaiii::from_u64(self, buffer)
+    }
+
+    #[inline(always)]
+    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
+        jeaiii::from_i64(self, buffer)
+    }
+}
+
 impl Decimal for usize {
     #[inline(always)]
     fn decimal(self, buffer: &mut [u8]) -> usize {
@@ -281,4 +295,16 @@ impl Decimal for usize {
             _ => unimplemented!(),
         }
     }
+
+    #[inline(always)]
+    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
+        match usize::BITS {
+            8 => (self as u8).decimal_signed(buffer),
+            16 => (self as u16).decimal_signed(buffer),
+            32 => (self as u32).decimal_signed(buffer),
+            64 => (self as u64).decimal_signed(buffer),
+            128 => (self as u128).decimal_signed(buffer),
+            _ => unimplemented!(),
+        }
+    }
 }
diff --git a/lexical-write-integer/src/jeaiii.rs b/lexical-write-integer/src/jeaiii.rs
@@ -297,11 +297,16 @@ pub fn from_u32(n: u32, buffer: &mut [u8]) -> usize {
 /// Optimized jeaiii algorithm for u64.
 #[inline(always)]
 #[allow(clippy::collapsible_else_if)] // reason = "branching is fine-tuned for performance"
-pub fn from_u64(n: u64, buffer: &mut [u8]) -> usize {
+fn from_u64_impl(n: u64, buffer: &mut [u8], is_signed: bool) -> usize {
     // NOTE: Like before, this optimizes better for large and small
     // values if there's a flat comparison with larger values first.
     const FACTOR: u64 = 100_0000_0000;
-    let buffer = &mut buffer[..20];
+    // NOTE `i64` takes a max of 19 digits, while `u64` takes a max of 20.
+    let buffer = if is_signed {
+        &mut buffer[..19]
+    } else {
+        &mut buffer[..20]
+    };
     if n < 1_0000 {
         // 1 to 4 digits
         if n >= 100 {
@@ -326,7 +331,7 @@ pub fn from_u64(n: u64, buffer: &mut [u8]) -> usize {
             write_digits!(@5-6 buffer, n)
         }
     } else {
-        // 11-20 digits, can do in 2 steps
+        // 11-20 digits, can do in 2 steps (11-19 if is signed).
         // NOTE: `hi` has to be in `[0, 2^31)`, while `lo` is in `[0, 10^11)`
         // So, we can use our `from_u64_small` for hi. For our `lo`, we always
         // need to write 10 digits. However, the `jeaiii` algorithm is too
@@ -340,6 +345,23 @@ pub fn from_u64(n: u64, buffer: &mut [u8]) -> usize {
     }
 }
 
+/// Optimized jeaiii algorithm for u64.
+#[inline(always)]
+pub fn from_u64(n: u64, buffer: &mut [u8]) -> usize {
+    from_u64_impl(n, buffer, false)
+}
+
+/// Optimized jeaiii algorithm for i64, which must be positive.
+///
+/// This value **MUST** have originally been from an `i64`, since it
+/// uses `19` for the bounds checked, so this will panic if `>= 10^19`
+/// is passed to the function.
+#[inline(always)]
+pub fn from_i64(n: u64, buffer: &mut [u8]) -> usize {
+    debug_assert!(n <= 1000_0000_0000_0000_0000u64);
+    from_u64_impl(n, buffer, true)
+}
+
 /// Optimized jeaiii algorithm for u128.
 #[inline(always)]
 #[allow(clippy::collapsible_else_if)] // reason = "branching is fine-tuned for performance"

diff --git a/lexical-write-integer/src/write.rs b/lexical-write-integer/src/write.rs
@@ -21,19 +21,39 @@ macro_rules! write_mantissa {
         fn write_mantissa<const FORMAT: u128>(self, buffer: &mut [u8]) -> usize {
             self.write_integer::<FORMAT, { format::RADIX }, { format::RADIX_SHIFT }>(buffer)
         }
+
+        /// Internal implementation to write significant digits for float writers.
+        #[doc(hidden)]
+        #[inline(always)]
+        fn write_mantissa_signed<const FORMAT: u128>(self, buffer: &mut [u8]) -> usize {
+            self.write_integer_signed::<FORMAT, { format::RADIX }, { format::RADIX_SHIFT }>(buffer)
+        }
     };
 }
 
 /// Define the implementation to write exponent digits.
 macro_rules! write_exponent {
     ($($t:tt)+) => (
+        // NOTE: This should always be signed, but for backwards compatibility as
+        // a precaution we keep the original just in case someone uses the private API.
+
         /// Internal implementation to write exponent digits for float writers.
+        // NOTE: This is not part of the public API.
         #[doc(hidden)]
         #[inline(always)]
+        #[deprecated = "use `write_exponent_signed`, since exponents are always signed."]
         fn write_exponent<const FORMAT: u128>(self, buffer: &mut [u8]) -> usize
         {
             self.write_integer::<FORMAT, { format::EXPONENT_RADIX }, { format::EXPONENT_RADIX_SHIFT }>(buffer)
         }
+
+        /// Internal implementation to write exponent digits for float writers.
+        #[doc(hidden)]
+        #[inline(always)]
+        fn write_exponent_signed<const FORMAT: u128>(self, buffer: &mut [u8]) -> usize
+        {
+            self.write_integer_signed::<FORMAT, { format::EXPONENT_RADIX }, { format::EXPONENT_RADIX_SHIFT }>(buffer)
+        }
     )
 }
 
@@ -56,6 +76,23 @@ pub trait WriteInteger: Compact {
         self.compact(radix, buffer)
     }
 
+    /// Forward write integer parameters to an optimized backend.
+    ///
+    /// This requires a type that was previously signed.
+    ///
+    /// # Preconditions
+    ///
+    /// `self` must be non-negative but is `>= 0` and `<= Signed::MAX`.
+    ///
+    /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL
+    #[inline(always)]
+    fn write_integer_signed<const FORMAT: u128, const MASK: u128, const SHIFT: i32>(
+        self,
+        buffer: &mut [u8],
+    ) -> usize {
+        self.write_integer::<FORMAT, MASK, SHIFT>(buffer)
+    }
+
     write_mantissa!(Compact);
     write_exponent!(Compact);
 }
@@ -79,6 +116,23 @@ pub trait WriteInteger: Decimal {
         self.decimal(buffer)
     }
 
+    /// Forward write integer parameters to an optimized backend.
+    ///
+    /// This requires a type that was previously signed.
+    ///
+    /// # Preconditions
+    ///
+    /// `self` must be non-negative but is `>= 0` and `<= Signed::MAX`.
+    ///
+    /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL
+    #[inline(always)]
+    fn write_integer_signed<const __: u128, const ___: u128, const ____: i32>(
+        self,
+        buffer: &mut [u8],
+    ) -> usize {
+        self.decimal_signed(buffer)
+    }
+
     write_mantissa!(Decimal);
     write_exponent!(Decimal);
 }
@@ -107,6 +161,27 @@ pub trait WriteInteger: Decimal + Radix {
         }
     }
 
+    /// Forward write integer parameters to an optimized backend.
+    ///
+    /// This requires a type that was previously signed.
+    ///
+    /// # Preconditions
+    ///
+    /// `self` must be non-negative but is `>= 0` and `<= Signed::MAX`.
+    ///
+    /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL
+    #[inline(always)]
+    fn write_integer_signed<const FORMAT: u128, const MASK: u128, const SHIFT: i32>(
+        self,
+        buffer: &mut [u8],
+    ) -> usize {
+        if format::radix_from_flags(FORMAT, MASK, SHIFT) == 10 {
+            self.decimal_signed(buffer)
+        } else {
+            self.radix::<FORMAT, MASK, SHIFT>(buffer)
+        }
+    }
+
     write_mantissa!(Decimal + Radix);
     write_exponent!(Decimal + Radix);
 }

diff --git a/lexical-write-integer/tests/decimal_tests.rs b/lexical-write-integer/tests/decimal_tests.rs
@@ -47,6 +47,21 @@ fn u64_decimal_count_test() {
     assert_eq!(u64::decimal_count(u64::MAX), 20);
 }
 
+#[test]
+fn i64_19digit_test() {
+    let mut buffer = [0u8; 19];
+    assert_eq!((5i64 as u64).decimal_signed(&mut buffer), 1);
+    assert_eq!(&buffer[..1], b"5");
+}
+
+#[test]
+#[should_panic]
+fn u64_19digit_test() {
+    let mut buffer = [0u8; 19];
+    assert_eq!(5u64.decimal(&mut buffer), 1);
+    assert_eq!(&buffer[..1], b"5");
+}
+
 #[test]
 fn u128_decimal_count_test() {
     assert_eq!(u128::decimal_count(u128::MAX), 39);