Made s_mp_floor_ilog2 external function and reduced castings

libtom · May 20, 2024 · 463406b · 463406b
1 parent f20c2dd
commit 463406b
Show file tree

Hide file tree

Showing 13 changed files with 62 additions and 51 deletions.
diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj
@@ -844,6 +844,10 @@
 			RelativePath="s_mp_faster_to_radix.c"
 			>
 		</File>
+		<File
+			RelativePath="s_mp_floor_ilog2.c"
+			>
+		</File>
 		<File
 			RelativePath="s_mp_fp_log.c"
 			>

diff --git a/makefile b/makefile
@@ -45,8 +45,8 @@ mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_
 mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
 mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
 s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \
-s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o \
-s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
+s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \
+s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
 s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
 s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
 s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \

diff --git a/makefile.mingw b/makefile.mingw
@@ -47,8 +47,8 @@ mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_
 mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
 mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
 s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \
-s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o \
-s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
+s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \
+s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
 s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
 s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
 s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \

diff --git a/makefile.msvc b/makefile.msvc
@@ -43,8 +43,8 @@ mp_set_l.obj mp_set_u32.obj mp_set_u64.obj mp_set_ul.obj mp_shrink.obj mp_signed
 mp_sqrtmod_prime.obj mp_sub.obj mp_sub_d.obj mp_submod.obj mp_to_radix.obj mp_to_sbin.obj mp_to_ubin.obj mp_ubin_size.obj \
 mp_unpack.obj mp_warray_free.obj mp_xor.obj mp_zero.obj s_mp_add.obj s_mp_copy_digs.obj s_mp_div_3.obj \
 s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s_mp_exptmod_fast.obj \
-s_mp_faster_read_radix.obj s_mp_faster_to_radix.obj s_mp_fp_log.obj s_mp_fp_log_d.obj s_mp_get_bit.obj \
-s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj \
+s_mp_faster_read_radix.obj s_mp_faster_to_radix.obj s_mp_floor_ilog2.obj s_mp_fp_log.obj s_mp_fp_log_d.obj \
+s_mp_get_bit.obj s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log_2expt.obj s_mp_montgomery_reduce_comba.obj s_mp_mul.obj \
 s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \
 s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj \
 s_mp_radix_size_overestimate.obj s_mp_rand_platform.obj s_mp_slower_read_radix.obj s_mp_slower_to_radix.obj \

diff --git a/makefile.shared b/makefile.shared
@@ -42,8 +42,8 @@ mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_
 mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
 mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
 s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \
-s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o \
-s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
+s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \
+s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
 s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
 s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
 s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \

diff --git a/makefile.unix b/makefile.unix
@@ -48,8 +48,8 @@ mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_
 mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
 mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
 s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o \
-s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_fp_log.o s_mp_fp_log_d.o s_mp_get_bit.o \
-s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
+s_mp_faster_read_radix.o s_mp_faster_to_radix.o s_mp_floor_ilog2.o s_mp_fp_log.o s_mp_fp_log_d.o \
+s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
 s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
 s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
 s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_slower_read_radix.o s_mp_slower_to_radix.o \

diff --git a/mp_prime_is_prime.c b/mp_prime_is_prime.c
@@ -3,16 +3,6 @@
 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
 /* SPDX-License-Identifier: Unlicense */
 
-/* portable integer log of two with small footprint */
-static unsigned int s_floor_ilog2(int value)
-{
-   unsigned int r = 0;
-   while ((value >>= 1) != 0) {
-      r++;
-   }
-   return r;
-}
-
 mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result)
 {
    mp_int  b;
@@ -186,7 +176,7 @@ mp_err mp_prime_is_prime(const mp_int *a, int t, bool *result)
        * Hence the ugly type-fiddling in the following code.
        */
       size_a = mp_count_bits(a);
-      mask = (1u << s_floor_ilog2(size_a)) - 1u;
+      mask = (1u << s_mp_floor_ilog2(size_a)) - 1u;
       /*
          Assuming the General Rieman hypothesis (never thought to write that in a
          comment) the upper bound can be lowered to  2*(log a)^2.

diff --git a/mp_to_radix.c b/mp_to_radix.c
@@ -46,7 +46,6 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i
 
    /* TODO: check if it can be done better */
    if (MP_HAS(S_MP_FASTER_TO_RADIX)) {
-
       if ((err = s_mp_faster_to_radix(&a_bar, str, maxlen, &part_written, radix)) != MP_OKAY)            goto LBL_ERR;
    } else if (MP_HAS(S_MP_SLOWER_TO_RADIX)) {
       char *start = str;
@@ -56,7 +55,7 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i
       part_written++;
    } else {
       /* TODO: Add an error for "Function not available in this build"?
-               The compiler has no reason to complain complain in that case. */
+               The compiler has no reason to complain in that case. */
       err = MP_ERR;
       goto LBL_ERR;
    }

diff --git a/s_mp_faster_to_radix.c b/s_mp_faster_to_radix.c
@@ -26,20 +26,10 @@ const uint8_t extra_bits[] =  {
 };
 #endif
 
-/* TODO: We could use mp_log_n instead; in both instances it gets used here are singular precomputations. */
-static int32_t s_ilog2(int32_t value)
-{
-   int r = 0;
-   while ((value /= 2) != 0) {
-      r++;
-   }
-   return r;
-}
-
 /* TODO: Use bigint variant mp_expt_n(mp_int *, int,  mp_int*)? */
-static int32_t s_pow(int32_t base, int32_t exponent)
+static int s_pow(int base, int exponent)
 {
-   int32_t result = 1;
+   int result = 1;
    while (exponent != 0) {
       if ((exponent % 2) == 1) {
          result *= base;
@@ -53,26 +43,25 @@ static int32_t s_pow(int32_t base, int32_t exponent)
    return result;
 }
 
-/* DONE: too much castings */
-/* #define MP_COMPUTE_ESS(T, K) ((int)((int32_t)((uint32_t)1 << (T)) * K))  */
 
-static uint32_t s_mp_compute_s(int t, int k)
+static int s_mp_compute_s(int t, int k)
 {
-   uint32_t r = 0u, log2_intmax, log2_k;
+   uint32_t r = 0u;
+   int log2_intmax, log2_k;
 
-   log2_k = (uint32_t)s_ilog2((int32_t) k) + 1u;
-   log2_intmax = (uint32_t)s_ilog2((int32_t) INT_MAX) + 1u;
+   log2_k = (int)s_mp_floor_ilog2(k) + 1;
+   log2_intmax = (int)s_mp_floor_ilog2(INT_MAX) + 1;
 
    /* Rough first check for overflow */
-   if (t > (int)(log2_intmax - log2_k)) {
-      return 0u;
+   if (t > (log2_intmax - log2_k)) {
+      return 0;
    }
 
    r = 1u << t;
    r = r * (uint32_t)k;
 
    /* Final check for overflow */
-   return (r > (MP_MAX_DIGIT_COUNT * MP_DIGIT_BIT)) ? 0u : r;
+   return (r > (MP_MAX_DIGIT_COUNT * MP_DIGIT_BIT)) ? 0 : (int)r;
 }
 
 
@@ -84,6 +73,8 @@ static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_
    mp_err err;
    int Beta;
 
+   /* TODO: Free memory of P(t) is the level is done? */
+
    if (t < 0) {
       /* Print the string from the number given */
       if ((err = s_mp_slower_to_radix(a, str, part_maxlen, part_written, radix, pad)) != MP_OKAY)        goto LTM_ERR;
@@ -94,6 +85,7 @@ static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_
       } else if (first) {
          /* Largest division, only one time, no reason for Barret division in the first place */
          if ((err = mp_div(a, &P[t], &q, &r)) != MP_OKAY)                                                goto LTM_ERR;
+         /* TODO: we can release the memory here always, its only needed once. */
       } else {
          /*
             Barrett reduction. A step by step proof can be found at
@@ -154,8 +146,7 @@ static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_
 mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix)
 {
    mp_err err;
-   int32_t n = 0, k, t = 0, steps = 0;
-   int ilog2a, s;
+   int n = 0, k, t = 0, steps = 0, ilog2a, s;
 
    /* Use given buffer directly, no temporary buffers for the individual chunks */
    char **sptr = &str;
@@ -175,10 +166,10 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
 
 
    /* Denominator for the reciprocal: b^y. */
-   n = s_pow((int32_t)radix, (int32_t)s_mp_radix_exponent_y[radix]);
+   n = s_pow(radix, (int)s_mp_radix_exponent_y[radix]);
 
    /* Numerator of the reciprocal: ceil(log_2(n)) */
-   k = s_ilog2(n) + 1;
+   k = (int)s_mp_floor_ilog2(n) + 1;
 
    /* steps = floor(log_2(floor(log_2(a))))*/
    ilog2a = mp_count_bits(a) - 1;
@@ -201,7 +192,7 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
             That would be about 25 (max 31) elements for 32 bit ints, the default size
             of b^y and MP_RADIX_BARRETT_START_MULTIPLICATOR=1   */
 
-   steps  = s_ilog2((int32_t)ilog2a) + 1;
+   steps  = (int)s_mp_floor_ilog2(ilog2a) + 1;
 #ifdef MP_DEBUG_FASTER_TO_RADIX
    fprintf(stderr,"steps = %d\n",t);
 #endif
@@ -292,7 +283,7 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
       if ((radix == 10) && (ilog2a < (1 << 27)) && (!MP_IS_2EXPT(radix))) {
          /* Use a round of Newton-Raphson to compute the next reciprocal */
          /* s = 2^t*k */
-         s = (int)s_mp_compute_s(t, k);
+         s = s_mp_compute_s(t, k);
          /* Overflow, we have enough divisors */
          if (s == 0) {
             break;
@@ -316,7 +307,7 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
          if ((err = mp_incr(&R[t])) != MP_OKAY)                                                          goto LTM_ERR;
       } else {
 #endif
-         s = (int)s_mp_compute_s(t + 1, k);
+         s = s_mp_compute_s(t + 1, k);
          /* Overflow, we have enough divisors */
          if (s == 0) {
             break;

diff --git a/s_mp_floor_ilog2.c b/s_mp_floor_ilog2.c
@@ -0,0 +1,18 @@
+#include "tommath_private.h"
+#ifdef S_MP_FLOOR_ILOG2_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+
+
+unsigned int s_mp_floor_ilog2(int value)
+{
+   unsigned int r = 0;
+   while ((value >>= 1) != 0) {
+      r++;
+   }
+   return r;
+}
+
+
+#endif
diff --git a/sources.cmake b/sources.cmake
@@ -135,6 +135,7 @@ s_mp_exptmod.c
 s_mp_exptmod_fast.c
 s_mp_faster_read_radix.c
 s_mp_faster_to_radix.c
+s_mp_floor_ilog2.c
 s_mp_fp_log.c
 s_mp_fp_log_d.c
 s_mp_get_bit.c

diff --git a/tommath_class.h b/tommath_class.h
@@ -144,6 +144,7 @@
 #   define S_MP_EXPTMOD_FAST_C
 #   define S_MP_FASTER_READ_RADIX_C
 #   define S_MP_FASTER_TO_RADIX_C
+#   define S_MP_FLOOR_ILOG2_C
 #   define S_MP_FP_LOG_C
 #   define S_MP_FP_LOG_D_C
 #   define S_MP_GET_BIT_C
@@ -656,6 +657,7 @@
 #   define MP_RAND_C
 #   define MP_READ_RADIX_C
 #   define MP_SET_C
+#   define S_MP_FLOOR_ILOG2_C
 #   define S_MP_PRIME_IS_DIVISIBLE_C
 #endif
 
@@ -1113,11 +1115,15 @@
 #   define MP_SUB_C
 #   define MP_SUB_D_C
 #   define S_MP_COMPUTE_S_C
+#   define S_MP_FLOOR_ILOG2_C
 #   define S_MP_SLOWER_TO_RADIX_C
 #   define S_MP_TO_RADIX_RECURSIVE_C
 #   define S_MP_ZERO_BUF_C
 #endif
 
+#if defined(S_MP_FLOOR_ILOG2_C)
+#endif
+
 #if defined(S_MP_FP_LOG_C)
 #   define MP_2EXPT_C
 #   define MP_ADD_C

diff --git a/tommath_private.h b/tommath_private.h
@@ -234,6 +234,8 @@ MP_PRIVATE mp_err s_mp_radix_size_overestimate(const mp_int *a, const int radix,
 MP_PRIVATE mp_err s_mp_fp_log(const mp_int *a, mp_int *c) MP_WUR;
 MP_PRIVATE mp_err s_mp_fp_log_d(const mp_int *a, mp_word *c) MP_WUR;
 
+MP_PRIVATE unsigned int s_mp_floor_ilog2(int value);
+
 MP_PRIVATE mp_err s_mp_faster_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR;
 MP_PRIVATE mp_err s_mp_slower_read_radix(mp_int *a, const char *str, size_t start, size_t end, int radix) MP_WUR;
 MP_PRIVATE mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix) MP_WUR;
-Original file line number
+Diff line change
@@ Expand Up / @@ -844,6 +844,10 @@ @@
     			RelativePath="s_mp_faster_to_radix.c"
     			>
     		</File>
+    		<File
+    			RelativePath="s_mp_floor_ilog2.c"
+    			>
+    		</File>
     		<File
     			RelativePath="s_mp_fp_log.c"
     			>
@@ Expand Down @@