Merge branch 'int-str-conversion-limit' into str-conversion-doc-and-test

Mathics3 · Dec 18, 2023 · 6ec1cd8 · 6ec1cd8
2 parents 43e923e + 9ec00f5
commit 6ec1cd8
Show file tree

Hide file tree

Showing 8 changed files with 707 additions and 75 deletions.
diff --git a/admin-tools/pyenv-versions b/admin-tools/pyenv-versions
@@ -5,4 +5,4 @@ if [[ $0 == ${BASH_SOURCE[0]} ]] ; then
     echo "This script should be *sourced* rather than run directly through bash"
     exit 1
 fi
-export PYVERSIONS='3.6.15 3.7.16 pyston-2.3.5 pypy3.9-7.3.11 3.8.16 3.9.16 3.10.10'
+export PYVERSIONS='3.6.15 3.7.16 pyston-2.3.5 pypy3.9-7.3.11 3.8.17 3.9.18 3.10.13 3.11.7'
diff --git a/mathics/builtin/list/constructing.py b/mathics/builtin/list/constructing.py
@@ -221,6 +221,9 @@ class Range(Builtin):
     >> Range[-3, 2]
      = {-3, -2, -1, 0, 1, 2}
 
+    >> Range[5, 1, -2]
+     = {5, 3, 1}
+
     >> Range[1.0, 2.3]
      = {1., 2.}
 
@@ -258,17 +261,22 @@ def eval(self, imin, imax, di, evaluation: Evaluation):
             and isinstance(imax, Integer)
             and isinstance(di, Integer)
         ):
-            result = [Integer(i) for i in range(imin.value, imax.value + 1, di.value)]
+            pm = 1 if di.value >= 0 else -1
+            result = [Integer(i) for i in range(imin.value, imax.value + pm, di.value)]
             return ListExpression(
                 *result, elements_properties=range_list_elements_properties
             )
 
         imin = imin.to_sympy()
         imax = imax.to_sympy()
         di = di.to_sympy()
+
+        def compare_type(a, b):
+            return a <= b if di >= 0 else a >= b
+
         index = imin
         result = []
-        while index <= imax:
+        while compare_type(index, imax):
             evaluation.check_stopped()
             result.append(from_sympy(index))
             index += di

diff --git a/mathics/builtin/system.py b/mathics/builtin/system.py
@@ -36,6 +36,7 @@ class MaxLengthIntStringConversion(Predefined):
     https://docs.python.org/3.11/library/stdtypes.html#int-max-str-digits</url>
     <dl>
       <dt>'$MaxLengthIntStringConversion'
+
       <dd>A system constant that fixes the largest size of the string that can \
           result when converting an 'Integer' value into a 'String'. When the
           'String' is too large, then the middle of the integer contains
@@ -44,37 +45,69 @@ class MaxLengthIntStringConversion(Predefined):
           If to 0, at your peril there is no bound. Aside from 0, \
           640 is the smallest value allowed.
     </dl>
-
+    
     Although Mathics3 can represent integers of arbitrary size, when it formats \
     the value for display, there can be nonlinear behavior in converting the number to \
     decimal.
 
     Python, in version 3.11 and up, puts a default limit on the size of \
     the number of digits it will allow when conversting a big-num
-    integer intto a string.
+    integer into a string.
 
     Show the default value of '$MaxLengthIntStringConversion':
-    >> $MaxLengthIntStringConversion
-     = 7000
+    >> originalvalue = $MaxLengthIntStringConversion
+     = ...
 
-    Set '$MaxLenghtIntStringConversion' to the smallest value allowed:
-    $MaxLengthIntStringConversion = 640
-     = 640
+    Let's consider the number $37$, a two digits 'Integer'. The length of the
+    'String' resulting from its conversion is
+    >> 37 //ToString//StringLength
+     = 2
+    coinciding with the number of digits.
+
+    For extremely long numbers, the conversion can block the system. To avoid it,
+    conversion of very large 'Integer' to 'String' for large numbers results in an
+    abbreviated representation of the form $d_1d_2... << ommitted >> ... d_{n-1}d_n$.
+
+    For example, let's consider now $500!$, a $1135$ digits number.
 
     >> 500! //ToString//StringLength
-     = 65
+     = ...
+     
+    Depending on the default value of '$MaxLengthIntStringConversion', the result
+    is not 1135: this is because the number is abbreviated.
+    To get the full representation of the number, '$MaxLengthIntStringConversion'
+    must be set to '0':
 
     >> $MaxLengthIntStringConversion = 0; 500! //ToString//StringLength
      = 1135
 
-    >> $MaxLengthIntStringConversion = 650; 500! //ToString
-     = ...
-
-    Other than 0, Python 3.11 does not accept a value less than 640:
+    Python 3.11 does not accept values different to 0 or 'Integer' $>640$:
     >> $MaxLengthIntStringConversion = 10
      : 10 is not 0 or an Integer value greater than 640.
      = ...
 
+    Set '$MaxLenghtIntStringConversion' to the smallest value allowed:
+    >> $MaxLengthIntStringConversion = 640
+     = ...
+    >> 500! //ToString
+     = ...
+
+    Notice that for Python versions <3.11, '$MaxLengthIntStringConversion'
+    is always set to $0$, meaning that 'Integer' numbers are always converted
+    to its full explicit form.
+
+    By setting a smaller value, the resulting 'String' representation
+    is even shorter:
+    >> $MaxLengthIntStringConversion = 650; 500! //ToString//StringLength
+     = ...
+
+    Notice also that internally, the arithmetic is not affected by this constant:
+    >> a=500!; b=(500! + 10^60); b-a
+     = 1000000000000000000000000000000000000000000000000000000000000
+
+    Restore the value to the default.
+    >> $MaxLengthIntStringConversion = originalvalue;a=.;b=.;
+
     """
 
     attributes = A_CONSTANT

diff --git a/mathics/eval/makeboxes.py b/mathics/eval/makeboxes.py
@@ -82,13 +82,66 @@ def int_to_string_shorter_repr(value: Integer, form: Symbol, max_digits=640):
 
     where n-2k digits are replaced by a placeholder.
     """
+    if max_digits == 0:
+        return String(str(value))
+
+    # Normalize to positive quantities
+    is_negative = value < 0
+    if is_negative:
+        value = -value
+        max_digits = max_digits - 1
+
     # Estimate the number of decimal digits
     num_digits = int(value.bit_length() * 0.3)
-    len_num_digits = len(str(num_digits))
-    len_parts = (max_digits - len_num_digits - 8) // 2
-    msd = str(value // 10 ** (num_digits - len_parts))
-    lsd = str(abs(value) % 10**len_parts)
-    value_str = f"{msd} <<{num_digits - len(lsd)-len(msd)}>> {lsd}"
+
+    # If the estimated number is bellow the threshold,
+    # return it as it is.
+    if num_digits <= max_digits:
+        if is_negative:
+            return String("-" + str(value))
+        return String(str(value))
+
+    # estimate the size of the placeholder
+    size_placeholder = len(str(num_digits)) + 6
+    # Estimate the number of avaliable decimal places
+    avaliable_digits = max(max_digits - size_placeholder, 0)
+    # how many most significative digits include
+    len_msd = (avaliable_digits + 1) // 2
+    # how many least significative digits to include:
+    len_lsd = avaliable_digits - len_msd
+    # Compute the msd.
+    msd = str(value // 10 ** (num_digits - len_msd))
+    if msd == "0":
+        msd = ""
+
+    # If msd has more digits than the expected, it means that
+    # num_digits was wrong.
+    extra_msd_digits = len(msd) - len_msd
+    if extra_msd_digits > 0:
+        # Remove the extra digit and fix the real
+        # number of digits.
+        msd = msd[:len_msd]
+        num_digits = num_digits + 1
+
+    lsd = ""
+    if len_lsd > 0:
+        lsd = str(value % 10 ** (len_lsd))
+        # complete decimal positions in the lsd:
+        lsd = (len_lsd - len(lsd)) * "0" + lsd
+
+    # Now, compute the true number of hiding
+    # decimal places, and built the placeholder
+    remaining = num_digits - len_lsd - len_msd
+    placeholder = f" <<{remaining}>> "
+    # Check if the shorten string is actually
+    # shorter than the full string representation:
+    if len(placeholder) < remaining:
+        value_str = f"{msd}{placeholder}{lsd}"
+    else:
+        value_str = str(value)
+
+    if is_negative:
+        value_str = "-" + value_str
     return String(value_str)