From e12d4f528e4cb54f587691b273f9c2a7b9defde9 Mon Sep 17 00:00:00 2001 From: mmatera Date: Mon, 18 Dec 2023 13:41:16 -0300 Subject: [PATCH 1/2] improving implementation and documentation. Adding test adding tests --- mathics/builtin/system.py | 43 ++++++++++++++++++++----- mathics/eval/makeboxes.py | 63 ++++++++++++++++++++++++++++++++++--- test/eval/test_makeboxes.py | 51 ++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 12 deletions(-) create mode 100644 test/eval/test_makeboxes.py diff --git a/mathics/builtin/system.py b/mathics/builtin/system.py index 040c575aa..505b59373 100644 --- a/mathics/builtin/system.py +++ b/mathics/builtin/system.py @@ -32,30 +32,59 @@ class MaxLengthIntStringConversion(Predefined): """ - :Python 3.11:https://docs.python.org/3.11/library/stdtypes.html#int-max-str-digits + :Python 3.11 Integer string conversion length limitation: + https://docs.python.org/3.11/library/stdtypes.html#int-max-str-digits
'$MaxLengthIntStringConversion' -
A system constant that fixes the largest size of the String resulting from converting - an Integer into a String. +
A system constant that fixes the largest size of the 'String' obtained + from the conversion of an 'Integer' number.
>> originalvalue = $MaxLengthIntStringConversion = ... + + Let's consider the number $37$, a two digits 'Integer'. The length of the + 'String' resulting from its conversion is + >> 37 //ToString//StringLength + = 2 + coinciding with the number of digits. + + For extremely long numbers, the conversion can block the system. To avoid it, + conversion of very large 'Integer' to 'String' for large numbers results in an + abbreviated representation of the form $d_1d_2... << ommitted >> ... d_{n-1}d_n$. + + For example, let's consider now $500!$, a $1135$ digits number. >> 500! //ToString//StringLength = ... + + Depending on the default value of '$MaxLengthIntStringConversion', the result + is not 1135: this is because the number is abbreviated. + To get the full representation of the number, '$MaxLengthIntStringConversion' + must be set to '0': + >> $MaxLengthIntStringConversion = 0; 500! //ToString//StringLength = 1135 - >> $MaxLengthIntStringConversion = 650; 500! //ToString + + Notice that for Python versions <3.11, '$MaxLengthIntStringConversion' + is always set to $0$, meaning that 'Integer' numbers are always converted + to its full explicit form. + + By setting a smaller value, the resulting 'String' representation + is even shorter: + >> $MaxLengthIntStringConversion = 650; 500! //ToString//StringLength = ... - Python 3.11 does not accept values different to 0 or >640: + Notice also that internally, the arithmetic is not affected by this constant: + >> a=500!; b=(500! + 10^60); b-a + = 1000000000000000000000000000000000000000000000000000000000000 + + Python 3.11 does not accept values different to 0 or 'Integer' $>640$: >> $MaxLengthIntStringConversion = 10 : 10 is not 0 or an Integer value >640. = ... - Restore the value to the default. - >> $MaxLengthIntStringConversion = originalvalue; + >> $MaxLengthIntStringConversion = originalvalue;a=.;b=.; """ diff --git a/mathics/eval/makeboxes.py b/mathics/eval/makeboxes.py index 28439385f..30d1f0dcd 100644 --- a/mathics/eval/makeboxes.py +++ b/mathics/eval/makeboxes.py @@ -78,13 +78,66 @@ def int_to_string_shorter_repr(value: Integer, form: Symbol, max_digits=640): where n-2k digits are replaced by a placeholder. """ + if max_digits == 0: + return String(str(value)) + + # Normalize to positive quantities + is_negative = value < 0 + if is_negative: + value = -value + max_digits = max_digits - 1 + # Estimate the number of decimal digits num_digits = int(value.bit_length() * 0.3) - len_num_digits = len(str(num_digits)) - len_parts = (max_digits - len_num_digits - 8) // 2 - msd = str(value // 10 ** (num_digits - len_parts)) - lsd = str(abs(value) % 10**len_parts) - value_str = f"{msd} <<{num_digits - len(lsd)-len(msd)}>> {lsd}" + + # If the estimated number is bellow the threshold, + # return it as it is. + if num_digits <= max_digits: + if is_negative: + return String("-" + str(value)) + return String(str(value)) + + # estimate the size of the placeholder + size_placeholder = len(str(num_digits)) + 6 + # Estimate the number of avaliable decimal places + avaliable_digits = max(max_digits - size_placeholder, 0) + # how many most significative digits include + len_msd = (avaliable_digits + 1) // 2 + # how many least significative digits to include: + len_lsd = avaliable_digits - len_msd + # Compute the msd. + msd = str(value // 10 ** (num_digits - len_msd)) + if msd == "0": + msd = "" + + # If msd has more digits than the expected, it means that + # num_digits was wrong. + extra_msd_digits = len(msd) - len_msd + if extra_msd_digits > 0: + # Remove the extra digit and fix the real + # number of digits. + msd = msd[:len_msd] + num_digits = num_digits + 1 + + lsd = "" + if len_lsd > 0: + lsd = str(value % 10 ** (len_lsd)) + # complete decimal positions in the lsd: + lsd = (len_lsd - len(lsd)) * "0" + lsd + + # Now, compute the true number of hiding + # decimal places, and built the placeholder + remaining = num_digits - len_lsd - len_msd + placeholder = f" <<{remaining}>> " + # Check if the shorten string is actually + # shorter than the full string representation: + if len(placeholder) < remaining: + value_str = f"{msd}{placeholder}{lsd}" + else: + value_str = str(value) + + if is_negative: + value_str = "-" + value_str return String(value_str) diff --git a/test/eval/test_makeboxes.py b/test/eval/test_makeboxes.py new file mode 100644 index 000000000..a0963259e --- /dev/null +++ b/test/eval/test_makeboxes.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +from test.helper import evaluate + +import pytest + +import mathics.core.systemsymbols as SymbolOutputForm +from mathics.eval.makeboxes import int_to_string_shorter_repr + + +@pytest.mark.parametrize( + ("int_expr", "digits", "str_repr"), + [ + ("1234567890", 0, "1234567890"), + ("1234567890", 2, " <<10>> "), + ("1234567890", 9, "1234567890"), + ("1234567890", 10, "1234567890"), + ("9934567890", 10, "9934567890"), + ("1234567890", 11, "1234567890"), + ("1234567890", 20, "1234567890"), + ("-1234567890", 0, "-1234567890"), + ("-1234567890", 2, "- <<10>> "), + ("-1234567890", 9, "-1 <<9>> "), + ("-1234567890", 10, "-1234567890"), + ("-1234567890", 11, "-1234567890"), + ("-9934567890", 11, "-9934567890"), + ("12345678900987654321", 15, "1234 <<13>> 321"), + ("-1234567890", 20, "-1234567890"), + ("12345678900987654321", 0, "12345678900987654321"), + ("12345678900987654321", 2, " <<20>> "), + ("92345678900987654329", 2, " <<20>> "), + ("12345678900987654321", 9, "1 <<19>> "), + ("12345678900987654321", 10, "1 <<18>> 1"), + ("12345678900987654321", 11, "12 <<17>> 1"), + ("12345678900987654321", 20, "12345678900987654321"), + ("-12345678900987654321", 0, "-12345678900987654321"), + ("-12345678900987654321", 2, "- <<20>> "), + ("-12345678900987654321", 9, "- <<20>> "), + ("-12345678900987654321", 10, "-1 <<19>> "), + ("-12345678900987654321", 11, "-1 <<18>> 1"), + ("-12345678900987654321", 15, "-123 <<14>> 321"), + ("-99345678900987654321", 15, "-993 <<14>> 321"), + ("-12345678900987654321", 16, "-1234 <<13>> 321"), + ("-99345678900987654321", 16, "-9934 <<13>> 321"), + ("-12345678900987654321", 20, "-12345678900987654321"), + ], +) +def test_string_conversion_limited_size(int_expr, digits, str_repr): + value = evaluate(int_expr).value + result = int_to_string_shorter_repr(value, SymbolOutputForm, digits) + assert result.value == str_repr, f"{value} -> {digits}-> {result.value}!={str_repr}" From c5735a62d7f8d693c359363c1df025df827b8c2b Mon Sep 17 00:00:00 2001 From: mmatera Date: Mon, 18 Dec 2023 18:46:23 -0300 Subject: [PATCH 2/2] init --- test/eval/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test/eval/__init__.py diff --git a/test/eval/__init__.py b/test/eval/__init__.py new file mode 100644 index 000000000..e69de29bb