keis · malvidin · Nov 30, 2022 · Dec 1, 2022 · keis · Dec 1, 2022
diff --git a/base58/__init__.py b/base58/__init__.py
@@ -1,8 +1,8 @@
-'''Base58 encoding
+"""Base58 encoding
 
 Implementations of Base58 and Base58Check encodings that are compatible
 with the bitcoin network.
-'''
+"""
 
 # This module is based upon base58 snippets found scattered over many bitcoin
 # tools written in python. From what I gather the original source is from a
@@ -11,105 +11,187 @@
 
 from functools import lru_cache
 from hashlib import sha256
-from typing import Mapping, Union
+from typing import Dict, Tuple, Union
+from math import log
 
-__version__ = '2.1.1'
+try:
+    from gmpy2 import mpz
+except ImportError:
+    mpz = None
+
+__version__ = "2.1.1"
 
 # 58 character alphabet used
-BITCOIN_ALPHABET = \
-    b'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
-RIPPLE_ALPHABET = b'rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz'
+BITCOIN_ALPHABET = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
+RIPPLE_ALPHABET = b"rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz"
 XRP_ALPHABET = RIPPLE_ALPHABET
+_MPZ_ALPHABET = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+POWERS = {
+    45: {2**i: 45 ** (2**i) for i in range(4, 20)},
+    58: {2**i: 58 ** (2**i) for i in range(4, 20)},
+}  # type: Dict[int, Dict[int, int]]
 
 # Retro compatibility
 alphabet = BITCOIN_ALPHABET
 
 
 def scrub_input(v: Union[str, bytes]) -> bytes:
     if isinstance(v, str):
-        v = v.encode('ascii')
+        v = v.encode("ascii")
 
     return v
 
 
+def _encode_int(i: int, base: int = 58, alphabet: bytes = BITCOIN_ALPHABET) -> bytes:
+    """
+    Encode integer to bytes with base 58 alphabet by powers of 58
+    """
+    min_val = POWERS[base][2**8]
+    if i <= min_val:
+        string = bytearray()
+        while i:
+            i, idx = divmod(i, base)
+            string.append(idx)
+        return bytes(string[::-1])
+    else:
+        origlen0 = int(log(i, 58)) // 2
+        try:
+            split_num = POWERS[base][2**origlen0]
+        except KeyError:
+            POWERS[base][2**origlen0] = split_num = base**origlen0
+        i1, i0 = divmod(i, split_num)
+
+        v1 = _encode_int(i1, base, alphabet)
+        v0 = _encode_int(i0, base, alphabet)
+        newlen0 = len(v0)
+        if newlen0 < origlen0:
+            v0 = b"\0" * (origlen0 - newlen0) + v0
+        return v1 + v0
+
+
+def _mpz_encode(i: int, alphabet: bytes) -> bytes:
+    """
+    Encode an integer to arbitrary base using gmpy2 mpz
+    """
+    base = len(alphabet)
+
+    raw: bytes = mpz(i).digits(base).encode()
+    tr_bytes = bytes.maketrans(_MPZ_ALPHABET[:base], alphabet)
+    encoded: bytes = raw.translate(tr_bytes)
+
+    return encoded
+
+
 def b58encode_int(
     i: int, default_one: bool = True, alphabet: bytes = BITCOIN_ALPHABET
 ) -> bytes:
     """
     Encode an integer using Base58
     """
-    if not i and default_one:
-        return alphabet[0:1]
-    string = b""
+    if not i:
+        if default_one:
+            return alphabet[0:1]
+        return b""
+    if mpz:
+        return _mpz_encode(i, alphabet)
+
     base = len(alphabet)
-    while i:
-        i, idx = divmod(i, base)
-        string = alphabet[idx:idx+1] + string
+    raw_string = _encode_int(i, base, alphabet)
+    string = raw_string.translate(
+        bytes.maketrans(bytearray(range(len(alphabet))), alphabet)
+    )
+
     return string
 
 
-def b58encode(
-    v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET
-) -> bytes:
+def b58encode(v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET) -> bytes:
     """
     Encode a string using Base58
     """
     v = scrub_input(v)
 
     origlen = len(v)
-    v = v.lstrip(b'\0')
+    v = v.lstrip(b"\0")
     newlen = len(v)
 
-    acc = int.from_bytes(v, byteorder='big')  # first byte is most significant
+    acc = int.from_bytes(v, byteorder="big")  # first byte is most significant
 
     result = b58encode_int(acc, default_one=False, alphabet=alphabet)
     return alphabet[0:1] * (origlen - newlen) + result
 
 
 @lru_cache()
-def _get_base58_decode_map(alphabet: bytes,
-                           autofix: bool) -> Mapping[int, int]:
+def _get_base58_decode_map(alphabet: bytes, autofix: bool) -> Tuple[bytes, bytes]:
     invmap = {char: index for index, char in enumerate(alphabet)}
-
+    base = len(alphabet)
     if autofix:
-        groups = [b'0Oo', b'Il1']
+        groups = [b"0Oo", b"Il1"]
         for group in groups:
             pivots = [c for c in group if c in invmap]
             if len(pivots) == 1:
                 for alternative in group:
                     invmap[alternative] = invmap[pivots[0]]
 
-    return invmap
+    del_chars = bytes(bytearray(x for x in range(256) if x not in invmap))
+
+    if mpz is not None:
+        mpz_alphabet = "".join([mpz(x).digits(base) for x in invmap.values()]).encode()
+        tr_bytes = bytes.maketrans(bytearray(invmap.keys()), mpz_alphabet)
+        return tr_bytes, del_chars
+
+    tr_bytes = bytes.maketrans(bytearray(invmap.keys()), bytearray(invmap.values()))
+    return tr_bytes, del_chars
+
+
+def _decode(data: bytes, min_split: int = 256, base: int = 58) -> int:
+    """
+    Decode larger data blocks recursively
+    """
+    if len(data) <= min_split:
+        ret_int = 0
+        for val in data:
+            ret_int = base * ret_int + val
+        return ret_int
+    else:
+        split_len = 2 ** (len(data).bit_length() - 2)
+        try:
+            base_pow = POWERS[base][split_len]
+        except KeyError:
+            POWERS[base] = base_pow = base**split_len
+        return (base_pow * _decode(data[:-split_len])) + _decode(data[-split_len:])
 
 
 def b58decode_int(
-    v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *,
-    autofix: bool = False
+    v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *, autofix: bool = False
 ) -> int:
     """
     Decode a Base58 encoded string as an integer
     """
-    if b' ' not in alphabet:
+    if b" " not in alphabet:
         v = v.rstrip()
     v = scrub_input(v)
 
-    map = _get_base58_decode_map(alphabet, autofix=autofix)
-
-    decimal = 0
     base = len(alphabet)
-    try:
-        for char in v:
-            decimal = decimal * base + map[char]
-    except KeyError as e:
-        raise ValueError(
-            "Invalid character {!r}".format(chr(e.args[0]))
-        ) from None
-    return decimal
+    tr_bytes, del_chars = _get_base58_decode_map(alphabet, autofix=autofix)
+    cv = v.translate(tr_bytes, delete=del_chars)
+    if len(v) != len(cv):
+        err_char = chr(next(c for c in v if c in del_chars))
+        raise ValueError("Invalid character {!r}".format(err_char))
+
+    if cv == b"":
+        return 0
+
+    if mpz:
+        try:
+            return int(mpz(cv, base=base))
+        except ValueError:
+            raise ValueError(cv, base)
+
+    return _decode(cv, base=base)
 
 
 def b58decode(
-    v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *,
-    autofix: bool = False
+    v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *, autofix: bool = False
 ) -> bytes:
     """
     Decode a Base58 encoded string
@@ -123,17 +205,10 @@ def b58decode(
 
     acc = b58decode_int(v, alphabet=alphabet, autofix=autofix)
 
-    result = []
-    while acc > 0:
-        acc, mod = divmod(acc, 256)
-        result.append(mod)
-
-    return b'\0' * (origlen - newlen) + bytes(reversed(result))
+    return acc.to_bytes(origlen - newlen + (acc.bit_length() + 7) // 8, "big")
 
 
-def b58encode_check(
-    v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET
-) -> bytes:
+def b58encode_check(v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET) -> bytes:
     """
     Encode a string using Base58 with a 4 character checksum
     """
@@ -144,10 +219,9 @@ def b58encode_check(
 
 
 def b58decode_check(
-    v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *,
-    autofix: bool = False
+    v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *, autofix: bool = False
 ) -> bytes:
-    '''Decode and verify the checksum of a Base58 encoded string'''
+    """Decode and verify the checksum of a Base58 encoded string"""
 
     result = b58decode(v, alphabet=alphabet, autofix=autofix)
     result, check = result[:-4], result[-4:]

diff --git a/base58/__main__.py b/base58/__main__.py
@@ -20,24 +20,22 @@ def main() -> None:
 
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
-        'file',
-        metavar='FILE',
-        nargs='?',
-        type=argparse.FileType('r'),
-        help=(
-            "File to encode or decode. If no file is provided standard "
-            "input is used instead"),
-        default='-')
+        "file",
+        metavar="FILE",
+        nargs="?",
+        type=argparse.FileType("r"),
+        help="File to encode or decode. If no file is provided standard input is used instead",
+        default="-",
+    )
     parser.add_argument(
-        '-d', '--decode',
-        action='store_true',
-        help="decode data instead of encoding")
+        "-d", "--decode", action="store_true", help="decode data instead of encoding"
+    )
     parser.add_argument(
-        '-c', '--check',
-        action='store_true',
-        help=(
-            "calculate a checksum and append to encoded data or verify "
-            "existing checksum when decoding"))
+        "-c",
+        "--check",
+        action="store_true",
+        help="calculate a checksum and append to encoded data or verify existing checksum when decoding",
+    )
 
     args = parser.parse_args()
     fun = _fmap[(args.decode, args.check)]
@@ -52,5 +50,5 @@ def main() -> None:
     stdout.write(result)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/setup.cfg b/setup.cfg
@@ -57,3 +57,5 @@ ignore_missing_imports = True
 [mypy-pytest.*]
 ignore_missing_imports = True
 
+[mypy-gmpy2.*]
+ignore_missing_imports = True