Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Better performance for larger inputs #70

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 126 additions & 52 deletions base58/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
'''Base58 encoding
"""Base58 encoding

Implementations of Base58 and Base58Check encodings that are compatible
with the bitcoin network.
'''
"""

# This module is based upon base58 snippets found scattered over many bitcoin
# tools written in python. From what I gather the original source is from a
Expand All @@ -11,105 +11,187 @@

from functools import lru_cache
from hashlib import sha256
from typing import Mapping, Union
from typing import Dict, Tuple, Union
from math import log

__version__ = '2.1.1'
try:
from gmpy2 import mpz
except ImportError:
mpz = None

__version__ = "2.1.1"

# 58 character alphabet used
BITCOIN_ALPHABET = \
b'123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
RIPPLE_ALPHABET = b'rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz'
BITCOIN_ALPHABET = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
RIPPLE_ALPHABET = b"rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz"
XRP_ALPHABET = RIPPLE_ALPHABET
_MPZ_ALPHABET = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
POWERS = {
45: {2**i: 45 ** (2**i) for i in range(4, 20)},
58: {2**i: 58 ** (2**i) for i in range(4, 20)},
} # type: Dict[int, Dict[int, int]]

# Retro compatibility
alphabet = BITCOIN_ALPHABET


def scrub_input(v: Union[str, bytes]) -> bytes:
if isinstance(v, str):
v = v.encode('ascii')
v = v.encode("ascii")

return v


def _encode_int(i: int, base: int = 58, alphabet: bytes = BITCOIN_ALPHABET) -> bytes:
"""
Encode integer to bytes with base 58 alphabet by powers of 58
"""
min_val = POWERS[base][2**8]
if i <= min_val:
string = bytearray()
while i:
i, idx = divmod(i, base)
string.append(idx)
return bytes(string[::-1])
else:
origlen0 = int(log(i, 58)) // 2
try:
split_num = POWERS[base][2**origlen0]
except KeyError:
POWERS[base][2**origlen0] = split_num = base**origlen0
i1, i0 = divmod(i, split_num)

v1 = _encode_int(i1, base, alphabet)
v0 = _encode_int(i0, base, alphabet)
newlen0 = len(v0)
if newlen0 < origlen0:
v0 = b"\0" * (origlen0 - newlen0) + v0
return v1 + v0


def _mpz_encode(i: int, alphabet: bytes) -> bytes:
"""
Encode an integer to arbitrary base using gmpy2 mpz
"""
base = len(alphabet)

raw: bytes = mpz(i).digits(base).encode()
tr_bytes = bytes.maketrans(_MPZ_ALPHABET[:base], alphabet)
encoded: bytes = raw.translate(tr_bytes)

return encoded


def b58encode_int(
i: int, default_one: bool = True, alphabet: bytes = BITCOIN_ALPHABET
) -> bytes:
"""
Encode an integer using Base58
"""
if not i and default_one:
return alphabet[0:1]
string = b""
if not i:
if default_one:
return alphabet[0:1]
return b""
if mpz:
return _mpz_encode(i, alphabet)

base = len(alphabet)
while i:
i, idx = divmod(i, base)
string = alphabet[idx:idx+1] + string
raw_string = _encode_int(i, base, alphabet)
string = raw_string.translate(
bytes.maketrans(bytearray(range(len(alphabet))), alphabet)
)

return string


def b58encode(
v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET
) -> bytes:
def b58encode(v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET) -> bytes:
"""
Encode a string using Base58
"""
v = scrub_input(v)

origlen = len(v)
v = v.lstrip(b'\0')
v = v.lstrip(b"\0")
newlen = len(v)

acc = int.from_bytes(v, byteorder='big') # first byte is most significant
acc = int.from_bytes(v, byteorder="big") # first byte is most significant

result = b58encode_int(acc, default_one=False, alphabet=alphabet)
return alphabet[0:1] * (origlen - newlen) + result


@lru_cache()
def _get_base58_decode_map(alphabet: bytes,
autofix: bool) -> Mapping[int, int]:
def _get_base58_decode_map(alphabet: bytes, autofix: bool) -> Tuple[bytes, bytes]:
invmap = {char: index for index, char in enumerate(alphabet)}

base = len(alphabet)
if autofix:
groups = [b'0Oo', b'Il1']
groups = [b"0Oo", b"Il1"]
for group in groups:
pivots = [c for c in group if c in invmap]
if len(pivots) == 1:
for alternative in group:
invmap[alternative] = invmap[pivots[0]]

return invmap
del_chars = bytes(bytearray(x for x in range(256) if x not in invmap))

if mpz is not None:
mpz_alphabet = "".join([mpz(x).digits(base) for x in invmap.values()]).encode()
tr_bytes = bytes.maketrans(bytearray(invmap.keys()), mpz_alphabet)
return tr_bytes, del_chars

tr_bytes = bytes.maketrans(bytearray(invmap.keys()), bytearray(invmap.values()))
return tr_bytes, del_chars


def _decode(data: bytes, min_split: int = 256, base: int = 58) -> int:
"""
Decode larger data blocks recursively
"""
if len(data) <= min_split:
ret_int = 0
for val in data:
ret_int = base * ret_int + val
return ret_int
else:
split_len = 2 ** (len(data).bit_length() - 2)
try:
base_pow = POWERS[base][split_len]
except KeyError:
POWERS[base] = base_pow = base**split_len
return (base_pow * _decode(data[:-split_len])) + _decode(data[-split_len:])


def b58decode_int(
v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *,
autofix: bool = False
v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *, autofix: bool = False
) -> int:
"""
Decode a Base58 encoded string as an integer
"""
if b' ' not in alphabet:
if b" " not in alphabet:
v = v.rstrip()
v = scrub_input(v)

map = _get_base58_decode_map(alphabet, autofix=autofix)

decimal = 0
base = len(alphabet)
try:
for char in v:
decimal = decimal * base + map[char]
except KeyError as e:
raise ValueError(
"Invalid character {!r}".format(chr(e.args[0]))
) from None
return decimal
tr_bytes, del_chars = _get_base58_decode_map(alphabet, autofix=autofix)
cv = v.translate(tr_bytes, delete=del_chars)
if len(v) != len(cv):
err_char = chr(next(c for c in v if c in del_chars))
raise ValueError("Invalid character {!r}".format(err_char))

if cv == b"":
return 0

if mpz:
try:
return int(mpz(cv, base=base))
except ValueError:
raise ValueError(cv, base)

return _decode(cv, base=base)


def b58decode(
v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *,
autofix: bool = False
v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *, autofix: bool = False
) -> bytes:
"""
Decode a Base58 encoded string
Expand All @@ -123,17 +205,10 @@ def b58decode(

acc = b58decode_int(v, alphabet=alphabet, autofix=autofix)

result = []
while acc > 0:
acc, mod = divmod(acc, 256)
result.append(mod)

return b'\0' * (origlen - newlen) + bytes(reversed(result))
return acc.to_bytes(origlen - newlen + (acc.bit_length() + 7) // 8, "big")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change alone seems to have a pretty good impact and even simplifies the code 😍



def b58encode_check(
v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET
) -> bytes:
def b58encode_check(v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET) -> bytes:
"""
Encode a string using Base58 with a 4 character checksum
"""
Expand All @@ -144,10 +219,9 @@ def b58encode_check(


def b58decode_check(
v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *,
autofix: bool = False
v: Union[str, bytes], alphabet: bytes = BITCOIN_ALPHABET, *, autofix: bool = False
) -> bytes:
'''Decode and verify the checksum of a Base58 encoded string'''
"""Decode and verify the checksum of a Base58 encoded string"""

result = b58decode(v, alphabet=alphabet, autofix=autofix)
result, check = result[:-4], result[-4:]
Expand Down
32 changes: 15 additions & 17 deletions base58/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,22 @@ def main() -> None:

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'file',
metavar='FILE',
nargs='?',
type=argparse.FileType('r'),
help=(
"File to encode or decode. If no file is provided standard "
"input is used instead"),
default='-')
"file",
metavar="FILE",
nargs="?",
type=argparse.FileType("r"),
help="File to encode or decode. If no file is provided standard input is used instead",
default="-",
)
parser.add_argument(
'-d', '--decode',
action='store_true',
help="decode data instead of encoding")
"-d", "--decode", action="store_true", help="decode data instead of encoding"
)
parser.add_argument(
'-c', '--check',
action='store_true',
help=(
"calculate a checksum and append to encoded data or verify "
"existing checksum when decoding"))
"-c",
"--check",
action="store_true",
help="calculate a checksum and append to encoded data or verify existing checksum when decoding",
)

args = parser.parse_args()
fun = _fmap[(args.decode, args.check)]
Expand All @@ -52,5 +50,5 @@ def main() -> None:
stdout.write(result)


if __name__ == '__main__':
if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@ ignore_missing_imports = True
[mypy-pytest.*]
ignore_missing_imports = True

[mypy-gmpy2.*]
ignore_missing_imports = True
Loading