diff --git a/setup.py b/setup.py index bbf6e60f55..c251071229 100644 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ def _global_version(version): python_requires=">=3.10,<4", py_modules=["vyper"], install_requires=[ + "cbor2>=5.4.6,<6", "asttokens>=2.0.5,<3", "pycryptodome>=3.5.1,<4", "semantic-version>=2.10,<3", diff --git a/tests/compiler/test_bytecode_runtime.py b/tests/compiler/test_bytecode_runtime.py index 86eff70a50..9519b03772 100644 --- a/tests/compiler/test_bytecode_runtime.py +++ b/tests/compiler/test_bytecode_runtime.py @@ -1,14 +1,135 @@ -import vyper +import cbor2 +import pytest +import vyper +from vyper.compiler.settings import OptimizationLevel, Settings -def test_bytecode_runtime(): - code = """ +simple_contract_code = """ @external def a() -> bool: return True - """ +""" + +many_functions = """ +@external +def foo1(): + pass + +@external +def foo2(): + pass + +@external +def foo3(): + pass + +@external +def foo4(): + pass + +@external +def foo5(): + pass +""" + +has_immutables = """ +A_GOOD_PRIME: public(immutable(uint256)) + +@external +def __init__(): + A_GOOD_PRIME = 967 +""" + + +def _parse_cbor_metadata(initcode): + metadata_ofst = int.from_bytes(initcode[-2:], "big") + metadata = cbor2.loads(initcode[-metadata_ofst:-2]) + return metadata - out = vyper.compile_code(code, ["bytecode_runtime", "bytecode"]) + +def test_bytecode_runtime(): + out = vyper.compile_code(simple_contract_code, ["bytecode_runtime", "bytecode"]) assert len(out["bytecode"]) > len(out["bytecode_runtime"]) - assert out["bytecode_runtime"][2:] in out["bytecode"][2:] + assert out["bytecode_runtime"].removeprefix("0x") in out["bytecode"].removeprefix("0x") + + +def test_bytecode_signature(): + out = vyper.compile_code(simple_contract_code, ["bytecode_runtime", "bytecode"]) + + runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) + initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert runtime_len == len(runtime_code) + assert data_section_lengths == [] + assert immutables_len == 0 + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + +def test_bytecode_signature_dense_jumptable(): + settings = Settings(optimize=OptimizationLevel.CODESIZE) + + out = vyper.compile_code(many_functions, ["bytecode_runtime", "bytecode"], settings=settings) + + runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) + initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert runtime_len == len(runtime_code) + assert data_section_lengths == [5, 35] + assert immutables_len == 0 + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + +def test_bytecode_signature_sparse_jumptable(): + settings = Settings(optimize=OptimizationLevel.GAS) + + out = vyper.compile_code(many_functions, ["bytecode_runtime", "bytecode"], settings=settings) + + runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) + initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert runtime_len == len(runtime_code) + assert data_section_lengths == [8] + assert immutables_len == 0 + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + +def test_bytecode_signature_immutables(): + out = vyper.compile_code(has_immutables, ["bytecode_runtime", "bytecode"]) + + runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) + initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert runtime_len == len(runtime_code) + assert data_section_lengths == [] + assert immutables_len == 32 + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + +# check that deployed bytecode actually matches the cbor metadata +@pytest.mark.parametrize("code", [simple_contract_code, has_immutables, many_functions]) +def test_bytecode_signature_deployed(code, get_contract, w3): + c = get_contract(code) + deployed_code = w3.eth.get_code(c.address) + + initcode = c._classic_contract.bytecode + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + # runtime_len includes data sections but not immutables + assert len(deployed_code) == runtime_len + immutables_len diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 69fcbf1f1f..334c5ba613 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -218,7 +218,7 @@ def _build_asm(asm_list): def build_source_map_output(compiler_data: CompilerData) -> OrderedDict: _, line_number_map = compile_ir.assembly_to_evm( - compiler_data.assembly_runtime, insert_vyper_signature=False + compiler_data.assembly_runtime, insert_compiler_metadata=False ) # Sort line_number_map out = OrderedDict() diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 526d2f3253..a1c7342320 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -184,12 +184,12 @@ def assembly_runtime(self) -> list: @cached_property def bytecode(self) -> bytes: - insert_vyper_signature = not self.no_bytecode_metadata - return generate_bytecode(self.assembly, insert_vyper_signature=insert_vyper_signature) + insert_compiler_metadata = not self.no_bytecode_metadata + return generate_bytecode(self.assembly, insert_compiler_metadata=insert_compiler_metadata) @cached_property def bytecode_runtime(self) -> bytes: - return generate_bytecode(self.assembly_runtime, insert_vyper_signature=False) + return generate_bytecode(self.assembly_runtime, insert_compiler_metadata=False) @cached_property def blueprint_bytecode(self) -> bytes: @@ -331,7 +331,7 @@ def _find_nested_opcode(assembly, key): return any(_find_nested_opcode(x, key) for x in sublists) -def generate_bytecode(assembly: list, insert_vyper_signature: bool) -> bytes: +def generate_bytecode(assembly: list, insert_compiler_metadata: bool) -> bytes: """ Generate bytecode from assembly instructions. @@ -345,4 +345,6 @@ def generate_bytecode(assembly: list, insert_vyper_signature: bool) -> bytes: bytes Final compiled bytecode. """ - return compile_ir.assembly_to_evm(assembly, insert_vyper_signature=insert_vyper_signature)[0] + return compile_ir.assembly_to_evm(assembly, insert_compiler_metadata=insert_compiler_metadata)[ + 0 + ] diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index bba3b34515..7a3e97155b 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1,6 +1,9 @@ import copy import functools import math +from dataclasses import dataclass + +import cbor2 from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel @@ -507,9 +510,9 @@ def _height_of(witharg): elif code.value == "deploy": memsize = code.args[0].value # used later to calculate _mem_deploy_start ir = code.args[1] - padding = code.args[2].value + immutables_len = code.args[2].value assert isinstance(memsize, int), "non-int memsize" - assert isinstance(padding, int), "non-int padding" + assert isinstance(immutables_len, int), "non-int immutables_len" runtime_begin = mksymbol("runtime_begin") @@ -521,14 +524,14 @@ def _height_of(witharg): o.extend(["_sym_subcode_size", runtime_begin, "_mem_deploy_start", "CODECOPY"]) # calculate the len of runtime code - o.extend(["_OFST", "_sym_subcode_size", padding]) # stack: len + o.extend(["_OFST", "_sym_subcode_size", immutables_len]) # stack: len o.extend(["_mem_deploy_start"]) # stack: len mem_ofst o.extend(["RETURN"]) # since the asm data structures are very primitive, to make sure # assembly_to_evm is able to calculate data offsets correctly, # we pass the memsize via magic opcodes to the subcode - subcode = [_RuntimeHeader(runtime_begin, memsize)] + subcode + subcode = [_RuntimeHeader(runtime_begin, memsize, immutables_len)] + subcode # append the runtime code after the ctor code # `append(...)` call here is intentional. @@ -1051,18 +1054,19 @@ def _length_of_data(assembly): return ret +@dataclass class _RuntimeHeader: - def __init__(self, label, ctor_mem_size): - self.label = label - self.ctor_mem_size = ctor_mem_size + label: str + ctor_mem_size: int + immutables_len: int def __repr__(self): - return f"" + return f"" +@dataclass class _DataHeader: - def __init__(self, label): - self.label = label + label: str def __repr__(self): return f"DATA {self.label}" @@ -1092,21 +1096,21 @@ def _relocate_segments(assembly): # TODO: change API to split assembly_to_evm and assembly_to_source/symbol_maps -def assembly_to_evm(assembly, pc_ofst=0, insert_vyper_signature=False): +def assembly_to_evm(assembly, pc_ofst=0, insert_compiler_metadata=False): bytecode, source_maps, _ = assembly_to_evm_with_symbol_map( - assembly, pc_ofst=pc_ofst, insert_vyper_signature=insert_vyper_signature + assembly, pc_ofst=pc_ofst, insert_compiler_metadata=insert_compiler_metadata ) return bytecode, source_maps -def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=False): +def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadata=False): """ Assembles assembly into EVM assembly: list of asm instructions pc_ofst: when constructing the source map, the amount to offset all pcs by (no effect until we add deploy code source map) - insert_vyper_signature: whether to append vyper metadata to output + insert_compiler_metadata: whether to append vyper metadata to output (should be true for runtime code) """ line_number_map = { @@ -1122,12 +1126,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature= runtime_code, runtime_code_start, runtime_code_end = None, None, None - bytecode_suffix = b"" - if insert_vyper_signature: - # CBOR encoded: {"vyper": [major,minor,patch]} - bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple)) - bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big") - # to optimize the size of deploy code - we want to use the smallest # PUSH instruction possible which can support all memory symbols # (and also works with linear pass symbol resolution) @@ -1155,6 +1153,9 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature= if runtime_code_end is not None: mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst) + data_section_lengths = [] + immutables_len = None + # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): @@ -1198,18 +1199,41 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature= # [_OFST, _mem_foo, bar] -> PUSHN (foo+bar) pc -= 1 elif isinstance(item, list) and isinstance(item[0], _RuntimeHeader): + # we are in initcode symbol_map[item[0].label] = pc # add source map for all items in the runtime map t = adjust_pc_maps(runtime_map, pc) for key in line_number_map: line_number_map[key].update(t[key]) + immutables_len = item[0].immutables_len pc += len(runtime_code) + # grab lengths of data sections from the runtime + for t in item: + if isinstance(t, list) and isinstance(t[0], _DataHeader): + data_section_lengths.append(_length_of_data(t)) + elif isinstance(item, list) and isinstance(item[0], _DataHeader): symbol_map[item[0].label] = pc pc += _length_of_data(item) else: pc += 1 + bytecode_suffix = b"" + if insert_compiler_metadata: + # this will hold true when we are in initcode + assert immutables_len is not None + metadata = ( + len(runtime_code), + data_section_lengths, + immutables_len, + {"vyper": version_tuple}, + ) + bytecode_suffix += cbor2.dumps(metadata) + # append the length of the footer, *including* the length + # of the length bytes themselves. + suffix_len = len(bytecode_suffix) + 2 + bytecode_suffix += suffix_len.to_bytes(2, "big") + pc += len(bytecode_suffix) symbol_map["_sym_code_end"] = pc