Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add runtime code layout to initcode #3584

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def _global_version(version):
python_requires=">=3.10,<4",
py_modules=["vyper"],
install_requires=[
"cbor2>=5.4.6,<6",
"asttokens>=2.0.5,<3",
"pycryptodome>=3.5.1,<4",
"semantic-version>=2.10,<3",
Expand Down
133 changes: 127 additions & 6 deletions tests/compiler/test_bytecode_runtime.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,135 @@
import vyper
import cbor2
import pytest

import vyper
from vyper.compiler.settings import OptimizationLevel, Settings

def test_bytecode_runtime():
code = """
simple_contract_code = """
@external
def a() -> bool:
return True
"""
"""

many_functions = """
@external
def foo1():
pass

@external
def foo2():
pass

@external
def foo3():
pass

@external
def foo4():
pass

@external
def foo5():
pass
"""

has_immutables = """
A_GOOD_PRIME: public(immutable(uint256))

@external
def __init__():
A_GOOD_PRIME = 967
"""


def _parse_cbor_metadata(initcode):
metadata_ofst = int.from_bytes(initcode[-2:], "big")
metadata = cbor2.loads(initcode[-metadata_ofst:-2])
return metadata

out = vyper.compile_code(code, ["bytecode_runtime", "bytecode"])

def test_bytecode_runtime():
out = vyper.compile_code(simple_contract_code, ["bytecode_runtime", "bytecode"])

assert len(out["bytecode"]) > len(out["bytecode_runtime"])
assert out["bytecode_runtime"][2:] in out["bytecode"][2:]
assert out["bytecode_runtime"].removeprefix("0x") in out["bytecode"].removeprefix("0x")


def test_bytecode_signature():
out = vyper.compile_code(simple_contract_code, ["bytecode_runtime", "bytecode"])

runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert runtime_len == len(runtime_code)
assert data_section_lengths == []
assert immutables_len == 0
assert compiler == {"vyper": list(vyper.version.version_tuple)}


def test_bytecode_signature_dense_jumptable():
settings = Settings(optimize=OptimizationLevel.CODESIZE)

out = vyper.compile_code(many_functions, ["bytecode_runtime", "bytecode"], settings=settings)

runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert runtime_len == len(runtime_code)
assert data_section_lengths == [5, 35]
assert immutables_len == 0
assert compiler == {"vyper": list(vyper.version.version_tuple)}


def test_bytecode_signature_sparse_jumptable():
settings = Settings(optimize=OptimizationLevel.GAS)

out = vyper.compile_code(many_functions, ["bytecode_runtime", "bytecode"], settings=settings)

runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert runtime_len == len(runtime_code)
assert data_section_lengths == [8]
assert immutables_len == 0
assert compiler == {"vyper": list(vyper.version.version_tuple)}


def test_bytecode_signature_immutables():
out = vyper.compile_code(has_immutables, ["bytecode_runtime", "bytecode"])

runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert runtime_len == len(runtime_code)
assert data_section_lengths == []
assert immutables_len == 32
assert compiler == {"vyper": list(vyper.version.version_tuple)}


# check that deployed bytecode actually matches the cbor metadata
@pytest.mark.parametrize("code", [simple_contract_code, has_immutables, many_functions])
def test_bytecode_signature_deployed(code, get_contract, w3):
c = get_contract(code)
deployed_code = w3.eth.get_code(c.address)

initcode = c._classic_contract.bytecode

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert compiler == {"vyper": list(vyper.version.version_tuple)}

# runtime_len includes data sections but not immutables
assert len(deployed_code) == runtime_len + immutables_len
2 changes: 1 addition & 1 deletion vyper/compiler/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def _build_asm(asm_list):

def build_source_map_output(compiler_data: CompilerData) -> OrderedDict:
_, line_number_map = compile_ir.assembly_to_evm(
compiler_data.assembly_runtime, insert_vyper_signature=False
compiler_data.assembly_runtime, insert_compiler_metadata=False
)
# Sort line_number_map
out = OrderedDict()
Expand Down
12 changes: 7 additions & 5 deletions vyper/compiler/phases.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,12 @@ def assembly_runtime(self) -> list:

@cached_property
def bytecode(self) -> bytes:
insert_vyper_signature = not self.no_bytecode_metadata
return generate_bytecode(self.assembly, insert_vyper_signature=insert_vyper_signature)
insert_compiler_metadata = not self.no_bytecode_metadata
return generate_bytecode(self.assembly, insert_compiler_metadata=insert_compiler_metadata)

@cached_property
def bytecode_runtime(self) -> bytes:
return generate_bytecode(self.assembly_runtime, insert_vyper_signature=False)
return generate_bytecode(self.assembly_runtime, insert_compiler_metadata=False)

@cached_property
def blueprint_bytecode(self) -> bytes:
Expand Down Expand Up @@ -331,7 +331,7 @@ def _find_nested_opcode(assembly, key):
return any(_find_nested_opcode(x, key) for x in sublists)


def generate_bytecode(assembly: list, insert_vyper_signature: bool) -> bytes:
def generate_bytecode(assembly: list, insert_compiler_metadata: bool) -> bytes:
"""
Generate bytecode from assembly instructions.
Expand All @@ -345,4 +345,6 @@ def generate_bytecode(assembly: list, insert_vyper_signature: bool) -> bytes:
bytes
Final compiled bytecode.
"""
return compile_ir.assembly_to_evm(assembly, insert_vyper_signature=insert_vyper_signature)[0]
return compile_ir.assembly_to_evm(assembly, insert_compiler_metadata=insert_compiler_metadata)[
0
]
64 changes: 44 additions & 20 deletions vyper/ir/compile_ir.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import copy
import functools
import math
from dataclasses import dataclass

import cbor2

from vyper.codegen.ir_node import IRnode
from vyper.compiler.settings import OptimizationLevel
Expand Down Expand Up @@ -507,9 +510,9 @@ def _height_of(witharg):
elif code.value == "deploy":
memsize = code.args[0].value # used later to calculate _mem_deploy_start
ir = code.args[1]
padding = code.args[2].value
immutables_len = code.args[2].value
assert isinstance(memsize, int), "non-int memsize"
assert isinstance(padding, int), "non-int padding"
assert isinstance(immutables_len, int), "non-int immutables_len"

runtime_begin = mksymbol("runtime_begin")

Expand All @@ -521,14 +524,14 @@ def _height_of(witharg):
o.extend(["_sym_subcode_size", runtime_begin, "_mem_deploy_start", "CODECOPY"])

# calculate the len of runtime code
o.extend(["_OFST", "_sym_subcode_size", padding]) # stack: len
o.extend(["_OFST", "_sym_subcode_size", immutables_len]) # stack: len
o.extend(["_mem_deploy_start"]) # stack: len mem_ofst
o.extend(["RETURN"])

# since the asm data structures are very primitive, to make sure
# assembly_to_evm is able to calculate data offsets correctly,
# we pass the memsize via magic opcodes to the subcode
subcode = [_RuntimeHeader(runtime_begin, memsize)] + subcode
subcode = [_RuntimeHeader(runtime_begin, memsize, immutables_len)] + subcode

# append the runtime code after the ctor code
# `append(...)` call here is intentional.
Expand Down Expand Up @@ -1051,18 +1054,19 @@ def _length_of_data(assembly):
return ret


@dataclass
class _RuntimeHeader:
def __init__(self, label, ctor_mem_size):
self.label = label
self.ctor_mem_size = ctor_mem_size
label: str
ctor_mem_size: int
immutables_len: int

def __repr__(self):
return f"<RUNTIME {self.label} mem @{self.ctor_mem_size}>"
return f"<RUNTIME {self.label} mem @{self.ctor_mem_size} imms @{self.immutables_len}>"


@dataclass
class _DataHeader:
def __init__(self, label):
self.label = label
label: str

def __repr__(self):
return f"DATA {self.label}"
Expand Down Expand Up @@ -1092,21 +1096,21 @@ def _relocate_segments(assembly):


# TODO: change API to split assembly_to_evm and assembly_to_source/symbol_maps
def assembly_to_evm(assembly, pc_ofst=0, insert_vyper_signature=False):
def assembly_to_evm(assembly, pc_ofst=0, insert_compiler_metadata=False):
bytecode, source_maps, _ = assembly_to_evm_with_symbol_map(
assembly, pc_ofst=pc_ofst, insert_vyper_signature=insert_vyper_signature
assembly, pc_ofst=pc_ofst, insert_compiler_metadata=insert_compiler_metadata
)
return bytecode, source_maps


def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=False):
def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadata=False):
"""
Assembles assembly into EVM
assembly: list of asm instructions
pc_ofst: when constructing the source map, the amount to offset all
pcs by (no effect until we add deploy code source map)
insert_vyper_signature: whether to append vyper metadata to output
insert_compiler_metadata: whether to append vyper metadata to output
(should be true for runtime code)
"""
line_number_map = {
Expand All @@ -1122,12 +1126,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=

runtime_code, runtime_code_start, runtime_code_end = None, None, None

bytecode_suffix = b""
if insert_vyper_signature:
# CBOR encoded: {"vyper": [major,minor,patch]}
bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple))
bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big")

# to optimize the size of deploy code - we want to use the smallest
# PUSH instruction possible which can support all memory symbols
# (and also works with linear pass symbol resolution)
Expand Down Expand Up @@ -1155,6 +1153,9 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=
if runtime_code_end is not None:
mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst)

data_section_lengths = []
immutables_len = None

# go through the code, resolving symbolic locations
# (i.e. JUMPDEST locations) to actual code locations
for i, item in enumerate(assembly):
Expand Down Expand Up @@ -1198,18 +1199,41 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=
# [_OFST, _mem_foo, bar] -> PUSHN (foo+bar)
pc -= 1
elif isinstance(item, list) and isinstance(item[0], _RuntimeHeader):
# we are in initcode
symbol_map[item[0].label] = pc
# add source map for all items in the runtime map
t = adjust_pc_maps(runtime_map, pc)
for key in line_number_map:
line_number_map[key].update(t[key])
immutables_len = item[0].immutables_len
pc += len(runtime_code)
# grab lengths of data sections from the runtime
for t in item:
if isinstance(t, list) and isinstance(t[0], _DataHeader):
data_section_lengths.append(_length_of_data(t))

elif isinstance(item, list) and isinstance(item[0], _DataHeader):
symbol_map[item[0].label] = pc
pc += _length_of_data(item)
else:
pc += 1

bytecode_suffix = b""
if insert_compiler_metadata:
# this will hold true when we are in initcode
assert immutables_len is not None
metadata = (
len(runtime_code),
data_section_lengths,
immutables_len,
{"vyper": version_tuple},
)
bytecode_suffix += cbor2.dumps(metadata)
# append the length of the footer, *including* the length
# of the length bytes themselves.
suffix_len = len(bytecode_suffix) + 2
bytecode_suffix += suffix_len.to_bytes(2, "big")
charles-cooper marked this conversation as resolved.
Show resolved Hide resolved

pc += len(bytecode_suffix)

symbol_map["_sym_code_end"] = pc
Expand Down
Loading