From 257d5e43ad0f4c4a8ae5f700d0e0fea47358164b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 18 Sep 2023 16:21:56 -0400 Subject: [PATCH 01/15] fix raw_call --- vyper/builtins/functions.py | 20 +++++++++++++------- vyper/codegen/ir_node.py | 3 ++- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 3ec8f69934..f9ce95f8ee 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -1155,14 +1155,20 @@ def build_IR(self, expr, args, kwargs, context): outsize, ] - if delegate_call: - call_op = ["delegatecall", gas, to, *common_call_args] - elif static_call: - call_op = ["staticcall", gas, to, *common_call_args] - else: - call_op = ["call", gas, to, value, *common_call_args] + gas, value = IRnode.from_list(gas), IRnode.from_list(value) + with value.cache_when_complex("_value") as (b1, value), gas.cache_when_complex("_gas") as ( + b2, + gas, + ), to.cache_when_complex("_to") as (b3, to): + if delegate_call: + call_op = ["delegatecall", gas, to, *common_call_args] + elif static_call: + call_op = ["staticcall", gas, to, *common_call_args] + else: + call_op = ["call", gas, to, value, *common_call_args] - call_ir += [call_op] + call_ir += [call_op] + call_ir = b1.resolve(b2.resolve(b3.resolve(call_ir))) # build sequence IR if outsize: diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 6cb0a07281..a266964d4c 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -333,7 +333,8 @@ def gas(self): def is_complex_ir(self): # list of items not to cache. note can add other env variables # which do not change, e.g. calldatasize, coinbase, etc. - do_not_cache = {"~empty", "calldatasize"} + do_not_cache = {"~empty", "calldatasize", "callvalue"} + return ( isinstance(self.value, str) and (self.value.lower() in VALID_IR_MACROS or self.value.upper() in get_ir_opcodes()) From 1dd65e52d7eb4f4111ce7c154a37a1a1639d4332 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 18 Sep 2023 21:25:50 -0400 Subject: [PATCH 02/15] test: raw_call with msg.data buffer clean memory --------- Co-authored-by: Tanguy Rocher --- tests/parser/functions/test_raw_call.py | 120 ++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/tests/parser/functions/test_raw_call.py b/tests/parser/functions/test_raw_call.py index 9c6fba79e7..63252f72e9 100644 --- a/tests/parser/functions/test_raw_call.py +++ b/tests/parser/functions/test_raw_call.py @@ -426,6 +426,126 @@ def baz(_addr: address, should_raise: bool) -> uint256: assert caller.baz(target.address, False) == 3 +# XXX: these test_raw_call_clean_mem* tests depend on variables and +# calling convention writing to memory + + +def test_raw_call_msg_data_clean_mem(get_contract): + # test msize uses clean memory and does not get overwritten by + # any raw_call() arguments + code = """ +identity: constant(address) = 0x0000000000000000000000000000000000000004 + +@external +def foo(): + pass + +@internal +@view +def get_address()->address: + a:uint256 = 121 # 0x79 + return identity +@external +def bar(f: uint256, u: uint256) -> Bytes[100]: + # embed an internal call in the calculation of address + a: Bytes[100] = raw_call(self.get_address(), msg.data, max_outsize=100) + return a + """ + + c = get_contract(code) + assert ( + c.bar(1, 2).hex() == "ae42e951" + "0000000000000000000000000000000000000000000000000000000000000001" + "0000000000000000000000000000000000000000000000000000000000000002" + ) + + +def test_raw_call_clean_mem2(get_contract): + # test msize uses clean memory and does not get overwritten by + # any raw_call() arguments, another way + code = """ +buf: Bytes[100] + +@external +def bar(f: uint256, g: uint256, h: uint256) -> Bytes[100]: + # embed a memory modifying expression in the calculation of address + self.buf = raw_call( + [0x0000000000000000000000000000000000000004,][f-1], + msg.data, + max_outsize=100 + ) + return self.buf + """ + c = get_contract(code) + + assert ( + c.bar(1, 2, 3).hex() == "9309b76e" + "0000000000000000000000000000000000000000000000000000000000000001" + "0000000000000000000000000000000000000000000000000000000000000002" + "0000000000000000000000000000000000000000000000000000000000000003" + ) + + +def test_raw_call_clean_mem_kwargs_value(get_contract): + # test msize uses clean memory and does not get overwritten by + # any raw_call() kwargs + code = """ +buf: Bytes[100] + +@internal +def _value() -> uint256: + x: uint256 = 1 + return x + +@external +def bar(f: uint256) -> Bytes[100]: + # embed a memory modifying expression in the calculation of address + self.buf = raw_call( + 0x0000000000000000000000000000000000000004, + msg.data, + max_outsize=100, + value=self._value() + ) + return self.buf + """ + c = get_contract(code, value=1) + + assert ( + c.bar(13).hex() == "0423a132" + "000000000000000000000000000000000000000000000000000000000000000d" + ) + + +def test_raw_call_clean_mem_kwargs_gas(get_contract): + # test msize uses clean memory and does not get overwritten by + # any raw_call() kwargs + code = """ +buf: Bytes[100] + +@internal +def _gas() -> uint256: + x: uint256 = msg.gas + return x + +@external +def bar(f: uint256) -> Bytes[100]: + # embed a memory modifying expression in the calculation of address + self.buf = raw_call( + 0x0000000000000000000000000000000000000004, + msg.data, + max_outsize=100, + gas=self._gas() + ) + return self.buf + """ + c = get_contract(code, value=1) + + assert ( + c.bar(15).hex() == "0423a132" + "000000000000000000000000000000000000000000000000000000000000000f" + ) + + uncompilable_code = [ ( """ From 9cbbd8d1643953f1d3179a074bba9adf75c2c071 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 19 Sep 2023 12:45:47 -0400 Subject: [PATCH 03/15] force memory effects in some clean_mem tests --- tests/parser/functions/test_raw_call.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/parser/functions/test_raw_call.py b/tests/parser/functions/test_raw_call.py index 63252f72e9..115eed723a 100644 --- a/tests/parser/functions/test_raw_call.py +++ b/tests/parser/functions/test_raw_call.py @@ -492,6 +492,11 @@ def test_raw_call_clean_mem_kwargs_value(get_contract): code = """ buf: Bytes[100] +# add a dummy function to trigger memory expansion in the selector table routine +@external +def foo(): + pass + @internal def _value() -> uint256: x: uint256 = 1 @@ -522,6 +527,11 @@ def test_raw_call_clean_mem_kwargs_gas(get_contract): code = """ buf: Bytes[100] +# add a dummy function to trigger memory expansion in the selector table routine +@external +def foo(): + pass + @internal def _gas() -> uint256: x: uint256 = msg.gas From 4e614fd025119702e3e899791437798fc2ccd331 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 19 Sep 2023 12:56:25 -0400 Subject: [PATCH 04/15] add tests for clean memory in create functions --------- Co-authored-by: Tanguy Rocher --- .../parser/functions/test_create_functions.py | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) diff --git a/tests/parser/functions/test_create_functions.py b/tests/parser/functions/test_create_functions.py index 876d50b27d..a5db0e0bb1 100644 --- a/tests/parser/functions/test_create_functions.py +++ b/tests/parser/functions/test_create_functions.py @@ -431,3 +431,180 @@ def test2(target: address, salt: bytes32) -> address: # test2 = c.test2(b"\x01", salt) # assert HexBytes(test2) == create2_address_of(c.address, salt, vyper_initcode(b"\x01")) # assert_tx_failed(lambda: c.test2(bytecode, salt)) + + + +@pytest.mark.parametrize("blueprint_prefix", [b"", b"\xfe", b"\xfe\71\x00"]) +def test_create_from_blueprint_complex_value( + get_contract, deploy_blueprint_for, w3, blueprint_prefix +): + code = """ +var: uint256 + +@external +@payable +def __init__(x: uint256): + self.var = x + +@external +def foo()-> uint256: + return self.var + """ + + prefix_len = len(blueprint_prefix) + + deployer_code = f""" +created_address: public(address) +x: constant(Bytes[32]) = b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0c" + +@internal +def foo() -> uint256: + g:uint256 = 42 + return 3 + +@external +@payable +def test(target: address): + self.created_address = create_from_blueprint(target, x, code_offset={prefix_len}, value=self.foo(), raw_args=True) + """ + + foo_contract = get_contract(code, 12) + expected_runtime_code = w3.eth.get_code(foo_contract.address) + + f, FooContract = deploy_blueprint_for(code, initcode_prefix=blueprint_prefix) + + d = get_contract(deployer_code) + + d.test(f.address, transact={"value": 3}) + + test = FooContract(d.created_address()) + assert w3.eth.get_code(test.address) == expected_runtime_code + assert test.foo() == 12 + + +@pytest.mark.parametrize("blueprint_prefix", [b"", b"\xfe", b"\xfe\71\x00"]) +def test_create_from_blueprint_complex_salt_raw_args( + get_contract, deploy_blueprint_for, w3, blueprint_prefix +): + code = """ +var: uint256 + +@external +@payable +def __init__(x: uint256): + self.var = x + +@external +def foo()-> uint256: + return self.var + """ + + prefix_len = len(blueprint_prefix) + deployer_code = f""" +created_address: public(address) + +x: constant(Bytes[32]) = b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0c" +salt: constant(bytes32) = keccak256("kebab") + +@internal +def foo() -> bytes32: + g:uint256 = 42 + return salt + +@external +@payable +def test(target: address): + self.created_address = create_from_blueprint(target, x, code_offset={prefix_len}, salt=self.foo(), raw_args= True) + """ + + foo_contract = get_contract(code, 12) + expected_runtime_code = w3.eth.get_code(foo_contract.address) + + f, FooContract = deploy_blueprint_for(code, initcode_prefix=blueprint_prefix) + + d = get_contract(deployer_code) + + d.test(f.address, transact={}) + + test = FooContract(d.created_address()) + assert w3.eth.get_code(test.address) == expected_runtime_code + assert test.foo() == 12 + + +@pytest.mark.parametrize("blueprint_prefix", [b"", b"\xfe", b"\xfe\71\x00"]) +def test_create_from_blueprint_complex_salt_no_constructor_args( + get_contract, deploy_blueprint_for, w3, blueprint_prefix +): + code = """ +var: uint256 + +@external +@payable +def __init__(): + self.var = 12 + +@external +def foo()-> uint256: + return self.var + """ + + prefix_len = len(blueprint_prefix) + deployer_code = f""" +created_address: public(address) + +salt: constant(bytes32) = keccak256("kebab") + +@external +@payable +def test(target: address): + self.created_address = create_from_blueprint(target, code_offset={prefix_len}, salt=keccak256(_abi_encode(target))) + """ + + foo_contract = get_contract(code) + expected_runtime_code = w3.eth.get_code(foo_contract.address) + + f, FooContract = deploy_blueprint_for(code, initcode_prefix=blueprint_prefix) + + d = get_contract(deployer_code) + + d.test(f.address, transact={}) + + test = FooContract(d.created_address()) + assert w3.eth.get_code(test.address) == expected_runtime_code + assert test.foo() == 12 + +def test_create_copy_of_complex_kwargs(get_contract, w3): + complex_salt = """ +created_address: public(address) + +@external +def test(target: address) -> address: + self.created_address = create_copy_of(target, salt = keccak256(_abi_encode(target))) + return self.created_address + + """ + + c = get_contract(complex_salt) + bytecode = w3.eth.get_code(c.address) + c.test(c.address, transact={}) + test1 = c.created_address() + assert w3.eth.get_code(test1) == bytecode + + complex_value = """ +created_address: public(address) + +@external +@payable +def test(target: address) -> address: + value: uint256 = 2 + self.created_address = create_copy_of(target, value = [2,2,2][value]) + return self.created_address + + """ + + c = get_contract(complex_value) + bytecode = w3.eth.get_code(c.address) + + c.test(c.address, transact={"value": 2}) + test1 = c.created_address() + assert w3.eth.get_code(test1) == bytecode From bb5636178a092aca722b230898d7910cd7710364 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 19 Sep 2023 12:59:20 -0400 Subject: [PATCH 05/15] fix lint --- .../parser/functions/test_create_functions.py | 39 +++++++++++++++---- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/tests/parser/functions/test_create_functions.py b/tests/parser/functions/test_create_functions.py index a5db0e0bb1..1f5e99f2d3 100644 --- a/tests/parser/functions/test_create_functions.py +++ b/tests/parser/functions/test_create_functions.py @@ -433,7 +433,6 @@ def test2(target: address, salt: bytes32) -> address: # assert_tx_failed(lambda: c.test2(bytecode, salt)) - @pytest.mark.parametrize("blueprint_prefix", [b"", b"\xfe", b"\xfe\71\x00"]) def test_create_from_blueprint_complex_value( get_contract, deploy_blueprint_for, w3, blueprint_prefix @@ -453,9 +452,11 @@ def foo()-> uint256: prefix_len = len(blueprint_prefix) + some_constant = b"\00" * 31 + b"\x0c" + deployer_code = f""" created_address: public(address) -x: constant(Bytes[32]) = b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0c" +x: constant(Bytes[32]) = {some_constant} @internal def foo() -> uint256: @@ -465,7 +466,13 @@ def foo() -> uint256: @external @payable def test(target: address): - self.created_address = create_from_blueprint(target, x, code_offset={prefix_len}, value=self.foo(), raw_args=True) + self.created_address = create_from_blueprint( + target, + x, + code_offset={prefix_len}, + value=self.foo(), + raw_args=True + ) """ foo_contract = get_contract(code, 12) @@ -499,13 +506,15 @@ def foo()-> uint256: return self.var """ + some_constant = b"\00" * 31 + b"\x0c" prefix_len = len(blueprint_prefix) + deployer_code = f""" created_address: public(address) -x: constant(Bytes[32]) = b"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0c" +x: constant(Bytes[32]) = {some_constant} salt: constant(bytes32) = keccak256("kebab") - + @internal def foo() -> bytes32: g:uint256 = 42 @@ -514,7 +523,13 @@ def foo() -> bytes32: @external @payable def test(target: address): - self.created_address = create_from_blueprint(target, x, code_offset={prefix_len}, salt=self.foo(), raw_args= True) + self.created_address = create_from_blueprint( + target, + x, + code_offset={prefix_len}, + salt=self.foo(), + raw_args= True + ) """ foo_contract = get_contract(code, 12) @@ -557,7 +572,11 @@ def foo()-> uint256: @external @payable def test(target: address): - self.created_address = create_from_blueprint(target, code_offset={prefix_len}, salt=keccak256(_abi_encode(target))) + self.created_address = create_from_blueprint( + target, + code_offset={prefix_len}, + salt=keccak256(_abi_encode(target)) + ) """ foo_contract = get_contract(code) @@ -573,13 +592,17 @@ def test(target: address): assert w3.eth.get_code(test.address) == expected_runtime_code assert test.foo() == 12 + def test_create_copy_of_complex_kwargs(get_contract, w3): complex_salt = """ created_address: public(address) @external def test(target: address) -> address: - self.created_address = create_copy_of(target, salt = keccak256(_abi_encode(target))) + self.created_address = create_copy_of( + target, + salt=keccak256(_abi_encode(target)) + ) return self.created_address """ From a4d3c12c829cfec1dfc938781567e8f7c5b9272c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 10 Aug 2023 20:58:48 -0700 Subject: [PATCH 06/15] add scope_multi abstraction --- vyper/codegen/ir_node.py | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index a266964d4c..dfb19974b7 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -2,6 +2,7 @@ from enum import Enum, auto from functools import cached_property from typing import Any, List, Optional, Tuple, Union +import contextlib from vyper.compiler.settings import VYPER_COLOR_OUTPUT from vyper.evm.address_space import AddrSpace @@ -46,6 +47,47 @@ class Encoding(Enum): # future: packed +# create multiple with scopes if any of the items are complex, to force +# ordering of side effects. +# CMC 2023-08-10 this is horrible! remove this _as soon as_ we have +# real variables in IR (that we can declare without explicit scoping - +# needs liveness analysis). +@contextlib.contextmanager +def scope_multi(ir_nodes, names): + assert len(ir_nodes) == len(names) + + should_scope = any(s._optimized.is_complex_ir for s in ir_nodes) + + class _Builder: + def resolve(self, body): + if not should_scope: + # uses of the variable have already been inlined + return body + + ret = body + # build with scopes from inside-out (hence reversed) + for arg, name in reversed(list(zip(ir_nodes, names))): + ret = ["with", name, arg, ret] + + if isinstance(body, IRnode): + return IRnode.from_list( + ret, typ=body.typ, location=body.location, encoding=body.encoding + ) + else: + return ret + + + b = _Builder() + if should_scope: + ir_vars = tuple(IRnode.from_list( + name, typ=arg.typ, location=arg.location, encoding=arg.encoding + ) for (arg, name) in zip(ir_nodes, names)) + yield b, ir_vars + else: + # inline them + yield b, ir_nodes + + # this creates a magical block which maps to IR `with` class _WithBuilder: def __init__(self, ir_node, name, should_inline=False): From 9c5c7800ea882af330b5c27adb0addcda39f9eb7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 19 Sep 2023 13:48:34 -0400 Subject: [PATCH 07/15] refactor raw_call to use scope_multi --- vyper/builtins/functions.py | 9 +++------ vyper/codegen/ir_node.py | 10 +++++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index f9ce95f8ee..b99d0e2748 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -36,7 +36,7 @@ unwrap_location, ) from vyper.codegen.expr import Expr -from vyper.codegen.ir_node import Encoding +from vyper.codegen.ir_node import Encoding, scope_multi from vyper.codegen.keccak256_helper import keccak256_helper from vyper.evm.address_space import MEMORY, STORAGE from vyper.exceptions import ( @@ -1156,10 +1156,7 @@ def build_IR(self, expr, args, kwargs, context): ] gas, value = IRnode.from_list(gas), IRnode.from_list(value) - with value.cache_when_complex("_value") as (b1, value), gas.cache_when_complex("_gas") as ( - b2, - gas, - ), to.cache_when_complex("_to") as (b3, to): + with scope_multi((to, value, gas), ("_to", "_value", "_gas")) as (b1, (to, value, gas)): if delegate_call: call_op = ["delegatecall", gas, to, *common_call_args] elif static_call: @@ -1168,7 +1165,7 @@ def build_IR(self, expr, args, kwargs, context): call_op = ["call", gas, to, value, *common_call_args] call_ir += [call_op] - call_ir = b1.resolve(b2.resolve(b3.resolve(call_ir))) + call_ir = b1.resolve(call_ir) # build sequence IR if outsize: diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index dfb19974b7..51576c4349 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -1,8 +1,8 @@ +import contextlib import re from enum import Enum, auto from functools import cached_property from typing import Any, List, Optional, Tuple, Union -import contextlib from vyper.compiler.settings import VYPER_COLOR_OUTPUT from vyper.evm.address_space import AddrSpace @@ -76,12 +76,12 @@ def resolve(self, body): else: return ret - b = _Builder() if should_scope: - ir_vars = tuple(IRnode.from_list( - name, typ=arg.typ, location=arg.location, encoding=arg.encoding - ) for (arg, name) in zip(ir_nodes, names)) + ir_vars = tuple( + IRnode.from_list(name, typ=arg.typ, location=arg.location, encoding=arg.encoding) + for (arg, name) in zip(ir_nodes, names) + ) yield b, ir_vars else: # inline them From bedb9fd52a9e642f24e861c75eba6e4ae8f4c303 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 19 Sep 2023 15:20:45 -0400 Subject: [PATCH 08/15] add fixes for create memory cleanliness --- vyper/builtins/functions.py | 50 ++++++++++++++++++++++++------------- vyper/codegen/ir_node.py | 36 +++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index b99d0e2748..075dec314f 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -21,6 +21,7 @@ clamp_basetype, clamp_nonzero, copy_bytes, + dummy_node_for_type, ensure_in_memory, eval_once_check, eval_seq, @@ -1592,13 +1593,15 @@ def build_IR(self, expr, context): # CREATE* functions +CREATE2_SENTINEL = dummy_node_for_type(BYTES32_T) + # create helper functions # generates CREATE op sequence + zero check for result -def _create_ir(value, buf, length, salt=None, checked=True): +def _create_ir(value, buf, length, salt, checked=True): args = [value, buf, length] create_op = "create" - if salt is not None: + if salt is not CREATE2_SENTINEL: create_op = "create2" args.append(salt) @@ -1716,8 +1719,9 @@ def build_IR(self, expr, args, kwargs, context): context.check_is_not_constant("use {self._id}", expr) should_use_create2 = "salt" in [kwarg.arg for kwarg in expr.keywords] + if not should_use_create2: - kwargs["salt"] = None + kwargs["salt"] = CREATE2_SENTINEL ir_builder = self._build_create_IR(expr, args, context, **kwargs) @@ -1797,13 +1801,17 @@ def _add_gas_estimate(self, args, should_use_create2): def _build_create_IR(self, expr, args, context, value, salt): target = args[0] - with target.cache_when_complex("create_target") as (b1, target): + # something we can pass to scope_multi + with scope_multi((target, value), ("create_target", "create_value")) as ( + b1, + (target, value), + ), salt.cache_when_complex("create_salt") as (b2, salt): codesize = IRnode.from_list(["extcodesize", target]) msize = IRnode.from_list(["msize"]) - with codesize.cache_when_complex("target_codesize") as ( - b2, - codesize, - ), msize.cache_when_complex("mem_ofst") as (b3, mem_ofst): + with scope_multi((codesize, msize), ("target_codesize", "mem_ofst")) as ( + b3, + (codesize, mem_ofst), + ): ir = ["seq"] # make sure there is actually code at the target @@ -1880,17 +1888,23 @@ def _build_create_IR(self, expr, args, context, value, salt, code_offset, raw_ar # (since the abi encoder could write to fresh memory). # it would be good to not require the memory copy, but need # to evaluate memory safety. - with target.cache_when_complex("create_target") as (b1, target), argslen.cache_when_complex( - "encoded_args_len" - ) as (b2, encoded_args_len), code_offset.cache_when_complex("code_ofst") as (b3, codeofst): - codesize = IRnode.from_list(["sub", ["extcodesize", target], codeofst]) + with scope_multi( + (target, value, argslen, code_offset), + ("create_target", "create_value", "encoded_args_len", "code_ofst"), + ) as (b1, (target, value, encoded_args_len, code_offset)), salt.cache_when_complex( + "create_salt" + ) as ( + b2, + salt, + ): + codesize = IRnode.from_list(["sub", ["extcodesize", target], code_offset]) # copy code to memory starting from msize. we are clobbering # unused memory so it's safe. msize = IRnode.from_list(["msize"], location=MEMORY) - with codesize.cache_when_complex("target_codesize") as ( - b4, - codesize, - ), msize.cache_when_complex("mem_ofst") as (b5, mem_ofst): + with scope_multi((codesize, msize), ("target_codesize", "mem_ofst")) as ( + b3, + (codesize, mem_ofst), + ): ir = ["seq"] # make sure there is code at the target, and that @@ -1910,7 +1924,7 @@ def _build_create_IR(self, expr, args, context, value, salt, code_offset, raw_ar # copy the target code into memory. # layout starting from mem_ofst: # 00...00 (22 0's) | preamble | bytecode - ir.append(["extcodecopy", target, mem_ofst, codeofst, codesize]) + ir.append(["extcodecopy", target, mem_ofst, code_offset, codesize]) ir.append(copy_bytes(add_ofst(mem_ofst, codesize), argbuf, encoded_args_len, bufsz)) @@ -1925,7 +1939,7 @@ def _build_create_IR(self, expr, args, context, value, salt, code_offset, raw_ar ir.append(_create_ir(value, mem_ofst, length, salt)) - return b1.resolve(b2.resolve(b3.resolve(b4.resolve(b5.resolve(ir))))) + return b1.resolve(b2.resolve(b3.resolve(ir))) class _UnsafeMath(BuiltinFunction): diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 51576c4349..b4c584350f 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -47,15 +47,44 @@ class Encoding(Enum): # future: packed -# create multiple with scopes if any of the items are complex, to force -# ordering of side effects. -# CMC 2023-08-10 this is horrible! remove this _as soon as_ we have +# shortcut for chaining multiple cache_when_complex calls +# CMC 2023-08-10 remove this and scope_together _as soon as_ we have # real variables in IR (that we can declare without explicit scoping - # needs liveness analysis). @contextlib.contextmanager def scope_multi(ir_nodes, names): assert len(ir_nodes) == len(names) + builders = [] + scoped_ir_nodes = [] + + class _MultiBuilder: + def resolve(self, body): + # sanity check that it's initialized properly + assert len(builders) == len(ir_nodes) + ret = body + for b in builders: + ret = b.resolve(ret) + return ret + + mb = _MultiBuilder() + + with contextlib.ExitStack() as stack: + for arg, name in zip(ir_nodes, names): + b, ir_node = stack.enter_context(arg.cache_when_complex(name)) + + builders.append(b) + scoped_ir_nodes.append(ir_node) + + yield mb, scoped_ir_nodes + + +# create multiple with scopes if any of the items are complex, to force +# ordering of side effects. +@contextlib.contextmanager +def scope_together(ir_nodes, names): + assert len(ir_nodes) == len(names) + should_scope = any(s._optimized.is_complex_ir for s in ir_nodes) class _Builder: @@ -77,6 +106,7 @@ def resolve(self, body): return ret b = _Builder() + if should_scope: ir_vars = tuple( IRnode.from_list(name, typ=arg.typ, location=arg.location, encoding=arg.encoding) From 2c71523d8621b4bd71a4e3e9bc9d8478ac96bc06 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 19 Sep 2023 15:28:10 -0400 Subject: [PATCH 09/15] add some notes --- tests/parser/functions/test_create_functions.py | 9 +++++++++ tests/parser/functions/test_raw_call.py | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/parser/functions/test_create_functions.py b/tests/parser/functions/test_create_functions.py index 1f5e99f2d3..fa7729d98e 100644 --- a/tests/parser/functions/test_create_functions.py +++ b/tests/parser/functions/test_create_functions.py @@ -433,10 +433,15 @@ def test2(target: address, salt: bytes32) -> address: # assert_tx_failed(lambda: c.test2(bytecode, salt)) +# XXX: these various tests to check the msize allocator for +# create_copy_of and create_from_blueprint depend on calling convention +# and variables writing to memory. think of ways to make more robust to +# changes in calling convention and memory layout @pytest.mark.parametrize("blueprint_prefix", [b"", b"\xfe", b"\xfe\71\x00"]) def test_create_from_blueprint_complex_value( get_contract, deploy_blueprint_for, w3, blueprint_prefix ): + # check msize allocator does not get trampled by value= kwarg code = """ var: uint256 @@ -493,6 +498,7 @@ def test(target: address): def test_create_from_blueprint_complex_salt_raw_args( get_contract, deploy_blueprint_for, w3, blueprint_prefix ): + # test msize allocator does not get trampled by salt= kwarg code = """ var: uint256 @@ -550,6 +556,7 @@ def test(target: address): def test_create_from_blueprint_complex_salt_no_constructor_args( get_contract, deploy_blueprint_for, w3, blueprint_prefix ): + # test msize allocator does not get trampled by salt= kwarg code = """ var: uint256 @@ -594,6 +601,7 @@ def test(target: address): def test_create_copy_of_complex_kwargs(get_contract, w3): + # test msize allocator does not get trampled by salt= kwarg complex_salt = """ created_address: public(address) @@ -613,6 +621,7 @@ def test(target: address) -> address: test1 = c.created_address() assert w3.eth.get_code(test1) == bytecode + # test msize allocator does not get trampled by value= kwarg complex_value = """ created_address: public(address) diff --git a/tests/parser/functions/test_raw_call.py b/tests/parser/functions/test_raw_call.py index 115eed723a..2abb70336f 100644 --- a/tests/parser/functions/test_raw_call.py +++ b/tests/parser/functions/test_raw_call.py @@ -427,7 +427,8 @@ def baz(_addr: address, should_raise: bool) -> uint256: # XXX: these test_raw_call_clean_mem* tests depend on variables and -# calling convention writing to memory +# calling convention writing to memory. think of ways to make more +# robust to changes to calling convention and memory layout. def test_raw_call_msg_data_clean_mem(get_contract): From 4e837a84734e004518b6dd5ab27ef036f7ed0a6c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 19 Sep 2023 17:13:49 -0400 Subject: [PATCH 10/15] update optimizer tests -- callvalue is now considered constant --- tests/compiler/ir/test_optimize_ir.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/compiler/ir/test_optimize_ir.py b/tests/compiler/ir/test_optimize_ir.py index b679e55453..1466166501 100644 --- a/tests/compiler/ir/test_optimize_ir.py +++ b/tests/compiler/ir/test_optimize_ir.py @@ -143,7 +143,9 @@ (["sub", "x", 0], ["x"]), (["sub", "x", "x"], [0]), (["sub", ["sload", 0], ["sload", 0]], None), - (["sub", ["callvalue"], ["callvalue"]], None), + (["sub", ["callvalue"], ["callvalue"]], [0]), + (["sub", ["msize"], ["msize"]], None), + (["sub", ["gas"], ["gas"]], None), (["sub", -1, ["sload", 0]], ["not", ["sload", 0]]), (["mul", "x", 1], ["x"]), (["div", "x", 1], ["x"]), @@ -210,7 +212,9 @@ (["eq", -1, ["add", -(2**255), 2**255 - 1]], [1]), # test compile-time wrapping (["eq", -2, ["add", 2**256 - 1, 2**256 - 1]], [1]), # test compile-time wrapping (["eq", "x", "x"], [1]), - (["eq", "callvalue", "callvalue"], None), + (["eq", "gas", "gas"], None), + (["eq", "msize", "msize"], None), + (["eq", "callvalue", "callvalue"], [1]), (["ne", "x", "x"], [0]), ] From aedb076ef4e08fc7b064450aea56c612910171bf Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 20 Sep 2023 09:01:19 -0700 Subject: [PATCH 11/15] update an IR name Co-authored-by: trocher <43437004+trocher@users.noreply.github.com> --- vyper/builtins/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 075dec314f..07a3bb1d59 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -1890,7 +1890,7 @@ def _build_create_IR(self, expr, args, context, value, salt, code_offset, raw_ar # to evaluate memory safety. with scope_multi( (target, value, argslen, code_offset), - ("create_target", "create_value", "encoded_args_len", "code_ofst"), + ("create_target", "create_value", "encoded_args_len", "code_offset"), ) as (b1, (target, value, encoded_args_len, code_offset)), salt.cache_when_complex( "create_salt" ) as ( From 59d706079ed3caa061b579ba9f9bcee88e6039f4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 20 Sep 2023 12:13:26 -0400 Subject: [PATCH 12/15] fix order of evaluation in scope_multi --- tests/parser/functions/test_raw_call.py | 27 +++++++++++++++++++++++++ vyper/codegen/ir_node.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/parser/functions/test_raw_call.py b/tests/parser/functions/test_raw_call.py index 2abb70336f..81efe64a18 100644 --- a/tests/parser/functions/test_raw_call.py +++ b/tests/parser/functions/test_raw_call.py @@ -487,6 +487,33 @@ def bar(f: uint256, g: uint256, h: uint256) -> Bytes[100]: ) +def test_raw_call_clean_mem3(get_contract): + # test msize uses clean memory and does not get overwritten by + # any raw_call() arguments, and also test order of evaluation for + # scope_multi + code = """ +buf: Bytes[100] +canary: String[32] + +@internal +def bar() -> address: + self.canary = "bar" + return 0x0000000000000000000000000000000000000004 + +@internal +def goo() -> uint256: + self.canary = "goo" + return 0 + +@external +def foo() -> String[32]: + self.buf = raw_call(self.bar(), msg.data, value = self.goo(), max_outsize=100) + return self.canary + """ + c = get_contract(code) + assert c.foo() == "goo" + + def test_raw_call_clean_mem_kwargs_value(get_contract): # test msize uses clean memory and does not get overwritten by # any raw_call() kwargs diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index b4c584350f..9d6770ce81 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -63,7 +63,7 @@ def resolve(self, body): # sanity check that it's initialized properly assert len(builders) == len(ir_nodes) ret = body - for b in builders: + for b in reversed(builders): ret = b.resolve(ret) return ret From fe9f6eaf9f8b903c936818eb1a3a9674c7d39ee4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 20 Sep 2023 12:23:16 -0400 Subject: [PATCH 13/15] move salt back into scope_multi --- vyper/builtins/functions.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 07a3bb1d59..95759372a6 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -1802,14 +1802,13 @@ def _build_create_IR(self, expr, args, context, value, salt): target = args[0] # something we can pass to scope_multi - with scope_multi((target, value), ("create_target", "create_value")) as ( - b1, - (target, value), - ), salt.cache_when_complex("create_salt") as (b2, salt): + with scope_multi( + (target, value, salt), ("create_target", "create_value", "create_salt") + ) as (b1, (target, value, salt)): codesize = IRnode.from_list(["extcodesize", target]) msize = IRnode.from_list(["msize"]) with scope_multi((codesize, msize), ("target_codesize", "mem_ofst")) as ( - b3, + b2, (codesize, mem_ofst), ): ir = ["seq"] @@ -1835,7 +1834,7 @@ def _build_create_IR(self, expr, args, context, value, salt): ir.append(_create_ir(value, buf, buf_len, salt)) - return b1.resolve(b2.resolve(b3.resolve(ir))) + return b1.resolve(b2.resolve(ir)) class CreateFromBlueprint(_CreateBase): @@ -1889,20 +1888,15 @@ def _build_create_IR(self, expr, args, context, value, salt, code_offset, raw_ar # it would be good to not require the memory copy, but need # to evaluate memory safety. with scope_multi( - (target, value, argslen, code_offset), - ("create_target", "create_value", "encoded_args_len", "code_offset"), - ) as (b1, (target, value, encoded_args_len, code_offset)), salt.cache_when_complex( - "create_salt" - ) as ( - b2, - salt, - ): + (target, value, salt, argslen, code_offset), + ("create_target", "create_value", "create_salt", "encoded_args_len", "code_offset"), + ) as (b1, (target, value, salt, encoded_args_len, code_offset)): codesize = IRnode.from_list(["sub", ["extcodesize", target], code_offset]) # copy code to memory starting from msize. we are clobbering # unused memory so it's safe. msize = IRnode.from_list(["msize"], location=MEMORY) with scope_multi((codesize, msize), ("target_codesize", "mem_ofst")) as ( - b3, + b2, (codesize, mem_ofst), ): ir = ["seq"] @@ -1939,7 +1933,7 @@ def _build_create_IR(self, expr, args, context, value, salt, code_offset, raw_ar ir.append(_create_ir(value, mem_ofst, length, salt)) - return b1.resolve(b2.resolve(b3.resolve(ir))) + return b1.resolve(b2.resolve(ir)) class _UnsafeMath(BuiltinFunction): From 3ba2740f3dd1b04cb4ef69a770ca86bb80684310 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 21 Sep 2023 08:43:02 -0400 Subject: [PATCH 14/15] add a note on reads --- vyper/codegen/ir_node.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 9d6770ce81..a253a9278d 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -405,6 +405,8 @@ def gas(self): def is_complex_ir(self): # list of items not to cache. note can add other env variables # which do not change, e.g. calldatasize, coinbase, etc. + # reads (from memory or storage) should not be cached because + # they can have side effects. do_not_cache = {"~empty", "calldatasize", "callvalue"} return ( From 8450c37a743dce030c722debae7a77ba6ffee84b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 21 Sep 2023 08:55:16 -0400 Subject: [PATCH 15/15] fix a note --- vyper/codegen/ir_node.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index a253a9278d..ad4aa76437 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -398,15 +398,13 @@ def _check(condition, err): def gas(self): return self._gas + self.add_gas_estimate - # the IR should be cached. - # TODO make this private. turns out usages are all for the caching - # idiom that cache_when_complex addresses + # the IR should be cached and/or evaluated exactly once @property def is_complex_ir(self): # list of items not to cache. note can add other env variables # which do not change, e.g. calldatasize, coinbase, etc. # reads (from memory or storage) should not be cached because - # they can have side effects. + # they can have or be affected by side effects. do_not_cache = {"~empty", "calldatasize", "callvalue"} return (