-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
workaround cdt's small const memcpy host function calls in EOS VM OC #1016
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,4 +25,7 @@ using intrinsic_map_t = std::map<std::string, intrinsic_entry>; | |
|
||
const intrinsic_map_t& get_intrinsic_map(); | ||
|
||
static constexpr unsigned minimum_const_memcpy_intrinsic_to_optimize = 1; | ||
static constexpr unsigned maximum_const_memcpy_intrinsic_to_optimize = 128; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a weird place for this, but header files for OC are a little weird because of some parts still are compiled with C++17 -- need to place these in a header file that is consumed by the C++17 code. |
||
}}} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -697,12 +697,14 @@ namespace LLVMJIT | |
llvm::Value* callee; | ||
const FunctionType* calleeType; | ||
bool isExit = false; | ||
bool isMemcpy = false; | ||
if(imm.functionIndex < moduleContext.importedFunctionOffsets.size()) | ||
{ | ||
calleeType = module.types[module.functions.imports[imm.functionIndex].type.index]; | ||
llvm::Value* ic = irBuilder.CreateLoad( emitLiteralPointer((void*)(OFFSET_OF_FIRST_INTRINSIC-moduleContext.importedFunctionOffsets[imm.functionIndex]*8), llvmI64Type->getPointerTo(256)) ); | ||
callee = irBuilder.CreateIntToPtr(ic, asLLVMType(calleeType)->getPointerTo()); | ||
isExit = module.functions.imports[imm.functionIndex].moduleName == "env" && module.functions.imports[imm.functionIndex].exportName == "eosio_exit"; | ||
isMemcpy = module.functions.imports[imm.functionIndex].moduleName == "env" && module.functions.imports[imm.functionIndex].exportName == "memcpy"; | ||
} | ||
else | ||
{ | ||
|
@@ -715,6 +717,28 @@ namespace LLVMJIT | |
auto llvmArgs = (llvm::Value**)alloca(sizeof(llvm::Value*) * calleeType->parameters.size()); | ||
popMultiple(llvmArgs,calleeType->parameters.size()); | ||
|
||
//convert small constant sized memcpy host function calls to a load+store (plus small call to validate non-overlap rule) | ||
if(isMemcpy) { | ||
assert(calleeType->parameters.size() == 3); | ||
if(llvm::ConstantInt* const_memcpy_sz = llvm::dyn_cast<llvm::ConstantInt>(llvmArgs[2]); | ||
const_memcpy_sz && | ||
const_memcpy_sz->getZExtValue() >= minimum_const_memcpy_intrinsic_to_optimize && | ||
const_memcpy_sz->getZExtValue() <= maximum_const_memcpy_intrinsic_to_optimize) { | ||
const unsigned sz_value = const_memcpy_sz->getZExtValue(); | ||
llvm::IntegerType* type_of_memcpy_width = llvm::Type::getIntNTy(context, sz_value*8); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The use of a non-power-of-2 type size here is rather unorthodox, though one would have to imagine something like pushq %rax
decl %gs:-18888
je 83
movl %gs:208, %eax ;;load 4 bytes (208-211)
movq %gs:200, %rcx ;;load 8 bytes (200-207)
movb %gs:212, %dl ;;load 1 byte (212)
movb %dl, %gs:16 ;;store 1 byte (16)
movq %rcx, %gs:4 ;;store 8 bytes (4-11)
movl %eax, %gs:12 ;;store 4 bytes (12-15)
movl $4, %edi
movl $200, %esi
movl $13, %edx
callq *%gs:-21056
incl %gs:-18888
popq %rax
retq
callq *%gs:-18992 |
||
|
||
llvm::Value* load_pointer = coerceByteIndexToPointer(llvmArgs[1],0,type_of_memcpy_width); | ||
llvm::Value* store_pointer = coerceByteIndexToPointer(llvmArgs[0],0,type_of_memcpy_width); | ||
irBuilder.CreateStore(irBuilder.CreateLoad(load_pointer), store_pointer, true); | ||
|
||
emitRuntimeIntrinsic("eosvmoc_internal.check_memcpy_params", | ||
FunctionType::get(ResultType::none,{ValueType::i32,ValueType::i32,ValueType::i32}), | ||
{llvmArgs[0],llvmArgs[1],llvmArgs[2]}); | ||
push(llvmArgs[0]); | ||
return; | ||
} | ||
} | ||
|
||
// Call the function. | ||
auto result = createCall(callee,llvm::ArrayRef<llvm::Value*>(llvmArgs,calleeType->parameters.size())); | ||
if(isExit) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,8 +88,9 @@ static intrinsic eosio_exit_intrinsic("env.eosio_exit", IR::FunctionType::get(IR | |
std::integral_constant<std::size_t, find_intrinsic_index("env.eosio_exit")>::value | ||
); | ||
|
||
static void throw_internal_exception(const char* const s) { | ||
*reinterpret_cast<std::exception_ptr*>(eos_vm_oc_get_exception_ptr()) = std::make_exception_ptr(wasm_execution_error(FC_LOG_MESSAGE(error, s))); | ||
template <typename E> | ||
static void throw_internal_exception(const E& e) { | ||
*reinterpret_cast<std::exception_ptr*>(eos_vm_oc_get_exception_ptr()) = std::make_exception_ptr(e); | ||
siglongjmp(*eos_vm_oc_get_jmp_buf(), EOSVMOC_EXIT_EXCEPTION); | ||
__builtin_unreachable(); | ||
} | ||
|
@@ -102,25 +103,42 @@ static void throw_internal_exception(const char* const s) { | |
void name() | ||
|
||
DEFINE_EOSVMOC_TRAP_INTRINSIC(eosvmoc_internal,depth_assert) { | ||
throw_internal_exception("Exceeded call depth maximum"); | ||
throw_internal_exception(wasm_execution_error(FC_LOG_MESSAGE(error, "Exceeded call depth maximum"))); | ||
} | ||
|
||
DEFINE_EOSVMOC_TRAP_INTRINSIC(eosvmoc_internal,div0_or_overflow) { | ||
throw_internal_exception("Division by 0 or integer overflow trapped"); | ||
throw_internal_exception(wasm_execution_error(FC_LOG_MESSAGE(error, "Division by 0 or integer overflow trapped"))); | ||
} | ||
|
||
DEFINE_EOSVMOC_TRAP_INTRINSIC(eosvmoc_internal,indirect_call_mismatch) { | ||
throw_internal_exception("Indirect call function type mismatch"); | ||
throw_internal_exception(wasm_execution_error(FC_LOG_MESSAGE(error, "Indirect call function type mismatch"))); | ||
} | ||
|
||
DEFINE_EOSVMOC_TRAP_INTRINSIC(eosvmoc_internal,indirect_call_oob) { | ||
throw_internal_exception("Indirect call index out of bounds"); | ||
throw_internal_exception(wasm_execution_error(FC_LOG_MESSAGE(error, "Indirect call index out of bounds"))); | ||
} | ||
|
||
DEFINE_EOSVMOC_TRAP_INTRINSIC(eosvmoc_internal,unreachable) { | ||
throw_internal_exception("Unreachable reached"); | ||
throw_internal_exception(wasm_execution_error(FC_LOG_MESSAGE(error, "Unreachable reached"))); | ||
} | ||
|
||
static void eos_vm_oc_check_memcpy_params(int32_t dest, int32_t src, int32_t length) { | ||
//make sure dest & src are zexted when converted from signed 32-bit to signed ptrdiff_t; length should always be small but do it too | ||
const unsigned udest = dest; | ||
const unsigned usrc = src; | ||
const unsigned ulength = length; | ||
|
||
//this must remain the same behavior as the memcpy host function | ||
if((size_t)(std::abs((ptrdiff_t)udest - (ptrdiff_t)usrc)) >= ulength) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This check compiles down to about ~11 instructions but can still consume a substantial amount of CPU (beyond 1% in some cases). Perhaps something more clever can shave off another 0.5%. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wonder if
might be faster? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Did you try plain There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both of these interesting suggestions and worth trying. Though, I'd suggest we keep the existing impl initially as it's most like what already exists (and has already been performance tested at the moment). I'm also curious what would happen if the check was moved completely inline. The obviously bad aspect of this is that it's going to put a lot of pressure on the branch prediction cache. But.. there is a non-trivial amount of work just to get the values in to the right registers to pass off to a standard ABI function; then again maybe these get pipelined well. Plenty to experiment with here. |
||
return; | ||
throw_internal_exception(overlapping_memory_error(FC_LOG_MESSAGE(error, "memcpy can only accept non-aliasing pointers"))); | ||
} | ||
|
||
static intrinsic check_memcpy_params_intrinsic("eosvmoc_internal.check_memcpy_params", IR::FunctionType::get(IR::ResultType::none,{IR::ValueType::i32,IR::ValueType::i32,IR::ValueType::i32}), | ||
(void*)&eos_vm_oc_check_memcpy_params, | ||
std::integral_constant<std::size_t, find_intrinsic_index("eosvmoc_internal.check_memcpy_params")>::value | ||
); | ||
|
||
struct executor_signal_init { | ||
executor_signal_init() { | ||
struct sigaction sig_action, old_sig_action; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
128 is a bit of a dice roll. I don't have data to know where the exact threshold is between this local copy being faster vs slower than the full call off to memcpy. I wouldn't want to go higher than this anyways though, since it bloats the code a little more.