From 6413198034a5fb736453afc920b12835a1709c83 Mon Sep 17 00:00:00 2001 From: Ian Guimaraes Date: Tue, 11 Jun 2024 14:50:09 -0300 Subject: [PATCH 1/3] feat(arguments): calldatacopy base case --- .../src/utils/heuristics/arguments.rs | 66 ++++++++++++++++++- crates/vm/src/core/types.rs | 11 +++- 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/crates/decompile/src/utils/heuristics/arguments.rs b/crates/decompile/src/utils/heuristics/arguments.rs index ca9a9002..fadb403c 100644 --- a/crates/decompile/src/utils/heuristics/arguments.rs +++ b/crates/decompile/src/utils/heuristics/arguments.rs @@ -46,8 +46,18 @@ pub fn argument_heuristic( // CALLDATACOPY 0x37 => { - // TODO: implement CALLDATACOPY support - trace!("CALLDATACOPY detected; not implemented"); + let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) + / (32 * 3)) // Accounts for dest, source and size + .try_into() + .unwrap_or(usize::MAX); + + function.arguments.entry(arg_index).or_insert_with(|| { + CalldataFrame { + arg_op: state.last_instruction.input_operations[0].to_string(), + mask_size: usize::MAX, // init to MAX because it's a dynamic-size argument + heuristics: HashSet::new(), + } + }); } // AND | OR @@ -256,3 +266,55 @@ pub fn argument_heuristic( Ok(()) } + +#[cfg(test)] +mod tests { + use ethers::types::U256; + use heimdall_vm::core::{ + memory::Memory, + opcodes::Opcode, + stack::Stack, + storage::Storage, + vm::{Instruction, State}, + }; + + use crate::{ + core::analyze::{AnalyzerState, AnalyzerType}, + interfaces::AnalyzedFunction, + }; + + use super::argument_heuristic; + + #[test] + fn test_heuristic_for_calldatacopy() { + let mut function = AnalyzedFunction::new("0x40c10f19", false); + let state = State { + last_instruction: Instruction { + instruction: 0, + opcode_details: Some(Opcode::new(0x37)), + opcode: 0x37, + inputs: vec![U256::from(160), U256::from(36), U256::from(0)], + outputs: vec![], + input_operations: vec![], + output_operations: vec![], + }, + gas_used: 0, + gas_remaining: 0, + stack: Stack::new(), + memory: Memory::new(), + storage: Storage::new(), + events: vec![], + }; + let mut analyzer_state = AnalyzerState { + jumped_conditional: None, + conditional_stack: Vec::new(), + analyzer_type: AnalyzerType::Solidity, + }; + + argument_heuristic(&mut function, &state, &mut analyzer_state).unwrap(); + + assert_eq!(function.arguments.len(), 1); + assert_eq!(function.arguments[&1].mask_size, usize::MAX); + assert_eq!(function.arguments[&1].heuristics.len(), 0); + } +} diff --git a/crates/vm/src/core/types.rs b/crates/vm/src/core/types.rs index 288df378..ec62b3a8 100644 --- a/crates/vm/src/core/types.rs +++ b/crates/vm/src/core/types.rs @@ -1,4 +1,4 @@ -use std::{collections::VecDeque, ops::Range}; +use std::{collections::VecDeque, ops::Range, usize}; use ethers::abi::{AbiEncode, ParamType}; use eyre::{eyre, Result}; @@ -271,6 +271,11 @@ pub fn byte_size_to_type(byte_size: usize) -> (usize, Vec) { match byte_size { 1 => potential_types.push("bool".to_string()), 15..=20 => potential_types.push("address".to_string()), + usize::MAX => { + // if the byte size is usize::MAX, it is a dynamic type + potential_types.push("bytes memory".to_string()); + return (byte_size, potential_types); + } _ => {} } @@ -326,8 +331,8 @@ pub fn get_padding(bytes: &[u8]) -> Padding { // we can avoid doing a full check if any of the following are true: // there are no null bytes OR // neither first nor last byte is a null byte, it is not padded - if null_byte_indices.is_empty() || - null_byte_indices[0] != 0 && null_byte_indices[null_byte_indices.len() - 1] != size - 1 + if null_byte_indices.is_empty() + || null_byte_indices[0] != 0 && null_byte_indices[null_byte_indices.len() - 1] != size - 1 { return Padding::None; } From 3fe3e6bdda32a85f9b4e1016474643ab38651c8a Mon Sep 17 00:00:00 2001 From: Ian Guimaraes Date: Fri, 21 Jun 2024 16:32:37 -0300 Subject: [PATCH 2/3] fix(arguments): calldatacopy tests --- .../decompile/src/utils/heuristics/arguments.rs | 17 ++++++++++++----- crates/vm/src/core/types.rs | 4 ++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/crates/decompile/src/utils/heuristics/arguments.rs b/crates/decompile/src/utils/heuristics/arguments.rs index 9c6453d5..9de86341 100644 --- a/crates/decompile/src/utils/heuristics/arguments.rs +++ b/crates/decompile/src/utils/heuristics/arguments.rs @@ -6,7 +6,7 @@ use heimdall_vm::core::{ types::{byte_size_to_type, convert_bitmask}, vm::State, }; -use tracing::{debug, trace}; +use tracing::debug; use crate::{ core::analyze::{AnalyzerState, AnalyzerType}, @@ -46,11 +46,15 @@ pub fn argument_heuristic( // CALLDATACOPY 0x37 => { - let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) - / (32 * 3)) // Accounts for dest, source and size + let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) / + (32 * 3)) // Accounts for dest, source and size .try_into() .unwrap_or(usize::MAX); + println!("arg key: {}", arg_index); + + print!("{:#?}", state.last_instruction.input_operations); + function.arguments.entry(arg_index).or_insert_with(|| { CalldataFrame { arg_op: state.last_instruction.input_operations[0].to_string(), @@ -271,7 +275,7 @@ mod tests { use ethers::types::U256; use heimdall_vm::core::{ memory::Memory, - opcodes::Opcode, + opcodes::{Opcode, WrappedInput, WrappedOpcode}, stack::Stack, storage::Storage, vm::{Instruction, State}, @@ -294,7 +298,10 @@ mod tests { opcode: 0x37, inputs: vec![U256::from(160), U256::from(36), U256::from(0)], outputs: vec![], - input_operations: vec![], + input_operations: vec![WrappedOpcode { + opcode: Opcode::new(0x60), + inputs: vec![WrappedInput::Raw(U256::from(1))], + }], output_operations: vec![], }, gas_used: 0, diff --git a/crates/vm/src/core/types.rs b/crates/vm/src/core/types.rs index b1de1089..dbd2d8f0 100644 --- a/crates/vm/src/core/types.rs +++ b/crates/vm/src/core/types.rs @@ -186,8 +186,8 @@ pub fn get_padding(bytes: &[u8]) -> Padding { // we can avoid doing a full check if any of the following are true: // there are no null bytes OR // neither first nor last byte is a null byte, it is not padded - if null_byte_indices.is_empty() - || null_byte_indices[0] != 0 && null_byte_indices[null_byte_indices.len() - 1] != size - 1 + if null_byte_indices.is_empty() || + null_byte_indices[0] != 0 && null_byte_indices[null_byte_indices.len() - 1] != size - 1 { return Padding::None; } From 13f79152e6adfe38a09c37f02797511992588930 Mon Sep 17 00:00:00 2001 From: Ian Guimaraes Date: Fri, 21 Jun 2024 16:48:30 -0300 Subject: [PATCH 3/3] chore(arguments): remove logs --- crates/decompile/src/utils/heuristics/arguments.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/decompile/src/utils/heuristics/arguments.rs b/crates/decompile/src/utils/heuristics/arguments.rs index 9de86341..fac88ab1 100644 --- a/crates/decompile/src/utils/heuristics/arguments.rs +++ b/crates/decompile/src/utils/heuristics/arguments.rs @@ -51,10 +51,6 @@ pub fn argument_heuristic( .try_into() .unwrap_or(usize::MAX); - println!("arg key: {}", arg_index); - - print!("{:#?}", state.last_instruction.input_operations); - function.arguments.entry(arg_index).or_insert_with(|| { CalldataFrame { arg_op: state.last_instruction.input_operations[0].to_string(),