diff --git a/crates/decompile/src/utils/heuristics/arguments.rs b/crates/decompile/src/utils/heuristics/arguments.rs index 2d74cc20..0b82b9f3 100644 --- a/crates/decompile/src/utils/heuristics/arguments.rs +++ b/crates/decompile/src/utils/heuristics/arguments.rs @@ -7,7 +7,7 @@ use heimdall_vm::core::{ types::{byte_size_to_type, convert_bitmask}, vm::State, }; -use tracing::{debug, trace}; +use tracing::debug; use crate::{ core::analyze::{AnalyzerState, AnalyzerType}, @@ -48,8 +48,18 @@ pub fn argument_heuristic( // CALLDATACOPY 0x37 => { - // TODO: implement CALLDATACOPY support - trace!("CALLDATACOPY detected; not implemented"); + let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) / + (32 * 3)) // Accounts for dest, source and size + .try_into() + .unwrap_or(usize::MAX); + + function.arguments.entry(arg_index).or_insert_with(|| { + CalldataFrame { + arg_op: state.last_instruction.input_operations[0].to_string(), + mask_size: usize::MAX, // init to MAX because it's a dynamic-size argument + heuristics: HashSet::new(), + } + }); } // AND | OR @@ -277,3 +287,58 @@ pub fn argument_heuristic( Ok(()) } + +#[cfg(test)] +mod tests { + use ethers::types::U256; + use heimdall_vm::core::{ + memory::Memory, + opcodes::{Opcode, WrappedInput, WrappedOpcode}, + stack::Stack, + storage::Storage, + vm::{Instruction, State}, + }; + + use crate::{ + core::analyze::{AnalyzerState, AnalyzerType}, + interfaces::AnalyzedFunction, + }; + + use super::argument_heuristic; + + #[test] + fn test_heuristic_for_calldatacopy() { + let mut function = AnalyzedFunction::new("0x40c10f19", false); + let state = State { + last_instruction: Instruction { + instruction: 0, + opcode_details: Some(Opcode::new(0x37)), + opcode: 0x37, + inputs: vec![U256::from(160), U256::from(36), U256::from(0)], + outputs: vec![], + input_operations: vec![WrappedOpcode { + opcode: Opcode::new(0x60), + inputs: vec![WrappedInput::Raw(U256::from(1))], + }], + output_operations: vec![], + }, + gas_used: 0, + gas_remaining: 0, + stack: Stack::new(), + memory: Memory::new(), + storage: Storage::new(), + events: vec![], + }; + let mut analyzer_state = AnalyzerState { + jumped_conditional: None, + conditional_stack: Vec::new(), + analyzer_type: AnalyzerType::Solidity, + }; + + argument_heuristic(&mut function, &state, &mut analyzer_state).unwrap(); + + assert_eq!(function.arguments.len(), 1); + assert_eq!(function.arguments[&1].mask_size, usize::MAX); + assert_eq!(function.arguments[&1].heuristics.len(), 0); + } +} diff --git a/crates/vm/src/core/types.rs b/crates/vm/src/core/types.rs index f8058ce9..d588de9f 100644 --- a/crates/vm/src/core/types.rs +++ b/crates/vm/src/core/types.rs @@ -1,4 +1,4 @@ -use std::{collections::VecDeque, ops::Range}; +use std::{collections::VecDeque, ops::Range, usize}; use alloy::{dyn_abi::DynSolType, sol_types::SolValue}; use eyre::{eyre, Result}; @@ -131,6 +131,11 @@ pub fn byte_size_to_type(byte_size: usize) -> (usize, Vec) { match byte_size { 1 => potential_types.push("bool".to_string()), 15..=20 => potential_types.push("address".to_string()), + usize::MAX => { + // if the byte size is usize::MAX, it is a dynamic type + potential_types.push("bytes memory".to_string()); + return (byte_size, potential_types); + } _ => {} }