From 99552008379c09a84e35c22ad7ad7ec069af63ca Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Tue, 3 Sep 2024 16:14:56 -0400 Subject: [PATCH 01/22] x86-symbolic: Setting up a SysV-compatible stack --- x86_symbolic/macaw-x86-symbolic.cabal | 1 + x86_symbolic/src/Data/Macaw/X86/Symbolic.hs | 104 ++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/x86_symbolic/macaw-x86-symbolic.cabal b/x86_symbolic/macaw-x86-symbolic.cabal index 91b0b5a8..aaa73f19 100644 --- a/x86_symbolic/macaw-x86-symbolic.cabal +++ b/x86_symbolic/macaw-x86-symbolic.cabal @@ -9,6 +9,7 @@ license-file: LICENSE library build-depends: base >= 4.13, + containers, bv-sized >= 1.0.0, crucible >= 0.4, crucible-llvm, diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs index b36fc71b..ef855552 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs @@ -44,22 +44,28 @@ module Data.Macaw.X86.Symbolic , r15 , getReg , IP, GP, Flag, X87Status, X87Top, X87Tag, FPReg, YMM + , x86SysvStack ) where import Control.Lens ((^.),(%~),(&)) +import qualified Control.Monad as Monad import Control.Monad ( void ) import Control.Monad.IO.Class ( liftIO ) +import qualified Data.BitVector.Sized as BVS import Data.Functor.Identity (Identity(..)) import Data.Kind import Data.Parameterized.Context as Ctx import Data.Parameterized.Map as MapF import Data.Parameterized.TraversableF import Data.Parameterized.TraversableFC +import qualified Data.Sequence as Seq +import Data.Word (Word8) import GHC.TypeLits import qualified Data.Macaw.CFG as M import Data.Macaw.Symbolic import Data.Macaw.Symbolic.Backend +import qualified Data.Macaw.Symbolic.Stack as MSS import qualified Data.Macaw.Types as M import qualified Data.Macaw.X86 as M import qualified Data.Macaw.X86.X86Reg as M @@ -77,6 +83,8 @@ import qualified Lang.Crucible.CFG.Reg as C import qualified Lang.Crucible.CFG.Expr as CE import qualified Lang.Crucible.Simulator as C import qualified Lang.Crucible.Types as C +import qualified Lang.Crucible.LLVM.Bytes as Bytes +import qualified Lang.Crucible.LLVM.DataLayout as CLD import qualified Lang.Crucible.LLVM.MemModel as MM ------------------------------------------------------------------------ @@ -472,6 +480,102 @@ instance GenArchInfo LLVMMemory M.X86_64 where , updateReg = x86UpdateReg } +-- | Helper, not exported +writeBytesBackwards :: + MM.HasPtrWidth w => + C.IsSymBackend sym bak => + (?memOpts :: MM.MemOptions) => + MM.HasLLVMAnn sym => + bak -> + MM.MemImpl sym -> + MM.LLVMPtr sym w -> + Seq.Seq Word8 -> + IO (MM.LLVMPtr sym w, MM.MemImpl sym) +writeBytesBackwards bak mem ptr bytes = do + let sym = C.backendGetSym bak + let i8 = MM.bitvectorType (Bytes.toBytes (1 :: Int)) + z <- WI.natLit sym 0 + one <- WI.bvOne sym ?ptrWidth + let writeByte (p, m) byte = do + p' <- MM.ptrSub sym ?ptrWidth p one + bv <- WI.bvLit sym (C.knownNat @8) (BVS.mkBV C.knownNat (fromIntegral byte)) + m' <- MM.storeRaw bak m p' i8 CLD.noAlignment (MM.LLVMValInt z bv) + pure (p', m') + Monad.foldM writeByte (ptr, mem) (Seq.reverse bytes) + +-- | Create an allocation for the stack for x86_64 with the SysV ABI. +-- +-- On x86_64 with the SysV ABI, the stack grows \"downwards\" from high +-- addresses to low. The end of the stack is initialized with the ELF auxiliary +-- vector (not modelled here), and functions expect the following data to be +-- available above their stack frame (i.e., just above the address in @rsp@): +-- +-- * Their stack-spilled arguments +-- * The return address +-- +-- To model this behavior, this function: +-- +-- 1. Uses 'MSS.createArrayStack' to allocate the stack, which returns a stack +-- allocation and a pointer to its end, minus the space for stack-spilled +-- arguments (see its Haddocks for details) +-- 2. Decrements the pointer to the top of the stack ('MSS.arrayStackTopPtr') by +-- 8 bytes (i.e., the size of a pointer) +-- 3. Writes a symbolic return address to the top of the stack +-- +-- (Unless a concrete list of bytes is passed, in which case the decrement (2) +-- is by the length of that list, and the write (3) is of those bytes.) +-- +-- This setup should enable calling most functions without incident, after the +-- returned pointer is put in @rsp@. Those that expect stack-spilled arguments +-- may end up reading from the (uninitialized) stack slots. +-- +-- Notably, the Microsoft x86_64 ABI differs in the stack setup. See +-- [this link](https://learn.microsoft.com/en-us/cpp/build/stack-usage) +-- for more detailson that ABI. +x86SysvStack :: + C.IsSymBackend sym bak => + (?memOpts :: MM.MemOptions) => + MM.HasLLVMAnn sym => + bak -> + MM.MemImpl sym -> + -- | Initialize the end of the stack to a concrete sequence of bytes if the + -- value is @Just@, otherwise initialize the end of the stack to 8 fresh, + -- symbolic bytes. + Maybe (Seq.Seq Word8) -> + -- | The caller must ensure the size of these is less than the allocation size + MSS.ExtraStackSlots -> + -- | Size of stack allocation + WI.SymExpr sym (WI.BaseBVType 64) -> + IO (MSS.ArrayStack sym 64, MM.MemImpl sym) +x86SysvStack bak mem topBytes slots sz = do + let ptrBytes = 8 + let ?ptrWidth = C.knownNat @64 + + (arrayStack, mem') <- MSS.createArrayStack bak mem slots sz + MSS.ArrayStack _base top _array <- pure arrayStack + + let i8 = MM.bitvectorType (Bytes.toBytes (1 :: Int)) + (top', mem'') <- + case topBytes of + Nothing -> do + -- (2) + let sym = C.backendGetSym bak + ptrSzBv <- WI.bvLit sym ?ptrWidth (BVS.mkBV ?ptrWidth ptrBytes) + top' <- MM.ptrSub sym ?ptrWidth top ptrSzBv + + -- (3) + z <- WI.natLit sym 0 + bv <- WI.freshConstant sym (WI.safeSymbol "x86_64_ret_addr") (WI.BaseBVRepr ?ptrWidth) + let val = MM.LLVMValInt z bv + let storTy = MM.arrayType (fromIntegral ptrBytes) i8 + mem'' <- MM.storeRaw bak mem' top' storTy CLD.noAlignment val + pure (top', mem'') + + Just bytes -> writeBytesBackwards bak mem' top bytes + + let arrayStack' = arrayStack { MSS.arrayStackTopPtr = top' } + return (arrayStack', mem'') + {- Note [Syscalls] While most of the extension functions can be translated directly by embedding them in From ffcab1cd40008ae97c9ce23331295789c9e5cb53 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 11:15:50 -0400 Subject: [PATCH 02/22] x86-symbolic: A module for SysV stack manipulation Begin splitting apart a giant, branching, monolithic stack setup function into individual helpers. Introduce newtypes. --- x86_symbolic/macaw-x86-symbolic.cabal | 1 + x86_symbolic/src/Data/Macaw/X86/Symbolic.hs | 104 ------------- .../src/Data/Macaw/X86/Symbolic/ABI/SysV.hs | 141 ++++++++++++++++++ 3 files changed, 142 insertions(+), 104 deletions(-) create mode 100644 x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs diff --git a/x86_symbolic/macaw-x86-symbolic.cabal b/x86_symbolic/macaw-x86-symbolic.cabal index aaa73f19..1f1930cc 100644 --- a/x86_symbolic/macaw-x86-symbolic.cabal +++ b/x86_symbolic/macaw-x86-symbolic.cabal @@ -30,6 +30,7 @@ library exposed-modules: Data.Macaw.X86.Symbolic Data.Macaw.X86.Crucible + Data.Macaw.X86.Symbolic.ABI.SysV other-modules: Data.Macaw.X86.Symbolic.Panic diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs index ef855552..b36fc71b 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs @@ -44,28 +44,22 @@ module Data.Macaw.X86.Symbolic , r15 , getReg , IP, GP, Flag, X87Status, X87Top, X87Tag, FPReg, YMM - , x86SysvStack ) where import Control.Lens ((^.),(%~),(&)) -import qualified Control.Monad as Monad import Control.Monad ( void ) import Control.Monad.IO.Class ( liftIO ) -import qualified Data.BitVector.Sized as BVS import Data.Functor.Identity (Identity(..)) import Data.Kind import Data.Parameterized.Context as Ctx import Data.Parameterized.Map as MapF import Data.Parameterized.TraversableF import Data.Parameterized.TraversableFC -import qualified Data.Sequence as Seq -import Data.Word (Word8) import GHC.TypeLits import qualified Data.Macaw.CFG as M import Data.Macaw.Symbolic import Data.Macaw.Symbolic.Backend -import qualified Data.Macaw.Symbolic.Stack as MSS import qualified Data.Macaw.Types as M import qualified Data.Macaw.X86 as M import qualified Data.Macaw.X86.X86Reg as M @@ -83,8 +77,6 @@ import qualified Lang.Crucible.CFG.Reg as C import qualified Lang.Crucible.CFG.Expr as CE import qualified Lang.Crucible.Simulator as C import qualified Lang.Crucible.Types as C -import qualified Lang.Crucible.LLVM.Bytes as Bytes -import qualified Lang.Crucible.LLVM.DataLayout as CLD import qualified Lang.Crucible.LLVM.MemModel as MM ------------------------------------------------------------------------ @@ -480,102 +472,6 @@ instance GenArchInfo LLVMMemory M.X86_64 where , updateReg = x86UpdateReg } --- | Helper, not exported -writeBytesBackwards :: - MM.HasPtrWidth w => - C.IsSymBackend sym bak => - (?memOpts :: MM.MemOptions) => - MM.HasLLVMAnn sym => - bak -> - MM.MemImpl sym -> - MM.LLVMPtr sym w -> - Seq.Seq Word8 -> - IO (MM.LLVMPtr sym w, MM.MemImpl sym) -writeBytesBackwards bak mem ptr bytes = do - let sym = C.backendGetSym bak - let i8 = MM.bitvectorType (Bytes.toBytes (1 :: Int)) - z <- WI.natLit sym 0 - one <- WI.bvOne sym ?ptrWidth - let writeByte (p, m) byte = do - p' <- MM.ptrSub sym ?ptrWidth p one - bv <- WI.bvLit sym (C.knownNat @8) (BVS.mkBV C.knownNat (fromIntegral byte)) - m' <- MM.storeRaw bak m p' i8 CLD.noAlignment (MM.LLVMValInt z bv) - pure (p', m') - Monad.foldM writeByte (ptr, mem) (Seq.reverse bytes) - --- | Create an allocation for the stack for x86_64 with the SysV ABI. --- --- On x86_64 with the SysV ABI, the stack grows \"downwards\" from high --- addresses to low. The end of the stack is initialized with the ELF auxiliary --- vector (not modelled here), and functions expect the following data to be --- available above their stack frame (i.e., just above the address in @rsp@): --- --- * Their stack-spilled arguments --- * The return address --- --- To model this behavior, this function: --- --- 1. Uses 'MSS.createArrayStack' to allocate the stack, which returns a stack --- allocation and a pointer to its end, minus the space for stack-spilled --- arguments (see its Haddocks for details) --- 2. Decrements the pointer to the top of the stack ('MSS.arrayStackTopPtr') by --- 8 bytes (i.e., the size of a pointer) --- 3. Writes a symbolic return address to the top of the stack --- --- (Unless a concrete list of bytes is passed, in which case the decrement (2) --- is by the length of that list, and the write (3) is of those bytes.) --- --- This setup should enable calling most functions without incident, after the --- returned pointer is put in @rsp@. Those that expect stack-spilled arguments --- may end up reading from the (uninitialized) stack slots. --- --- Notably, the Microsoft x86_64 ABI differs in the stack setup. See --- [this link](https://learn.microsoft.com/en-us/cpp/build/stack-usage) --- for more detailson that ABI. -x86SysvStack :: - C.IsSymBackend sym bak => - (?memOpts :: MM.MemOptions) => - MM.HasLLVMAnn sym => - bak -> - MM.MemImpl sym -> - -- | Initialize the end of the stack to a concrete sequence of bytes if the - -- value is @Just@, otherwise initialize the end of the stack to 8 fresh, - -- symbolic bytes. - Maybe (Seq.Seq Word8) -> - -- | The caller must ensure the size of these is less than the allocation size - MSS.ExtraStackSlots -> - -- | Size of stack allocation - WI.SymExpr sym (WI.BaseBVType 64) -> - IO (MSS.ArrayStack sym 64, MM.MemImpl sym) -x86SysvStack bak mem topBytes slots sz = do - let ptrBytes = 8 - let ?ptrWidth = C.knownNat @64 - - (arrayStack, mem') <- MSS.createArrayStack bak mem slots sz - MSS.ArrayStack _base top _array <- pure arrayStack - - let i8 = MM.bitvectorType (Bytes.toBytes (1 :: Int)) - (top', mem'') <- - case topBytes of - Nothing -> do - -- (2) - let sym = C.backendGetSym bak - ptrSzBv <- WI.bvLit sym ?ptrWidth (BVS.mkBV ?ptrWidth ptrBytes) - top' <- MM.ptrSub sym ?ptrWidth top ptrSzBv - - -- (3) - z <- WI.natLit sym 0 - bv <- WI.freshConstant sym (WI.safeSymbol "x86_64_ret_addr") (WI.BaseBVRepr ?ptrWidth) - let val = MM.LLVMValInt z bv - let storTy = MM.arrayType (fromIntegral ptrBytes) i8 - mem'' <- MM.storeRaw bak mem' top' storTy CLD.noAlignment val - pure (top', mem'') - - Just bytes -> writeBytesBackwards bak mem' top bytes - - let arrayStack' = arrayStack { MSS.arrayStackTopPtr = top' } - return (arrayStack', mem'') - {- Note [Syscalls] While most of the extension functions can be translated directly by embedding them in diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs new file mode 100644 index 00000000..8d333653 --- /dev/null +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs @@ -0,0 +1,141 @@ +{-| +Copyright : (c) Galois, Inc 2024 +Maintainer : Langston Barrett + +On x86_64 with the SysV ABI, the stack grows \"downwards\" from high addresses +to low. The end of the stack is initialized with the ELF auxiliary vector (not +modelled here), and functions expect the following data to be available above +their stack frame (i.e., just above the address in @rsp@), from high addresses +to low: + +* Their stack-spilled arguments +* The return address + +The functions in this module support manipulation of a stack under these +constraints. ABI-compatible machine code translated by Macaw manages the stack +for itself, so this module is primarily helpful for initial setup of the stack, +before starting symbolic execution. + +Helpful links: + +* +* + +TODO: The stack is supposed to be 16-byte aligned before a @call@. + +This module is meant to be imported qualified. +-} + +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ImplicitParams #-} + +module Data.Macaw.X86.Symbolic.ABI.SysV + ( StackPointer + , getStackPointer + , RetAddr(..) + , freshRetAddr + , writeSpilledArgs + , writeRetAddr + , allocStackFrame + ) where + +-- TODO: sort me! +import qualified Lang.Crucible.Backend as C +import qualified Lang.Crucible.LLVM.MemModel as MM +import qualified Data.Sequence as Seq +import qualified What4.Interface as WI +import qualified Data.BitVector.Sized as BVS +import qualified Lang.Crucible.LLVM.Bytes as Bytes +import qualified Lang.Crucible.LLVM.DataLayout as CLD +import qualified Control.Monad as Monad + +-- | Helper, not exported +ptrBytes :: Integer +ptrBytes = 8 + +ptrRepr :: WI.NatRepr 64 +ptrRepr = WI.knownNat + +-- | A pointer to a SysV-compatible x86_64 stack +newtype StackPointer sym = StackPointer { getStackPointer :: MM.LLVMPtr sym 64 } + +-- | A return address +newtype RetAddr sym = RetAddr { getRetAddr :: MM.LLVMPtr sym 64 } + +-- | Create a fresh, symbolic return address +freshRetAddr :: C.IsSymInterface sym => sym -> IO (RetAddr sym) +freshRetAddr sym = do + bv <- WI.freshConstant sym (WI.safeSymbol "x86_64_ret_addr") (WI.BaseBVRepr ptrRepr) + ptr <- MM.llvmPointer_bv sym bv + pure (RetAddr ptr) + +-- | Write pointer-sized stack-spilled arguments to the stack. +-- +-- SysV specifies that they will be written in reverse order, i.e., the last +-- element of the 'Seq.Seq' will be written to the highest address. +writeSpilledArgs :: + C.IsSymBackend sym bak => + (?memOpts :: MM.MemOptions) => + MM.HasLLVMAnn sym => + bak -> + MM.MemImpl sym -> + StackPointer sym -> + -- | Stack-spilled arguments, in normal order + Seq.Seq (MM.LLVMPtr sym 64) -> + IO (StackPointer sym, MM.MemImpl sym) +writeSpilledArgs bak mem sp spilledArgs = do + let sym = C.backendGetSym bak + eight <- WI.bvLit sym ptrRepr (BVS.mkBV WI.knownNat 8) + let i64 = MM.bitvectorType (Bytes.toBytes (64 :: Int)) + let ?ptrWidth = ptrRepr + let writeWord (StackPointer p, m) arg = do + p' <- MM.ptrSub sym ?ptrWidth p eight + m' <- MM.storeRaw bak m p' i64 CLD.noAlignment (MM.ptrToPtrVal arg) + pure (StackPointer p', m') + Monad.foldM writeWord (sp, mem) (Seq.reverse spilledArgs) + +-- | Write the return address to the stack. +writeRetAddr :: + C.IsSymBackend sym bak => + (?memOpts :: MM.MemOptions) => + MM.HasLLVMAnn sym => + bak -> + MM.MemImpl sym -> + StackPointer sym -> + RetAddr sym -> + IO (StackPointer sym, MM.MemImpl sym) +writeRetAddr bak mem sp retAddr = do + let sym = C.backendGetSym bak + let ?ptrWidth = MM.ptrWidth (getRetAddr retAddr) + ptrSzBv <- WI.bvLit sym ?ptrWidth (BVS.mkBV ?ptrWidth ptrBytes) + top <- MM.ptrSub sym ?ptrWidth (getStackPointer sp) ptrSzBv + let i64 = MM.bitvectorType (Bytes.toBytes (64 :: Int)) + let val = MM.ptrToPtrVal (getRetAddr retAddr) + mem' <- MM.storeRaw bak mem top i64 CLD.noAlignment val + pure (StackPointer top, mem') + +-- | Allocate a single stack frame by decrementing the stack pointer. +-- +-- From high to low addresses: +-- +-- * First, write stack-spilled arguments in reverse order +-- * Then, write the return address +allocStackFrame :: + C.IsSymBackend sym bak => + (?memOpts :: MM.MemOptions) => + MM.HasLLVMAnn sym => + bak -> + MM.MemImpl sym -> + StackPointer sym -> + -- | Stack-spilled arguments, in normal order + Seq.Seq (MM.LLVMPtr sym 64) -> + RetAddr sym -> + IO (StackPointer sym, MM.MemImpl sym) +allocStackFrame bak mem sp spilledArgs retAddr = do + let ?ptrWidth = ptrRepr + (sp', mem') <- writeSpilledArgs bak mem sp spilledArgs + writeRetAddr bak mem' sp' retAddr + +-- TODO: Pushing a stack frame (i.e., manipulating rsp) +-- TODO: Extracting the stack pointer from registers +-- TODO: Allocating the stack From e9475850fa3b7a5a3ccc22bbc957836993195f6e Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 12:26:45 -0400 Subject: [PATCH 03/22] x86-symbolic: More SysV stack setup and manipulation --- .../src/Data/Macaw/X86/Symbolic/ABI/SysV.hs | 96 +++++++++++++++++-- 1 file changed, 87 insertions(+), 9 deletions(-) diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs index 8d333653..c90e880b 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs @@ -32,22 +32,34 @@ This module is meant to be imported qualified. module Data.Macaw.X86.Symbolic.ABI.SysV ( StackPointer , getStackPointer + , stackPointerReg , RetAddr(..) , freshRetAddr + , SpilledArgs(..) , writeSpilledArgs , writeRetAddr , allocStackFrame + , pushStackFrame + , allocStack ) where -- TODO: sort me! +import qualified Control.Lens as Lens import qualified Lang.Crucible.Backend as C +import qualified Lang.Crucible.Simulator as C import qualified Lang.Crucible.LLVM.MemModel as MM import qualified Data.Sequence as Seq +import qualified Data.Macaw.Symbolic as MS +import qualified Data.Macaw.Symbolic.Stack as MSS +import qualified Data.Macaw.X86 as X86 +import qualified Data.Macaw.X86.Symbolic as X86S +import Data.Parameterized.Classes (ixF') import qualified What4.Interface as WI import qualified Data.BitVector.Sized as BVS import qualified Lang.Crucible.LLVM.Bytes as Bytes import qualified Lang.Crucible.LLVM.DataLayout as CLD import qualified Control.Monad as Monad +import qualified Data.Parameterized.Context as Ctx -- | Helper, not exported ptrBytes :: Integer @@ -59,20 +71,37 @@ ptrRepr = WI.knownNat -- | A pointer to a SysV-compatible x86_64 stack newtype StackPointer sym = StackPointer { getStackPointer :: MM.LLVMPtr sym 64 } +-- | A lens for accessing the stack pointer register as a 'StackPointer' +stackPointerReg :: + Lens.Lens' + (Ctx.Assignment (C.RegValue' sym) (MS.MacawCrucibleRegTypes X86.X86_64)) + (StackPointer sym) +stackPointerReg = + Lens.lens + (\regs -> StackPointer (C.unRV (regs Lens.^. ixF' X86S.rsp))) + (\regs v -> regs Lens.& ixF' X86S.rsp Lens..~ C.RV (getStackPointer v)) + -- | A return address newtype RetAddr sym = RetAddr { getRetAddr :: MM.LLVMPtr sym 64 } --- | Create a fresh, symbolic return address +-- | Create a fresh, symbolic return address. freshRetAddr :: C.IsSymInterface sym => sym -> IO (RetAddr sym) freshRetAddr sym = do bv <- WI.freshConstant sym (WI.safeSymbol "x86_64_ret_addr") (WI.BaseBVRepr ptrRepr) ptr <- MM.llvmPointer_bv sym bv pure (RetAddr ptr) +-- | Stack-spilled arguments, in normal order +newtype SpilledArgs sym + = SpilledArgs { getSpilledArgs :: Seq.Seq (MM.LLVMPtr sym 64) } + -- | Write pointer-sized stack-spilled arguments to the stack. -- -- SysV specifies that they will be written in reverse order, i.e., the last -- element of the 'Seq.Seq' will be written to the highest address. +-- +-- Asserts that the stack allocation is writable and large enough to contain the +-- spilled arguments. writeSpilledArgs :: C.IsSymBackend sym bak => (?memOpts :: MM.MemOptions) => @@ -80,8 +109,7 @@ writeSpilledArgs :: bak -> MM.MemImpl sym -> StackPointer sym -> - -- | Stack-spilled arguments, in normal order - Seq.Seq (MM.LLVMPtr sym 64) -> + SpilledArgs sym -> IO (StackPointer sym, MM.MemImpl sym) writeSpilledArgs bak mem sp spilledArgs = do let sym = C.backendGetSym bak @@ -92,9 +120,12 @@ writeSpilledArgs bak mem sp spilledArgs = do p' <- MM.ptrSub sym ?ptrWidth p eight m' <- MM.storeRaw bak m p' i64 CLD.noAlignment (MM.ptrToPtrVal arg) pure (StackPointer p', m') - Monad.foldM writeWord (sp, mem) (Seq.reverse spilledArgs) + Monad.foldM writeWord (sp, mem) (Seq.reverse (getSpilledArgs spilledArgs)) -- | Write the return address to the stack. +-- +-- Asserts that the stack allocation is writable and large enough to contain the +-- return address. writeRetAddr :: C.IsSymBackend sym bak => (?memOpts :: MM.MemOptions) => @@ -120,6 +151,9 @@ writeRetAddr bak mem sp retAddr = do -- -- * First, write stack-spilled arguments in reverse order -- * Then, write the return address +-- +-- Asserts that the stack allocation is writable and large enough to contain the +-- spilled arguments and return address. allocStackFrame :: C.IsSymBackend sym bak => (?memOpts :: MM.MemOptions) => @@ -127,8 +161,7 @@ allocStackFrame :: bak -> MM.MemImpl sym -> StackPointer sym -> - -- | Stack-spilled arguments, in normal order - Seq.Seq (MM.LLVMPtr sym 64) -> + SpilledArgs sym -> RetAddr sym -> IO (StackPointer sym, MM.MemImpl sym) allocStackFrame bak mem sp spilledArgs retAddr = do @@ -136,6 +169,51 @@ allocStackFrame bak mem sp spilledArgs retAddr = do (sp', mem') <- writeSpilledArgs bak mem sp spilledArgs writeRetAddr bak mem' sp' retAddr --- TODO: Pushing a stack frame (i.e., manipulating rsp) --- TODO: Extracting the stack pointer from registers --- TODO: Allocating the stack +-- | Like 'allocStackFrame', but manipulates @rsp@ directly. +-- +-- Asserts that the stack allocation is writable and large enough to contain the +-- spilled arguments and return address. +pushStackFrame :: + C.IsSymBackend sym bak => + (?memOpts :: MM.MemOptions) => + MM.HasLLVMAnn sym => + bak -> + MM.MemImpl sym -> + -- | Assignment of values to registers + Ctx.Assignment (C.RegValue' sym) (MS.MacawCrucibleRegTypes X86.X86_64) -> + SpilledArgs sym -> + RetAddr sym -> + IO + ( Ctx.Assignment (C.RegValue' sym) (MS.MacawCrucibleRegTypes X86.X86_64) + , MM.MemImpl sym + ) +pushStackFrame bak mem regs spilledArgs retAddr = do + let sp = regs Lens.^. stackPointerReg + (sp', mem') <- allocStackFrame bak mem sp spilledArgs retAddr + let regs' = regs Lens.& stackPointerReg Lens..~ sp' + pure (regs', mem') + +-- | Like 'MSS.createArrayStack', but puts the stack pointer in @rsp@ directly. +-- +-- Does not allow allocating stack slots, use 'allocStackFrame' or +-- 'pushStackFrame' for that. +allocStack :: + C.IsSymBackend sym bak => + (?memOpts :: MM.MemOptions) => + MM.HasLLVMAnn sym => + bak -> + MM.MemImpl sym -> + -- | Assignment of values to registers + Ctx.Assignment (C.RegValue' sym) (MS.MacawCrucibleRegTypes X86.X86_64) -> + -- | Size of stack allocation + WI.SymExpr sym (WI.BaseBVType 64) -> + IO + ( Ctx.Assignment (C.RegValue' sym) (MS.MacawCrucibleRegTypes X86.X86_64) + , MM.MemImpl sym + ) +allocStack bak mem regs sz = do + let ?ptrWidth = ptrRepr + let slots = MSS.ExtraStackSlots 0 + (MSS.ArrayStack _base top _arr, mem') <- MSS.createArrayStack bak mem slots sz + let regs' = regs Lens.& stackPointerReg Lens..~ StackPointer top + pure (regs', mem') From 145e0055516c79fac6fcd99459c2d20762b71dde Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 12:27:39 -0400 Subject: [PATCH 04/22] x86-symbolic: Sort imports --- .../src/Data/Macaw/X86/Symbolic/ABI/SysV.hs | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs index c90e880b..6a71a78c 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs @@ -28,6 +28,7 @@ This module is meant to be imported qualified. {-# LANGUAGE DataKinds #-} {-# LANGUAGE ImplicitParams #-} +{-# LANGUAGE ImportQualifiedPost #-} module Data.Macaw.X86.Symbolic.ABI.SysV ( StackPointer @@ -43,23 +44,22 @@ module Data.Macaw.X86.Symbolic.ABI.SysV , allocStack ) where --- TODO: sort me! -import qualified Control.Lens as Lens -import qualified Lang.Crucible.Backend as C -import qualified Lang.Crucible.Simulator as C -import qualified Lang.Crucible.LLVM.MemModel as MM -import qualified Data.Sequence as Seq -import qualified Data.Macaw.Symbolic as MS -import qualified Data.Macaw.Symbolic.Stack as MSS -import qualified Data.Macaw.X86 as X86 -import qualified Data.Macaw.X86.Symbolic as X86S +import Control.Lens qualified as Lens +import Control.Monad qualified as Monad +import Data.BitVector.Sized qualified as BVS +import Data.Macaw.Symbolic qualified as MS +import Data.Macaw.Symbolic.Stack qualified as MSS +import Data.Macaw.X86 qualified as X86 +import Data.Macaw.X86.Symbolic qualified as X86S import Data.Parameterized.Classes (ixF') -import qualified What4.Interface as WI -import qualified Data.BitVector.Sized as BVS -import qualified Lang.Crucible.LLVM.Bytes as Bytes -import qualified Lang.Crucible.LLVM.DataLayout as CLD -import qualified Control.Monad as Monad -import qualified Data.Parameterized.Context as Ctx +import Data.Parameterized.Context qualified as Ctx +import Data.Sequence qualified as Seq +import Lang.Crucible.Backend qualified as C +import Lang.Crucible.LLVM.Bytes qualified as Bytes +import Lang.Crucible.LLVM.DataLayout qualified as CLD +import Lang.Crucible.LLVM.MemModel qualified as MM +import Lang.Crucible.Simulator qualified as C +import What4.Interface qualified as WI -- | Helper, not exported ptrBytes :: Integer From c68cda07ac30fd67146df11531b74e80939c5fc9 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 15:03:19 -0400 Subject: [PATCH 05/22] symbolic: Remove unused LANGUAGE pragmas --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 077c8cca..18868481 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} @@ -8,9 +7,9 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeOperators #-} + -- | This module defines common test harness code that can be used in each of the -- architecture-specific backends. -- From f3fd0e0fcff67bd735bc11f1915af00cf737daec Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 15:16:54 -0400 Subject: [PATCH 06/22] symbolic: Small refactoring in `Testing` module --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 97 +++++++++++++++------ 1 file changed, 71 insertions(+), 26 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 18868481..fdfd35d5 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -7,7 +7,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeOperators #-} -- | This module defines common test harness code that can be used in each of the @@ -69,7 +68,6 @@ import qualified Data.Parameterized.Context as Ctx import qualified Data.Parameterized.List as PL import qualified Data.Parameterized.NatRepr as PN import Data.Parameterized.Some ( Some(..) ) -import Data.Proxy ( Proxy(..) ) import qualified Data.Text as Text import qualified Data.Text.Encoding as Text import qualified Data.Text.Encoding.Error as Text @@ -441,6 +439,75 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP WSR.Unsat {} -> return (SimulationResult Unsat) WSR.Unknown -> return (SimulationResult Unknown) +discMems :: + BinariesInfo arch -> + V.Vector (MEL.Memory (MC.ArchAddrWidth arch)) +discMems binfo = + let mainInfo = mainBinaryInfo binfo in + let soInfos = sharedLibraryInfos binfo in + V.map (MD.memory . binaryDiscState) (V.cons mainInfo soInfos) + +initialMem :: + ( ext ~ MS.MacawExt arch + , CCE.IsSyntaxExtension ext + , CB.IsSymBackend sym bak + , CLM.HasLLVMAnn sym + , MS.SymArchConstraints arch + , w ~ MC.ArchAddrWidth arch + , 16 <= w + , sym ~ WE.ExprBuilder scope st fs + , bak ~ CBO.OnlineBackend solver scope st fs + , WPO.OnlineSolver solver + , ?memOpts :: CLM.MemOptions + ) => + BinariesInfo arch -> + bak -> + MAI.ArchitectureInfo arch -> + MS.ArchVals arch -> + IO (CLM.MemImpl sym, MS.MemModelConfig p sym arch CLM.Mem) +initialMem binfo bak archInfo archVals = do + let endianness = MSMLazy.toCrucibleEndian (MAI.archEndianness archInfo) + (mem, memPtrTbl) <- + MSM.newMergedGlobalMemoryWith populateRelocation archInfo bak + endianness MSM.ConcreteMutable (discMems binfo) + let mmConf = (MSM.memModelConfig bak memPtrTbl) + { MS.lookupFunctionHandle = lookupFunction archVals binfo + , MS.lookupSyscallHandle = lookupSyscall + , MS.mkGlobalPointerValidityAssertion = validityCheck + } + pure (mem, mmConf) + +lazyInitialMem :: + ( ext ~ MS.MacawExt arch + , CCE.IsSyntaxExtension ext + , CB.IsSymBackend sym bak + , CLM.HasLLVMAnn sym + , MS.SymArchConstraints arch + , w ~ MC.ArchAddrWidth arch + , 16 <= w + , sym ~ WE.ExprBuilder scope st fs + , bak ~ CBO.OnlineBackend solver scope st fs + , WPO.OnlineSolver solver + , ?memOpts :: CLM.MemOptions + , MS.HasMacawLazySimulatorState p sym w + ) => + BinariesInfo arch -> + bak -> + MAI.ArchitectureInfo arch -> + MS.ArchVals arch -> + IO (CLM.MemImpl sym, MS.MemModelConfig p sym arch CLM.Mem) +lazyInitialMem binfo bak archInfo archVals = do + let endianness = MSMLazy.toCrucibleEndian (MAI.archEndianness archInfo) + (mem, memPtrTbl) <- + MSMLazy.newMergedGlobalMemoryWith populateRelocation archInfo bak + endianness MSMLazy.ConcreteMutable (discMems binfo) + let mmConf = (MSMLazy.memModelConfig bak memPtrTbl) + { MS.lookupFunctionHandle = lookupFunction archVals binfo + , MS.lookupSyscallHandle = lookupSyscall + , MS.mkGlobalPointerValidityAssertion = validityCheck + } + pure (mem, mmConf) + -- | Set up the symbolic execution engine with initial states and execute -- -- It returns: @@ -486,10 +553,6 @@ simulateFunction binfo bak execFeatures archInfo archVals halloc mmPreset g = do let symArchFns = MS.archFunctions archVals let crucRegTypes = MS.crucArchRegTypes symArchFns let regsRepr = CT.StructRepr crucRegTypes - let mainInfo = mainBinaryInfo binfo - let soInfos = sharedLibraryInfos binfo - let endianness = MSMLazy.toCrucibleEndian (MAI.archEndianness archInfo) - let mems = V.map (MD.memory . binaryDiscState) (V.cons mainInfo soInfos) -- Initialize memory -- @@ -499,26 +562,8 @@ simulateFunction binfo bak execFeatures archInfo archVals halloc mmPreset g = do let ?ptrWidth = WI.knownRepr (initMem, mmConf) <- case mmPreset of - DefaultMemModel -> do - (initMem, memPtrTbl) <- - MSM.newMergedGlobalMemoryWith populateRelocation (Proxy @arch) bak - endianness MSM.ConcreteMutable mems - let mmConf = (MSM.memModelConfig bak memPtrTbl) - { MS.lookupFunctionHandle = lookupFunction archVals binfo - , MS.lookupSyscallHandle = lookupSyscall - , MS.mkGlobalPointerValidityAssertion = validityCheck - } - pure (initMem, mmConf) - LazyMemModel -> do - (initMem, memPtrTbl) <- - MSMLazy.newMergedGlobalMemoryWith populateRelocation (Proxy @arch) bak - endianness MSMLazy.ConcreteMutable mems - let mmConf = (MSMLazy.memModelConfig bak memPtrTbl) - { MS.lookupFunctionHandle = lookupFunction archVals binfo - , MS.lookupSyscallHandle = lookupSyscall - , MS.mkGlobalPointerValidityAssertion = validityCheck - } - pure (initMem, mmConf) + DefaultMemModel -> initialMem binfo bak archInfo archVals + LazyMemModel -> lazyInitialMem binfo bak archInfo archVals -- Allocate a stack and insert it into memory -- From 0c5ed97e84b1dbf94ea90535239779cfb3f69cd1 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 15:21:07 -0400 Subject: [PATCH 07/22] symbolic: Rename and move a helper function in `Testing` --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index fdfd35d5..50544dd8 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -439,14 +439,6 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP WSR.Unsat {} -> return (SimulationResult Unsat) WSR.Unknown -> return (SimulationResult Unknown) -discMems :: - BinariesInfo arch -> - V.Vector (MEL.Memory (MC.ArchAddrWidth arch)) -discMems binfo = - let mainInfo = mainBinaryInfo binfo in - let soInfos = sharedLibraryInfos binfo in - V.map (MD.memory . binaryDiscState) (V.cons mainInfo soInfos) - initialMem :: ( ext ~ MS.MacawExt arch , CCE.IsSyntaxExtension ext @@ -469,7 +461,7 @@ initialMem binfo bak archInfo archVals = do let endianness = MSMLazy.toCrucibleEndian (MAI.archEndianness archInfo) (mem, memPtrTbl) <- MSM.newMergedGlobalMemoryWith populateRelocation archInfo bak - endianness MSM.ConcreteMutable (discMems binfo) + endianness MSM.ConcreteMutable (binariesMems binfo) let mmConf = (MSM.memModelConfig bak memPtrTbl) { MS.lookupFunctionHandle = lookupFunction archVals binfo , MS.lookupSyscallHandle = lookupSyscall @@ -500,7 +492,7 @@ lazyInitialMem binfo bak archInfo archVals = do let endianness = MSMLazy.toCrucibleEndian (MAI.archEndianness archInfo) (mem, memPtrTbl) <- MSMLazy.newMergedGlobalMemoryWith populateRelocation archInfo bak - endianness MSMLazy.ConcreteMutable (discMems binfo) + endianness MSMLazy.ConcreteMutable (binariesMems binfo) let mmConf = (MSMLazy.memModelConfig bak memPtrTbl) { MS.lookupFunctionHandle = lookupFunction archVals binfo , MS.lookupSyscallHandle = lookupSyscall @@ -866,6 +858,14 @@ data BinariesInfo arch = BinariesInfo -- revisited. } +binariesMems :: + BinariesInfo arch -> + V.Vector (MEL.Memory (MC.ArchAddrWidth arch)) +binariesMems binfo = + let mainInfo = mainBinaryInfo binfo in + let soInfos = sharedLibraryInfos binfo in + V.map (MD.memory . binaryDiscState) (V.cons mainInfo soInfos) + -- | Information about an individual binary. data BinaryInfo arch = BinaryInfo { binaryDiscState :: MD.DiscoveryState arch From 820401d12e59e3bf166864823dd5ba9690f85cfb Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 15:25:10 -0400 Subject: [PATCH 08/22] symbolic: Factor out fresh register assignment generator --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 50544dd8..b6a7c25e 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -302,6 +302,18 @@ mkInitialRegVal archFns sym r = do MT.FloatTypeRepr {} -> error ("Float-typed registers are not supported in the macaw-symbolic test harness: " ++ show regName) MT.VecTypeRepr {} -> error ("Vector-typed registers are not supported in the macaw-symbolic test harness: " ++ show regName) + +freshRegs :: + CB.IsSymInterface sym => + MT.HasRepr (MC.ArchReg arch) MT.TypeRepr => + MS.MacawSymbolicArchFunctions arch -> + sym -> + IO (Ctx.Assignment (CS.RegValue' sym) (MS.CtxToCrucibleType (MS.ArchRegContext arch))) +freshRegs symArchFns sym = + MS.macawAssignToCrucM + (mkInitialRegVal symArchFns sym) + (MS.crucGenRegAssignment symArchFns) + -- | Create a name for the given 'MD.DiscoveryFunInfo' -- -- If the function has no name, just use its address @@ -581,7 +593,7 @@ simulateFunction binfo bak execFeatures archInfo archVals halloc mmPreset g = do -- The functions in the test-suite do not (and should not) rely on accessing -- data in their caller's stack frames, even though that wouldn't cause test -- failures with this setup. - initialRegs <- MS.macawAssignToCrucM (mkInitialRegVal symArchFns sym) (MS.crucGenRegAssignment symArchFns) + initialRegs <- freshRegs symArchFns sym stackInitialOffset <- WI.bvLit sym WI.knownRepr (BVS.mkBV WI.knownRepr mib) sp <- CLM.ptrAdd sym WI.knownRepr stackBasePtr stackInitialOffset let initialRegsEntry = CS.RegEntry regsRepr initialRegs From c65ad3434345e2d0d137b29f2313cafd6309a6dd Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 15:29:27 -0400 Subject: [PATCH 09/22] symbolic: Data type for `(mem, mmConf)` tuple --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index b6a7c25e..32835dbb 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -468,7 +468,7 @@ initialMem :: bak -> MAI.ArchitectureInfo arch -> MS.ArchVals arch -> - IO (CLM.MemImpl sym, MS.MemModelConfig p sym arch CLM.Mem) + IO (InitialMem p sym arch) initialMem binfo bak archInfo archVals = do let endianness = MSMLazy.toCrucibleEndian (MAI.archEndianness archInfo) (mem, memPtrTbl) <- @@ -479,7 +479,7 @@ initialMem binfo bak archInfo archVals = do , MS.lookupSyscallHandle = lookupSyscall , MS.mkGlobalPointerValidityAssertion = validityCheck } - pure (mem, mmConf) + pure (InitialMem mem mmConf) lazyInitialMem :: ( ext ~ MS.MacawExt arch @@ -499,7 +499,7 @@ lazyInitialMem :: bak -> MAI.ArchitectureInfo arch -> MS.ArchVals arch -> - IO (CLM.MemImpl sym, MS.MemModelConfig p sym arch CLM.Mem) + IO (InitialMem p sym arch) lazyInitialMem binfo bak archInfo archVals = do let endianness = MSMLazy.toCrucibleEndian (MAI.archEndianness archInfo) (mem, memPtrTbl) <- @@ -510,7 +510,13 @@ lazyInitialMem binfo bak archInfo archVals = do , MS.lookupSyscallHandle = lookupSyscall , MS.mkGlobalPointerValidityAssertion = validityCheck } - pure (mem, mmConf) + pure (InitialMem mem mmConf) + +data InitialMem p sym arch + = InitialMem + { _initMemMem :: CLM.MemImpl sym + , _initMemMmConf :: MS.MemModelConfig p sym arch CLM.Mem + } -- | Set up the symbolic execution engine with initial states and execute -- @@ -564,7 +570,7 @@ simulateFunction binfo bak execFeatures archInfo archVals halloc mmPreset g = do -- initial contents) and a totally symbolic stack let ?ptrWidth = WI.knownRepr - (initMem, mmConf) <- + (InitialMem initMem mmConf) <- case mmPreset of DefaultMemModel -> initialMem binfo bak archInfo archVals LazyMemModel -> lazyInitialMem binfo bak archInfo archVals From 6fcdb938f078a3271b0f22996e1f7deebc7e8bf1 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 15:34:29 -0400 Subject: [PATCH 10/22] symbolic: Pass initial memory into `simulateFunction` as an argument --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 31 +++++++++++---------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 32835dbb..2a005a53 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -420,8 +420,19 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP MS.mkFunCFG (MS.archFunctions archVals) halloc funName (posFn (binaryPath mainInfo)) dfi let ?recordLLVMAnnotation = \_ _ _ -> return () + + -- Initialize memory + -- + -- This includes both global static memory (taken from the data segment + -- initial contents) and a totally symbolic stack + iMem <- + case mmPreset of + DefaultMemModel -> initialMem binfo bak archInfo archVals + LazyMemModel -> lazyInitialMem binfo bak archInfo archVals + (memVar, stackPointer, execResult) <- - simulateFunction binfo bak execFeatures archInfo archVals halloc mmPreset g + simulateFunction bak execFeatures archVals halloc iMem g + case execResult of CS.TimeoutResult {} -> return SimulationTimeout CS.AbortedResult {} -> return SimulationAborted @@ -546,34 +557,24 @@ simulateFunction :: forall arch sym bak w solver scope st fs ext blocks , WPO.OnlineSolver solver , ?memOpts :: CLM.MemOptions ) - => BinariesInfo arch - -> bak + => bak -> [CS.GenericExecutionFeature sym] - -> MAI.ArchitectureInfo arch -> MS.ArchVals arch -> CFH.HandleAllocator - -> MemModelPreset + -> InitialMem (MS.MacawLazySimulatorState sym w) sym arch -> CCC.CFG ext blocks (Ctx.EmptyCtx Ctx.::> MS.ArchRegStruct arch) (MS.ArchRegStruct arch) -> IO ( CS.GlobalVar CLM.Mem , CLM.LLVMPtr sym w , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) ) -simulateFunction binfo bak execFeatures archInfo archVals halloc mmPreset g = do +simulateFunction bak execFeatures archVals halloc iMem g = do let sym = CB.backendGetSym bak let symArchFns = MS.archFunctions archVals let crucRegTypes = MS.crucArchRegTypes symArchFns let regsRepr = CT.StructRepr crucRegTypes - -- Initialize memory - -- - -- This includes both global static memory (taken from the data segment - -- initial contents) and a totally symbolic stack - + InitialMem initMem mmConf <- pure iMem let ?ptrWidth = WI.knownRepr - (InitialMem initMem mmConf) <- - case mmPreset of - DefaultMemModel -> initialMem binfo bak archInfo archVals - LazyMemModel -> lazyInitialMem binfo bak archInfo archVals -- Allocate a stack and insert it into memory -- From 2372f699da9dc51807adde78df98c2df9af720dc Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 15:45:16 -0400 Subject: [PATCH 11/22] symbolic: Factor out construction of initial registers and stack --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 82 +++++++++++++-------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 2a005a53..53415cfe 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -314,6 +314,55 @@ freshRegs symArchFns sym = (mkInitialRegVal symArchFns sym) (MS.crucGenRegAssignment symArchFns) +-- | Make initial registers, including setting up a stack pointer. +-- +-- The stack pointer points to the middle of the stack allocation. This is a +-- hack. We do this because each architecture has some expected stack layout +-- (e.g., x86_64 expects a return address just above the stack pointer; +-- PPC expects a "back chain"; and AArch32, PPC, and x86_64 all expect +-- stack-spilled arguments above everything else). Setting the stack pointer +-- to the middle of the allocation allows reads of fully symbolic data from +-- above the stack pointer, and this seems to be good enough to make our tests +-- pass. +-- +-- The functions in the test-suite do not (and should not) rely on accessing +-- data in their caller's stack frames, even though that wouldn't cause test +-- failures with this setup. +defaultRegs :: + CB.IsSymBackend sym bak => + CLM.HasPtrWidth (MC.ArchAddrWidth arch) => + (?memOpts :: CLM.MemOptions) => + MS.SymArchConstraints arch => + CLM.HasLLVMAnn sym => + MT.HasRepr (MC.ArchReg arch) MT.TypeRepr => + bak -> + MS.ArchVals arch -> + CLM.MemImpl sym -> + IO + ( CS.RegEntry sym (MS.ArchRegStruct arch) + , CLM.LLVMPtr sym (MC.ArchAddrWidth arch) + , CLM.MemImpl sym + ) +defaultRegs bak archVals mem = do + let sym = CB.backendGetSym bak + + let kib = 1024 + let mib = 1024 * kib + stackSize <- WI.bvLit sym ?ptrWidth (BVS.mkBV ?ptrWidth (2 * mib)) + (MSS.ArrayStack stackBasePtr _stackTopPtr _stackArrayStorage, mem') <- + MSS.createArrayStack bak mem (MSS.ExtraStackSlots 0) stackSize + stackInitialOffset <- WI.bvLit sym ?ptrWidth (BVS.mkBV ?ptrWidth mib) + sp <- CLM.ptrAdd sym ?ptrWidth stackBasePtr stackInitialOffset + + let symArchFns = MS.archFunctions archVals + initialRegs <- freshRegs symArchFns sym + + let crucRegTypes = MS.crucArchRegTypes symArchFns + let regsRepr = CT.StructRepr crucRegTypes + let initialRegsEntry = CS.RegEntry regsRepr initialRegs + let regs = MS.updateReg archVals initialRegsEntry MC.sp_reg sp + pure (regs, sp, mem') + -- | Create a name for the given 'MD.DiscoveryFunInfo' -- -- If the function has no name, just use its address @@ -575,41 +624,12 @@ simulateFunction bak execFeatures archVals halloc iMem g = do InitialMem initMem mmConf <- pure iMem let ?ptrWidth = WI.knownRepr - - -- Allocate a stack and insert it into memory - -- - -- The stack allocation uses the SMT array backed memory model (rather than - -- the Haskell-level memory model) - let kib = 1024 - let mib = 1024 * kib - stackSize <- WI.bvLit sym WI.knownRepr (BVS.mkBV WI.knownRepr (2 * mib)) - (MSS.ArrayStack stackBasePtr _stackTopPtr _stackArrayStorage, mem1) <- - MSS.createArrayStack bak initMem (MSS.ExtraStackSlots 0) stackSize - - -- Make initial registers, including setting up a stack pointer. - -- - -- The stack pointer points to the middle of the stack allocation. This is a - -- hack. We do this because each architecture has some expected stack layout - -- (e.g., x86_64 expects a return address just above the stack pointer; - -- PPC expects a "back chain"; and AArch32, PPC, and x86_64 all expect - -- stack-spilled arguments above everything else). Setting the stack pointer - -- to the middle of the allocation allows reads of fully symbolic data from - -- above the stack pointer, and this seems to be good enough to make our tests - -- pass. - -- - -- The functions in the test-suite do not (and should not) rely on accessing - -- data in their caller's stack frames, even though that wouldn't cause test - -- failures with this setup. - initialRegs <- freshRegs symArchFns sym - stackInitialOffset <- WI.bvLit sym WI.knownRepr (BVS.mkBV WI.knownRepr mib) - sp <- CLM.ptrAdd sym WI.knownRepr stackBasePtr stackInitialOffset - let initialRegsEntry = CS.RegEntry regsRepr initialRegs - let regsWithStack = MS.updateReg archVals initialRegsEntry MC.sp_reg sp + (regs, sp, mem1) <- defaultRegs bak archVals initMem memVar <- CLM.mkMemVar "macaw-symbolic:test-harness:llvm_memory" halloc let lazySimSt = MS.emptyMacawLazySimulatorState let initGlobals = CSG.insertGlobal memVar mem1 CS.emptyGlobals - let arguments = CS.RegMap (Ctx.singleton regsWithStack) + let arguments = CS.RegMap (Ctx.singleton regs) let simAction = CS.runOverrideSim regsRepr (CS.regValue <$> CS.callCFG g arguments) let fnBindings = CFH.insertHandleMap (CCC.cfgHandle g) (CS.UseCFG g (CAP.postdomInfo g)) CFH.emptyHandleMap From 2033a7d20a1299ea96195d5b57f8f813208f2bd2 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 4 Sep 2024 16:02:23 -0400 Subject: [PATCH 12/22] symbolic: Further split up `simulateFunction` --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 40 +++++++++++++++++---- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 53415cfe..7f8ab884 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -617,18 +617,46 @@ simulateFunction :: forall arch sym bak w solver scope st fs ext blocks , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) ) simulateFunction bak execFeatures archVals halloc iMem g = do + InitialMem initMem mmConf <- pure iMem + let ?ptrWidth = WI.knownRepr + (regs, sp, mem) <- defaultRegs bak archVals initMem + (memVar, res) <- simMacawCfg bak execFeatures archVals halloc mmConf mem regs g + pure (memVar, sp, res) + +-- | Simulate a Macaw CFG, given initial registers and memory +simMacawCfg :: + ( ext ~ MS.MacawExt arch + , CCE.IsSyntaxExtension ext + , CB.IsSymBackend sym bak + , CLM.HasLLVMAnn sym + , MS.SymArchConstraints arch + , w ~ MC.ArchAddrWidth arch + , 16 <= w + , sym ~ WE.ExprBuilder scope st fs + , bak ~ CBO.OnlineBackend solver scope st fs + , WPO.OnlineSolver solver + , ?memOpts :: CLM.MemOptions + ) => + bak -> + [CS.GenericExecutionFeature sym] -> + MS.ArchVals arch -> + CFH.HandleAllocator -> + MS.MemModelConfig (MS.MacawLazySimulatorState sym w) sym arch CLM.Mem -> + CLM.MemImpl sym -> + CS.RegEntry sym (MS.ArchRegStruct arch) -> + CCC.CFG ext blocks (Ctx.EmptyCtx Ctx.::> MS.ArchRegStruct arch) (MS.ArchRegStruct arch) -> + IO ( CS.GlobalVar CLM.Mem + , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) + ) +simMacawCfg bak execFeatures archVals halloc mmConf mem regs g = do let sym = CB.backendGetSym bak let symArchFns = MS.archFunctions archVals let crucRegTypes = MS.crucArchRegTypes symArchFns let regsRepr = CT.StructRepr crucRegTypes - InitialMem initMem mmConf <- pure iMem - let ?ptrWidth = WI.knownRepr - (regs, sp, mem1) <- defaultRegs bak archVals initMem - memVar <- CLM.mkMemVar "macaw-symbolic:test-harness:llvm_memory" halloc let lazySimSt = MS.emptyMacawLazySimulatorState - let initGlobals = CSG.insertGlobal memVar mem1 CS.emptyGlobals + let initGlobals = CSG.insertGlobal memVar mem CS.emptyGlobals let arguments = CS.RegMap (Ctx.singleton regs) let simAction = CS.runOverrideSim regsRepr (CS.regValue <$> CS.callCFG g arguments) @@ -638,7 +666,7 @@ simulateFunction bak execFeatures archVals halloc iMem g = do let ctx = CS.initSimContext bak CLI.llvmIntrinsicTypes halloc IO.stdout (CS.FnBindings fnBindings) extImpl lazySimSt let s0 = CS.InitialState ctx initGlobals CS.defaultAbortHandler regsRepr simAction res <- CS.executeCrucible (fmap CS.genericToExecutionFeature execFeatures) s0 - return (memVar, sp, res) + return (memVar, res) -- | A wrapper around the symbolic backend that allows us to recover the online -- constraints when they are available From be0a57f555eac534f0677d118ac5c92a718df2e4 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Thu, 5 Sep 2024 11:20:59 -0400 Subject: [PATCH 13/22] symbolic: Don't feed the stack pointer to the `ResultExtractor` None of the supported architectures return values via the stack, and tracking the stack pointer needlessly complicates the code. --- macaw-aarch32-symbolic/tests/Main.hs | 2 +- macaw-ppc-symbolic/tests/Main.hs | 2 +- macaw-riscv-symbolic/tests/Main.hs | 2 +- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 28 +++++++-------------- x86_symbolic/tests/Main.hs | 2 +- 5 files changed, 13 insertions(+), 23 deletions(-) diff --git a/macaw-aarch32-symbolic/tests/Main.hs b/macaw-aarch32-symbolic/tests/Main.hs index 9abe7ecc..ca86be36 100644 --- a/macaw-aarch32-symbolic/tests/Main.hs +++ b/macaw-aarch32-symbolic/tests/Main.hs @@ -106,7 +106,7 @@ armResultExtractor :: ( CB.IsSymInterface sym ) => MS.ArchVals MA.ARM -> MST.ResultExtractor sym MA.ARM -armResultExtractor archVals = MST.ResultExtractor $ \regs _sp _mem k -> do +armResultExtractor archVals = MST.ResultExtractor $ \regs _mem k -> do let re = MS.lookupReg archVals regs (MAR.ARMGlobalBV (ASL.knownGlobalRef @"_R0")) k PC.knownRepr (CS.regValue re) diff --git a/macaw-ppc-symbolic/tests/Main.hs b/macaw-ppc-symbolic/tests/Main.hs index 4142b6d1..27f6f85c 100644 --- a/macaw-ppc-symbolic/tests/Main.hs +++ b/macaw-ppc-symbolic/tests/Main.hs @@ -118,7 +118,7 @@ ppcResultExtractor :: ( arch ~ MP.AnyPPC v ) => MS.ArchVals arch -> MST.ResultExtractor sym arch -ppcResultExtractor archVals = MST.ResultExtractor $ \regs _sp _mem k -> do +ppcResultExtractor archVals = MST.ResultExtractor $ \regs _mem k -> do let re = MS.lookupReg archVals regs (MP.PPC_GP (DP.GPR 3)) k PC.knownRepr (CS.regValue re) diff --git a/macaw-riscv-symbolic/tests/Main.hs b/macaw-riscv-symbolic/tests/Main.hs index cb62f6cc..0034d97a 100644 --- a/macaw-riscv-symbolic/tests/Main.hs +++ b/macaw-riscv-symbolic/tests/Main.hs @@ -109,7 +109,7 @@ riscvResultExtractor :: ( arch ~ MR.RISCV rv ) => MS.ArchVals arch -> MST.ResultExtractor sym arch -riscvResultExtractor archVals = MST.ResultExtractor $ \regs _sp _mem k -> do +riscvResultExtractor archVals = MST.ResultExtractor $ \regs _mem k -> do let re = MS.lookupReg archVals regs MR.GPR_A0 k PC.knownRepr (CS.regValue re) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 7f8ab884..91170713 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -338,11 +338,7 @@ defaultRegs :: bak -> MS.ArchVals arch -> CLM.MemImpl sym -> - IO - ( CS.RegEntry sym (MS.ArchRegStruct arch) - , CLM.LLVMPtr sym (MC.ArchAddrWidth arch) - , CLM.MemImpl sym - ) + IO (CS.RegEntry sym (MS.ArchRegStruct arch) , CLM.MemImpl sym) defaultRegs bak archVals mem = do let sym = CB.backendGetSym bak @@ -361,7 +357,7 @@ defaultRegs bak archVals mem = do let regsRepr = CT.StructRepr crucRegTypes let initialRegsEntry = CS.RegEntry regsRepr initialRegs let regs = MS.updateReg archVals initialRegsEntry MC.sp_reg sp - pure (regs, sp, mem') + pure (regs, mem') -- | Create a name for the given 'MD.DiscoveryFunInfo' -- @@ -479,7 +475,7 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP DefaultMemModel -> initialMem binfo bak archInfo archVals LazyMemModel -> lazyInitialMem binfo bak archInfo archVals - (memVar, stackPointer, execResult) <- + (memVar, execResult) <- simulateFunction bak execFeatures archVals halloc iMem g case execResult of @@ -496,7 +492,7 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP Just postMem -> pure postMem Nothing -> error $ "simulateAndVerify: Could not find global variable: " ++ Text.unpack (CS.globalName memVar) - withResult (gp L.^. CS.gpValue) stackPointer postMem $ \resRepr val -> do + withResult (gp L.^. CS.gpValue) postMem $ \resRepr val -> do -- The function is assumed to return a value that is intended to be -- True. Try to prove that (by checking the negation for unsat) -- @@ -585,9 +581,7 @@ data InitialMem p sym arch -- 1. The global variable that holds the memory state (allowing for -- inspecting the post-state, which is extracted from the 'CS.ExecResult') -- --- 2. The stack pointer for the function --- --- 3. The result of symbolic execution +-- 2. The result of symbolic execution -- -- Note that the provided 'CLM.MemImpl' is expected to have its global state -- populated as desired. The default loader populates it with concrete (and @@ -613,15 +607,14 @@ simulateFunction :: forall arch sym bak w solver scope st fs ext blocks -> InitialMem (MS.MacawLazySimulatorState sym w) sym arch -> CCC.CFG ext blocks (Ctx.EmptyCtx Ctx.::> MS.ArchRegStruct arch) (MS.ArchRegStruct arch) -> IO ( CS.GlobalVar CLM.Mem - , CLM.LLVMPtr sym w , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) ) simulateFunction bak execFeatures archVals halloc iMem g = do InitialMem initMem mmConf <- pure iMem let ?ptrWidth = WI.knownRepr - (regs, sp, mem) <- defaultRegs bak archVals initMem + (regs, mem) <- defaultRegs bak archVals initMem (memVar, res) <- simMacawCfg bak execFeatures archVals halloc mmConf mem regs g - pure (memVar, sp, res) + pure (memVar, res) -- | Simulate a Macaw CFG, given initial registers and memory simMacawCfg :: @@ -695,10 +688,8 @@ defaultExecFeatures backend = -- | This type wraps up a function that takes the post-state of a symbolic -- execution and extracts the return value from executing that function -- --- The details are architecture specific. Some architectures return values via --- dedicated registers, while others return values on the stack. --- --- This function takes the final register state and the post-state of memory, +-- Which register stores the return value is architecture-specific. This +-- function takes the final register state and the post-state of memory, -- allowing arbitrary access. -- -- Note that the function that clients provide could return any arbitrary @@ -710,7 +701,6 @@ defaultExecFeatures backend = data ResultExtractor sym arch where ResultExtractor :: (forall a . CS.RegEntry sym (MS.ArchRegStruct arch) - -> CLM.LLVMPtr sym (MC.ArchAddrWidth arch) -> CLM.MemImpl sym -> (forall n . (1 <= n) => PN.NatRepr n -> CLM.LLVMPtr sym n -> IO a) -> IO a) diff --git a/x86_symbolic/tests/Main.hs b/x86_symbolic/tests/Main.hs index a165b3f1..8f40ef5f 100644 --- a/x86_symbolic/tests/Main.hs +++ b/x86_symbolic/tests/Main.hs @@ -134,7 +134,7 @@ hasTestPrefix (Some dfi) = do -- Since all test functions must return a value to assert as true, this is -- straightforward to extract x86ResultExtractor :: (CB.IsSymInterface sym) => MS.ArchVals MX.X86_64 -> MST.ResultExtractor sym MX.X86_64 -x86ResultExtractor archVals = MST.ResultExtractor $ \regs _sp _mem k -> do +x86ResultExtractor archVals = MST.ResultExtractor $ \regs _mem k -> do let re = MS.lookupReg archVals regs MX.RAX k PC.knownRepr (CS.regValue re) From ec0b522a11657cf7b1e7fae89ce57a40c31eba77 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Fri, 6 Sep 2024 16:30:48 -0400 Subject: [PATCH 14/22] symbolic: Further split up `simulateAndVerify` --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 85 +++++++++++++-------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 91170713..b6a7a12a 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -455,8 +455,7 @@ simulateAndVerify :: forall arch sym bak ids w solver scope st fs -> MD.DiscoveryFunInfo arch ids -- ^ The function to simulate and verify -> IO SimulationResult -simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmPreset (ResultExtractor withResult) dfi = - let sym = CB.backendGetSym bak in +simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmPreset extractor dfi = MS.withArchConstraints archVals $ do let funName = functionName dfi let mainInfo = mainBinaryInfo binfo @@ -478,34 +477,60 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP (memVar, execResult) <- simulateFunction bak execFeatures archVals halloc iMem g - case execResult of - CS.TimeoutResult {} -> return SimulationTimeout - CS.AbortedResult {} -> return SimulationAborted - CS.FinishedResult _simCtx partialRes -> - case partialRes of - CS.PartialRes {} -> return SimulationPartial - CS.TotalRes gp -> do - when ("test_and_verify_" `Text.isPrefixOf` WF.functionName funName) $ - proveGoals bak goalSolver - - postMem <- case CSG.lookupGlobal memVar (gp L.^. CS.gpGlobals) of - Just postMem -> pure postMem - Nothing -> error $ "simulateAndVerify: Could not find global variable: " - ++ Text.unpack (CS.globalName memVar) - withResult (gp L.^. CS.gpValue) postMem $ \resRepr val -> do - -- The function is assumed to return a value that is intended to be - -- True. Try to prove that (by checking the negation for unsat) - -- - -- The result is treated as true if it is not equal to zero - zero <- WI.bvLit sym resRepr (BVS.mkBV resRepr 0) - bv_val <- CLM.projectLLVM_bv bak val - notZero <- WI.bvNe sym bv_val zero - goal <- WI.notPred sym notZero - WS.solver_adapter_check_sat goalSolver sym logger [goal] $ \satRes -> - case satRes of - WSR.Sat {} -> return (SimulationResult Sat) - WSR.Unsat {} -> return (SimulationResult Unsat) - WSR.Unknown -> return (SimulationResult Unknown) + summarizeExecution funName goalSolver logger bak memVar extractor execResult + +summarizeExecution :: + ( CB.IsSymBackend sym bak + , CCE.IsSyntaxExtension (MS.MacawExt arch) + , MM.MemWidth (MC.ArchAddrWidth arch) + , w ~ MC.ArchAddrWidth arch + , 16 <= w + , MT.KnownNat w + , sym ~ WE.ExprBuilder scope st fs + , bak ~ CBO.OnlineBackend solver scope st fs + , WPO.OnlineSolver solver + ) => + WF.FunctionName -> + -- | The solver adapter to use to discharge assertions + WS.SolverAdapter st -> + -- | A logger to (optionally) record solver interaction (for the goal solver) + WS.LogData -> + bak -> + CS.GlobalVar CLM.Mem -> + -- | A function to extract the return value of a function from its post-state + ResultExtractor sym arch -> + CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) -> + IO SimulationResult +summarizeExecution funName goalSolver logger bak memVar (ResultExtractor withResult) = + \case + CS.TimeoutResult {} -> return SimulationTimeout + CS.AbortedResult {} -> return SimulationAborted + CS.FinishedResult _simCtx partialRes -> + case partialRes of + CS.PartialRes {} -> return SimulationPartial + CS.TotalRes gp -> do + when ("test_and_verify_" `Text.isPrefixOf` WF.functionName funName) $ + proveGoals bak goalSolver + + postMem <- case CSG.lookupGlobal memVar (gp L.^. CS.gpGlobals) of + Just postMem -> pure postMem + Nothing -> error $ "simulateAndVerify: Could not find global variable: " + ++ Text.unpack (CS.globalName memVar) + withResult (gp L.^. CS.gpValue) postMem $ \resRepr val -> do + let sym = CB.backendGetSym bak + -- The function is assumed to return a value that is intended to be + -- True. Try to prove that (by checking the negation for unsat) + -- + -- The result is treated as true if it is not equal to zero + zero <- WI.bvLit sym resRepr (BVS.mkBV resRepr 0) + bv_val <- CLM.projectLLVM_bv bak val + notZero <- WI.bvNe sym bv_val zero + goal <- WI.notPred sym notZero + WS.solver_adapter_check_sat goalSolver sym logger [goal] $ + \case + WSR.Sat {} -> return (SimulationResult Sat) + WSR.Unsat {} -> return (SimulationResult Unsat) + WSR.Unknown -> return (SimulationResult Unknown) initialMem :: ( ext ~ MS.MacawExt arch From 9f2da79c293534d43950e81eb8e7987ad08f95e4 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Fri, 6 Sep 2024 16:37:01 -0400 Subject: [PATCH 15/22] symbolic: Try proving *all* safety conditions --- macaw-aarch32-symbolic/tests/README.org | 2 +- macaw-ppc-symbolic/tests/README.org | 2 +- macaw-riscv-symbolic/tests/README.org | 2 +- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 15 ++++----------- x86_symbolic/tests/README.org | 2 +- 5 files changed, 8 insertions(+), 15 deletions(-) diff --git a/macaw-aarch32-symbolic/tests/README.org b/macaw-aarch32-symbolic/tests/README.org index e9312e6c..7e90f490 100644 --- a/macaw-aarch32-symbolic/tests/README.org +++ b/macaw-aarch32-symbolic/tests/README.org @@ -1,7 +1,7 @@ * Overview This test suite tests that symbolic execution of AArch32 programs works. It is also a convenient place to test the semantics. The test harness looks for binaries in the ~pass~ and ~fail~ subdirectories. In both cases, it enumerates the functions whose names are prefixed by ~test_~, each of which are symbolically executed with fully symbolic initial states. It treats the return value of each test function as an assertion that it will attempt to prove (i.e., it checks the satisfiability of the assertion). Test cases in ~pass~ are expected to return a value that is always true, while the test cases in ~fail~ are expected to not always be true (but note that the precise models are not inspected). The test harness uses yices for path satisfiability checking and z3 to prove goals. Since path satisfiability checking is enabled, test cases can include loops as long as they symbolically terminate. -In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions for tests with names prefixed by ~test_and_verify_~. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures). +In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures). * Usage diff --git a/macaw-ppc-symbolic/tests/README.org b/macaw-ppc-symbolic/tests/README.org index 5ba19d3e..a8ec97dd 100644 --- a/macaw-ppc-symbolic/tests/README.org +++ b/macaw-ppc-symbolic/tests/README.org @@ -1,7 +1,7 @@ * Overview This test suite tests that symbolic execution of PowerPC programs works. It is also a convenient place to test the semantics. The test harness looks for binaries in the ~pass~ and ~fail~ subdirectories. In both cases, it enumerates the functions whose names are prefixed by ~test_~, each of which are symbolically executed with fully symbolic initial states. It treats the return value of each test function as an assertion that it will attempt to prove (i.e., it checks the satisfiability of the assertion). Test cases in ~pass~ are expected to return a value that is always true, while the test cases in ~fail~ are expected to not always be true (but note that the precise models are not inspected). The test harness uses yices for path satisfiability checking and z3 to prove goals. Since path satisfiability checking is enabled, test cases can include loops as long as they symbolically terminate. -In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions for tests with names prefixed by ~test_and_verify_~. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures). +In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures). * Usage diff --git a/macaw-riscv-symbolic/tests/README.org b/macaw-riscv-symbolic/tests/README.org index 836a5a7b..b74329ac 100644 --- a/macaw-riscv-symbolic/tests/README.org +++ b/macaw-riscv-symbolic/tests/README.org @@ -1,7 +1,7 @@ * Overview This test suite tests that symbolic execution of RISC-V programs works. It is also a convenient place to test the semantics. The test harness looks for binaries in the ~pass~ and ~fail~ subdirectories. In both cases, it enumerates the functions whose names are prefixed by ~test_~, each of which are symbolically executed with fully symbolic initial states. It treats the return value of each test function as an assertion that it will attempt to prove (i.e., it checks the satisfiability of the assertion). Test cases in ~pass~ are expected to return a value that is always true, while the test cases in ~fail~ are expected to not always be true (but note that the precise models are not inspected). The test harness uses yices for path satisfiability checking and z3 to prove goals. Since path satisfiability checking is enabled, test cases can include loops as long as they symbolically terminate. -In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions for tests with names prefixed by ~test_and_verify_~. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures). +In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures). * Usage diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index b6a7a12a..055c92d6 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -42,7 +42,6 @@ module Data.Macaw.Symbolic.Testing ( import qualified Control.Exception as X import qualified Control.Lens as L import Control.Lens ( (&), (%~) ) -import Control.Monad ( when ) import Control.Monad.Except ( runExceptT ) import qualified Data.Bits as Bits import qualified Data.BitVector.Sized as BVS @@ -415,11 +414,7 @@ proveGoals bak goalSolver = do -- architecture-specific primitives). -- -- In addition to symbolically executing the function to produce a Sat/Unsat --- result, this function will attempt to verify all generate side conditions if --- the name of the function being simulated begins with @test_and_verify_@ (as --- opposed to just @test@). This is necessary for testing aspects of the --- semantics that involve the generated side conditions, rather than just the --- final result. +-- result, this function will attempt to verify all generated side conditions. simulateAndVerify :: forall arch sym bak ids w solver scope st fs . ( CB.IsSymBackend sym bak , CCE.IsSyntaxExtension (MS.MacawExt arch) @@ -477,7 +472,7 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP (memVar, execResult) <- simulateFunction bak execFeatures archVals halloc iMem g - summarizeExecution funName goalSolver logger bak memVar extractor execResult + summarizeExecution goalSolver logger bak memVar extractor execResult summarizeExecution :: ( CB.IsSymBackend sym bak @@ -490,7 +485,6 @@ summarizeExecution :: , bak ~ CBO.OnlineBackend solver scope st fs , WPO.OnlineSolver solver ) => - WF.FunctionName -> -- | The solver adapter to use to discharge assertions WS.SolverAdapter st -> -- | A logger to (optionally) record solver interaction (for the goal solver) @@ -501,7 +495,7 @@ summarizeExecution :: ResultExtractor sym arch -> CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) -> IO SimulationResult -summarizeExecution funName goalSolver logger bak memVar (ResultExtractor withResult) = +summarizeExecution goalSolver logger bak memVar (ResultExtractor withResult) = \case CS.TimeoutResult {} -> return SimulationTimeout CS.AbortedResult {} -> return SimulationAborted @@ -509,8 +503,7 @@ summarizeExecution funName goalSolver logger bak memVar (ResultExtractor withRes case partialRes of CS.PartialRes {} -> return SimulationPartial CS.TotalRes gp -> do - when ("test_and_verify_" `Text.isPrefixOf` WF.functionName funName) $ - proveGoals bak goalSolver + proveGoals bak goalSolver postMem <- case CSG.lookupGlobal memVar (gp L.^. CS.gpGlobals) of Just postMem -> pure postMem diff --git a/x86_symbolic/tests/README.org b/x86_symbolic/tests/README.org index c8b73bf2..1d46a150 100644 --- a/x86_symbolic/tests/README.org +++ b/x86_symbolic/tests/README.org @@ -1,7 +1,7 @@ * Overview This test suite tests that symbolic execution of x86_64 programs works. It is also a convenient place to test the semantics. The test harness looks for binaries in the ~pass~ and ~fail~ subdirectories. In both cases, it enumerates the functions whose names are prefixed by ~test_~, each of which are symbolically executed with fully symbolic initial states. It treats the return value of each test function as an assertion that it will attempt to prove (i.e., it checks the satisfiability of the assertion). Test cases in ~pass~ are expected to return a value that is always true, while the test cases in ~fail~ are expected to not always be true (but note that the precise models are not inspected). The test harness uses yices for path satisfiability checking and z3 to prove goals. Since path satisfiability checking is enabled, test cases can include loops as long as they symbolically terminate. -In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions for tests with names prefixed by ~test_and_verify_~. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures). +In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures). * Usage From ce96ba911259efe8e64ef9eaf4af2fe257d13c0c Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 11 Sep 2024 13:50:10 -0400 Subject: [PATCH 16/22] symbolic: Factor out simulation of discovered functions --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 51 +++++++++++++++++---- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 055c92d6..a221a1e5 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -452,12 +452,7 @@ simulateAndVerify :: forall arch sym bak ids w solver scope st fs -> IO SimulationResult simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmPreset extractor dfi = MS.withArchConstraints archVals $ do - let funName = functionName dfi - let mainInfo = mainBinaryInfo binfo halloc <- CFH.newHandleAllocator - CCC.SomeCFG g <- - MS.mkFunCFG (MS.archFunctions archVals) halloc funName (posFn (binaryPath mainInfo)) dfi - let ?recordLLVMAnnotation = \_ _ _ -> return () -- Initialize memory @@ -470,7 +465,7 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP LazyMemModel -> lazyInitialMem binfo bak archInfo archVals (memVar, execResult) <- - simulateFunction bak execFeatures archVals halloc iMem g + simulateFunction bak execFeatures archVals halloc iMem binfo dfi summarizeExecution goalSolver logger bak memVar extractor execResult @@ -605,7 +600,7 @@ data InitialMem p sym arch -- populated as desired. The default loader populates it with concrete (and -- mutable) values taken from the data segment of the binary (as well as the -- immutable contents of the text segment). -simulateFunction :: forall arch sym bak w solver scope st fs ext blocks +simulateFunction :: forall arch sym bak w solver scope st fs ext ids . ( ext ~ MS.MacawExt arch , CCE.IsSyntaxExtension ext , CB.IsSymBackend sym bak @@ -623,17 +618,53 @@ simulateFunction :: forall arch sym bak w solver scope st fs ext blocks -> MS.ArchVals arch -> CFH.HandleAllocator -> InitialMem (MS.MacawLazySimulatorState sym w) sym arch - -> CCC.CFG ext blocks (Ctx.EmptyCtx Ctx.::> MS.ArchRegStruct arch) (MS.ArchRegStruct arch) + -> BinariesInfo arch + -> MD.DiscoveryFunInfo arch ids -> IO ( CS.GlobalVar CLM.Mem , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) ) -simulateFunction bak execFeatures archVals halloc iMem g = do +simulateFunction bak execFeatures archVals halloc iMem binfo dfi = do InitialMem initMem mmConf <- pure iMem let ?ptrWidth = WI.knownRepr (regs, mem) <- defaultRegs bak archVals initMem - (memVar, res) <- simMacawCfg bak execFeatures archVals halloc mmConf mem regs g + (memVar, res) <- simDiscoveredFunction bak execFeatures archVals halloc mmConf mem regs binfo dfi pure (memVar, res) +-- | Simulate a discovered Macaw function, given initial registers and memory +simDiscoveredFunction :: + ( ext ~ MS.MacawExt arch + , CCE.IsSyntaxExtension ext + , CB.IsSymBackend sym bak + , CLM.HasLLVMAnn sym + , MS.SymArchConstraints arch + , w ~ MC.ArchAddrWidth arch + , 16 <= w + , sym ~ WE.ExprBuilder scope st fs + , bak ~ CBO.OnlineBackend solver scope st fs + , WPO.OnlineSolver solver + , ?memOpts :: CLM.MemOptions + ) => + bak -> + [CS.GenericExecutionFeature sym] -> + MS.ArchVals arch -> + CFH.HandleAllocator -> + MS.MemModelConfig (MS.MacawLazySimulatorState sym w) sym arch CLM.Mem -> + CLM.MemImpl sym -> + CS.RegEntry sym (MS.ArchRegStruct arch) -> + BinariesInfo arch -> + MD.DiscoveryFunInfo arch ids -> + -- ^ The function to simulate + IO ( CS.GlobalVar CLM.Mem + , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) + ) +simDiscoveredFunction bak execFeatures archVals halloc mmConf mem regs binfo dfi = do + let funName = functionName dfi + let mainInfo = mainBinaryInfo binfo + let pos = posFn (binaryPath mainInfo) + CCC.SomeCFG g <- + MS.mkFunCFG (MS.archFunctions archVals) halloc funName pos dfi + simMacawCfg bak execFeatures archVals halloc mmConf mem regs g + -- | Simulate a Macaw CFG, given initial registers and memory simMacawCfg :: ( ext ~ MS.MacawExt arch From 672e0c5497f64c06195e3ab311f0ef92c8ab1616 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 11 Sep 2024 14:15:07 -0400 Subject: [PATCH 17/22] symbolic: Generalize and simplify `simMacawCfg` --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 30 ++++++++++----------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index a221a1e5..637c53ad 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -663,7 +663,8 @@ simDiscoveredFunction bak execFeatures archVals halloc mmConf mem regs binfo dfi let pos = posFn (binaryPath mainInfo) CCC.SomeCFG g <- MS.mkFunCFG (MS.archFunctions archVals) halloc funName pos dfi - simMacawCfg bak execFeatures archVals halloc mmConf mem regs g + let regMap = CS.RegMap (Ctx.singleton regs) + simMacawCfg bak execFeatures archVals halloc mmConf mem regMap g -- | Simulate a Macaw CFG, given initial registers and memory simMacawCfg :: @@ -685,30 +686,27 @@ simMacawCfg :: CFH.HandleAllocator -> MS.MemModelConfig (MS.MacawLazySimulatorState sym w) sym arch CLM.Mem -> CLM.MemImpl sym -> - CS.RegEntry sym (MS.ArchRegStruct arch) -> - CCC.CFG ext blocks (Ctx.EmptyCtx Ctx.::> MS.ArchRegStruct arch) (MS.ArchRegStruct arch) -> + CS.RegMap sym argTys -> + CCC.CFG ext blocks argTys retTy -> IO ( CS.GlobalVar CLM.Mem - , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) + , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym retTy) ) -simMacawCfg bak execFeatures archVals halloc mmConf mem regs g = do +simMacawCfg bak execFeatures archVals halloc mmConf mem args g = do let sym = CB.backendGetSym bak - let symArchFns = MS.archFunctions archVals - let crucRegTypes = MS.crucArchRegTypes symArchFns - let regsRepr = CT.StructRepr crucRegTypes memVar <- CLM.mkMemVar "macaw-symbolic:test-harness:llvm_memory" halloc let lazySimSt = MS.emptyMacawLazySimulatorState let initGlobals = CSG.insertGlobal memVar mem CS.emptyGlobals - let arguments = CS.RegMap (Ctx.singleton regs) - let simAction = CS.runOverrideSim regsRepr (CS.regValue <$> CS.callCFG g arguments) + let simAction = CS.runOverrideSim (CCC.cfgReturnType g) (CS.regValue <$> CS.callCFG g args) let fnBindings = CFH.insertHandleMap (CCC.cfgHandle g) (CS.UseCFG g (CAP.postdomInfo g)) CFH.emptyHandleMap - MS.withArchEval archVals sym $ \archEvalFn -> do - let extImpl = MS.macawExtensions archEvalFn memVar mmConf - let ctx = CS.initSimContext bak CLI.llvmIntrinsicTypes halloc IO.stdout (CS.FnBindings fnBindings) extImpl lazySimSt - let s0 = CS.InitialState ctx initGlobals CS.defaultAbortHandler regsRepr simAction - res <- CS.executeCrucible (fmap CS.genericToExecutionFeature execFeatures) s0 - return (memVar, res) + extImpl <- + MS.withArchEval archVals sym $ \archEvalFn -> + pure (MS.macawExtensions archEvalFn memVar mmConf) + let ctx = CS.initSimContext bak CLI.llvmIntrinsicTypes halloc IO.stdout (CS.FnBindings fnBindings) extImpl lazySimSt + let s0 = CS.InitialState ctx initGlobals CS.defaultAbortHandler (CCC.cfgReturnType g) simAction + res <- CS.executeCrucible (fmap CS.genericToExecutionFeature execFeatures) s0 + return (memVar, res) -- | A wrapper around the symbolic backend that allows us to recover the online -- constraints when they are available From d005b9c20a1f530c83ba01c91d3c26b46ab419ea Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 11 Sep 2024 14:24:01 -0400 Subject: [PATCH 18/22] symbolic: More refactoring of testing code --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 47 ++++++++++----------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 637c53ad..d981026a 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -469,7 +469,7 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP summarizeExecution goalSolver logger bak memVar extractor execResult -summarizeExecution :: +summarizeExecution :: ( CB.IsSymBackend sym bak , CCE.IsSyntaxExtension (MS.MacawExt arch) , MM.MemWidth (MC.ArchAddrWidth arch) @@ -624,11 +624,11 @@ simulateFunction :: forall arch sym bak w solver scope st fs ext ids , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) ) simulateFunction bak execFeatures archVals halloc iMem binfo dfi = do - InitialMem initMem mmConf <- pure iMem let ?ptrWidth = WI.knownRepr + InitialMem initMem mmConf <- pure iMem (regs, mem) <- defaultRegs bak archVals initMem - (memVar, res) <- simDiscoveredFunction bak execFeatures archVals halloc mmConf mem regs binfo dfi - pure (memVar, res) + let iMem' = InitialMem mem mmConf + simDiscoveredFunction bak execFeatures archVals halloc iMem' regs binfo dfi -- | Simulate a discovered Macaw function, given initial registers and memory simDiscoveredFunction :: @@ -648,8 +648,7 @@ simDiscoveredFunction :: [CS.GenericExecutionFeature sym] -> MS.ArchVals arch -> CFH.HandleAllocator -> - MS.MemModelConfig (MS.MacawLazySimulatorState sym w) sym arch CLM.Mem -> - CLM.MemImpl sym -> + InitialMem (MS.MacawLazySimulatorState sym w) sym arch -> CS.RegEntry sym (MS.ArchRegStruct arch) -> BinariesInfo arch -> MD.DiscoveryFunInfo arch ids -> @@ -657,14 +656,22 @@ simDiscoveredFunction :: IO ( CS.GlobalVar CLM.Mem , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym (MS.ArchRegStruct arch)) ) -simDiscoveredFunction bak execFeatures archVals halloc mmConf mem regs binfo dfi = do +simDiscoveredFunction bak execFeatures archVals halloc iMem regs binfo dfi = do + let sym = CB.backendGetSym bak + InitialMem mem mmConf <- pure iMem + memVar <- CLM.mkMemVar "macaw-symbolic:test-harness:llvm_memory" halloc + extImpl <- + MS.withArchEval archVals sym $ \archEvalFn -> + pure (MS.macawExtensions archEvalFn memVar mmConf) + let funName = functionName dfi let mainInfo = mainBinaryInfo binfo let pos = posFn (binaryPath mainInfo) CCC.SomeCFG g <- MS.mkFunCFG (MS.archFunctions archVals) halloc funName pos dfi let regMap = CS.RegMap (Ctx.singleton regs) - simMacawCfg bak execFeatures archVals halloc mmConf mem regMap g + res <- simMacawCfg bak execFeatures halloc extImpl memVar mem regMap g + pure (memVar, res) -- | Simulate a Macaw CFG, given initial registers and memory simMacawCfg :: @@ -682,31 +689,21 @@ simMacawCfg :: ) => bak -> [CS.GenericExecutionFeature sym] -> - MS.ArchVals arch -> CFH.HandleAllocator -> - MS.MemModelConfig (MS.MacawLazySimulatorState sym w) sym arch CLM.Mem -> + CS.ExtensionImpl (MS.MacawLazySimulatorState sym w) sym (MS.MacawExt arch) -> + CS.GlobalVar CLM.Mem -> CLM.MemImpl sym -> CS.RegMap sym argTys -> CCC.CFG ext blocks argTys retTy -> - IO ( CS.GlobalVar CLM.Mem - , CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym retTy) - ) -simMacawCfg bak execFeatures archVals halloc mmConf mem args g = do - let sym = CB.backendGetSym bak - - memVar <- CLM.mkMemVar "macaw-symbolic:test-harness:llvm_memory" halloc + IO (CS.ExecResult (MS.MacawLazySimulatorState sym w) sym ext (CS.RegEntry sym retTy)) +simMacawCfg bak execFeatures halloc extImpl memVar mem args g = do let lazySimSt = MS.emptyMacawLazySimulatorState - let initGlobals = CSG.insertGlobal memVar mem CS.emptyGlobals - let simAction = CS.runOverrideSim (CCC.cfgReturnType g) (CS.regValue <$> CS.callCFG g args) - let fnBindings = CFH.insertHandleMap (CCC.cfgHandle g) (CS.UseCFG g (CAP.postdomInfo g)) CFH.emptyHandleMap - extImpl <- - MS.withArchEval archVals sym $ \archEvalFn -> - pure (MS.macawExtensions archEvalFn memVar mmConf) let ctx = CS.initSimContext bak CLI.llvmIntrinsicTypes halloc IO.stdout (CS.FnBindings fnBindings) extImpl lazySimSt + let initGlobals = CSG.insertGlobal memVar mem CS.emptyGlobals + let simAction = CS.runOverrideSim (CCC.cfgReturnType g) (CS.regValue <$> CS.callCFG g args) let s0 = CS.InitialState ctx initGlobals CS.defaultAbortHandler (CCC.cfgReturnType g) simAction - res <- CS.executeCrucible (fmap CS.genericToExecutionFeature execFeatures) s0 - return (memVar, res) + CS.executeCrucible (fmap CS.genericToExecutionFeature execFeatures) s0 -- | A wrapper around the symbolic backend that allows us to recover the online -- constraints when they are available From b37e90782eb97c9f094ea86ddc3a8b8441cb41ff Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 11 Sep 2024 14:35:10 -0400 Subject: [PATCH 19/22] symbolic: Remove redundant constraints in testing code --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index d981026a..d3e5359c 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -600,7 +600,7 @@ data InitialMem p sym arch -- populated as desired. The default loader populates it with concrete (and -- mutable) values taken from the data segment of the binary (as well as the -- immutable contents of the text segment). -simulateFunction :: forall arch sym bak w solver scope st fs ext ids +simulateFunction :: forall arch sym bak w ext ids . ( ext ~ MS.MacawExt arch , CCE.IsSyntaxExtension ext , CB.IsSymBackend sym bak @@ -608,9 +608,6 @@ simulateFunction :: forall arch sym bak w solver scope st fs ext ids , MS.SymArchConstraints arch , w ~ MC.ArchAddrWidth arch , 16 <= w - , sym ~ WE.ExprBuilder scope st fs - , bak ~ CBO.OnlineBackend solver scope st fs - , WPO.OnlineSolver solver , ?memOpts :: CLM.MemOptions ) => bak @@ -637,11 +634,6 @@ simDiscoveredFunction :: , CB.IsSymBackend sym bak , CLM.HasLLVMAnn sym , MS.SymArchConstraints arch - , w ~ MC.ArchAddrWidth arch - , 16 <= w - , sym ~ WE.ExprBuilder scope st fs - , bak ~ CBO.OnlineBackend solver scope st fs - , WPO.OnlineSolver solver , ?memOpts :: CLM.MemOptions ) => bak -> @@ -678,14 +670,6 @@ simMacawCfg :: ( ext ~ MS.MacawExt arch , CCE.IsSyntaxExtension ext , CB.IsSymBackend sym bak - , CLM.HasLLVMAnn sym - , MS.SymArchConstraints arch - , w ~ MC.ArchAddrWidth arch - , 16 <= w - , sym ~ WE.ExprBuilder scope st fs - , bak ~ CBO.OnlineBackend solver scope st fs - , WPO.OnlineSolver solver - , ?memOpts :: CLM.MemOptions ) => bak -> [CS.GenericExecutionFeature sym] -> From edc9635fe7962396ca352e6faa9e3e0b53d82c80 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 11 Sep 2024 14:55:04 -0400 Subject: [PATCH 20/22] x86-symbolic: Use SysV-compatible stack setup in test suite Fix a logic bug (bytes, not bits!) along the way --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 12 +++- x86_symbolic/macaw-x86-symbolic.cabal | 1 + .../src/Data/Macaw/X86/Symbolic/ABI/SysV.hs | 2 +- x86_symbolic/tests/Main.hs | 61 ++++++++++++++++++- 4 files changed, 70 insertions(+), 6 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index d3e5359c..1ef6e5b4 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -29,6 +29,12 @@ module Data.Macaw.Symbolic.Testing ( MemModelPreset(..), describeMemModelPreset, toAddrSymMap, + freshRegs, + InitialMem(..), + initialMem, + lazyInitialMem, + simDiscoveredFunction, + summarizeExecution, -- * Execution features SomeBackend(..), defaultExecFeatures, @@ -337,7 +343,7 @@ defaultRegs :: bak -> MS.ArchVals arch -> CLM.MemImpl sym -> - IO (CS.RegEntry sym (MS.ArchRegStruct arch) , CLM.MemImpl sym) + IO (CS.RegEntry sym (MS.ArchRegStruct arch), CLM.MemImpl sym) defaultRegs bak archVals mem = do let sym = CB.backendGetSym bak @@ -583,8 +589,8 @@ lazyInitialMem binfo bak archInfo archVals = do data InitialMem p sym arch = InitialMem - { _initMemMem :: CLM.MemImpl sym - , _initMemMmConf :: MS.MemModelConfig p sym arch CLM.Mem + { initMemMem :: CLM.MemImpl sym + , initMemMmConf :: MS.MemModelConfig p sym arch CLM.Mem } -- | Set up the symbolic execution engine with initial states and execute diff --git a/x86_symbolic/macaw-x86-symbolic.cabal b/x86_symbolic/macaw-x86-symbolic.cabal index 1f1930cc..89397ce6 100644 --- a/x86_symbolic/macaw-x86-symbolic.cabal +++ b/x86_symbolic/macaw-x86-symbolic.cabal @@ -46,6 +46,7 @@ test-suite macaw-x86-symbolic-tests hs-source-dirs: tests build-depends: base >= 4, + bv-sized, bytestring, containers, crucible, diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs index 6a71a78c..65655054 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic/ABI/SysV.hs @@ -140,7 +140,7 @@ writeRetAddr bak mem sp retAddr = do let ?ptrWidth = MM.ptrWidth (getRetAddr retAddr) ptrSzBv <- WI.bvLit sym ?ptrWidth (BVS.mkBV ?ptrWidth ptrBytes) top <- MM.ptrSub sym ?ptrWidth (getStackPointer sp) ptrSzBv - let i64 = MM.bitvectorType (Bytes.toBytes (64 :: Int)) + let i64 = MM.bitvectorType (Bytes.toBytes (8 :: Int)) let val = MM.ptrToPtrVal (getRetAddr retAddr) mem' <- MM.storeRaw bak mem top i64 CLD.noAlignment val pure (StackPointer top, mem') diff --git a/x86_symbolic/tests/Main.hs b/x86_symbolic/tests/Main.hs index 8f40ef5f..84a3a185 100644 --- a/x86_symbolic/tests/Main.hs +++ b/x86_symbolic/tests/Main.hs @@ -5,9 +5,11 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} module Main (main) where import Control.Lens ( (^.) ) +import qualified Data.BitVector.Sized as BVS import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS8 import qualified Data.ElfEdit as Elf @@ -18,6 +20,7 @@ import qualified Data.Parameterized.Classes as PC import qualified Data.Parameterized.Nonce as PN import Data.Parameterized.Some ( Some(..) ) import Data.Proxy ( Proxy(..) ) +import qualified Data.Sequence as Seq import qualified Prettyprinter as PP import System.FilePath ( (), (<.>) ) import qualified System.FilePath.Glob as SFG @@ -34,6 +37,7 @@ import qualified Data.Macaw.Symbolic.Testing as MST import qualified Data.Macaw.X86 as MX import Data.Macaw.X86.Symbolic () import qualified Data.Macaw.X86.Symbolic as MXS +import qualified Data.Macaw.X86.Symbolic.ABI.SysV as SysV import qualified What4.Config as WC import qualified What4.Interface as WI import qualified What4.ProblemFeatures as WPF @@ -41,12 +45,16 @@ import qualified What4.Solver as WS import qualified Lang.Crucible.Backend as CB import qualified Lang.Crucible.Backend.Online as CBO +import qualified Lang.Crucible.FunctionHandle as CFH import qualified Lang.Crucible.Simulator as CS +import qualified Lang.Crucible.Types as CT import qualified Lang.Crucible.LLVM.MemModel as LLVM import qualified What4.FloatMode as W4 import qualified What4.Expr.Builder as W4 import qualified Data.Parameterized.Context as Ctx +import qualified What4.Protocol.Online as WPO +import qualified Lang.Crucible.CFG.Extension as CCE -- | A Tasty option to tell us to save SMT queries and responses to /tmp for debugging purposes data SaveSMT = SaveSMT Bool @@ -140,6 +148,46 @@ x86ResultExtractor archVals = MST.ResultExtractor $ \regs _mem k -> do data MacawX86SymbolicTest t = MacawX86SymbolicTest +setupRegsAndMem :: + ( ext ~ MS.MacawExt MX.X86_64 + , CCE.IsSyntaxExtension ext + , CB.IsSymBackend sym bak + , LLVM.HasLLVMAnn sym + , sym ~ W4.ExprBuilder scope st fs + , bak ~ CBO.OnlineBackend solver scope st fs + , WPO.OnlineSolver solver + , ?memOpts :: LLVM.MemOptions + , MS.HasMacawLazySimulatorState p sym 64 + ) => + bak -> + MS.GenArchVals MS.LLVMMemory MX.X86_64 -> + MST.MemModelPreset -> + MST.BinariesInfo MX.X86_64 -> + IO ( CS.RegEntry sym (MS.ArchRegStruct MX.X86_64) + , MST.InitialMem p sym MX.X86_64 + ) +setupRegsAndMem bak archVals mmPreset binariesInfo = do + let sym = CB.backendGetSym bak + MST.InitialMem initMem mmConf <- + case mmPreset of + MST.DefaultMemModel -> MST.initialMem binariesInfo bak MX.x86_64_linux_info archVals + MST.LazyMemModel -> MST.lazyInitialMem binariesInfo bak MX.x86_64_linux_info archVals + + let symArchFns = MS.archFunctions archVals + initRegs <- MST.freshRegs symArchFns sym + let kib = 1024 + let mib = 1024 * kib + stackSize <- WI.bvLit sym CT.knownNat (BVS.mkBV CT.knownNat mib) + (regs, mem) <- SysV.allocStack bak initMem initRegs stackSize + retAddr <- SysV.freshRetAddr sym + let spilled = SysV.SpilledArgs Seq.Empty + (regs', mem') <- SysV.pushStackFrame bak mem regs spilled retAddr + let crucRegTypes = MS.crucArchRegTypes symArchFns + let regsEntry = CS.RegEntry (CT.StructRepr crucRegTypes) regs' + let iMem = MST.InitialMem mem' mmConf + pure (regsEntry, iMem) + + mkSymExTest :: MST.SimulationResult -> MST.MemModelPreset -> FilePath -> TT.TestTree mkSymExTest expected mmPreset exePath = TT.askOption $ \saveSMT@(SaveSMT _) -> TT.askOption $ \saveMacaw@(SaveMacaw _) -> TTH.testCaseSteps exePath $ \step -> do bytes <- BS.readFile exePath @@ -171,8 +219,17 @@ mkSymExTest expected mmPreset exePath = TT.askOption $ \saveSMT@(SaveSMT _) -> T let extract = x86ResultExtractor archVals logger <- makeGoalLogger saveSMT solver name exePath let ?memOpts = LLVM.defaultMemOptions - simRes <- MST.simulateAndVerify solver logger bak execFeatures MX.x86_64_linux_info archVals binariesInfo mmPreset extract dfi - TTH.assertEqual "AssertionResult" expected simRes + + MS.withArchConstraints archVals $ do + halloc <- CFH.newHandleAllocator + let ?recordLLVMAnnotation = \_ _ _ -> return () + + (regs, iMem) <- setupRegsAndMem bak archVals mmPreset binariesInfo + (memVar, execResult) <- + MST.simDiscoveredFunction bak execFeatures archVals halloc iMem regs binariesInfo dfi + + simRes <- MST.summarizeExecution solver logger bak memVar extract execResult + TTH.assertEqual "AssertionResult" expected simRes writeMacawIR :: (MC.ArchConstraints arch) => SaveMacaw -> String -> M.DiscoveryFunInfo arch ids -> IO () writeMacawIR (SaveMacaw sm) name dfi From f80b275f9c9fba54c7841cda2ce4782d4ab0a78c Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 11 Sep 2024 17:58:29 -0400 Subject: [PATCH 21/22] symbolic: Sort imports --- x86_symbolic/tests/Main.hs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/x86_symbolic/tests/Main.hs b/x86_symbolic/tests/Main.hs index 84a3a185..f92bcdb7 100644 --- a/x86_symbolic/tests/Main.hs +++ b/x86_symbolic/tests/Main.hs @@ -17,6 +17,7 @@ import qualified Data.Foldable as F import qualified Data.Map as Map import Data.Maybe ( mapMaybe ) import qualified Data.Parameterized.Classes as PC +import qualified Data.Parameterized.Context as Ctx import qualified Data.Parameterized.Nonce as PN import Data.Parameterized.Some ( Some(..) ) import Data.Proxy ( Proxy(..) ) @@ -39,23 +40,20 @@ import Data.Macaw.X86.Symbolic () import qualified Data.Macaw.X86.Symbolic as MXS import qualified Data.Macaw.X86.Symbolic.ABI.SysV as SysV import qualified What4.Config as WC +import qualified What4.Expr.Builder as W4 import qualified What4.Interface as WI import qualified What4.ProblemFeatures as WPF +import qualified What4.Protocol.Online as WPO import qualified What4.Solver as WS import qualified Lang.Crucible.Backend as CB import qualified Lang.Crucible.Backend.Online as CBO +import qualified Lang.Crucible.CFG.Extension as CCE import qualified Lang.Crucible.FunctionHandle as CFH import qualified Lang.Crucible.Simulator as CS import qualified Lang.Crucible.Types as CT import qualified Lang.Crucible.LLVM.MemModel as LLVM -import qualified What4.FloatMode as W4 -import qualified What4.Expr.Builder as W4 -import qualified Data.Parameterized.Context as Ctx -import qualified What4.Protocol.Online as WPO -import qualified Lang.Crucible.CFG.Extension as CCE - -- | A Tasty option to tell us to save SMT queries and responses to /tmp for debugging purposes data SaveSMT = SaveSMT Bool deriving (Eq, Show) From e1886a8892a26fe0e22e2f83d8b21f3d94a17651 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Wed, 11 Sep 2024 20:19:43 -0400 Subject: [PATCH 22/22] symbolic: Address review comments --- symbolic/src/Data/Macaw/Symbolic/Testing.hs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic/Testing.hs b/symbolic/src/Data/Macaw/Symbolic/Testing.hs index 1ef6e5b4..3f33ffc5 100644 --- a/symbolic/src/Data/Macaw/Symbolic/Testing.hs +++ b/symbolic/src/Data/Macaw/Symbolic/Testing.hs @@ -475,6 +475,11 @@ simulateAndVerify goalSolver logger bak execFeatures archInfo archVals binfo mmP summarizeExecution goalSolver logger bak memVar extractor execResult +-- | Post-process the results of symbolic execution +-- +-- In particular, turn a 'CS.ExecResult' into a 'SimulationResult'. If +-- simulation succeeded, uses the 'ResultExtractor' to assert that the executed +-- function returned a nonzero value. summarizeExecution :: ( CB.IsSymBackend sym bak , CCE.IsSyntaxExtension (MS.MacawExt arch) @@ -587,9 +592,14 @@ lazyInitialMem binfo bak archInfo archVals = do } pure (InitialMem mem mmConf) +-- | Initial program memory and memory configuration +-- +-- Returned by 'initialMem' and 'lazyInitialMem'. data InitialMem p sym arch = InitialMem - { initMemMem :: CLM.MemImpl sym + { -- | Initial contents of memory, before symbolic execution + initMemMem :: CLM.MemImpl sym + -- | Memory model configuration , initMemMmConf :: MS.MemModelConfig p sym arch CLM.Mem } @@ -628,7 +638,7 @@ simulateFunction :: forall arch sym bak w ext ids ) simulateFunction bak execFeatures archVals halloc iMem binfo dfi = do let ?ptrWidth = WI.knownRepr - InitialMem initMem mmConf <- pure iMem + let InitialMem initMem mmConf = iMem (regs, mem) <- defaultRegs bak archVals initMem let iMem' = InitialMem mem mmConf simDiscoveredFunction bak execFeatures archVals halloc iMem' regs binfo dfi @@ -656,7 +666,7 @@ simDiscoveredFunction :: ) simDiscoveredFunction bak execFeatures archVals halloc iMem regs binfo dfi = do let sym = CB.backendGetSym bak - InitialMem mem mmConf <- pure iMem + let InitialMem mem mmConf = iMem memVar <- CLM.mkMemVar "macaw-symbolic:test-harness:llvm_memory" halloc extImpl <- MS.withArchEval archVals sym $ \archEvalFn ->