From 7d06753371659c533d6857bbdd088d23f16c0a8a Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 17 Dec 2024 10:18:02 -0800 Subject: [PATCH 001/106] Add aarch64 detection This makes angrop run on aarch64 binaries, but it's not able to do anything useful yet. --- angrop/arch.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/angrop/arch.py b/angrop/arch.py index e4dbc3e..07d235a 100644 --- a/angrop/arch.py +++ b/angrop/arch.py @@ -94,7 +94,11 @@ def block_make_sense(self, block): if insn.insn.mnemonic[-2:] in arm_conditional_postfix: return False return True - + +class AARCH64(ROPArch): + def __init__(self, project, kernel_mode=False): + super().__init__(project, kernel_mode=kernel_mode) + class MIPS(ROPArch): def __init__(self, project, kernel_mode=False): super().__init__(project, kernel_mode=kernel_mode) @@ -109,6 +113,8 @@ def get_arch(project, kernel_mode=False): return AMD64(project, kernel_mode=mode) elif name.startswith('ARM'): return ARM(project, kernel_mode=mode) + elif name == 'AARCH64': + return AARCH64(project, kernel_mode=mode) elif name.startswith('MIPS'): return MIPS(project, kernel_mode=mode) else: From f5ca80f80b27161a10e322517949e4dcaec7a354 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 17 Dec 2024 21:48:35 -0800 Subject: [PATCH 002/106] Basic aarch64 register setting This fixes the handling of gadgets where the address of the next gadget has to be inserted in the middle instead of just concatenated to the end. angrop is now able to set registers if there is a gadget that loads into the register from the stack, and it can also chain gadgets together as long as each gadget ends with a jump to an address loaded from the stack. --- angrop/chain_builder/builder.py | 3 +++ angrop/chain_builder/reg_setter.py | 5 ++--- angrop/rop_chain.py | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index bad51be..84684dd 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -189,6 +189,9 @@ def _build_reg_setting_chain(self, gadgets, modifiable_memory_range, register_di if not sym_word.variables.intersection(c.variables): continue var_name = set(c.variables - sym_word.variables).pop() + if var_name.startswith('next_addr_'): + var = rop_utils.cast_rop_value(test_symbolic_state.solver.BVS('next_pc', self.project.arch.bits), self.project) + break if var_name not in var_dict: continue var = var_dict[var_name] diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index cee6f17..29cfd2f 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -56,9 +56,8 @@ def verify(self, chain, preserve_regs, registers): # the next pc must come from the stack if len(state.regs.pc.variables) != 1: return False - if not set(state.regs.pc.variables).pop().startswith("symbolic_stack"): - return False - return True + pc_var = set(state.regs.pc.variables).pop() + return pc_var.startswith("symbolic_stack") or pc_var.startswith("next_pc") def _maybe_fix_jump_chain(self, chain, preserve_regs): all_changed_regs = set() diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index b4880ee..a6b8f0a 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -136,8 +136,8 @@ def _concretize_chain_values(self, constraints=None, timeout=None, preserve_next """ concretize chain values with a timeout """ - if self.next_pc_idx() is not None: - return (self + self._rop.chain_builder.shift(self._p.arch.bytes))._concretize_chain_values(constraints=constraints, timeout=timeout, preserve_next_pc=preserve_next_pc) + # if self.next_pc_idx() is not None: + # return (self + self._rop.chain_builder.shift(self._p.arch.bytes))._concretize_chain_values(constraints=constraints, timeout=timeout, preserve_next_pc=preserve_next_pc) if timeout is None: timeout = self._timeout values = rop_utils.timeout(timeout)(self.__concretize_chain_values)(constraints=constraints) From cecb6d063522726715d8924f45c4986b7e8a5553 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 18 Dec 2024 23:15:29 -0800 Subject: [PATCH 003/106] Start implementing RiscyROP gadget chaining --- angrop/chain_builder/reg_setter.py | 66 ++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 29cfd2f..30b90c1 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -1,6 +1,7 @@ import heapq import logging from collections import defaultdict +from typing import Iterable, Iterator import claripy from angr.errors import SimUnsatError @@ -8,6 +9,7 @@ from .builder import Builder from .. import rop_utils from ..rop_chain import RopChain +from ..rop_gadget import RopGadget from ..errors import RopException l = logging.getLogger("angrop.chain_builder.reg_setter") @@ -508,3 +510,67 @@ def _check_if_sufficient_partial_control(self, gadget, reg, value): return False return True return False + + def _backwards_recursive_search( + self, + gadgets: Iterable[RopGadget], + registers: set[str], + current_chain: list[RopGadget] = [], + preserve_regs: set[str] = set(), + modifiable_memory_range: tuple[int, int] | None = None + ) -> Iterator[list[RopGadget]]: + """Recursively build ROP chains starting from the end using the RiscyROP algorithm.""" + # Base case. + if not registers: + yield current_chain[::-1] + return + + for gadget in gadgets: + if not gadget.changed_regs.isdisjoint(preserve_regs): + continue + remaining_regs = self._get_remaining_regs(gadget, registers) + if remaining_regs is None: + continue + current_chain.append(gadget) + yield from self._backwards_recursive_search(gadgets, remaining_regs, current_chain, preserve_regs, modifiable_memory_range) + current_chain.pop() + + def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str] | None: + """ + Get the registers that still need to be controlled after prepending a gadget. + + Returns None if this gadget cannot be used. + """ + # Check if the gadget sets any registers that we need. + if gadget.popped_regs.isdisjoint(registers) and not any( + reg_move.to_reg in registers and reg_move.bits == self.project.arch.bits + for reg_move in gadget.reg_moves + ): + return None + + remaining_regs = set() + + for reg in registers: + if reg in gadget.popped_regs: + continue + new_reg = reg + for reg_move in gadget.reg_moves: + if reg_move.to_reg == reg: + if reg_move.bits != self.project.arch.bits: + # Register is only partially overwritten. + return None + new_reg = reg_move.from_reg + break + if new_reg in remaining_regs: + # Conflict, can't put two different values in the same register. + return None + remaining_regs.add(new_reg) + + if gadget.transit_type == 'jmp_reg': + # I don't know what's the difference between these two so just error if they're different. + assert gadget.jump_reg == gadget.pc_reg + if gadget.jump_reg in remaining_regs: + return None + remaining_regs.add(gadget.jump_reg) + + return remaining_regs From f920724bcbf2243684383eff048500eae1f91c5b Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 19 Dec 2024 16:18:11 -0800 Subject: [PATCH 004/106] Check for constrained writes to target registers --- angrop/chain_builder/reg_setter.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 30b90c1..0b90cb3 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -561,6 +561,10 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str return None new_reg = reg_move.from_reg break + else: + # Check if the gadget changes the register in some other way. + if reg in gadget.changed_regs: + return None if new_reg in remaining_regs: # Conflict, can't put two different values in the same register. return None From 98a167105f88089f64986a2e0bde010babf50e16 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 19 Dec 2024 23:00:36 -0800 Subject: [PATCH 005/106] Implement concrete chain generation --- angrop/chain_builder/reg_setter.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 0b90cb3..6cf7fa4 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -578,3 +578,28 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str remaining_regs.add(gadget.jump_reg) return remaining_regs + + def _build_concrete_chain(self, gadgets: list[RopGadget], registers: dict[str, int], next_pc: int) -> list[int]: + """ + Build a concrete ROP chain from a list of gadgets. + + Return a list of stack values, not including the address of the first gadget. + """ + stack_len = sum(g.stack_change for g in gadgets) // self.project.arch.bytes + init_state = rop_utils.make_symbolic_state(self.project, self.arch.reg_set, stack_gsize=stack_len) + state = init_state + for gadget in gadgets: + state.solver.add(state.ip == gadget.addr) + state = rop_utils.step_to_unconstrained_successor(self.project, state) + state.solver.add(state.ip == next_pc) + for reg, val in registers.items(): + state.solver.add(state.registers.load(reg) == val) + return [ + state.solver.eval( + init_state.stack_read( + i * self.project.arch.bytes, + self.project.arch.bytes, + ) + ) + for i in range(stack_len) + ] From 9869f9a05540439f1b17c24dae185098a55bae68 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 22 Dec 2024 22:10:27 -0800 Subject: [PATCH 006/106] Handle gadgets with conditional branches --- angrop/chain_builder/reg_setter.py | 13 ++- angrop/gadget_finder/__init__.py | 25 ++++-- angrop/gadget_finder/gadget_analyzer.py | 108 ++++++++++++------------ angrop/rop_gadget.py | 5 ++ 4 files changed, 85 insertions(+), 66 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 6cf7fa4..2576d43 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -577,6 +577,10 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str return None remaining_regs.add(gadget.jump_reg) + if not gadget.constraint_regs.isdisjoint(remaining_regs): + return None + remaining_regs |= gadget.constraint_regs + return remaining_regs def _build_concrete_chain(self, gadgets: list[RopGadget], registers: dict[str, int], next_pc: int) -> list[int]: @@ -590,7 +594,14 @@ def _build_concrete_chain(self, gadgets: list[RopGadget], registers: dict[str, i state = init_state for gadget in gadgets: state.solver.add(state.ip == gadget.addr) - state = rop_utils.step_to_unconstrained_successor(self.project, state) + for addr in gadget.bbl_addrs[1:]: + succ = state.step() + succ_states = [state for state in succ.successors if state.solver.is_true(state.ip == addr)] + assert len(succ_states) == 1 + state = succ_states[0] + succ = state.step() + assert len(succ.unconstrained_successors) == 1 + state = succ.unconstrained_successors[0] state.solver.add(state.ip == next_pc) for reg, val in registers.items(): state.solver.add(state.registers.load(reg) == val) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 6737c4a..d19cf6d 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -1,5 +1,6 @@ import re import logging +import itertools from multiprocessing import Pool from collections import defaultdict @@ -145,15 +146,23 @@ def get_duplicates(self): return {k:v for k,v in cache.items() if len(v) >= 2} def find_gadgets(self, processes=4, show_progress=True): - gadgets = [] self._cache = {} initargs = (self.gadget_analyzer,) - with Pool(processes=processes, initializer=_set_global_gadget_analyzer, initargs=initargs) as pool: - it = pool.imap_unordered(run_worker, self._addresses_to_check_with_caching(show_progress), chunksize=5) - for gadget in it: - if gadget is not None: - gadgets.append(gadget) + with Pool( + processes=processes, + initializer=_set_global_gadget_analyzer, + initargs=initargs, + ) as pool: + gadgets = list( + itertools.chain.from_iterable( + pool.imap_unordered( + run_worker, + self._addresses_to_check_with_caching(show_progress), + chunksize=5, + ) + ) + ) return sorted(gadgets, key=lambda x: x.addr), self.get_duplicates() @@ -164,9 +173,7 @@ def find_gadgets_single_threaded(self, show_progress=True): assert self.gadget_analyzer is not None for addr in self._addresses_to_check_with_caching(show_progress): - gadget = self.gadget_analyzer.analyze_gadget(addr) - if gadget is not None: - gadgets.append(gadget) + gadgets.extend(self.gadget_analyzer.analyze_gadget(addr)) return sorted(gadgets, key=lambda x: x.addr), self.get_duplicates() diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index ea1b2ac..2aecaa4 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -42,60 +42,62 @@ def __init__(self, project, fast_mode, kernel_mode=False, arch=None, stack_gsize @rop_utils.timeout(3) def analyze_gadget(self, addr): """ - :param addr: address to analyze for a gadget - :return: a RopGadget instance + :param addr: address to analyze for gadgets + :return: a list of RopGadget instances """ l.info("Analyzing 0x%x", addr) # Step 1: first check if the block makes sense if not self._block_make_sense(addr): - return None + return [] - try: - # Step 2: make sure the gadget can lead to a *controlled* unconstrained state within 2 steps - # TODO: shall we make the step number configurable? - if not self._can_reach_unconstrained(addr): - l.debug("... cannot get to unconstrained successor according to static analysis") - return None + init_state = self._state.copy() + init_state.ip = addr + simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) + simgr.run(n=3) - init_state, final_state = self._reach_unconstrained_or_syscall(addr) + gadgets = [] - if not self._valid_state(init_state, final_state): - return None + for final_state in simgr.unconstrained: + try: + if not self._valid_state(init_state, final_state): + continue - ctrl_type = self._check_for_control_type(init_state, final_state) - if not ctrl_type: - # for example, jump outside of the controllable region - l.debug("... cannot maintain the control flow hijacking primitive after executing the gadget") - return None + ctrl_type = self._check_for_control_type(init_state, final_state) + if not ctrl_type: + # for example, jump outside of the controllable region + l.debug("... cannot maintain the control flow hijacking primitive after executing the gadget") + continue - # Step 3: gadget effect analysis - l.debug("... analyzing rop potential of block") - gadget = self._create_gadget(addr, init_state, final_state, ctrl_type) - if not gadget: - return None + # Step 3: gadget effect analysis + l.debug("... analyzing rop potential of block") + gadget = self._create_gadget(addr, init_state, final_state, ctrl_type) + if not gadget: + continue - # Step 4: filter out bad gadgets - # too many mem accesses, it can only be done after gadget creation - # specifically, memory access analysis - if gadget.num_mem_access > self.arch.max_sym_mem_access: - l.debug("... too many symbolic memory accesses") - return None + # Step 4: filter out bad gadgets + # too many mem accesses, it can only be done after gadget creation + # specifically, memory access analysis + if gadget.num_mem_access > self.arch.max_sym_mem_access: + l.debug("... too many symbolic memory accesses") + continue - except RopException as e: - l.debug("... %s", e) - return None - except (claripy.errors.ClaripySolverInterruptError, claripy.errors.ClaripyZ3Error, ValueError): - return None - except (claripy.ClaripyFrontendError, angr.engines.vex.claripy.ccall.CCallMultivaluedException) as e: - l.warning("... claripy error: %s", e) - return None - except Exception as e:# pylint:disable=broad-except - l.exception(e) - return None + l.debug("... Appending gadget!") + gadgets.append(gadget) - l.debug("... Appending gadget!") - return gadget + except RopException as e: + l.debug("... %s", e) + continue + except (claripy.errors.ClaripySolverInterruptError, claripy.errors.ClaripyZ3Error, ValueError): + continue + except (claripy.ClaripyFrontendError, angr.engines.vex.claripy.ccall.CCallMultivaluedException) as e: + l.warning("... claripy error: %s", e) + continue + except Exception as e:# pylint:disable=broad-except + l.exception(e) + continue + + return gadgets def _valid_state(self, init_state, final_state): if self._change_arch_state(init_state, final_state): @@ -289,21 +291,7 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): # for jmp_reg gadget, record the jump target register if transit_type == "jmp_reg": - state = self._state.copy() - insns = self.project.factory.block(addr).capstone.insns - if state.project.arch.name.startswith("MIPS"): - idx = -2 # delayed slot - else: - idx = -1 - if len(insns) < abs(idx): - return None - jump_inst_addr = insns[idx].address - state.ip = jump_inst_addr - succ = rop_utils.step_to_unconstrained_successor(self.project, state=state) - jump_reg = list(succ.ip.variables)[0].split('_', 1)[1].rsplit('-')[0] - pc_reg = list(final_state.ip.variables)[0].split('_', 1)[1].rsplit('-')[0] - gadget.pc_reg = pc_reg - gadget.jump_reg = jump_reg + gadget.pc_reg = gadget.jump_reg = list(final_state.ip.variables)[0].split('_', 1)[1].rsplit('-')[0] # compute sp change l.debug("... computing sp change") @@ -343,6 +331,14 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): l.debug("... mem access with no addr dependencies") return None + # Store block address list for gadgets with conditional branches + gadget.bbl_addrs = list(final_state.history.bbl_addrs) + + for constraint in final_state.history.jump_guards: + for var in constraint.variables: + if var.startswith("sreg_"): + gadget.constraint_regs.add(var.split('_', 1)[1].split('-', 1)[0]) + return gadget def _analyze_concrete_regs(self, init_state, final_state, gadget): diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index dcbc804..a820d32 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -122,6 +122,11 @@ def __init__(self, addr): # when pc_offset==stack_change-arch_bytes, transit_type is basically ret self.pc_offset = None + # List of basic block addresses for gadgets with conditional branches + self.bbl_addrs = [] + # Registers that affect path constraints + self.constraint_regs = set() + @property def num_mem_access(self): return len(self.mem_reads) + len(self.mem_writes) + len(self.mem_changes) From e118ec09987c916464168e70664d6670c92da142 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Mon, 23 Dec 2024 15:17:41 -0800 Subject: [PATCH 007/106] Prevent infinite recursion --- angrop/chain_builder/reg_setter.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 2576d43..24969c4 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -517,14 +517,24 @@ def _backwards_recursive_search( registers: set[str], current_chain: list[RopGadget] = [], preserve_regs: set[str] = set(), - modifiable_memory_range: tuple[int, int] | None = None + modifiable_memory_range: tuple[int, int] | None = None, + visited: set[tuple[str, ...]] | None = None, ) -> Iterator[list[RopGadget]]: """Recursively build ROP chains starting from the end using the RiscyROP algorithm.""" + if visited is None: + visited = set() + # Base case. if not registers: yield current_chain[::-1] return + # Stop if we've seen the same set of registers before to prevent infinite recursion. + reg_tuple = tuple(sorted(registers)) + if reg_tuple in visited: + return + visited.add(reg_tuple) + for gadget in gadgets: if not gadget.changed_regs.isdisjoint(preserve_regs): continue @@ -532,9 +542,11 @@ def _backwards_recursive_search( if remaining_regs is None: continue current_chain.append(gadget) - yield from self._backwards_recursive_search(gadgets, remaining_regs, current_chain, preserve_regs, modifiable_memory_range) + yield from self._backwards_recursive_search(gadgets, remaining_regs, current_chain, preserve_regs, modifiable_memory_range, visited) current_chain.pop() + visited.remove(reg_tuple) + def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str] | None: """ Get the registers that still need to be controlled after prepending a gadget. From fb5417c14d6e732969946b39d54b0eaf6e80da6f Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 24 Dec 2024 15:38:00 -0800 Subject: [PATCH 008/106] Support printing gadgets containing jumps --- angrop/rop_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/angrop/rop_utils.py b/angrop/rop_utils.py index 086741e..92ebe89 100644 --- a/angrop/rop_utils.py +++ b/angrop/rop_utils.py @@ -12,9 +12,8 @@ def addr_to_asmstring(project, addr): return "; ".join(["%s %s" %(i.mnemonic, i.op_str) for i in block.capstone.insns]) def gadget_to_asmstring(project, gadget): - if not gadget.block_length: - return "" - return addr_to_asmstring(project, gadget.addr) + return "; ".join(addr_to_asmstring(project, addr) for addr in gadget.bbl_addrs) + def get_ast_dependency(ast): """ From 30d277f3d1d948898cdf5c97599ca63853a3a662 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 24 Dec 2024 16:44:32 -0800 Subject: [PATCH 009/106] Fix default argument mutation bug --- angrop/chain_builder/reg_setter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 24969c4..75ae467 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -515,12 +515,15 @@ def _backwards_recursive_search( self, gadgets: Iterable[RopGadget], registers: set[str], - current_chain: list[RopGadget] = [], + current_chain: list[RopGadget] | None = None, preserve_regs: set[str] = set(), modifiable_memory_range: tuple[int, int] | None = None, visited: set[tuple[str, ...]] | None = None, ) -> Iterator[list[RopGadget]]: """Recursively build ROP chains starting from the end using the RiscyROP algorithm.""" + if current_chain is None: + current_chain = [] + if visited is None: visited = set() From 45ba31781d81502c7d4feaf5884464960d5bd148 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 24 Dec 2024 16:45:55 -0800 Subject: [PATCH 010/106] Optimize gadget search order Prioritize gadgets that result in fewer register dependencies and gadgets with less instructions. --- angrop/chain_builder/reg_setter.py | 8 ++++++++ angrop/gadget_finder/gadget_analyzer.py | 1 + angrop/rop_gadget.py | 2 ++ 3 files changed, 11 insertions(+) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 75ae467..70e7589 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -538,12 +538,20 @@ def _backwards_recursive_search( return visited.add(reg_tuple) + potential_next_gadgets = [] + for gadget in gadgets: if not gadget.changed_regs.isdisjoint(preserve_regs): continue remaining_regs = self._get_remaining_regs(gadget, registers) if remaining_regs is None: continue + potential_next_gadgets.append((gadget, remaining_regs)) + + # Sort gadgets by number of remaining registers and instruction count + potential_next_gadgets.sort(key=lambda g: (len(g[1]), g[0].isn_count)) + + for gadget, remaining_regs in potential_next_gadgets: current_chain.append(gadget) yield from self._backwards_recursive_search(gadgets, remaining_regs, current_chain, preserve_regs, modifiable_memory_range, visited) current_chain.pop() diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 2aecaa4..6faac4e 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -333,6 +333,7 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): # Store block address list for gadgets with conditional branches gadget.bbl_addrs = list(final_state.history.bbl_addrs) + gadget.isn_count = sum(self.project.factory.block(addr).instructions for addr in gadget.bbl_addrs) for constraint in final_state.history.jump_guards: for var in constraint.variables: diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index a820d32..be78e7f 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -126,6 +126,8 @@ def __init__(self, addr): self.bbl_addrs = [] # Registers that affect path constraints self.constraint_regs = set() + # Instruction count to estimate complexity + self.isn_count = None @property def num_mem_access(self): From d6bcae259c6a39583e8829162468c21d4f9fabbb Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 24 Dec 2024 16:54:49 -0800 Subject: [PATCH 011/106] Limit maximum chain length --- angrop/chain_builder/reg_setter.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 70e7589..48e0e96 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -519,6 +519,7 @@ def _backwards_recursive_search( preserve_regs: set[str] = set(), modifiable_memory_range: tuple[int, int] | None = None, visited: set[tuple[str, ...]] | None = None, + max_length: int = 10, ) -> Iterator[list[RopGadget]]: """Recursively build ROP chains starting from the end using the RiscyROP algorithm.""" if current_chain is None: @@ -532,6 +533,9 @@ def _backwards_recursive_search( yield current_chain[::-1] return + if max_length == 0: + return + # Stop if we've seen the same set of registers before to prevent infinite recursion. reg_tuple = tuple(sorted(registers)) if reg_tuple in visited: @@ -553,7 +557,15 @@ def _backwards_recursive_search( for gadget, remaining_regs in potential_next_gadgets: current_chain.append(gadget) - yield from self._backwards_recursive_search(gadgets, remaining_regs, current_chain, preserve_regs, modifiable_memory_range, visited) + yield from self._backwards_recursive_search( + gadgets, + remaining_regs, + current_chain, + preserve_regs, + modifiable_memory_range, + visited, + max_length - 1, + ) current_chain.pop() visited.remove(reg_tuple) From 9c4f93b6e30632e324ef371f56dfe262a13359ea Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sat, 28 Dec 2024 11:56:46 -0800 Subject: [PATCH 012/106] Increase max block size for aarch64 --- angrop/arch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/angrop/arch.py b/angrop/arch.py index 07d235a..c8a872f 100644 --- a/angrop/arch.py +++ b/angrop/arch.py @@ -98,6 +98,7 @@ def block_make_sense(self, block): class AARCH64(ROPArch): def __init__(self, project, kernel_mode=False): super().__init__(project, kernel_mode=kernel_mode) + self.max_block_size = 4 * 15 class MIPS(ROPArch): def __init__(self, project, kernel_mode=False): From f3d7642f845b67b0fa562e25c0e2ab3c71756970 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sat, 28 Dec 2024 11:58:22 -0800 Subject: [PATCH 013/106] Work around memory leak issue There appears to be some kind of memory leak in angr that causes the memory usage to keep going up during gadget finding. This periodically restarts the worker processes to work around that. --- angrop/gadget_finder/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index d19cf6d..ac2cbf1 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -153,6 +153,7 @@ def find_gadgets(self, processes=4, show_progress=True): processes=processes, initializer=_set_global_gadget_analyzer, initargs=initargs, + maxtasksperchild=256, ) as pool: gadgets = list( itertools.chain.from_iterable( From 525534f2721929aca9dacfa250490315252901f1 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sat, 28 Dec 2024 15:44:03 -0800 Subject: [PATCH 014/106] Catch timeout exceptions when analyzing gadgets --- angrop/gadget_finder/gadget_analyzer.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 6faac4e..366500e 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -39,12 +39,18 @@ def __init__(self, project, fast_mode, kernel_mode=False, arch=None, stack_gsize fast_mode=self._fast_mode) self._concrete_sp = self._state.solver.eval(self._state.regs.sp) - @rop_utils.timeout(3) def analyze_gadget(self, addr): """ :param addr: address to analyze for gadgets :return: a list of RopGadget instances """ + try: + return self._analyze_gadget(addr) + except RopException: # Timeout + return [] + + @rop_utils.timeout(3) + def _analyze_gadget(self, addr): l.info("Analyzing 0x%x", addr) # Step 1: first check if the block makes sense From 406dd0934ddc067acdbeb715fd01f847748c17e4 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sat, 28 Dec 2024 15:51:47 -0800 Subject: [PATCH 015/106] Avoid gadgets with hooked addresses We don't want to go into SimProcedures when finding gadgets since those are probably not useful. --- angrop/gadget_finder/gadget_analyzer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 366500e..cf12756 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -60,7 +60,12 @@ def _analyze_gadget(self, addr): init_state = self._state.copy() init_state.ip = addr simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) - simgr.run(n=3) + simgr.run( + n=3, + filter_func=lambda state: simgr.DROP + if state.ip.concrete and self.project.is_hooked(state.ip.concrete_value) + else None, + ) gadgets = [] From 73bd54c797d2a182b5c1e9e0d85e858b5ae918eb Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sat, 28 Dec 2024 16:16:30 -0800 Subject: [PATCH 016/106] Ensure path constraints are controllable --- angrop/gadget_finder/gadget_analyzer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index cf12756..5c2e9bc 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -350,6 +350,9 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): for var in constraint.variables: if var.startswith("sreg_"): gadget.constraint_regs.add(var.split('_', 1)[1].split('-', 1)[0]) + elif not var.startswith("symbolic_stack_"): + l.debug("... path constraint not controlled by registers and stack") + return None return gadget From b8831d163fc018e3d00d2e2ff0d1da40d5774e19 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sat, 28 Dec 2024 16:36:10 -0800 Subject: [PATCH 017/106] Remove timeout message Some timeouts are normal such as when finding gadgets, so we shouldn't always print a message. It's also not that useful to print a timeout message without more information about where it came from so it makes more sense to do this in exception handlers instead. --- angrop/rop_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/angrop/rop_utils.py b/angrop/rop_utils.py index 92ebe89..60e6bae 100644 --- a/angrop/rop_utils.py +++ b/angrop/rop_utils.py @@ -344,7 +344,6 @@ def step_to_unconstrained_successor(project, state, max_steps=2, allow_simproced def timeout(seconds_before_timeout): def decorate(f): def handler(signum, frame):# pylint:disable=unused-argument - print("[angrop] Timeout") raise RopException("[angrop] Timeout!") def new_f(*args, **kwargs): old = signal.signal(signal.SIGALRM, handler) From fd4fbdf5fbbfbb194becdc5ab2048a0b68a64a41 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sat, 28 Dec 2024 16:59:19 -0800 Subject: [PATCH 018/106] Restart worker processes more frequently --- angrop/gadget_finder/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index ac2cbf1..4a28ca4 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -153,7 +153,7 @@ def find_gadgets(self, processes=4, show_progress=True): processes=processes, initializer=_set_global_gadget_analyzer, initargs=initargs, - maxtasksperchild=256, + maxtasksperchild=64, ) as pool: gadgets = list( itertools.chain.from_iterable( From deeb817a0045f229accb2adeb098298590fdd8e6 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sat, 28 Dec 2024 17:01:52 -0800 Subject: [PATCH 019/106] Don't print timeout exceptions when gadget finding SimSolverModeError gets raised when there's a timeout in claripy. --- angrop/gadget_finder/gadget_analyzer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 5c2e9bc..718f4e6 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -104,6 +104,8 @@ def _analyze_gadget(self, addr): except (claripy.ClaripyFrontendError, angr.engines.vex.claripy.ccall.CCallMultivaluedException) as e: l.warning("... claripy error: %s", e) continue + except angr.errors.SimSolverModeError: + continue except Exception as e:# pylint:disable=broad-except l.exception(e) continue From 668bec19209fa1f02026799dd6bed10355fa1d02 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 29 Dec 2024 15:47:02 -0800 Subject: [PATCH 020/106] Constrain memory access addresses If gadgets access memory outside of the stack, make sure the addresses are valid so that it doesn't crash. --- angrop/chain_builder/reg_setter.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 48e0e96..31c839b 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -1,4 +1,5 @@ import heapq +import itertools import logging from collections import defaultdict from typing import Iterable, Iterator @@ -547,6 +548,14 @@ def _backwards_recursive_search( for gadget in gadgets: if not gadget.changed_regs.isdisjoint(preserve_regs): continue + # Skip gadgets with non-constant memory accesses if we don't have memory that can be safely accessed. + if modifiable_memory_range is None and any( + mem_access.addr_constant is None + for mem_access in itertools.chain( + gadget.mem_changes, gadget.mem_reads, gadget.mem_writes + ) + ): + continue remaining_regs = self._get_remaining_regs(gadget, registers) if remaining_regs is None: continue @@ -616,9 +625,21 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str return None remaining_regs |= gadget.constraint_regs + for mem_access in itertools.chain(gadget.mem_changes, gadget.mem_reads, gadget.mem_writes): + for reg in mem_access.addr_dependencies: + if reg in remaining_regs: + return None + remaining_regs.add(reg) + return remaining_regs - def _build_concrete_chain(self, gadgets: list[RopGadget], registers: dict[str, int], next_pc: int) -> list[int]: + def _build_concrete_chain( + self, + gadgets: list[RopGadget], + registers: dict[str, int], + next_pc: int, + modifiable_memory_range: tuple[int, int] | None, + ) -> list[int]: """ Build a concrete ROP chain from a list of gadgets. @@ -640,6 +661,12 @@ def _build_concrete_chain(self, gadgets: list[RopGadget], registers: dict[str, i state.solver.add(state.ip == next_pc) for reg, val in registers.items(): state.solver.add(state.registers.load(reg) == val) + for action in state.history.actions: + if action.type == 'mem' and action.addr.ast.symbolic: + if modifiable_memory_range is None: + raise RopException("Symbolic memory address without modifiable memory range") + state.solver.add(action.addr.ast >= modifiable_memory_range[0]) + state.solver.add(action.addr.ast < modifiable_memory_range[1]) return [ state.solver.eval( init_state.stack_read( From 0acd6f3d1b7b98d4616fe5b80feb0fc5df4a8c96 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 29 Dec 2024 21:45:27 -0800 Subject: [PATCH 021/106] Catch exceptions from initial symbolic execution --- angrop/gadget_finder/gadget_analyzer.py | 29 +++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 718f4e6..127f5e3 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -57,15 +57,26 @@ def _analyze_gadget(self, addr): if not self._block_make_sense(addr): return [] - init_state = self._state.copy() - init_state.ip = addr - simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) - simgr.run( - n=3, - filter_func=lambda state: simgr.DROP - if state.ip.concrete and self.project.is_hooked(state.ip.concrete_value) - else None, - ) + try: + init_state = self._state.copy() + init_state.ip = addr + simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) + simgr.run( + n=3, + filter_func=lambda state: simgr.DROP + if state.ip.concrete and self.project.is_hooked(state.ip.concrete_value) + else None, + ) + except (claripy.errors.ClaripySolverInterruptError, claripy.errors.ClaripyZ3Error, ValueError): + return [] + except (claripy.ClaripyFrontendError, angr.engines.vex.claripy.ccall.CCallMultivaluedException) as e: + l.warning("... claripy error: %s", e) + return [] + except angr.errors.SimSolverModeError: + return [] + except Exception as e:# pylint:disable=broad-except + l.exception(e) + return [] gadgets = [] From 7cba3135d14404b34a97af3bfacd668ecc61615b Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 29 Dec 2024 22:03:29 -0800 Subject: [PATCH 022/106] Avoid printing timeout exceptions --- angrop/errors.py | 4 ++++ angrop/gadget_finder/gadget_analyzer.py | 8 ++++++-- angrop/rop_utils.py | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/angrop/errors.py b/angrop/errors.py index 99939b8..c88eca5 100644 --- a/angrop/errors.py +++ b/angrop/errors.py @@ -4,3 +4,7 @@ class RegNotFoundException(Exception): class RopException(Exception): pass + + +class RopTimeoutException(RopException): + pass diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 127f5e3..b604c91 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -8,7 +8,7 @@ from .. import rop_utils from ..arch import get_arch, X86 from ..rop_gadget import RopGadget, RopMemAccess, RopRegMove, PivotGadget, SyscallGadget -from ..errors import RopException, RegNotFoundException +from ..errors import RopException, RegNotFoundException, RopTimeoutException l = logging.getLogger("angrop.gadget_analyzer") @@ -46,7 +46,7 @@ def analyze_gadget(self, addr): """ try: return self._analyze_gadget(addr) - except RopException: # Timeout + except RopTimeoutException: return [] @rop_utils.timeout(3) @@ -74,6 +74,8 @@ def _analyze_gadget(self, addr): return [] except angr.errors.SimSolverModeError: return [] + except RopTimeoutException: + return [] except Exception as e:# pylint:disable=broad-except l.exception(e) return [] @@ -107,6 +109,8 @@ def _analyze_gadget(self, addr): l.debug("... Appending gadget!") gadgets.append(gadget) + except RopTimeoutException: + return gadgets except RopException as e: l.debug("... %s", e) continue diff --git a/angrop/rop_utils.py b/angrop/rop_utils.py index 60e6bae..d555ab9 100644 --- a/angrop/rop_utils.py +++ b/angrop/rop_utils.py @@ -4,7 +4,7 @@ import angr import claripy -from .errors import RegNotFoundException, RopException +from .errors import RegNotFoundException, RopException, RopTimeoutException from .rop_value import RopValue def addr_to_asmstring(project, addr): @@ -344,7 +344,7 @@ def step_to_unconstrained_successor(project, state, max_steps=2, allow_simproced def timeout(seconds_before_timeout): def decorate(f): def handler(signum, frame):# pylint:disable=unused-argument - raise RopException("[angrop] Timeout!") + raise RopTimeoutException("[angrop] Timeout!") def new_f(*args, **kwargs): old = signal.signal(signal.SIGALRM, handler) old_time_left = signal.alarm(seconds_before_timeout) From 619ccf8ec83f8f6113eefadf6b88ad02b7780f84 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 29 Dec 2024 22:04:53 -0800 Subject: [PATCH 023/106] Set initial ip properly when concretizing chain Turns out that project.factory.blank_state() initializes the ip to the entry point instead of making it unconstrained. --- angrop/chain_builder/reg_setter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 31c839b..1e46d91 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -647,6 +647,7 @@ def _build_concrete_chain( """ stack_len = sum(g.stack_change for g in gadgets) // self.project.arch.bytes init_state = rop_utils.make_symbolic_state(self.project, self.arch.reg_set, stack_gsize=stack_len) + init_state.ip = init_state.solver.BVS("init_ip", self.project.arch.bits) state = init_state for gadget in gadgets: state.solver.add(state.ip == gadget.addr) From 56fe645091914582dfbf113f17c30a8caa130b82 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 29 Dec 2024 22:16:02 -0800 Subject: [PATCH 024/106] Update gadget comparison for gadgets with jumps gadget.block_length is only the size of the first block. --- angrop/rop_gadget.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index be78e7f..a289477 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -160,7 +160,7 @@ def reg_set_better_than(self, other): if len(self.changed_regs) >= len(other.changed_regs) and \ self.stack_change <= other.stack_change and \ self.num_mem_access <= other.num_mem_access and \ - self.block_length <= other.block_length: + self.isn_count <= other.isn_count: return True return False From 1b33cfc63725d2973dfa35a198b1b41ceccf80d2 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 29 Dec 2024 22:19:39 -0800 Subject: [PATCH 025/106] Remove constrained registers from popped_regs set If a gadget pops a value into a register but the value is used in a branch condition or memory access address, we can't fully control it so we remove it from the set of popped registers. This also makes gadget.constraint_regs include registers that affect memory access addresses. --- angrop/chain_builder/reg_setter.py | 6 ----- angrop/gadget_finder/gadget_analyzer.py | 29 +++++++++++++++++++------ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 1e46d91..b87cf9c 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -625,12 +625,6 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str return None remaining_regs |= gadget.constraint_regs - for mem_access in itertools.chain(gadget.mem_changes, gadget.mem_reads, gadget.mem_writes): - for reg in mem_access.addr_dependencies: - if reg in remaining_regs: - return None - remaining_regs.add(reg) - return remaining_regs def _build_concrete_chain( diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index b604c91..22fe4ac 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -363,13 +363,28 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): gadget.bbl_addrs = list(final_state.history.bbl_addrs) gadget.isn_count = sum(self.project.factory.block(addr).instructions for addr in gadget.bbl_addrs) - for constraint in final_state.history.jump_guards: - for var in constraint.variables: - if var.startswith("sreg_"): - gadget.constraint_regs.add(var.split('_', 1)[1].split('-', 1)[0]) - elif not var.startswith("symbolic_stack_"): - l.debug("... path constraint not controlled by registers and stack") - return None + constraint_vars = { + var + for constraint in final_state.history.jump_guards + for var in constraint.variables + } + + for action in final_state.history.actions: + if action.type == 'mem': + constraint_vars |= action.addr.variables + + for var in constraint_vars: + if var.startswith("sreg_"): + gadget.constraint_regs.add(var.split('_', 1)[1].split('-', 1)[0]) + elif not var.startswith("symbolic_stack_"): + l.debug("... constraint not controlled by registers and stack") + return None + + gadget.popped_regs = { + reg + for reg in gadget.popped_regs + if final_state.registers.load(reg).variables.isdisjoint(constraint_vars) + } return gadget From eeb3d4ca5223c787785a1983c5264adc80666757 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 29 Dec 2024 23:05:48 -0800 Subject: [PATCH 026/106] Prune search tree Backtrack if we've already seen an equivalent chain that is not longer than our current chain. --- angrop/chain_builder/reg_setter.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index b87cf9c..29a9569 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -519,7 +519,7 @@ def _backwards_recursive_search( current_chain: list[RopGadget] | None = None, preserve_regs: set[str] = set(), modifiable_memory_range: tuple[int, int] | None = None, - visited: set[tuple[str, ...]] | None = None, + visited: dict[tuple[str, ...], int] | None = None, max_length: int = 10, ) -> Iterator[list[RopGadget]]: """Recursively build ROP chains starting from the end using the RiscyROP algorithm.""" @@ -527,21 +527,21 @@ def _backwards_recursive_search( current_chain = [] if visited is None: - visited = set() + visited = {} # Base case. if not registers: yield current_chain[::-1] return - if max_length == 0: + if len(current_chain) >= max_length: return # Stop if we've seen the same set of registers before to prevent infinite recursion. reg_tuple = tuple(sorted(registers)) - if reg_tuple in visited: + if visited.get(reg_tuple, max_length) <= len(current_chain): return - visited.add(reg_tuple) + visited[reg_tuple] = len(current_chain) potential_next_gadgets = [] @@ -573,12 +573,10 @@ def _backwards_recursive_search( preserve_regs, modifiable_memory_range, visited, - max_length - 1, + max_length, ) current_chain.pop() - visited.remove(reg_tuple) - def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str] | None: """ Get the registers that still need to be controlled after prepending a gadget. From b39128242e217aab1ea46bfc2e83c3e6014433e1 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Sun, 29 Dec 2024 23:10:59 -0800 Subject: [PATCH 027/106] Add register weight heuristic Estimate how hard it is to set a register by counting the number of gadgets that pop the register, and prioritize gadgets that result in target registers which are easier to set. --- angrop/chain_builder/reg_setter.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 29a9569..becf1cf 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -1,7 +1,7 @@ import heapq import itertools import logging -from collections import defaultdict +from collections import defaultdict, Counter from typing import Iterable, Iterator import claripy @@ -23,11 +23,20 @@ def __init__(self, chain_builder): super().__init__(chain_builder) self._reg_setting_gadgets = None self.hard_chain_cache = None + # Estimate of how difficult it is to set each register. + self._reg_weights = None self.update() def update(self): self._reg_setting_gadgets = self._filter_gadgets(self.chain_builder.gadgets) self.hard_chain_cache = {} + reg_pops = Counter() + for gadget in self._reg_setting_gadgets: + reg_pops.update(gadget.popped_regs) + self._reg_weights = { + reg: 5 if reg_pops[reg] == 0 else 2 if reg_pops[reg] == 1 else 1 + for reg in self.arch.reg_set + } def verify(self, chain, preserve_regs, registers): """ @@ -562,7 +571,7 @@ def _backwards_recursive_search( potential_next_gadgets.append((gadget, remaining_regs)) # Sort gadgets by number of remaining registers and instruction count - potential_next_gadgets.sort(key=lambda g: (len(g[1]), g[0].isn_count)) + potential_next_gadgets.sort(key=lambda g: (sum(self._reg_weights[reg] for reg in g[1]), g[0].isn_count)) for gadget, remaining_regs in potential_next_gadgets: current_chain.append(gadget) From 6b35828d2f924c7345697a9458862ea4e77b5b5b Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Mon, 30 Dec 2024 15:39:01 -0800 Subject: [PATCH 028/106] Limit number of instructions in gadgets --- angrop/gadget_finder/gadget_analyzer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 22fe4ac..f50cfca 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -63,6 +63,7 @@ def _analyze_gadget(self, addr): simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) simgr.run( n=3, + num_inst=30, filter_func=lambda state: simgr.DROP if state.ip.concrete and self.project.is_hooked(state.ip.concrete_value) else None, From 25728b10560bdad874f305565744cb129e675e89 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Mon, 30 Dec 2024 22:50:08 -0800 Subject: [PATCH 029/106] Handle two regs being popped with the same value If two registers are popped from the same location on the stack, we can control either one of them but not both. --- angrop/chain_builder/reg_setter.py | 6 ++++++ angrop/gadget_finder/gadget_analyzer.py | 5 +++++ angrop/rop_gadget.py | 3 +++ 3 files changed, 14 insertions(+) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index becf1cf..e27307d 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -600,9 +600,15 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str return None remaining_regs = set() + stack_dependencies = set() for reg in registers: if reg in gadget.popped_regs: + vars = gadget.popped_reg_vars[reg] + if not vars.isdisjoint(stack_dependencies): + # Two registers are popped from the same location on the stack. + return None + stack_dependencies |= vars continue new_reg = reg for reg_move in gadget.reg_moves: diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index f50cfca..3f3df6b 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -387,6 +387,11 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): if final_state.registers.load(reg).variables.isdisjoint(constraint_vars) } + gadget.popped_reg_vars = { + reg: final_state.registers.load(reg).variables + for reg in gadget.popped_regs + } + return gadget def _analyze_concrete_regs(self, init_state, final_state, gadget): diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index a289477..f275ed3 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -100,6 +100,9 @@ def __init__(self, addr): # register effect information self.changed_regs = set() self.popped_regs = set() + # Stores the stack variables that each register depends on. + # Used to check for cases where two registers are popped from the same location. + self.popped_reg_vars = {} self.concrete_regs = {} self.reg_dependencies = {} # like rax might depend on rbx, rcx self.reg_controllers = {} # like rax might be able to be controlled by rbx (for any value of rcx) From 04b453baa222962ac6c2ccb48561c6011a23e27c Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 31 Dec 2024 16:17:58 -0800 Subject: [PATCH 030/106] Disable slow gadget filtering code The gadget filtering code in RegSetter and RegMover tries to compare every gadget with every other gadget, so it doesn't scale well when there are a large number of gadgets. Things seem to work fine without the filtering so I'm removing it for now. --- angrop/chain_builder/reg_mover.py | 2 +- angrop/chain_builder/reg_setter.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/angrop/chain_builder/reg_mover.py b/angrop/chain_builder/reg_mover.py index 215bc25..8dd765b 100644 --- a/angrop/chain_builder/reg_mover.py +++ b/angrop/chain_builder/reg_mover.py @@ -20,7 +20,7 @@ def __init__(self, chain_builder): self.update() def update(self): - self._reg_moving_gadgets = self._filter_gadgets(self.chain_builder.gadgets) + self._reg_moving_gadgets = self.chain_builder.gadgets def verify(self, chain, preserve_regs, registers): """ diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index e27307d..eb392c4 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -28,7 +28,7 @@ def __init__(self, chain_builder): self.update() def update(self): - self._reg_setting_gadgets = self._filter_gadgets(self.chain_builder.gadgets) + self._reg_setting_gadgets = self.chain_builder.gadgets self.hard_chain_cache = {} reg_pops = Counter() for gadget in self._reg_setting_gadgets: From 5954db0082329ada6e7c66f616750f9c3f3d39a4 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 31 Dec 2024 16:22:57 -0800 Subject: [PATCH 031/106] Integrate RiscyROP chaining algorithm into angrop This replaces the angrop gadget chaining algorithm with the backwards DFS from RiscyROP. The chain building code is modified to support gadgets with conditional branches, and remove the assumption that gadget addresses appear in the chain in the order in which the gadgets are executed. It also no longer relies on concrete value comparisons or inspecting the solver constraints to determine whether values in the resulting chain are gadget addresses or target values provided by the user. --- angrop/chain_builder/builder.py | 174 +++++++++++++++++------------ angrop/chain_builder/reg_setter.py | 73 +++++------- angrop/rop_chain.py | 25 ++++- 3 files changed, 150 insertions(+), 122 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 84684dd..6b18fdc 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -6,6 +6,7 @@ from .. import rop_utils from ..errors import RopException +from ..rop_gadget import RopGadget from ..rop_value import RopValue from ..rop_chain import RopChain @@ -102,104 +103,131 @@ def _get_ptr_to_null(self): return None @rop_utils.timeout(2) - def _build_reg_setting_chain(self, gadgets, modifiable_memory_range, register_dict, stack_change): + def _build_reg_setting_chain( + self, gadgets, modifiable_memory_range, register_dict, stack_change + ): """ This function figures out the actual values needed in the chain for a particular set of gadgets and register values This is done by stepping a symbolic state through each gadget then constraining the final registers to the values that were requested - FIXME: trim this disgusting function """ # emulate a 'pop pc' of the first gadget - test_symbolic_state = self.make_sim_state(gadgets[0].addr) - - addrs = [g.addr for g in gadgets] - addrs.append(test_symbolic_state.solver.BVS("next_addr", self.project.arch.bits)) + test_symbolic_state = rop_utils.make_symbolic_state( + self.project, + self.arch.reg_set, + stack_gsize=stack_change // self.project.arch.bytes, + ) + rop_utils.make_reg_symbolic(test_symbolic_state, self.arch.base_pointer) + test_symbolic_state.ip = test_symbolic_state.stack_pop() + test_symbolic_state.solver._solver.timeout = 5000 + + # Maps each stack variable to the RopValue or RopGadget that should be placed there. + stack_var_to_value = {} + + def map_stack_var(ast, value): + if len(ast.variables) != 1: + raise RopException("Target value not controlled by a single variable") + var = next(iter(ast.variables)) + if not var.startswith("symbolic_stack_"): + raise RopException("Target value not controlled by the stack") + stack_var_to_value[var] = value arch_bytes = self.project.arch.bytes state = test_symbolic_state - # step through each gadget - # for each gadget, constrain memory addresses and add constraints for the successor - for addr in addrs[1:]: - succ = rop_utils.step_to_unconstrained_successor(self.project, state) - state.add_constraints(succ.regs.ip == addr) - # constrain reads/writes - for a in succ.log.actions: - if a.type == "mem" and a.addr.ast.symbolic: - if modifiable_memory_range is None: - raise RopException("Symbolic memory address when there shouldnt have been") - test_symbolic_state.add_constraints(a.addr.ast >= modifiable_memory_range[0]) - test_symbolic_state.add_constraints(a.addr.ast < modifiable_memory_range[1]) - test_symbolic_state.add_constraints(succ.regs.ip == addr) - # get to the unconstrained successor - state = rop_utils.step_to_unconstrained_successor(self.project, state) - - # re-adjuest the stack pointer - sp = test_symbolic_state.regs.sp - sp -= arch_bytes - bytes_per_pop = arch_bytes + # Step through each gadget and constrain the ip. + for gadget in gadgets: + map_stack_var(state.ip, gadget) + state.solver.add(state.ip == gadget.addr) + for addr in gadget.bbl_addrs[1:]: + succ = state.step() + succ_states = [ + state + for state in succ.successors + if state.solver.is_true(state.ip == addr) + ] + if len(succ_states) != 1: + raise RopException( + "Zero or multiple states match address of next block" + ) + state = succ_states[0] + succ = state.step() + if succ.flat_successors or len(succ.unconstrained_successors) != 1: + raise RopException( + "Executing gadget doesn't result in a single unconstrained state" + ) + state = succ.unconstrained_successors[0] + + # Record the variable that controls the final ip. + next_pc_val = rop_utils.cast_rop_value( + test_symbolic_state.solver.BVS("next_pc", self.project.arch.bits), + self.project, + ) + map_stack_var(state.ip, next_pc_val) + + # Constrain final register values. + for reg, val in register_dict.items(): + var = state.registers.load(reg) + map_stack_var(var, val) + state.solver.add(var == val) + + # Constrain memory access addresses. + for action in state.history.actions: + if action.type == action.MEM and action.addr.symbolic: + if modifiable_memory_range is None: + raise RopException( + "Symbolic memory address without modifiable memory range" + ) + state.solver.add(action.addr.ast >= modifiable_memory_range[0]) + state.solver.add(action.addr.ast < modifiable_memory_range[1]) + + test_symbolic_state.solver.add(*state.solver.constraints) - # constrain the final registers - rebase_state = test_symbolic_state.copy() - var_dict = {} - for r, v in register_dict.items(): - var = claripy.BVS(r, self.project.arch.bits) - var_name = var._encoded_name.decode() - var_dict[var_name] = v - test_symbolic_state.add_constraints(state.registers.load(r) == var) - test_symbolic_state.add_constraints(var == v.data) + bytes_per_pop = arch_bytes # constrain the "filler" values if self.roparg_filler is not None: - for i in range(stack_change // bytes_per_pop): - sym_word = test_symbolic_state.memory.load(sp + bytes_per_pop*i, bytes_per_pop, - endness=self.project.arch.memory_endness) + for offset in range(0, stack_change, bytes_per_pop): + sym_word = test_symbolic_state.stack_read(offset, bytes_per_pop) # check if we can constrain val to be the roparg_filler - if test_symbolic_state.solver.satisfiable((sym_word == self.roparg_filler,)) and \ - rebase_state.solver.satisfiable((sym_word == self.roparg_filler,)): + if test_symbolic_state.solver.satisfiable( + (sym_word == self.roparg_filler,) + ): # constrain the val to be the roparg_filler test_symbolic_state.add_constraints(sym_word == self.roparg_filler) - rebase_state.add_constraints(sym_word == self.roparg_filler) # create the ropchain - chain = RopChain(self.project, self, state=test_symbolic_state.copy(), - badbytes=self.badbytes) + chain = RopChain( + self.project, self, state=test_symbolic_state.copy(), badbytes=self.badbytes + ) # iterate through the stack values that need to be in the chain - # HACK: handle jump register separately because of angrop's broken - # assumptions on x86's ret behavior - if gadgets[-1].transit_type == 'jmp_reg': - stack_change += arch_bytes - for i in range(stack_change // bytes_per_pop): - sym_word = test_symbolic_state.memory.load(sp + bytes_per_pop*i, bytes_per_pop, - endness=self.project.arch.memory_endness) - val = test_symbolic_state.solver.eval(sym_word) - if len(gadgets) > 0 and val == gadgets[0].addr: - chain.add_gadget(gadgets[0]) - gadgets = gadgets[1:] - else: - # propagate the initial RopValue provided by users to preserve info like rebase - var = sym_word - for c in test_symbolic_state.solver.constraints: - if len(c.variables) != 2: # it is always xx == yy - continue - if not sym_word.variables.intersection(c.variables): - continue - var_name = set(c.variables - sym_word.variables).pop() - if var_name.startswith('next_addr_'): - var = rop_utils.cast_rop_value(test_symbolic_state.solver.BVS('next_pc', self.project.arch.bits), self.project) + for offset in range(-bytes_per_pop, stack_change, bytes_per_pop): + sym_word = test_symbolic_state.stack_read(offset, bytes_per_pop) + assert len(sym_word.variables) == 1 + sym_var = next(iter(sym_word.variables)) + if sym_var in stack_var_to_value: + val = stack_var_to_value[sym_var] + if isinstance(val, RopGadget): + chain.add_gadget(val, append_addr_only=True) + else: + # HACK: Because angrop appears to have originally been written + # with assumptions around x86 ret gadgets, the target of the final jump + # is not included in the chain if it is the last value. + if ( + offset == stack_change - bytes_per_pop + and sym_var is next_pc_val + ): break - if var_name not in var_dict: - continue - var = var_dict[var_name] - break - chain.add_value(var) - - if len(gadgets) > 0: - raise RopException("Didnt find all gadget addresses, something must've broke") + chain.add_value(val) + else: + chain.add_value(sym_word) + + chain.set_gadgets(gadgets) + return chain def _get_fill_val(self): diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index eb392c4..224ece0 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -45,7 +45,13 @@ def verify(self, chain, preserve_regs, registers): """ state = chain.exec() for reg, val in registers.items(): - chain_str = '\n-----\n'.join([str(self.project.factory.block(g.addr).capstone)for g in chain._gadgets]) + chain_str = "\n-----\n".join( + "\n".join( + str(self.project.factory.block(addr).capstone) + for addr in g.bbl_addrs + ) + for g in chain._gadgets + ) bv = getattr(state.regs, reg) for act in state.history.actions.hardcopy: if act.type not in ("mem", "reg"): @@ -71,19 +77,7 @@ def verify(self, chain, preserve_regs, registers): pc_var = set(state.regs.pc.variables).pop() return pc_var.startswith("symbolic_stack") or pc_var.startswith("next_pc") - def _maybe_fix_jump_chain(self, chain, preserve_regs): - all_changed_regs = set() - for g in chain._gadgets[:-1]: - all_changed_regs.update(g.changed_regs) - jump_reg = chain._gadgets[-1].jump_reg - if jump_reg in all_changed_regs: - return chain - shifter = self.chain_builder._shifter.shift(self.project.arch.bytes) - next_ip = rop_utils.cast_rop_value(shifter._gadgets[0].addr, self.project) - new = self.run(preserve_regs=preserve_regs, **{jump_reg: next_ip}) - return new + chain - - def run(self, modifiable_memory_range=None, use_partial_controllers=False, preserve_regs=None, **registers): + def run(self, modifiable_memory_range=None, use_partial_controllers=False, preserve_regs=None, max_length=10, **registers): if len(registers) == 0: return RopChain(self.project, None, badbytes=self.badbytes) @@ -97,31 +91,28 @@ def run(self, modifiable_memory_range=None, use_partial_controllers=False, pres for x in registers: registers[x] = rop_utils.cast_rop_value(registers[x], self.project) - gadgets = self._find_relevant_gadgets(**registers) - - chains = [] - - # find the chain provided by the graph search algorithm - best_chain, _, _ = self._find_reg_setting_gadgets(modifiable_memory_range, - use_partial_controllers, - preserve_regs=preserve_regs, - **registers) - if best_chain: - chains += [best_chain] - - # find chains using BFS based on pops - chains += self._find_all_candidate_chains(gadgets, preserve_regs.copy(), **registers) - - for gadgets in chains: - chain_str = '\n-----\n'.join([str(self.project.factory.block(g.addr).capstone)for g in gadgets]) + for gadgets in self._backwards_recursive_search( + self._reg_setting_gadgets, + set(registers), + current_chain=[], + preserve_regs=preserve_regs, + modifiable_memory_range=modifiable_memory_range, + visited={}, + max_length=max_length, + ): + chain_str = "\n-----\n".join( + "\n".join( + str(self.project.factory.block(addr).capstone) + for addr in g.bbl_addrs + ) + for g in gadgets + ) l.debug("building reg_setting chain with chain:\n%s", chain_str) stack_change = sum(x.stack_change for x in gadgets) try: chain = self._build_reg_setting_chain(gadgets, modifiable_memory_range, registers, stack_change) chain._concretize_chain_values(timeout=len(chain._values)*3) - if chain._gadgets[-1].transit_type == 'jmp_reg': - chain = self._maybe_fix_jump_chain(chain, preserve_regs) if self.verify(chain, preserve_regs, registers): #self._chain_cache[reg_tuple].append(gadgets) return chain @@ -525,19 +516,13 @@ def _backwards_recursive_search( self, gadgets: Iterable[RopGadget], registers: set[str], - current_chain: list[RopGadget] | None = None, - preserve_regs: set[str] = set(), - modifiable_memory_range: tuple[int, int] | None = None, - visited: dict[tuple[str, ...], int] | None = None, - max_length: int = 10, + current_chain: list[RopGadget], + preserve_regs: set[str], + modifiable_memory_range: tuple[int, int] | None, + visited: dict[tuple[str, ...], int], + max_length: int, ) -> Iterator[list[RopGadget]]: """Recursively build ROP chains starting from the end using the RiscyROP algorithm.""" - if current_chain is None: - current_chain = [] - - if visited is None: - visited = {} - # Base case. if not registers: yield current_chain[::-1] diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index a6b8f0a..a255ad2 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -1,5 +1,6 @@ from . import rop_utils from .errors import RopException +from .rop_gadget import RopGadget from .rop_value import RopValue CHAIN_TIMEOUT_DEFAULT = 3 @@ -63,8 +64,20 @@ def add_value(self, value): self._values.append(value) self.payload_len += self._p.arch.bytes - def add_gadget(self, gadget): - self._gadgets.append(gadget) + def add_gadget(self, gadget, append_addr_only=False): + # angrop was originally written with the assumption that gadget addresses + # appear in the chain in the same order in which the gadgets are executed. + # This is not always true when there are gadgets that end with a jump to + # an address from a register instead of the stack. + # For example, if the ROP chain has three gadgets A, B, and C where gadget + # B ends with a jump to some register, gadget A would have to load the + # address of gadget C into the register before jumping to gadget B. + # Therefore, the address of gadget C might need to be placed before the + # address of gadget B. + # The append_addr_only argument and the set_gadgets method below were added + # to support chains like this without breaking the existing API. + if not append_addr_only: + self._gadgets.append(gadget) value = gadget.addr if self._pie: @@ -73,12 +86,14 @@ def add_gadget(self, gadget): if self._pie: value._rebase = True - idx = self.next_pc_idx() - if idx is None: + if append_addr_only or (idx := self.next_pc_idx()) is None: self.add_value(value) else: self._values[idx] = value + def set_gadgets(self, gadgets: list[RopGadget]): + self._gadgets = gadgets + def add_constraint(self, cons): """ helpful if the chain contains variables @@ -245,7 +260,7 @@ def exec(self, max_steps=None, timeout=None): for value, _ in reversed(concrete_vals[1:]): state.stack_push(value) if max_steps is None: - max_steps = len(self._gadgets)*2 + max_steps = sum(len(gadget.bbl_addrs) for gadget in self._gadgets) return rop_utils.step_to_unconstrained_successor(self._p, state, max_steps=max_steps, allow_simprocedures=True) From 5c51b397aa302466eb72f4e23e008eaef66fe51c Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Tue, 31 Dec 2024 19:30:49 -0800 Subject: [PATCH 032/106] Reject gadgets with pc_offset >= stack_change The gadget chaining code assumes that gadgets don't read values from the stack past where the stack pointer points after the gadget is executed. --- angrop/gadget_finder/gadget_analyzer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 3f3df6b..6cfb715 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -337,6 +337,8 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): if type(gadget) is not PivotGadget and transit_type in ['pop_pc', 'ret']: idx = list(final_state.ip.variables)[0].split('_')[2] gadget.pc_offset = int(idx) * self.project.arch.bytes + if gadget.pc_offset >= gadget.stack_change: + return None l.info("... checking for controlled regs") self._check_reg_changes(final_state, init_state, gadget) From c196262a8ed178229c41daf10e1d11bfe59bcb7c Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 10:57:32 -0800 Subject: [PATCH 033/106] Make gadget analyzer API backwards compatible Gadgets with conditional branches are now supported, but existing code might not be able to handle conditional branches or multiple gadgets from the same address. This makes the gadget analyzer return a single gadget to emulate the previous behavior unless the new allow_conditional_branches option is enabled. --- angrop/gadget_finder/__init__.py | 4 ++-- angrop/gadget_finder/gadget_analyzer.py | 29 ++++++++++++++++++++----- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 4a28ca4..3b08e05 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -37,7 +37,7 @@ def _set_global_gadget_analyzer(rop_gadget_analyzer): _disable_loggers() def run_worker(addr): - return _global_gadget_analyzer.analyze_gadget(addr) + return _global_gadget_analyzer.analyze_gadget(addr, allow_conditional_branches=True) class GadgetFinder: """ @@ -174,7 +174,7 @@ def find_gadgets_single_threaded(self, show_progress=True): assert self.gadget_analyzer is not None for addr in self._addresses_to_check_with_caching(show_progress): - gadgets.extend(self.gadget_analyzer.analyze_gadget(addr)) + gadgets.extend(self.gadget_analyzer.analyze_gadget(addr, allow_conditional_branches=True)) return sorted(gadgets, key=lambda x: x.addr), self.get_duplicates() diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 6cfb715..c23979d 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -39,18 +39,32 @@ def __init__(self, project, fast_mode, kernel_mode=False, arch=None, stack_gsize fast_mode=self._fast_mode) self._concrete_sp = self._state.solver.eval(self._state.regs.sp) - def analyze_gadget(self, addr): + def analyze_gadget(self, addr, allow_conditional_branches=False): """ + Find gadgets at the given address. + + Support for gadgets with conditional branches can be enabled using the + allow_conditional_branches option, which is False by default for + compatibility with existing code that can't handle these gadgets. + Returns a list of gadgets when allow_conditional_branches is enabled, + and a single gadget or None when it is disabled. + :param addr: address to analyze for gadgets - :return: a list of RopGadget instances + :param allow_conditional_branches: whether to allow gadgets with conditional branches + :return: a list of RopGadget instances or a single RopGadget instance """ try: - return self._analyze_gadget(addr) + gadgets = self._analyze_gadget(addr, allow_conditional_branches) except RopTimeoutException: - return [] + return [] if allow_conditional_branches else None + if allow_conditional_branches: + return gadgets + else: + assert len(gadgets) <= 1 + return gadgets[0] if gadgets else None @rop_utils.timeout(3) - def _analyze_gadget(self, addr): + def _analyze_gadget(self, addr, allow_conditional_branches): l.info("Analyzing 0x%x", addr) # Step 1: first check if the block makes sense @@ -81,6 +95,11 @@ def _analyze_gadget(self, addr): l.exception(e) return [] + if not allow_conditional_branches and ( + simgr.active or simgr.deadended or len(simgr.unconstrained) != 1 + ): + return [] + gadgets = [] for final_state in simgr.unconstrained: From ce24ef9dbfbfc458a286150e255772f85b489bca Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 13:27:44 -0800 Subject: [PATCH 034/106] Add function for printing assembly code of chains --- angrop/rop_chain.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index a255ad2..9e408b5 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -276,3 +276,9 @@ def copy(self): def __str__(self): return self.payload_code() + + def print_gadget_asm(self): + for gadget in self._gadgets: + for addr in gadget.bbl_addrs: + self._p.factory.block(addr).capstone.pp() + print() From 6f3509e12d37a1af0681a4e4d34fc3ec1a8504c7 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 13:28:58 -0800 Subject: [PATCH 035/106] Add usage document for the new code --- README-RiscyROP.md | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 README-RiscyROP.md diff --git a/README-RiscyROP.md b/README-RiscyROP.md new file mode 100644 index 0000000..d39ca04 --- /dev/null +++ b/README-RiscyROP.md @@ -0,0 +1,56 @@ +# RiscyROP Usage + +## z3 Memory Usage + +Unfortunately there appears to be some kind of memory leak issue involving z3 that causes the memory usage to keep increasing during gadget finding. +With the latest z3 version the memory usage will increase to several GB per thread very quickly, but with older versions like 4.12.6.0 it's not as bad and the workaround I implemented that periodically restarts the worker processes is enough to keep the memory usage below 1.5 GB per thread. + +## Finding Gadgets + +Disable angrop's `fast_mode` setting when initializing the project, otherwise you will get very few gadgets. +The new gadget analyzer is a lot slower than angrop's original implementation. +You'll probably want to increase the number of processes from the default of 4, but make sure you have enough memory. +On my machine, it takes around 30 minutes to an hour to find gadgets in nginx and glibc with 16 processes. + +```python +import angr, angrop +p = angr.Project("some_binary", auto_load_libs=False) +rop = p.analyses.ROP(fast_mode=False) +rop.find_gadgets(16) +``` + +Since gadget finding takes a while, you can save the gadgets and load them later so that you don't have to run the gadget finder again. + +```python +rop.save_gadgets("gadgets") +rop.load_gadgets("gadgets") +``` + +## Chain Building + +Building register setting chains should work well, but building other types of chains might not work since integration of the new algorithms with the existing angrop features isn't fully complete. +On large binaries like glibc the new algorithm can set most if not all of the argument registers. +You can set the `modifiable_memory_range` argument to a range of addresses that can be safely accessed. +This will allow the chain builder to use gadgets that access memory outside of the stack, and it will ensure that the addresses are within the given range. +The maximum chain length defaults to 10 gadgets, which might not be enough if the number of registers is large. + +```python +chain = rop.set_regs(x0=1, x1=2, x2=3, x3=4, x4=5, x5=6, x6=7, x7=8, x30=42, modifiable_memory_range=(0x1000, 0x2000), max_length=15) +chain.print_gadget_asm() +chain.print_payload_code() +``` + +The address of the first gadget is placed at the beginning of the chain since all of the existing code assumes this is the case, but you might have to put it somewhere else depending on how you enter the chain. +For example, the initial gadget address would probably have to be placed further up the stack if return addresses are stored at the beginning of the stack frame instead of the end. +Similarly, the address that you want the last gadget to jump to might have to be placed somewhere in the middle of the chain instead of right after the chain. +`chain.next_pc_idx()` tells you which value in the chain should be replaced with the desired address if this is the case. + +If things aren't working, you might want to enable debug logging: + +```python +import logging +logging.getLogger('angrop.chain_builder.reg_setter').setLevel('DEBUG') +``` + +If the chain builder finds a sequence of gadgets that should work but it encounters an error when concretizing the chain, it will try a different sequence of gadgets. +However, this should rarely happen with the new algorithm. From f66479724856eac5f5136876de68ca317a4b62ae Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 16:17:37 -0800 Subject: [PATCH 036/106] Fix syscall gadget detection --- angrop/gadget_finder/gadget_analyzer.py | 32 ++++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index c23979d..2ebf498 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -75,13 +75,17 @@ def _analyze_gadget(self, addr, allow_conditional_branches): init_state = self._state.copy() init_state.ip = addr simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) - simgr.run( - n=3, - num_inst=30, - filter_func=lambda state: simgr.DROP - if state.ip.concrete and self.project.is_hooked(state.ip.concrete_value) - else None, - ) + + def filter(state): + if state.ip.concrete and self.project.is_hooked(state.addr): + # We don't want to go into SimProcedures. + return simgr.DROP + if rop_utils.is_in_kernel(self.project, state): + return "syscall" + return None + + simgr.run(n=4, num_inst=30, filter_func=filter) + except (claripy.errors.ClaripySolverInterruptError, claripy.errors.ClaripyZ3Error, ValueError): return [] except (claripy.ClaripyFrontendError, angr.engines.vex.claripy.ccall.CCallMultivaluedException) as e: @@ -95,14 +99,18 @@ def _analyze_gadget(self, addr, allow_conditional_branches): l.exception(e) return [] + final_states = list(simgr.unconstrained) + if "syscall" in simgr.stashes: + final_states.extend(self._try_stepping_past_syscall(state) for state in simgr.syscall) + if not allow_conditional_branches and ( - simgr.active or simgr.deadended or len(simgr.unconstrained) != 1 + simgr.active or simgr.deadended or len(final_states) != 1 ): return [] gadgets = [] - for final_state in simgr.unconstrained: + for final_state in final_states: try: if not self._valid_state(init_state, final_state): continue @@ -282,6 +290,12 @@ def _reach_unconstrained_or_syscall(self, addr): return init_state, state2 return init_state, final_state + def _try_stepping_past_syscall(self, state): + try: + return rop_utils.step_to_unconstrained_successor(self.project, state, max_steps=3) + except Exception: # pylint: disable=broad-exception-caught + return state + def _identify_transit_type(self, final_state, ctrl_type): # FIXME: not always jump, could be call as well if ctrl_type == 'register': From ce772b66b427544440bf416c2231a28fa31c08dd Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 19:01:40 -0800 Subject: [PATCH 037/106] Remove broad except clauses in gadget finder We don't want to catch things like KeyboardInterrupt, so only catch angr errors instead. --- angrop/gadget_finder/gadget_analyzer.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 2ebf498..ff8937b 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -91,13 +91,10 @@ def filter(state): except (claripy.ClaripyFrontendError, angr.engines.vex.claripy.ccall.CCallMultivaluedException) as e: l.warning("... claripy error: %s", e) return [] - except angr.errors.SimSolverModeError: + except (angr.errors.AngrError, angr.errors.AngrRuntimeError, angr.errors.SimError): return [] except RopTimeoutException: return [] - except Exception as e:# pylint:disable=broad-except - l.exception(e) - return [] final_states = list(simgr.unconstrained) if "syscall" in simgr.stashes: @@ -147,10 +144,7 @@ def filter(state): except (claripy.ClaripyFrontendError, angr.engines.vex.claripy.ccall.CCallMultivaluedException) as e: l.warning("... claripy error: %s", e) continue - except angr.errors.SimSolverModeError: - continue - except Exception as e:# pylint:disable=broad-except - l.exception(e) + except (angr.errors.AngrError, angr.errors.AngrRuntimeError, angr.errors.SimError): continue return gadgets From 538941b27a41349d3b2a30a1bdf3ac9961432d39 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 19:05:16 -0800 Subject: [PATCH 038/106] Move some code that was in a loop for no reason --- angrop/chain_builder/reg_setter.py | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 224ece0..8a79458 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -43,31 +43,31 @@ def verify(self, chain, preserve_regs, registers): given a potential chain, verify whether the chain can set the registers correctly by symbolically execute the chain """ + chain_str = "\n-----\n".join( + "\n".join( + str(self.project.factory.block(addr).capstone) + for addr in g.bbl_addrs + ) + for g in chain._gadgets + ) state = chain.exec() + for act in state.history.actions.hardcopy: + if act.type not in ("mem", "reg"): + continue + if act.type == 'mem': + if act.addr.ast.variables: + l.exception("memory access outside stackframe\n%s\n", chain_str) + return False + if act.type == 'reg' and act.action == 'write': + # get the full name of the register + offset = act.offset + offset -= act.offset % self.project.arch.bytes + reg_name = self.project.arch.translate_register_name(offset) + if reg_name in preserve_regs: + l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 1.", chain_str) + return False for reg, val in registers.items(): - chain_str = "\n-----\n".join( - "\n".join( - str(self.project.factory.block(addr).capstone) - for addr in g.bbl_addrs - ) - for g in chain._gadgets - ) bv = getattr(state.regs, reg) - for act in state.history.actions.hardcopy: - if act.type not in ("mem", "reg"): - continue - if act.type == 'mem': - if act.addr.ast.variables: - l.exception("memory access outside stackframe\n%s\n", chain_str) - return False - if act.type == 'reg' and act.action == 'write': - # get the full name of the register - offset = act.offset - offset -= act.offset % self.project.arch.bytes - reg_name = self.project.arch.translate_register_name(offset) - if reg_name in preserve_regs: - l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 1.", chain_str) - return False if bv.symbolic or state.solver.eval(bv != val.data): l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 2.", chain_str) return False From 59ae8bb8e470f4945490302a0a02be0daca27467 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 19:06:32 -0800 Subject: [PATCH 039/106] Fix stack size when building the chain The stack needs to have one extra value initialized to account for the address of the first gadget being popped off. --- angrop/chain_builder/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 6b18fdc..9a01bac 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -117,7 +117,7 @@ def _build_reg_setting_chain( test_symbolic_state = rop_utils.make_symbolic_state( self.project, self.arch.reg_set, - stack_gsize=stack_change // self.project.arch.bytes, + stack_gsize=stack_change // self.project.arch.bytes + 1, ) rop_utils.make_reg_symbolic(test_symbolic_state, self.arch.base_pointer) test_symbolic_state.ip = test_symbolic_state.stack_pop() From d59bcf66772d0f69ca1dd861f35eff208cc31ead Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 19:10:33 -0800 Subject: [PATCH 040/106] Try to minimize the payload size When choosing gadgets, break ties by choosing the gadget with the smaller stack change to minimize the size of the payload. --- angrop/chain_builder/reg_setter.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 8a79458..beb8092 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -555,8 +555,14 @@ def _backwards_recursive_search( continue potential_next_gadgets.append((gadget, remaining_regs)) - # Sort gadgets by number of remaining registers and instruction count - potential_next_gadgets.sort(key=lambda g: (sum(self._reg_weights[reg] for reg in g[1]), g[0].isn_count)) + # Sort gadgets by number of remaining registers, instruction count, and stack change + potential_next_gadgets.sort( + key=lambda g: ( + sum(self._reg_weights[reg] for reg in g[1]), + g[0].isn_count, + g[0].stack_change, + ) + ) for gadget, remaining_regs in potential_next_gadgets: current_chain.append(gadget) From b37ceab6a67f81650ab390d4eba184db85ec0e0e Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 19:16:12 -0800 Subject: [PATCH 041/106] Fix bug in compatibility workaround --- angrop/chain_builder/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 9a01bac..29c02f8 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -219,7 +219,7 @@ def map_stack_var(ast, value): # is not included in the chain if it is the last value. if ( offset == stack_change - bytes_per_pop - and sym_var is next_pc_val + and val is next_pc_val ): break chain.add_value(val) From bf727018004b5c2b380748b484ff94b9e642cfa0 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 20:17:11 -0800 Subject: [PATCH 042/106] Document memory leak workaround --- angrop/gadget_finder/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 3b08e05..f266dc6 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -153,6 +153,8 @@ def find_gadgets(self, processes=4, show_progress=True): processes=processes, initializer=_set_global_gadget_analyzer, initargs=initargs, + # There is some kind of memory leak issue involving z3, + # so we periodically restart the worker processes. maxtasksperchild=64, ) as pool: gadgets = list( From db8a56f20f2eb22c24257cb3cf37abd86844916f Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Wed, 1 Jan 2025 23:14:07 -0800 Subject: [PATCH 043/106] Disable slow gadget filtering See reasoning in commit f47ceddbadab1bbc7edd52d4a6a8a7984504d1d2. --- angrop/chain_builder/pivot.py | 2 +- angrop/chain_builder/shifter.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/angrop/chain_builder/pivot.py b/angrop/chain_builder/pivot.py index c7e6f5a..4a714bc 100644 --- a/angrop/chain_builder/pivot.py +++ b/angrop/chain_builder/pivot.py @@ -34,7 +34,7 @@ def __init__(self, chain_builder): self.update() def update(self): - self._pivot_gadgets = self._filter_gadgets(self.chain_builder.pivot_gadgets) + self._pivot_gadgets = self.chain_builder.pivot_gadgets def pivot(self, thing): if thing.is_register: diff --git a/angrop/chain_builder/shifter.py b/angrop/chain_builder/shifter.py index e8199ed..4cf2750 100644 --- a/angrop/chain_builder/shifter.py +++ b/angrop/chain_builder/shifter.py @@ -18,7 +18,7 @@ def __init__(self, chain_builder): self.update() def update(self): - self.shift_gadgets = self._filter_gadgets(self.chain_builder.gadgets) + self.shift_gadgets = self.chain_builder.gadgets def verify_shift(self, chain, length, preserve_regs): arch_bytes = self.project.arch.bytes From c01bb327e38ba5072e0e290a2913dd3537c562e8 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 00:27:43 -0800 Subject: [PATCH 044/106] Increase chain building timeout Long chains can take a couple of seconds to build. --- angrop/chain_builder/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 29c02f8..a404f54 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -102,7 +102,7 @@ def _get_ptr_to_null(self): return addr return None - @rop_utils.timeout(2) + @rop_utils.timeout(8) def _build_reg_setting_chain( self, gadgets, modifiable_memory_range, register_dict, stack_change ): From f69ef3acd14903ac8b3949e02ebe1ced91134daf Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 00:28:53 -0800 Subject: [PATCH 045/106] Only use conditional branch gadgets in RegSetter The other chain builders can't handle conditional branches. --- angrop/chain_builder/mem_changer.py | 2 ++ angrop/chain_builder/mem_writer.py | 2 ++ angrop/chain_builder/pivot.py | 6 +++++- angrop/chain_builder/reg_mover.py | 6 +++++- angrop/chain_builder/shifter.py | 6 +++++- angrop/gadget_finder/gadget_analyzer.py | 8 +++++++- angrop/rop_gadget.py | 1 + 7 files changed, 27 insertions(+), 4 deletions(-) diff --git a/angrop/chain_builder/mem_changer.py b/angrop/chain_builder/mem_changer.py index 20d7852..8894e20 100644 --- a/angrop/chain_builder/mem_changer.py +++ b/angrop/chain_builder/mem_changer.py @@ -48,6 +48,8 @@ def _set_regs(self, *args, **kwargs): def _get_all_mem_change_gadgets(gadgets): possible_gadgets = set() for g in gadgets: + if g.has_conditional_branch: + continue if len(g.mem_reads) + len(g.mem_writes) > 0 or len(g.mem_changes) != 1: continue if g.stack_change <= 0: diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index 349e036..649eded 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -31,6 +31,8 @@ def _set_regs(self, *args, **kwargs): def _get_all_mem_write_gadgets(gadgets): possible_gadgets = set() for g in gadgets: + if g.has_conditional_branch: + continue if len(g.mem_reads) + len(g.mem_changes) > 0 or len(g.mem_writes) != 1: continue if g.stack_change <= 0: diff --git a/angrop/chain_builder/pivot.py b/angrop/chain_builder/pivot.py index 4a714bc..1cb5311 100644 --- a/angrop/chain_builder/pivot.py +++ b/angrop/chain_builder/pivot.py @@ -34,7 +34,11 @@ def __init__(self, chain_builder): self.update() def update(self): - self._pivot_gadgets = self.chain_builder.pivot_gadgets + self._pivot_gadgets = [ + gadget + for gadget in self.chain_builder.pivot_gadgets + if not gadget.has_conditional_branch + ] def pivot(self, thing): if thing.is_register: diff --git a/angrop/chain_builder/reg_mover.py b/angrop/chain_builder/reg_mover.py index 8dd765b..880345a 100644 --- a/angrop/chain_builder/reg_mover.py +++ b/angrop/chain_builder/reg_mover.py @@ -20,7 +20,11 @@ def __init__(self, chain_builder): self.update() def update(self): - self._reg_moving_gadgets = self.chain_builder.gadgets + self._reg_moving_gadgets = [ + gadget + for gadget in self.chain_builder.gadgets + if not gadget.has_conditional_branch + ] def verify(self, chain, preserve_regs, registers): """ diff --git a/angrop/chain_builder/shifter.py b/angrop/chain_builder/shifter.py index 4cf2750..f48a0d7 100644 --- a/angrop/chain_builder/shifter.py +++ b/angrop/chain_builder/shifter.py @@ -18,7 +18,11 @@ def __init__(self, chain_builder): self.update() def update(self): - self.shift_gadgets = self.chain_builder.gadgets + self.shift_gadgets = [ + gadget + for gadget in self.chain_builder.gadgets + if not gadget.has_conditional_branch + ] def verify_shift(self, chain, length, preserve_regs): arch_bytes = self.project.arch.bytes diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index ff8937b..caa6070 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -61,7 +61,11 @@ def analyze_gadget(self, addr, allow_conditional_branches=False): return gadgets else: assert len(gadgets) <= 1 - return gadgets[0] if gadgets else None + return ( + gadgets[0] + if gadgets and not gadgets[0].has_conditional_branch + else None + ) @rop_utils.timeout(3) def _analyze_gadget(self, addr, allow_conditional_branches): @@ -399,6 +403,8 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): for var in constraint.variables } + gadget.has_conditional_branch = len(constraint_vars) > 0 + for action in final_state.history.actions: if action.type == 'mem': constraint_vars |= action.addr.variables diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index f275ed3..74b192b 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -131,6 +131,7 @@ def __init__(self, addr): self.constraint_regs = set() # Instruction count to estimate complexity self.isn_count = None + self.has_conditional_branch = None @property def num_mem_access(self): From 24e03f727c1898fda39ca7158a613c05e9850eae Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 10:50:55 -0800 Subject: [PATCH 046/106] Fix start address calculation The previous code incorrectly skips the first address if it is aligned. --- angrop/gadget_finder/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index f266dc6..b9755d1 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -289,7 +289,7 @@ def _addresses_to_check(self): yield addr+offset for segment in self._get_executable_ranges(): l.debug("Analyzing segment with address range: 0x%x, 0x%x", segment.min_addr, segment.max_addr) - start = segment.min_addr + (alignment - segment.min_addr % alignment) + start = alignment * ((segment.min_addr + alignment - 1) // alignment) for addr in range(start, start+segment.memsize, alignment): yield addr+offset From 2d7b59dc290904a2e681689d10595e5b36320231 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 10:52:15 -0800 Subject: [PATCH 047/106] Add tests for new chaining algorithm --- tests/test_chainbuilder.py | 83 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index a9fdfd9..47a8522 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -374,6 +374,89 @@ def test_pop_pc_syscall_chain(): assert state.regs.rdi.concrete_value == 0x41414141 assert 0 not in state.posix.fd +def test_aarch64_basic_reg_setting(): + proj = angr.load_shellcode( + """ + mov x0, x29 + ldp x29, x30, [sp], #0x10 + ret + """, + "aarch64", + load_address=0x400000, + auto_load_libs=False, + ) + rop = proj.analyses.ROP(fast_mode=False, only_check_near_rets=False) + rop.find_gadgets_single_threaded(show_progress=False) + chain = rop.set_regs(x0=0x41414141) + state = chain.exec() + assert state.regs.x0.concrete_value == 0x41414141 + +def test_aarch64_jump_reg(): + proj = angr.load_shellcode( + """ + ldp x0, x4, [sp, #0x10] + ldp x29, x30, [sp], #0x20 + ret + mov x1, x29 + br x4 + """, + "aarch64", + load_address=0x400000, + auto_load_libs=False, + ) + rop = proj.analyses.ROP(fast_mode=False, only_check_near_rets=False) + rop.find_gadgets_single_threaded(show_progress=False) + chain = rop.set_regs(x0=0x41414141, x1=0x42424242) + state = chain.exec() + assert state.regs.x0.concrete_value == 0x41414141 + assert state.regs.x1.concrete_value == 0x42424242 + +def test_aarch64_cond_branch(): + proj = angr.load_shellcode( + """ + ldp x0, x1, [sp, #0x10] + ldp x29, x30, [sp], #0x20 + ret + ldr x2, [sp, #0x10] + add x0, x0, #0x42 + cmp x0, x1 + b.ne .ret + ldp x29, x30, [sp], #0x20 + .ret: + ret + """, + "aarch64", + load_address=0x400000, + auto_load_libs=False, + ) + rop = proj.analyses.ROP(fast_mode=False, only_check_near_rets=False) + rop.find_gadgets_single_threaded(show_progress=False) + chain = rop.set_regs(x2=0x41414141) + state = chain.exec() + assert state.regs.x2.concrete_value == 0x41414141 + +def test_aarch64_mem_access(): + proj = angr.load_shellcode( + """ + ldp x0, x1, [sp, #0x10] + str x1, [x1] + ldp x29, x30, [sp], #0x20 + ret + """, + "aarch64", + load_address=0x400000, + auto_load_libs=False, + ) + rop = proj.analyses.ROP(fast_mode=False, only_check_near_rets=False) + rop.find_gadgets_single_threaded(show_progress=False) + chain = rop.set_regs(x0=0x41414141, modifiable_memory_range=(0x1000, 0x2000)) + state = chain.exec() + assert state.regs.x0.concrete_value == 0x41414141 + for action in state.history.actions: + if action.type == action.MEM and action.action == action.WRITE: + assert action.addr.ast.concrete_value >= 0x1000 + assert action.addr.ast.concrete_value < 0x2000 + def run_all(): functions = globals() all_functions = {x:y for x, y in functions.items() if x.startswith('test_')} From 21b5a79f10035ba70d69da79910ce79f9b07cd28 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 11:22:43 -0800 Subject: [PATCH 048/106] Set max execution steps for func call test The chain doesn't know how many steps the fake function gadget needs so we have to override the max steps calculation. --- tests/test_chainbuilder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index 47a8522..1f44a91 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -19,7 +19,7 @@ def test_x86_64_func_call(): rop.save_gadgets(cache_path) chain = rop.func_call('puts', [0x402704]) + rop.func_call('puts', [0x402704]) - state = chain.exec() + state = chain.exec(max_steps=8) assert state.posix.dumps(1) == b'Enter username: \nEnter username: \n' def test_i386_func_call(): @@ -105,7 +105,7 @@ def test_preserve_regs(): chain1 = rop.set_regs(rdi=0x402715) chain2 = rop.func_call('puts', [0x402704], preserve_regs=['rdi']) chain = chain1+chain2 - state = chain.exec() + state = chain.exec(max_steps=5) assert state.posix.dumps(1) == b'Failed to parse username.\n' def test_i386_mem_write(): From 9fda99ffff3c35cd1d4d57e32f3331fc2088d69b Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 11:25:26 -0800 Subject: [PATCH 049/106] Fix gadget filtering I accidentally broke some chain builders when trying to remove the slow filtering loops that compare every gadget with every other gadget. --- angrop/chain_builder/reg_mover.py | 32 ++++++++++++-------------- angrop/chain_builder/shifter.py | 38 ++++++++++++++++--------------- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/angrop/chain_builder/reg_mover.py b/angrop/chain_builder/reg_mover.py index 880345a..9c92369 100644 --- a/angrop/chain_builder/reg_mover.py +++ b/angrop/chain_builder/reg_mover.py @@ -20,11 +20,7 @@ def __init__(self, chain_builder): self.update() def update(self): - self._reg_moving_gadgets = [ - gadget - for gadget in self.chain_builder.gadgets - if not gadget.has_conditional_branch - ] + self._reg_moving_gadgets = self._filter_gadgets(self.chain_builder.gadgets) def verify(self, chain, preserve_regs, registers): """ @@ -126,21 +122,21 @@ def _filter_gadgets(gadgets): """ filter gadgets having the same effect """ - gadgets = set(gadgets) + gadgets = {g for g in gadgets if not g.has_conditional_branch} # first: filter out gadgets that don't do register move gadgets = set(x for x in gadgets if x.reg_moves) - # second: remove gadgets that are strictly worse than some others - skip = set({}) - while True: - to_remove = set({}) - for g in gadgets-skip: - to_remove.update({x for x in gadgets-{g} if g.reg_move_better_than(x)}) - if to_remove: - break - skip.add(g) - if not to_remove: - break - gadgets -= to_remove + # # second: remove gadgets that are strictly worse than some others + # skip = set({}) + # while True: + # to_remove = set({}) + # for g in gadgets-skip: + # to_remove.update({x for x in gadgets-{g} if g.reg_move_better_than(x)}) + # if to_remove: + # break + # skip.add(g) + # if not to_remove: + # break + # gadgets -= to_remove # third: remove gadgets that only move from itself to itself, it is not helpful # for exploitation new_gadgets = set(x for x in gadgets if any(y.from_reg != y.to_reg for y in x.reg_moves)) diff --git a/angrop/chain_builder/shifter.py b/angrop/chain_builder/shifter.py index f48a0d7..5828d5d 100644 --- a/angrop/chain_builder/shifter.py +++ b/angrop/chain_builder/shifter.py @@ -18,11 +18,7 @@ def __init__(self, chain_builder): self.update() def update(self): - self.shift_gadgets = [ - gadget - for gadget in self.chain_builder.gadgets - if not gadget.has_conditional_branch - ] + self.shift_gadgets = self._filter_gadgets(self.chain_builder.gadgets) def verify_shift(self, chain, length, preserve_regs): arch_bytes = self.project.arch.bytes @@ -124,21 +120,27 @@ def _filter_gadgets(self, gadgets): filter gadgets having the same effect """ # we don't like gadgets with any memory accesses or jump gadgets - gadgets = [x for x in gadgets if x.num_mem_access == 0 and x.transit_type != 'jmp_reg'] + gadgets = [ + x + for x in gadgets + if x.num_mem_access == 0 + and x.transit_type != "jmp_reg" + and not x.has_conditional_branch + ] # now do the standard filtering - gadgets = set(gadgets) - skip = set({}) - while True: - to_remove = set({}) - for g in gadgets-skip: - to_remove.update({x for x in gadgets-{g} if self.better_than(g, x)}) - if to_remove: - break - skip.add(g) - if not to_remove: - break - gadgets -= to_remove + # gadgets = set(gadgets) + # skip = set({}) + # while True: + # to_remove = set({}) + # for g in gadgets-skip: + # to_remove.update({x for x in gadgets-{g} if self.better_than(g, x)}) + # if to_remove: + # break + # skip.add(g) + # if not to_remove: + # break + # gadgets -= to_remove d = defaultdict(list) for g in gadgets: From 44ae030d27620750c8bee151ecf1d6cf6173ad32 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 16:15:32 -0800 Subject: [PATCH 050/106] Make new chain builder work with RegMover --- angrop/chain_builder/builder.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index a404f54..73001a0 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -171,8 +171,14 @@ def map_stack_var(ast, value): # Constrain final register values. for reg, val in register_dict.items(): var = state.registers.load(reg) - map_stack_var(var, val) - state.solver.add(var == val) + if val.is_register: + if var.op != "BVS" or not next(iter(var.variables)).startswith( + f"sreg_{val.reg_name}-" + ): + raise RopException("Register wasn't moved correctly") + else: + map_stack_var(var, val) + state.solver.add(var == val) # Constrain memory access addresses. for action in state.history.actions: From c37cde99e913149a1a1dbd70bed3262fa3354d50 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 16:20:06 -0800 Subject: [PATCH 051/106] Set max steps for func call tests See explanation in commit 3da7f4810af6f5e0171679398fc6f55853aecd4f. --- tests/test_chainbuilder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index 1f44a91..b2f7fb7 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -49,16 +49,16 @@ def test_arm_func_call(): proj.hook_symbol('write', angr.SIM_PROCEDURES['posix']['write']()) chain1 = rop.func_call("write", [1, 0x4E15F0, 9]) - state = chain1.exec() + state = chain1.exec(max_steps=8) assert state.posix.dumps(1) == b'malloc.c\x00' proj.hook_symbol('puts', angr.SIM_PROCEDURES['libc']['puts']()) chain2 = rop.func_call("puts", [0x4E15F0]) - state = chain2.exec() + state = chain2.exec(max_steps=8) assert state.posix.dumps(1) == b'malloc.c\n' chain = chain1 + chain2 - state = chain.exec() + state = chain.exec(max_steps=8) assert state.posix.dumps(1) == b'malloc.c\x00malloc.c\n' def test_i386_syscall(): From eb959b25fb1fab8ef19217932e0593132eec21ed Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 16:24:47 -0800 Subject: [PATCH 052/106] Set chain execution max steps conservatively Setting the maximum number of steps to the total number of blocks doesn't work with the fake gadgets used in function call chains, so use the old default of twice the number of gadgets if it's higher. --- angrop/rop_chain.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 9e408b5..2180e79 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -260,7 +260,10 @@ def exec(self, max_steps=None, timeout=None): for value, _ in reversed(concrete_vals[1:]): state.stack_push(value) if max_steps is None: - max_steps = sum(len(gadget.bbl_addrs) for gadget in self._gadgets) + max_steps = max( + sum(len(gadget.bbl_addrs) for gadget in self._gadgets), + 2 * len(self._gadgets), + ) return rop_utils.step_to_unconstrained_successor(self._p, state, max_steps=max_steps, allow_simprocedures=True) From afbe7ab35b4ab5a30d9faeaa890ee39e0e800e96 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 16:34:17 -0800 Subject: [PATCH 053/106] Prioritize minimizing payload size This makes the behavior closer to the previous algorithm which tries to find the chain with the smallest payload size. --- angrop/chain_builder/reg_setter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index beb8092..cce89ff 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -555,12 +555,12 @@ def _backwards_recursive_search( continue potential_next_gadgets.append((gadget, remaining_regs)) - # Sort gadgets by number of remaining registers, instruction count, and stack change + # Sort gadgets by number of remaining registers, stack change, and instruction count potential_next_gadgets.sort( key=lambda g: ( sum(self._reg_weights[reg] for reg in g[1]), - g[0].isn_count, g[0].stack_change, + g[0].isn_count, ) ) From ff77b330aab1a038e5f8c26a7b7ced91b94740c4 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 21:30:11 -0800 Subject: [PATCH 054/106] Update tests for conditional branches The gadget comparison code has to handle multiple gadgets at the same address due to conditional branches. --- tests/test_rop.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_rop.py b/tests/test_rop.py index 8191b01..305d0b7 100644 --- a/tests/test_rop.py +++ b/tests/test_rop.py @@ -72,7 +72,9 @@ def compare_gadgets(test_gadgets, known_gadgets): # check that each of the expected gadget addrs was found as a gadget # if it wasn't the best way to debug is to run: # angrop.gadget_analyzer.l.setLevel("DEBUG"); rop._gadget_analyzer.analyze_gadget(addr) - test_gadget_dict = {g.addr: g for g in test_gadgets} + test_gadget_dict = {} + for g in test_gadgets: + test_gadget_dict.setdefault(g.addr, []).append(g) found_addrs = set(g.addr for g in test_gadgets) for g in known_gadgets: @@ -83,7 +85,13 @@ def compare_gadgets(test_gadgets, known_gadgets): # check gadgets for g in known_gadgets: - assert_gadgets_equal(g, test_gadget_dict[g.addr]) + matching_gadgets = [ + test_gadget + for test_gadget in test_gadget_dict[g.addr] + if test_gadget.bbl_addrs == g.bbl_addrs + ] + assert len(matching_gadgets) == 1 + assert_gadgets_equal(g, matching_gadgets[0]) def execute_chain(project, chain): From 1a013b345c1cfc8eaa9bd4fb94abd930972a00ab Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 22:12:23 -0800 Subject: [PATCH 055/106] Make chain jump to address after it if possible This preserves the previous behavior where if the chain ends with a jump to some address in the middle of the chain, the concretization function would attempt to append a no-op gadget so that the chain jumps to the address immediately after it instead. --- angrop/rop_chain.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 2180e79..431080d 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -151,8 +151,17 @@ def _concretize_chain_values(self, constraints=None, timeout=None, preserve_next """ concretize chain values with a timeout """ - # if self.next_pc_idx() is not None: - # return (self + self._rop.chain_builder.shift(self._p.arch.bytes))._concretize_chain_values(constraints=constraints, timeout=timeout, preserve_next_pc=preserve_next_pc) + if self.next_pc_idx() is not None: + try: + return ( + self + self._rop.chain_builder.shift(self._p.arch.bytes) + )._concretize_chain_values( + constraints=constraints, + timeout=timeout, + preserve_next_pc=preserve_next_pc, + ) + except RopException: + pass if timeout is None: timeout = self._timeout values = rop_utils.timeout(timeout)(self.__concretize_chain_values)(constraints=constraints) From 3d4cb0ad97f7b9bd66ca7609d5e3b86affe3da19 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 22:15:16 -0800 Subject: [PATCH 056/106] Update tests to check chain.next_pc_idx() If chain.next_pc_idx() is not None, the address to jump to after the chain should be placed at that index instead of after the chain. In cases like this the chain concretization function will attempt to append a no-op gadget so that the chain jumps to the address immediately after it, but this is not always possible on architectures without a stack return instruction. --- tests/test_rop.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_rop.py b/tests/test_rop.py index 305d0b7..d0ecf56 100644 --- a/tests/test_rop.py +++ b/tests/test_rop.py @@ -96,7 +96,13 @@ def compare_gadgets(test_gadgets, known_gadgets): def execute_chain(project, chain): s = project.factory.blank_state() - s.memory.store(s.regs.sp, chain.payload_str() + b"AAAAAAAAA") + s.memory.store(s.regs.sp, chain.payload_str()) + goal_idx = chain.next_pc_idx() + s.memory.store( + s.regs.sp + + (chain.payload_len if goal_idx is None else goal_idx * project.arch.bytes), + b"A" * project.arch.bytes, + ) s.ip = s.stack_pop() p = project.factory.simulation_manager(s) goal_addr = 0x4141414141414141 % (1 << project.arch.bits) From 435da697e54ad62c3b4a6b71125605a9425a9968 Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 22:47:30 -0800 Subject: [PATCH 057/106] Use block.pp() instead of block.capstone.pp() --- angrop/rop_chain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 431080d..57b0f76 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -292,5 +292,5 @@ def __str__(self): def print_gadget_asm(self): for gadget in self._gadgets: for addr in gadget.bbl_addrs: - self._p.factory.block(addr).capstone.pp() + self._p.factory.block(addr).pp() print() From f28974ce3f347f3e59fe4422793e751d55024c6c Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Thu, 2 Jan 2025 23:08:10 -0800 Subject: [PATCH 058/106] Rewrite test to not rely on exact chain Tests should check if the chains do what we want, and they should not rely on the chain matching a fixed sequence of gadgets since there can be multiple chains that do the same thing. --- tests/test_rop.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tests/test_rop.py b/tests/test_rop.py index d0ecf56..a726dad 100644 --- a/tests/test_rop.py +++ b/tests/test_rop.py @@ -203,16 +203,21 @@ def test_roptest_x86_64(): r.find_gadgets_single_threaded(show_progress=False) c = r.execve(path=b"/bin/sh") - # verifying this is a giant pain, partially because the binary is so tiny, and there's no code beyond the syscall - assert len(c._gadgets) == 8 - - # verify the chain is valid - chain_addrs = [ g.addr for g in c._gadgets ] - assert chain_addrs[1] in [0x4000b2, 0x4000bd] - assert chain_addrs[5] in [0x4000b2, 0x4000bd] - chain_addrs[1] = 0x4000b2 - chain_addrs[5] = 0x4000b2 - assert chain_addrs == [ 0x4000b0, 0x4000b2, 0x4000b4, 0x4000b0, 0x4000bb, 0x4000b2, 0x4000bf, 0x4000c1 ] + state = p.factory.blank_state() + state.memory.store(state.regs.sp, c.payload_str()) + state.ip = state.stack_pop() + + # Step to the syscall. + while state.block(num_inst=1).disassembly.insns[0].mnemonic != 'syscall': + succ = state.step() + assert len(succ.flat_successors) == 1 + assert not succ.unconstrained_successors + state = succ.flat_successors[0] + + assert state.solver.is_true(state.memory.load(state.regs.rdi, 8) == b'/bin/sh\0') + assert state.regs.rsi.concrete_value == 0 + assert state.regs.rdx.concrete_value == 0 + assert state.regs.rax.concrete_value == 0x3b def test_roptest_mips(): proj = angr.Project(os.path.join(public_bin_location, "mipsel/darpa_ping"), auto_load_libs=False) From aa9b7b8db821e9b560e1c19b45d36ee41018e25e Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Fri, 3 Jan 2025 00:54:35 -0800 Subject: [PATCH 059/106] Add previous BFS algorithm for concrete values The BFS algorithm is able to use gadgets that set registers to concrete values which the new algorithm doesn't support yet. This tries the BFS algorithm first since it's fast while the new algorithm can take a long time if it can't find a chain. --- angrop/chain_builder/builder.py | 3 +++ angrop/chain_builder/reg_setter.py | 23 +++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 73001a0..7dcf882 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -176,6 +176,9 @@ def map_stack_var(ast, value): f"sreg_{val.reg_name}-" ): raise RopException("Register wasn't moved correctly") + elif not var.symbolic and not val.symbolic: + if var.concrete_value != val.concreted: + raise RopException("Register set to incorrect value") else: map_stack_var(var, val) state.solver.add(var == val) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index cce89ff..4edd966 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -91,14 +91,21 @@ def run(self, modifiable_memory_range=None, use_partial_controllers=False, pres for x in registers: registers[x] = rop_utils.cast_rop_value(registers[x], self.project) - for gadgets in self._backwards_recursive_search( - self._reg_setting_gadgets, - set(registers), - current_chain=[], - preserve_regs=preserve_regs, - modifiable_memory_range=modifiable_memory_range, - visited={}, - max_length=max_length, + for gadgets in itertools.chain( + self._find_all_candidate_chains( + self._find_relevant_gadgets(**registers), + preserve_regs.copy(), + **registers, + ), + self._backwards_recursive_search( + self._reg_setting_gadgets, + set(registers), + current_chain=[], + preserve_regs=preserve_regs, + modifiable_memory_range=modifiable_memory_range, + visited={}, + max_length=max_length, + ), ): chain_str = "\n-----\n".join( "\n".join( From 4db9728d78a27cea5256ac81b12d7ddcaeea1ffd Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Fri, 3 Jan 2025 10:12:44 -0800 Subject: [PATCH 060/106] Fix is_in_kernel check The previous code can throw exceptions if the address is not in any object and there is already a function in rop_utils for this. --- angrop/gadget_finder/gadget_analyzer.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index caa6070..9fd2f09 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -236,13 +236,7 @@ def _block_make_sense(self, addr): return True def is_in_kernel(self, state): - ip = state.ip - if not ip.symbolic: - obj = self.project.loader.find_object_containing(ip.concrete_value) - if obj.binary == 'cle##kernel': - return True - return False - return False + return rop_utils.is_in_kernel(self.project, state) def _can_reach_unconstrained(self, addr, max_steps=2): """ From f671363f15ead720af068ae4e17787c6d690b0fd Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Fri, 3 Jan 2025 21:22:39 -0800 Subject: [PATCH 061/106] Fix exception during gadget finding act.data.ast can be a floating-point expression instead of a bitvector, which would cause the == operator to return False instead of a claripy expression. --- angrop/gadget_finder/gadget_analyzer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 9fd2f09..ff4ff55 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -553,7 +553,11 @@ def _check_for_control_type(self, init_state, final_state): saved_ip_addr = None for act in final_state.history.actions: if act.type == 'mem' and act.action == 'read': - if act.size == self.project.arch.bits and not (act.data.ast == ip).symbolic: + if ( + act.size == self.project.arch.bits + and isinstance(act.data.ast, claripy.ast.BV) + and not (act.data.ast == ip).symbolic + ): if init_state.solver.eval(act.data.ast == ip): saved_ip_addr = act.addr.ast break From 01e054aafe63e186c271133ebdea3162b1adc43a Mon Sep 17 00:00:00 2001 From: Alexander Zhang Date: Fri, 3 Jan 2025 22:54:33 -0800 Subject: [PATCH 062/106] Remove unused function I rewrote the existing Builder._build_reg_setting_chain() function instead. --- angrop/chain_builder/reg_setter.py | 45 ------------------------------ 1 file changed, 45 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 4edd966..43d04e6 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -637,48 +637,3 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str remaining_regs |= gadget.constraint_regs return remaining_regs - - def _build_concrete_chain( - self, - gadgets: list[RopGadget], - registers: dict[str, int], - next_pc: int, - modifiable_memory_range: tuple[int, int] | None, - ) -> list[int]: - """ - Build a concrete ROP chain from a list of gadgets. - - Return a list of stack values, not including the address of the first gadget. - """ - stack_len = sum(g.stack_change for g in gadgets) // self.project.arch.bytes - init_state = rop_utils.make_symbolic_state(self.project, self.arch.reg_set, stack_gsize=stack_len) - init_state.ip = init_state.solver.BVS("init_ip", self.project.arch.bits) - state = init_state - for gadget in gadgets: - state.solver.add(state.ip == gadget.addr) - for addr in gadget.bbl_addrs[1:]: - succ = state.step() - succ_states = [state for state in succ.successors if state.solver.is_true(state.ip == addr)] - assert len(succ_states) == 1 - state = succ_states[0] - succ = state.step() - assert len(succ.unconstrained_successors) == 1 - state = succ.unconstrained_successors[0] - state.solver.add(state.ip == next_pc) - for reg, val in registers.items(): - state.solver.add(state.registers.load(reg) == val) - for action in state.history.actions: - if action.type == 'mem' and action.addr.ast.symbolic: - if modifiable_memory_range is None: - raise RopException("Symbolic memory address without modifiable memory range") - state.solver.add(action.addr.ast >= modifiable_memory_range[0]) - state.solver.add(action.addr.ast < modifiable_memory_range[1]) - return [ - state.solver.eval( - init_state.stack_read( - i * self.project.arch.bytes, - self.project.arch.bytes, - ) - ) - for i in range(stack_len) - ] From 027a929ee97227355c2d71a416428452c83c14d7 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 15 Jan 2025 15:12:53 -0700 Subject: [PATCH 063/106] fix symbolic data handling --- angrop/chain_builder/reg_setter.py | 2 +- angrop/rop_chain.py | 13 ++++++++++++- tests/test_chainbuilder.py | 20 ++++++++++++++++++++ tests/test_rop.py | 1 + 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 43d04e6..ecfa4c3 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -68,7 +68,7 @@ def verify(self, chain, preserve_regs, registers): return False for reg, val in registers.items(): bv = getattr(state.regs, reg) - if bv.symbolic or state.solver.eval(bv != val.data): + if (val.symbolic != bv.symbolic) or state.solver.eval(bv != val.data): l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 2.", chain_str) return False # the next pc must come from the stack diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 57b0f76..534a0b3 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -264,9 +264,20 @@ def exec(self, max_steps=None, timeout=None): state.solver.reload_solver([]) # remove constraints state.regs.pc = self._values[0].concreted concrete_vals = self._concretize_chain_values(timeout=timeout, preserve_next_pc=True) + + # when the chain data includes symbolic values, we need to replace the concrete values + # with the user's symbolic data + values = concrete_vals + for idx, val in enumerate(self._values): + if not val.symbolic: + continue + if all(var.startswith("symbolic_stack") for var in val.ast.variables): + continue + values[idx] = (val.data, val.rebase) + # the assumption is that the first value in the chain is a code address # it sounds like a reasonable assumption to me. But I can be wrong. - for value, _ in reversed(concrete_vals[1:]): + for value, _ in reversed(values[1:]): state.stack_push(value) if max_steps is None: max_steps = max( diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index b2f7fb7..7f668b1 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -2,11 +2,31 @@ import angr import angrop # pylint: disable=unused-import +import claripy from angrop.rop_value import RopValue BIN_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "binaries") CACHE_DIR = os.path.join(BIN_DIR, 'tests_data', 'angrop_gadgets_cache') +def test_symbolic_data(): + cache_path = os.path.join(CACHE_DIR, "amd64_glibc_2.19") + proj = angr.Project(os.path.join(BIN_DIR, "tests", "x86_64", "libc.so.6"), auto_load_libs=False) + rop = proj.analyses.ROP() + + if os.path.exists(cache_path): + rop.load_gadgets(cache_path) + else: + rop.find_gadgets() + rop.save_gadgets(cache_path) + + var1 = claripy.BVS("var1", proj.arch.bits) + var2 = claripy.BVS("var2", proj.arch.bits) + chain = rop.set_regs(rax=var1, rbx=var2) + + state = chain.exec() + assert state.solver.satisfiable(extra_constraints=[state.regs.rax != var1]) is False + assert state.solver.satisfiable(extra_constraints=[state.regs.rbx != var2]) is False + def test_x86_64_func_call(): cache_path = os.path.join(CACHE_DIR, "1after909") proj = angr.Project(os.path.join(BIN_DIR, "tests", "x86_64", "1after909"), auto_load_libs=False) diff --git a/tests/test_rop.py b/tests/test_rop.py index a726dad..3309984 100644 --- a/tests/test_rop.py +++ b/tests/test_rop.py @@ -236,6 +236,7 @@ def run_all(): all_functions = dict([x for x in functions.items() if x[0].startswith('test_')]) for f in sorted(all_functions.keys()): if hasattr(all_functions[f], '__call__'): + print(f) all_functions[f]() From 851b3e7d5b0a6c4fb74616b475c839f4eeb775f5 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 15 Jan 2025 17:21:14 -0700 Subject: [PATCH 064/106] do static analysis first before symbolic execution for performance purpose --- angrop/gadget_finder/gadget_analyzer.py | 62 ++++++++++++++++--------- tests/test_chainbuilder.py | 1 + tests/test_find_gadgets.py | 17 +++---- 3 files changed, 46 insertions(+), 34 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index ff4ff55..92ac062 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -1,3 +1,4 @@ +import ctypes import logging from collections import defaultdict @@ -67,17 +68,15 @@ def analyze_gadget(self, addr, allow_conditional_branches=False): else None ) - @rop_utils.timeout(3) - def _analyze_gadget(self, addr, allow_conditional_branches): - l.info("Analyzing 0x%x", addr) - - # Step 1: first check if the block makes sense - if not self._block_make_sense(addr): - return [] + def _step_to_gadget_stopping_states(self, init_state): + """ + Currently, the following scenarios are considered as stopping states: + 1. unconstrained (e.g. ret) + 2. invokes syscall (e.g. syscall) + for gadgets invoking syscalls, we will try to step over it to find gadgets such as "syscall; ret" + """ try: - init_state = self._state.copy() - init_state.ip = addr simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) def filter(state): @@ -88,7 +87,7 @@ def filter(state): return "syscall" return None - simgr.run(n=4, num_inst=30, filter_func=filter) + simgr.run(n=2, filter_func=filter) except (claripy.errors.ClaripySolverInterruptError, claripy.errors.ClaripyZ3Error, ValueError): return [] @@ -104,9 +103,25 @@ def filter(state): if "syscall" in simgr.stashes: final_states.extend(self._try_stepping_past_syscall(state) for state in simgr.syscall) - if not allow_conditional_branches and ( - simgr.active or simgr.deadended or len(final_states) != 1 - ): + bad_states = simgr.active + simgr.deadended + + return final_states, bad_states + + @rop_utils.timeout(3) + def _analyze_gadget(self, addr, allow_conditional_branches): + l.info("Analyzing 0x%x", addr) + + # Step 1: first statically check if the block can reach stopping states + # static analysis is much faster + if not self._can_reach_stopping_states(addr, allow_conditional_branches): + return [] + + # Step 2: get all potential successor states + init_state = self._state.copy() + init_state.ip = addr + final_states, bad_states = self._step_to_gadget_stopping_states(init_state) + + if not allow_conditional_branches and (bad_states or len(final_states) != 1): return [] gadgets = [] @@ -150,6 +165,8 @@ def filter(state): continue except (angr.errors.AngrError, angr.errors.AngrRuntimeError, angr.errors.SimError): continue + except ctypes.ArgumentError as e: + continue return gadgets @@ -238,31 +255,30 @@ def _block_make_sense(self, addr): def is_in_kernel(self, state): return rop_utils.is_in_kernel(self.project, state) - def _can_reach_unconstrained(self, addr, max_steps=2): + def _can_reach_stopping_states(self, addr, allow_conditional_branches, max_steps=2): """ Use static analysis to check whether the address can lead to unconstrained targets It is much faster than directly doing symbolic execution on the addr """ + if not self._block_make_sense(addr): + return False + b = self.project.factory.block(addr) constant_jump_targets = list(b.vex.constant_jump_targets) if not constant_jump_targets: return True - # we drop block that have more than 1 jump targets - # technically, this check make us miss some gadgets that have a branch that is never satisfiable - # but it is what we need to pay for performance - if len(constant_jump_targets) > 1: + if not allow_conditional_branches and len(constant_jump_targets) > 1: return False if max_steps == 0: return False - target_block_addr = constant_jump_targets[0] - if not self._block_make_sense(target_block_addr): - return False - - return self._can_reach_unconstrained(target_block_addr, max_steps-1) + for target_block_addr in constant_jump_targets: + if self._can_reach_stopping_states(target_block_addr, max_steps-1): + return True + return False def _reach_unconstrained_or_syscall(self, addr): init_state = self._state.copy() diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index 7f668b1..a38d54b 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -482,6 +482,7 @@ def run_all(): all_functions = {x:y for x, y in functions.items() if x.startswith('test_')} for f in sorted(all_functions.keys()): if hasattr(all_functions[f], '__call__'): + print(f) all_functions[f]() if __name__ == "__main__": diff --git a/tests/test_find_gadgets.py b/tests/test_find_gadgets.py index 325f1dc..28309cf 100644 --- a/tests/test_find_gadgets.py +++ b/tests/test_find_gadgets.py @@ -155,6 +155,10 @@ def test_shift_gadget(): assert all(gadget_exists(rop, x) for x in [0x454e75, 0x5622d5, 0x490058]) def test_i386_syscall(): + """ + in 32-bit world, syscall instruction is only valid for AMD CPUs, we consider it invalid in angrop for + better portability, see https://github.com/angr/angrop/issues/104 + """ # pylint: disable=pointless-string-statement proj = angr.Project(os.path.join(tests_dir, "i386", "angrop_syscall_test"), auto_load_libs=False) @@ -162,22 +166,13 @@ def test_i386_syscall(): """ 804918c int 0x80 """ - """ - 8049195 mov esp, 0x804c038 - 804919a ret - """ - assert all(gadget_exists(rop, x) for x in [0x804918c, 0x8049195]) + assert all(gadget_exists(rop, x) for x in [0x804918c]) """ 8049189 syscall """ - - """ - 804918f mov esp, 0x804c020 - 8049194 ret - """ - assert all(not gadget_exists(rop, x) for x in [0x8049189, 0x804918f]) + assert all(not gadget_exists(rop, x) for x in [0x8049189]) def test_gadget_timeout(): # pylint: disable=pointless-string-statement From 1901cdf5e5580c21e54b2890c8e08877e9f760da Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 15 Jan 2025 17:30:02 -0700 Subject: [PATCH 065/106] oops --- angrop/gadget_finder/gadget_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 92ac062..b5699e0 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -276,7 +276,7 @@ def _can_reach_stopping_states(self, addr, allow_conditional_branches, max_steps return False for target_block_addr in constant_jump_targets: - if self._can_reach_stopping_states(target_block_addr, max_steps-1): + if self._can_reach_stopping_states(target_block_addr, allow_conditional_branches, max_steps-1): return True return False From 0150c4b869821dd8b042b756a2315124a5cb481a Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 15 Jan 2025 17:43:54 -0700 Subject: [PATCH 066/106] better handling syscall register after symbolic execution --- angrop/gadget_finder/gadget_analyzer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index b5699e0..19052de 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -88,6 +88,7 @@ def filter(state): return None simgr.run(n=2, filter_func=filter) + simgr.move(from_stash='active', to_stash='syscall', filter_func=lambda s: rop_utils.is_in_kernel(self.project, s)) except (claripy.errors.ClaripySolverInterruptError, claripy.errors.ClaripyZ3Error, ValueError): return [] From aba70061ed039f9747843e0eec067982da72af83 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 15 Jan 2025 18:01:05 -0700 Subject: [PATCH 067/106] fix issues with gadget finder --- angrop/gadget_finder/__init__.py | 6 +++--- angrop/gadget_finder/gadget_analyzer.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index b9755d1..3b9031b 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -132,9 +132,9 @@ def analyze_gadget_list(self, addr_list, processes=4, show_progress=True): with Pool(processes=processes, initializer=_set_global_gadget_analyzer, initargs=initargs) as pool: it = pool.imap_unordered(run_worker, iterable, chunksize=1) - for gadget in it: - if gadget is not None: - gadgets.append(gadget) + for gs in it: + if gs: + gadgets += gs return sorted(gadgets, key=lambda x: x.addr) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 19052de..975d8a0 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -91,14 +91,14 @@ def filter(state): simgr.move(from_stash='active', to_stash='syscall', filter_func=lambda s: rop_utils.is_in_kernel(self.project, s)) except (claripy.errors.ClaripySolverInterruptError, claripy.errors.ClaripyZ3Error, ValueError): - return [] + return [], [] except (claripy.ClaripyFrontendError, angr.engines.vex.claripy.ccall.CCallMultivaluedException) as e: l.warning("... claripy error: %s", e) - return [] + return [], [] except (angr.errors.AngrError, angr.errors.AngrRuntimeError, angr.errors.SimError): - return [] + return [], [] except RopTimeoutException: - return [] + return [], [] final_states = list(simgr.unconstrained) if "syscall" in simgr.stashes: @@ -250,6 +250,8 @@ def _block_make_sense(self, addr): return False except AttributeError: return False + except KeyError: + return False return True From 5d374ef3c2aeb749b58ae0aa889db86ce22d2489 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 15 Jan 2025 18:07:44 -0700 Subject: [PATCH 068/106] fix analyze_gadget_list test case --- tests/test_find_gadgets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_find_gadgets.py b/tests/test_find_gadgets.py index 28309cf..1f5c48f 100644 --- a/tests/test_find_gadgets.py +++ b/tests/test_find_gadgets.py @@ -193,7 +193,7 @@ def local_multiprocess_analyze_gadget_list(): 0x4005d8 bad instruction """ gadgets = rop.analyze_gadget_list([0x4006d8, 0x4005d8, 0x400864]) - assert len(gadgets[1]) == 2 + assert len(gadgets) == 2 assert gadgets[0].addr == 0x4006d8 assert gadgets[1].addr == 0x400864 From a1fdccc5ddb25572db675cb4bb9c1942c2cf3d3e Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 16 Jan 2025 12:32:09 -0700 Subject: [PATCH 069/106] fix function calling in i386 --- angrop/chain_builder/__init__.py | 4 ++-- angrop/chain_builder/func_caller.py | 2 +- angrop/chain_builder/shifter.py | 29 ++++++++++++++++++++++--- angrop/rop_chain.py | 33 ++++++++++++++++------------- tests/test_chainbuilder.py | 25 ++++++++++++++++++++++ 5 files changed, 72 insertions(+), 21 deletions(-) diff --git a/angrop/chain_builder/__init__.py b/angrop/chain_builder/__init__.py index 6b88434..fe5a9fa 100644 --- a/angrop/chain_builder/__init__.py +++ b/angrop/chain_builder/__init__.py @@ -135,13 +135,13 @@ def execve(self, path=None, path_addr=None): return None return self._sys_caller.execve(path=path, path_addr=path_addr) - def shift(self, length, preserve_regs=None): + def shift(self, length, preserve_regs=None, next_pc_idx=-1): """ build a rop chain to shift the stack to a specific value :param length: the length of sp you want to shift :param preserve_regs: set of registers to preserve, e.g. ('eax', 'ebx') """ - return self._shifter.shift(length, preserve_regs=preserve_regs) + return self._shifter.shift(length, preserve_regs=preserve_regs, next_pc_idx=next_pc_idx) def retsled(self, size, preserve_regs=None): """ diff --git a/angrop/chain_builder/func_caller.py b/angrop/chain_builder/func_caller.py index 5ea7951..a69f988 100644 --- a/angrop/chain_builder/func_caller.py +++ b/angrop/chain_builder/func_caller.py @@ -66,7 +66,7 @@ def _func_call(self, func_gadget, cc, args, extra_regs=None, preserve_regs=None, # 1. handle stack arguments # 2. handle function return address to maintain the control flow if stack_arguments: - cleaner = self.chain_builder.shift((len(stack_arguments)+1)*arch_bytes) # +1 for itself + cleaner = self.chain_builder.shift((len(stack_arguments)+1)*arch_bytes, next_pc_idx=-1) chain.add_gadget(cleaner._gadgets[0]) for arg in stack_arguments: chain.add_value(arg) diff --git a/angrop/chain_builder/shifter.py b/angrop/chain_builder/shifter.py index 5828d5d..b49894a 100644 --- a/angrop/chain_builder/shifter.py +++ b/angrop/chain_builder/shifter.py @@ -1,6 +1,7 @@ import logging from collections import defaultdict +from .. import rop_utils from .builder import Builder from ..rop_chain import RopChain from ..errors import RopException @@ -64,7 +65,12 @@ def same_effect(g1, g2): return False return True - def shift(self, length, preserve_regs=None): + def shift(self, length, preserve_regs=None, next_pc_idx=-1): + """ + length: how many bytes to shift + preserve_regs: what registers not to clobber + next_pc_idx: where is the next pc, e.g for ret, it is -1 + """ preserve_regs = set(preserve_regs) if preserve_regs else set() arch_bytes = self.project.arch.bytes @@ -75,14 +81,31 @@ def shift(self, length, preserve_regs=None): raise RopException("Encounter a shifting request that requires chaining multiple shifting gadgets " + "together which is not support atm. Plz create an issue on GitHub " + "so we can add the support!") + g_cnt = length // arch_bytes + next_pc_idx = (next_pc_idx % g_cnt + g_cnt) % g_cnt # support negative indexing for g in self.shift_gadgets[length]: if preserve_regs.intersection(g.changed_regs): continue + if next_pc_idx == g_cnt-1: + if g.transit_type != 'ret': + continue + else: + if g.transit_type != 'pop_pc': + continue + if g.pc_offset != next_pc_idx*arch_bytes: + continue try: chain = RopChain(self.project, self.chain_builder) chain.add_gadget(g) - for _ in range(g.stack_change//arch_bytes-1): - chain.add_value(self._get_fill_val()) + for idx in range(g_cnt): + if idx != next_pc_idx: + chain.add_value(self._get_fill_val()) + else: + next_pc_val = rop_utils.cast_rop_value( + chain._blank_state.solver.BVS("next_pc", self.project.arch.bits), + self.project, + ) + chain.add_value(next_pc_val) if self.verify_shift(chain, length, preserve_regs): return chain except RopException: diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 534a0b3..2c18ab3 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -11,12 +11,12 @@ class RopChain: """ cls_timeout = CHAIN_TIMEOUT_DEFAULT - def __init__(self, project, rop, state=None, badbytes=None): + def __init__(self, project, builder, state=None, badbytes=None): """ """ self._p = project self._pie = self._p.loader.main_object.pic - self._rop = rop + self._builder = builder self._gadgets = [] self._values = [] @@ -147,19 +147,22 @@ def __concretize_chain_values(self, constraints=None): return concrete_vals - def _concretize_chain_values(self, constraints=None, timeout=None, preserve_next_pc=False): + def _concretize_chain_values(self, constraints=None, timeout=None, preserve_next_pc=False, append_shift=False): """ concretize chain values with a timeout """ - if self.next_pc_idx() is not None: + if self.next_pc_idx() is not None and append_shift: try: - return ( - self + self._rop.chain_builder.shift(self._p.arch.bytes) - )._concretize_chain_values( - constraints=constraints, - timeout=timeout, - preserve_next_pc=preserve_next_pc, - ) + # the following line is the final touch for chains ending with retn-style + # gadget to make sure that the next_pc is at the end of the chain + chain = self + self._builder.chain_builder.shift(self._p.arch.bytes) + values = chain._concretize_chain_values( + constraints=constraints, + timeout=timeout, + preserve_next_pc=preserve_next_pc, + append_shift=False, + ) + return values except RopException: pass if timeout is None: @@ -181,7 +184,7 @@ def payload_str(self, constraints=None, base_addr=None, timeout=None): if base_addr is None: base_addr = self._p.loader.main_object.mapped_base test_state = self._blank_state.copy() - concrete_vals = self._concretize_chain_values(constraints, timeout=timeout) + concrete_vals = self._concretize_chain_values(constraints, timeout=timeout, append_shift=True) for value, rebased in reversed(concrete_vals): if rebased: test_state.stack_push(value - self._p.loader.main_object.mapped_base + base_addr) @@ -230,7 +233,7 @@ def payload_code(self, constraints=None, print_instructions=True, timeout=None): payload = "" payload += 'chain = b""\n' - concrete_vals = self._concretize_chain_values(constraints, timeout=timeout) + concrete_vals = self._concretize_chain_values(constraints, timeout=timeout, append_shift=True) for value, rebased in concrete_vals: instruction_code = "" @@ -263,7 +266,7 @@ def exec(self, max_steps=None, timeout=None): state = self._blank_state.copy() state.solver.reload_solver([]) # remove constraints state.regs.pc = self._values[0].concreted - concrete_vals = self._concretize_chain_values(timeout=timeout, preserve_next_pc=True) + concrete_vals = self._concretize_chain_values(timeout=timeout, preserve_next_pc=True, append_shift=False) # when the chain data includes symbolic values, we need to replace the concrete values # with the user's symbolic data @@ -288,7 +291,7 @@ def exec(self, max_steps=None, timeout=None): allow_simprocedures=True) def copy(self): - cp = RopChain(self._p, self._rop) + cp = RopChain(self._p, self._builder) cp._gadgets = list(self._gadgets) cp._values = list(self._values) cp.payload_len = self.payload_len diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index a38d54b..0605920 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -4,6 +4,7 @@ import angrop # pylint: disable=unused-import import claripy from angrop.rop_value import RopValue +from angrop.errors import RopException BIN_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "binaries") CACHE_DIR = os.path.join(BIN_DIR, 'tests_data', 'angrop_gadgets_cache') @@ -394,6 +395,30 @@ def test_pop_pc_syscall_chain(): assert state.regs.rdi.concrete_value == 0x41414141 assert 0 not in state.posix.fd +def test_retn_i386_call_chain(): + cache_path = os.path.join(CACHE_DIR, "bronze_ropchain") + proj = angr.Project(os.path.join(BIN_DIR, "tests", "i386", "bronze_ropchain"), auto_load_libs=False) + rop = proj.analyses.ROP() + + if os.path.exists(cache_path): + rop.load_gadgets(cache_path) + else: + rop.find_gadgets() + rop.save_gadgets(cache_path) + + # force to use 'retn 0xc' to clean up function arguments + g = rop.analyze_gadget(0x809d9fb) + rop._chain_builder._shifter.shift_gadgets = {g.stack_change: [g]} + + rop.func_call('write', [1, 0x80AC5E8, 17], needs_return=False) + + chain = None + try: + chain = rop.func_call('write', [1, 0x80AC5E8, 17]) + except RopException: + pass + assert chain is None + def test_aarch64_basic_reg_setting(): proj = angr.load_shellcode( """ From 6b0b5355649c83bd1e14753df8bd5de0a5034318 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 16 Jan 2025 15:02:32 -0700 Subject: [PATCH 070/106] fix function calls for lr-based calling convention --- angrop/chain_builder/func_caller.py | 7 +++++-- angrop/chain_builder/reg_setter.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/angrop/chain_builder/func_caller.py b/angrop/chain_builder/func_caller.py index a69f988..45a2539 100644 --- a/angrop/chain_builder/func_caller.py +++ b/angrop/chain_builder/func_caller.py @@ -43,12 +43,15 @@ def _func_call(self, func_gadget, cc, args, extra_regs=None, preserve_regs=None, stack_arguments = args[len(cc.ARG_REGS):] # set register arguments + if needs_return and isinstance(cc.RETURN_ADDR, SimRegArg) and cc.RETURN_ADDR.reg_name != 'ip_at_syscall': + reg_name = cc.RETURN_ADDR.reg_name + preserve_regs.add(reg_name) registers = {} if extra_regs is None else extra_regs for arg, reg in zip(register_arguments, cc.ARG_REGS): registers[reg] = arg for reg in preserve_regs: registers.pop(reg, None) - chain = self.chain_builder.set_regs(**registers) + chain = self.chain_builder.set_regs(**registers, preserve_regs=preserve_regs) # invoke the function chain.add_gadget(func_gadget) @@ -66,7 +69,7 @@ def _func_call(self, func_gadget, cc, args, extra_regs=None, preserve_regs=None, # 1. handle stack arguments # 2. handle function return address to maintain the control flow if stack_arguments: - cleaner = self.chain_builder.shift((len(stack_arguments)+1)*arch_bytes, next_pc_idx=-1) + cleaner = self.chain_builder.shift((len(stack_arguments)+1)*arch_bytes, next_pc_idx=-1, preserve_regs=preserve_regs) chain.add_gadget(cleaner._gadgets[0]) for arg in stack_arguments: chain.add_value(arg) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index ecfa4c3..5de0987 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -163,8 +163,8 @@ def _recursively_find_chains(self, gadgets, chain, preserve_regs, todo_regs, har continue if g.changed_regs.intersection(hard_preserve_regs): continue - destory_regs = g.changed_regs.intersection(preserve_regs) - if destory_regs - set_regs: + clobbered_regs = g.changed_regs.intersection(preserve_regs) + if clobbered_regs - set_regs: continue new_preserve = preserve_regs.copy() new_preserve.update(set_regs) From 9bd01db0e3a010ff03e0813d1222d3e2aee0a1dd Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 16 Jan 2025 20:18:50 -0700 Subject: [PATCH 071/106] make gadget filtering 10x faster --- angrop/chain_builder/builder.py | 46 +++++++++++++++++ angrop/chain_builder/mem_changer.py | 1 - angrop/chain_builder/mem_writer.py | 1 - angrop/chain_builder/pivot.py | 36 +++---------- angrop/chain_builder/reg_mover.py | 43 ++++++++-------- angrop/chain_builder/reg_setter.py | 80 +++++++++++++++++++++-------- angrop/chain_builder/shifter.py | 40 +++++---------- angrop/chain_builder/sys_caller.py | 1 - angrop/rop_gadget.py | 46 ----------------- 9 files changed, 147 insertions(+), 147 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 7dcf882..89a0ed5 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -1,6 +1,7 @@ import struct from abc import abstractmethod from functools import cmp_to_key +from collections import defaultdict import claripy @@ -245,6 +246,51 @@ def _get_fill_val(self): else: return claripy.BVS("filler", self.project.arch.bits) + @abstractmethod + def _same_effect(self, g1, g2): + raise NotImplementedError("_same_effect is not implemented!") + + @abstractmethod + def _better_than(self, g1, g2): + raise NotImplementedError("_better_than is not implemented!") + + def same_effect(self, g1, g2): + return self._same_effect(g1, g2) + + def better_than(self, g1, g2): + if not self.same_effect(g1, g2): + return False + return self._better_than(g1, g2) + + def __filter_gadgets(self, gadgets): + """ + remove any gadgets that are strictly worse than others + FIXME: make all gadget filtering logic like what we do in reg_setter, which is correct and way more faster + """ + gadgets = set(gadgets) + bests = set() + while gadgets: + g1 = gadgets.pop() + # check if nothing is better than g1 + for g2 in gadgets: + if self._better_than(g2, g1): + break + else: + bests.add(g1) + return bests + + def _filter_gadgets(self, gadgets): + bests = set() + gadgets = set(gadgets) + while gadgets: + g0 = gadgets.pop() + equal_class = {g for g in gadgets if self._same_effect(g0, g)} + equal_class.add(g0) + bests = bests.union(self.__filter_gadgets(equal_class)) + + gadgets -= equal_class + return bests + @abstractmethod def update(self): raise NotImplementedError("each Builder class should have an `update` method!") diff --git a/angrop/chain_builder/mem_changer.py b/angrop/chain_builder/mem_changer.py index 8894e20..8e4e03b 100644 --- a/angrop/chain_builder/mem_changer.py +++ b/angrop/chain_builder/mem_changer.py @@ -18,7 +18,6 @@ def __init__(self, chain_builder): super().__init__(chain_builder) self._mem_change_gadgets = None self._mem_add_gadgets = None - self.update() def update(self): self._mem_change_gadgets = self._get_all_mem_change_gadgets(self.chain_builder.gadgets) diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index 649eded..6ef06f8 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -19,7 +19,6 @@ class MemWriter(Builder): def __init__(self, chain_builder): super().__init__(chain_builder) self._mem_write_gadgets = None - self.update() def update(self): self._mem_write_gadgets = self._get_all_mem_write_gadgets(self.chain_builder.gadgets) diff --git a/angrop/chain_builder/pivot.py b/angrop/chain_builder/pivot.py index 1cb5311..5ac2edb 100644 --- a/angrop/chain_builder/pivot.py +++ b/angrop/chain_builder/pivot.py @@ -31,14 +31,9 @@ class Pivot(Builder): def __init__(self, chain_builder): super().__init__(chain_builder) self._pivot_gadgets = None - self.update() def update(self): - self._pivot_gadgets = [ - gadget - for gadget in self.chain_builder.pivot_gadgets - if not gadget.has_conditional_branch - ] + self._pivot_gadgets = self.filter_gadgets(self.chain_builder.pivot_gadgets) def pivot(self, thing): if thing.is_register: @@ -115,8 +110,7 @@ def pivot_reg(self, reg_val): raise RopException(f"Fail to pivot the stack to {reg}!") - @staticmethod - def same_effect(g1, g2): + def _same_effect(self, g1, g2): if g1.sp_controllers != g2.sp_controllers: return False if g1.stack_change != g2.stack_change: @@ -125,9 +119,7 @@ def same_effect(g1, g2): return False return True - def better_than(self, g1, g2): - if not self.same_effect(g1, g2): - return False + def _better_than(self, g1, g2): if g1.num_mem_access > g2.num_mem_access: return False if not g1.changed_regs.issubset(g2.changed_regs): @@ -136,21 +128,7 @@ def better_than(self, g1, g2): return False return True - def _filter_gadgets(self, gadgets): - """ - filter gadgets having the same effect - """ - gadgets = set(gadgets) - skip = set({}) - while True: - to_remove = set({}) - for g in gadgets-skip: - to_remove.update({x for x in gadgets-{g} if self.better_than(g, x)}) - if to_remove: - break - skip.add(g) - if not to_remove: - break - gadgets -= to_remove - gadgets = sorted(gadgets, key=functools.cmp_to_key(cmp)) - return gadgets + def filter_gadgets(self, gadgets): + gadgets = [x for x in gadgets if not x.has_conditional_branch] + gadgets = self._filter_gadgets(gadgets) + return sorted(gadgets, key=functools.cmp_to_key(cmp)) diff --git a/angrop/chain_builder/reg_mover.py b/angrop/chain_builder/reg_mover.py index 9c92369..4903003 100644 --- a/angrop/chain_builder/reg_mover.py +++ b/angrop/chain_builder/reg_mover.py @@ -17,10 +17,9 @@ class RegMover(Builder): def __init__(self, chain_builder): super().__init__(chain_builder) self._reg_moving_gadgets = None - self.update() def update(self): - self._reg_moving_gadgets = self._filter_gadgets(self.chain_builder.gadgets) + self._reg_moving_gadgets = self.filter_gadgets(self.chain_builder.gadgets) def verify(self, chain, preserve_regs, registers): """ @@ -117,28 +116,13 @@ def run(self, preserve_regs=None, **registers): raise RopException("Couldn't move registers :(") - @staticmethod - def _filter_gadgets(gadgets): + def filter_gadgets(self, gadgets): """ filter gadgets having the same effect """ - gadgets = {g for g in gadgets if not g.has_conditional_branch} - # first: filter out gadgets that don't do register move - gadgets = set(x for x in gadgets if x.reg_moves) - # # second: remove gadgets that are strictly worse than some others - # skip = set({}) - # while True: - # to_remove = set({}) - # for g in gadgets-skip: - # to_remove.update({x for x in gadgets-{g} if g.reg_move_better_than(x)}) - # if to_remove: - # break - # skip.add(g) - # if not to_remove: - # break - # gadgets -= to_remove - # third: remove gadgets that only move from itself to itself, it is not helpful - # for exploitation + # first: filter out gadgets that don't do register move or have conditional branches + gadgets = {g for g in gadgets if not g.has_conditional_branch and g.reg_moves} + gadgets = self._filter_gadgets(gadgets) new_gadgets = set(x for x in gadgets if any(y.from_reg != y.to_reg for y in x.reg_moves)) return new_gadgets @@ -153,3 +137,20 @@ def _find_relevant_gadgets(self, moves): if moves.intersection(set(g.reg_moves)): gadgets.add(g) return gadgets + + def _same_effect(self, g1, g2): + """ + having the same register moving effect compared to the other gadget + """ + if set(g1.reg_moves) != set(g2.reg_moves): + return False + if g1.reg_dependencies != g2.reg_dependencies: + return False + return True + + def _better_than(self, g1, g2): + if g1.stack_change <= g2.stack_change and \ + g1.num_mem_access <= g2.num_mem_access and \ + g1.isn_count <= g2.isn_count: + return True + return False diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 5de0987..42bc274 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -25,10 +25,9 @@ def __init__(self, chain_builder): self.hard_chain_cache = None # Estimate of how difficult it is to set each register. self._reg_weights = None - self.update() def update(self): - self._reg_setting_gadgets = self.chain_builder.gadgets + self._reg_setting_gadgets = self.filter_gadgets(self.chain_builder.gadgets) self.hard_chain_cache = {} reg_pops = Counter() for gadget in self._reg_setting_gadgets: @@ -247,25 +246,6 @@ def _find_all_candidate_chains(self, gadgets, preserve_regs, **registers): set(registers.keys()), preserve_regs) return self._sort_chains(chains) - @staticmethod - def _filter_gadgets(gadgets): - """ - filter gadgets having the same effect - """ - gadgets = set(gadgets) - skip = set({}) - while True: - to_remove = set({}) - for g in gadgets-skip: - to_remove.update({x for x in gadgets-{g} if g.reg_set_better_than(x)}) - if to_remove: - break - skip.add(g) - if not to_remove: - break - gadgets -= to_remove - return gadgets - @staticmethod def _tuple_to_gadgets(data, reg_tuple): """ @@ -637,3 +617,61 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str remaining_regs |= gadget.constraint_regs return remaining_regs + + def __filter_gadgets(self, gadgets): + d = defaultdict(list) + for g in gadgets: + key = (len(g.changed_regs), g.stack_change, g.num_mem_access, g.isn_count) + d[key].append(g) + if len(d) == 0: + return set() + if len(d) == 1: + return {gadgets.pop()} + + keys = set(d.keys()) + bests = set() + while keys: + k1 = keys.pop() + # check if nothing is better than k1 + for k2 in keys: + # if k2 is better than k1 + if all(k2[i] <= k1[i] for i in range(4)): + break + else: + bests.add(k1) + + gadgets = set() + for key, val in d.items(): + if key not in bests: + continue + gadgets = gadgets.union(val) + return gadgets + + def _same_effect(self, g1, g2): + if g1.popped_regs != g2.popped_regs: + return False + if g1.concrete_regs != g2.concrete_regs: + return False + if g1.reg_dependencies != g2.reg_dependencies: + return False + if g1.transit_type != g2.transit_type: + return False + return True + + def _filter_gadgets(self, gadgets): + bests = set() + gadgets = set(gadgets) + while gadgets: + g0 = gadgets.pop() + equal_class = {g for g in gadgets if self._same_effect(g0, g)} + equal_class.add(g0) + bests = bests.union(self.__filter_gadgets(equal_class)) + + gadgets -= equal_class + return bests + + def filter_gadgets(self, gadgets): + """ + filter gadgets having the same effect + """ + return self._filter_gadgets(gadgets) \ No newline at end of file diff --git a/angrop/chain_builder/shifter.py b/angrop/chain_builder/shifter.py index b49894a..f14821b 100644 --- a/angrop/chain_builder/shifter.py +++ b/angrop/chain_builder/shifter.py @@ -16,10 +16,9 @@ def __init__(self, chain_builder): super().__init__(chain_builder) self.shift_gadgets = None - self.update() def update(self): - self.shift_gadgets = self._filter_gadgets(self.chain_builder.gadgets) + self.shift_gadgets = self.filter_gadgets(self.chain_builder.gadgets) def verify_shift(self, chain, length, preserve_regs): arch_bytes = self.project.arch.bytes @@ -57,14 +56,6 @@ def verify_retsled(self, chain, size, preserve_regs): return False return True - @staticmethod - def same_effect(g1, g2): - if g1.stack_change != g2.stack_change: - return False - if g1.transit_type != g2.transit_type: - return False - return True - def shift(self, length, preserve_regs=None, next_pc_idx=-1): """ length: how many bytes to shift @@ -133,12 +124,19 @@ def retsled(self, size, preserve_regs=None): raise RopException(f"Failed to create a ret-sled sp for {size:#x} bytes while preserving {preserve_regs}") - def better_than(self, g1, g2): - if not self.same_effect(g1, g2): + def _same_effect(self, g1, g2): + if g1.stack_change != g2.stack_change: + return False + if g1.transit_type != g2.transit_type: return False - return g1.changed_regs.issubset(g2.changed_regs) + if g1.changed_regs != g2.changed_regs: # needed for preserve_regs + return False + return True + + def _better_than(self, g1, g2): + return False - def _filter_gadgets(self, gadgets): + def filter_gadgets(self, gadgets): """ filter gadgets having the same effect """ @@ -151,19 +149,7 @@ def _filter_gadgets(self, gadgets): and not x.has_conditional_branch ] - # now do the standard filtering - # gadgets = set(gadgets) - # skip = set({}) - # while True: - # to_remove = set({}) - # for g in gadgets-skip: - # to_remove.update({x for x in gadgets-{g} if self.better_than(g, x)}) - # if to_remove: - # break - # skip.add(g) - # if not to_remove: - # break - # gadgets -= to_remove + gadgets = self._filter_gadgets(gadgets) d = defaultdict(list) for g in gadgets: diff --git a/angrop/chain_builder/sys_caller.py b/angrop/chain_builder/sys_caller.py index f3fbfb1..eb85097 100644 --- a/angrop/chain_builder/sys_caller.py +++ b/angrop/chain_builder/sys_caller.py @@ -43,7 +43,6 @@ def __init__(self, chain_builder): super().__init__(chain_builder) self.syscall_gadgets = None - self.update() @staticmethod def supported_os(os): diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index 74b192b..d01f467 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -141,52 +141,6 @@ def has_symbolic_access(self): accesses = set(self.mem_reads + self.mem_writes + self.mem_changes) return any(x.is_symbolic_access() for x in accesses) - def reg_set_same_effect(self, other): - """ - having the same register setting effect compared to the other gadget - """ - if self.popped_regs != other.popped_regs: - return False - if self.concrete_regs != other.concrete_regs: - return False - if self.reg_dependencies != other.reg_dependencies: - return False - if self.transit_type != other.transit_type: - return False - return True - - def reg_set_better_than(self, other): - """ - whether this gadget is strictly better than the other in terms of register setting effect - """ - if not self.reg_set_same_effect(other): - return False - if len(self.changed_regs) >= len(other.changed_regs) and \ - self.stack_change <= other.stack_change and \ - self.num_mem_access <= other.num_mem_access and \ - self.isn_count <= other.isn_count: - return True - return False - - def reg_move_same_effect(self, other): - """ - having the same register moving effect compared to the other gadget - """ - if set(self.reg_moves) != set(other.reg_moves): - return False - if self.reg_dependencies != other.reg_dependencies: - return False - return True - - def reg_move_better_than(self, other): - if not self.reg_move_same_effect(other): - return False - if self.stack_change <= other.stack_change and \ - self.num_mem_access <= other.num_mem_access and \ - self.block_length <= other.block_length: - return True - return False - def __str__(self): s = "Gadget %#x\n" % self.addr s += "Stack change: %#x\n" % self.stack_change From 1f25a18532f21269f9bc3ed931b1dfd452487b20 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Fri, 17 Jan 2025 15:14:52 -0700 Subject: [PATCH 072/106] print bad chains for easier debugging --- angrop/rop_chain.py | 15 +++++++++++++-- tests/test_find_gadgets.py | 6 ++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 2c18ab3..cc51175 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -1,3 +1,5 @@ +import logging + from . import rop_utils from .errors import RopException from .rop_gadget import RopGadget @@ -5,6 +7,8 @@ CHAIN_TIMEOUT_DEFAULT = 3 +l = logging.getLogger("angrop.chain_builder.reg_setter") + class RopChain: """ This class holds rop chains returned by the rop chain building methods such as rop.set_regs() @@ -287,8 +291,15 @@ def exec(self, max_steps=None, timeout=None): sum(len(gadget.bbl_addrs) for gadget in self._gadgets), 2 * len(self._gadgets), ) - return rop_utils.step_to_unconstrained_successor(self._p, state, max_steps=max_steps, - allow_simprocedures=True) + try: + state = rop_utils.step_to_unconstrained_successor(self._p, state, max_steps=max_steps, + allow_simprocedures=True) + except RopException as e: + code = self.payload_code(print_instructions=True) + l.error("The following chain fails to execute!") + l.error(code) + raise e + return state def copy(self): cp = RopChain(self._p, self._builder) diff --git a/tests/test_find_gadgets.py b/tests/test_find_gadgets.py index 1f5c48f..6f4bc4e 100644 --- a/tests/test_find_gadgets.py +++ b/tests/test_find_gadgets.py @@ -197,6 +197,12 @@ def local_multiprocess_analyze_gadget_list(): assert gadgets[0].addr == 0x4006d8 assert gadgets[1].addr == 0x400864 +def test_bad_gadgets(): + proj = angr.Project(os.path.join(tests_dir, "armel", "libc-2.31.so"), auto_load_libs=False) + rop = proj.analyses.ROP(fast_mode=False, only_check_near_rets=False, is_thumb=True) + g = rop.analyze_gadget(0x44cc95) + assert g is None + def run_all(): functions = globals() all_functions = {x:y for x, y in functions.items() if x.startswith('test_')} From 7e2f62e3145a47b966faa0f4131b4b691e7146b9 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Fri, 17 Jan 2025 16:05:27 -0700 Subject: [PATCH 073/106] fix gadget filtering --- angrop/chain_builder/builder.py | 2 +- angrop/chain_builder/reg_setter.py | 3 +-- tests/test_find_gadgets.py | 8 +++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 89a0ed5..d361326 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -272,7 +272,7 @@ def __filter_gadgets(self, gadgets): while gadgets: g1 = gadgets.pop() # check if nothing is better than g1 - for g2 in gadgets: + for g2 in bests|gadgets: if self._better_than(g2, g1): break else: diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 42bc274..f0a5c57 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -633,13 +633,12 @@ def __filter_gadgets(self, gadgets): while keys: k1 = keys.pop() # check if nothing is better than k1 - for k2 in keys: + for k2 in bests|keys: # if k2 is better than k1 if all(k2[i] <= k1[i] for i in range(4)): break else: bests.add(k1) - gadgets = set() for key, val in d.items(): if key not in bests: diff --git a/tests/test_find_gadgets.py b/tests/test_find_gadgets.py index 6f4bc4e..4812aff 100644 --- a/tests/test_find_gadgets.py +++ b/tests/test_find_gadgets.py @@ -197,11 +197,13 @@ def local_multiprocess_analyze_gadget_list(): assert gadgets[0].addr == 0x4006d8 assert gadgets[1].addr == 0x400864 -def test_bad_gadgets(): +def test_gadget_filtering(): proj = angr.Project(os.path.join(tests_dir, "armel", "libc-2.31.so"), auto_load_libs=False) rop = proj.analyses.ROP(fast_mode=False, only_check_near_rets=False, is_thumb=True) - g = rop.analyze_gadget(0x44cc95) - assert g is None + rop.analyze_gadget(0x42bca5) + rop.analyze_gadget(0x42c3c1) + rop.chain_builder.update() + assert len(rop.chain_builder._reg_setter._reg_setting_gadgets) == 1 def run_all(): functions = globals() From f5cb48e5b05987185e1872af321314daa6b3ebe7 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Fri, 17 Jan 2025 18:16:28 -0700 Subject: [PATCH 074/106] implement gadget.pp() --- angrop/gadget_finder/__init__.py | 16 ++++++++++++++-- angrop/rop.py | 5 ++++- angrop/rop_gadget.py | 8 +++++++- angrop/rop_utils.py | 3 --- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 3b9031b..768ca90 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -119,7 +119,10 @@ def _initialize_gadget_analyzer(self): kernel_mode=self.kernel_mode, stack_gsize=self.stack_gsize) def analyze_gadget(self, addr): - return self.gadget_analyzer.analyze_gadget(addr) + g = self.gadget_analyzer.analyze_gadget(addr) + if g: + g.project = self.project + return g def analyze_gadget_list(self, addr_list, processes=4, show_progress=True): gadgets = [] @@ -136,6 +139,9 @@ def analyze_gadget_list(self, addr_list, processes=4, show_progress=True): if gs: gadgets += gs + for g in gadgets: + g.project = self.project + return sorted(gadgets, key=lambda x: x.addr) def get_duplicates(self): @@ -145,7 +151,7 @@ def get_duplicates(self): cache = self._cache return {k:v for k,v in cache.items() if len(v) >= 2} - def find_gadgets(self, processes=4, show_progress=True): + def find_gadgets(self, processes=16, show_progress=True): self._cache = {} initargs = (self.gadget_analyzer,) @@ -167,6 +173,9 @@ def find_gadgets(self, processes=4, show_progress=True): ) ) + for g in gadgets: + g.project = self.project + return sorted(gadgets, key=lambda x: x.addr), self.get_duplicates() def find_gadgets_single_threaded(self, show_progress=True): @@ -178,6 +187,9 @@ def find_gadgets_single_threaded(self, show_progress=True): for addr in self._addresses_to_check_with_caching(show_progress): gadgets.extend(self.gadget_analyzer.analyze_gadget(addr, allow_conditional_branches=True)) + for g in gadgets: + g.project = self.project + return sorted(gadgets, key=lambda x: x.addr), self.get_duplicates() def _block_has_ip_relative(self, addr, bl): diff --git a/angrop/rop.py b/angrop/rop.py index 9798afa..285f105 100644 --- a/angrop/rop.py +++ b/angrop/rop.py @@ -143,11 +143,14 @@ def find_gadgets_single_threaded(self, show_progress=True): return self.rop_gadgets def _get_cache_tuple(self): - return (self._all_gadgets, self._duplicates) + all_gadgets = [x for x in self._all_gadgets] + for g in all_gadgets: g.project = None + return (all_gadgets, self._duplicates) def _load_cache_tuple(self, tup): self._all_gadgets = tup[0] self._duplicates = tup[1] + for g in self._all_gadgets: g.project = self.project self._screen_gadgets() def save_gadgets(self, path): diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index d01f467..f07afc9 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -1,3 +1,5 @@ +from .rop_utils import addr_to_asmstring + class RopMemAccess: """Holds information about memory accesses Attributes: @@ -93,6 +95,7 @@ class RopGadget: Gadget objects """ def __init__(self, addr): + self.project = None self.addr = addr self.block_length = None self.stack_change = None @@ -141,6 +144,9 @@ def has_symbolic_access(self): accesses = set(self.mem_reads + self.mem_writes + self.mem_changes) return any(x.is_symbolic_access() for x in accesses) + def pp(self): + print("; ".join(addr_to_asmstring(self.project, addr) for addr in self.bbl_addrs)) + def __str__(self): s = "Gadget %#x\n" % self.addr s += "Stack change: %#x\n" % self.stack_change @@ -198,7 +204,7 @@ def __repr__(self): return "" % self.addr def copy(self): - out = RopGadget(self.addr) + out = RopGadget(self.project, self.addr) out.addr = self.addr out.changed_regs = set(self.changed_regs) out.popped_regs = set(self.popped_regs) diff --git a/angrop/rop_utils.py b/angrop/rop_utils.py index d555ab9..d063a6f 100644 --- a/angrop/rop_utils.py +++ b/angrop/rop_utils.py @@ -11,9 +11,6 @@ def addr_to_asmstring(project, addr): block = project.factory.block(addr) return "; ".join(["%s %s" %(i.mnemonic, i.op_str) for i in block.capstone.insns]) -def gadget_to_asmstring(project, gadget): - return "; ".join(addr_to_asmstring(project, addr) for addr in gadget.bbl_addrs) - def get_ast_dependency(ast): """ From 40fc842640318906493dbd6d561b4d2d7aadeffa Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Fri, 17 Jan 2025 18:45:03 -0700 Subject: [PATCH 075/106] better pretty printing for gadget and chain --- angrop/chain_builder/func_caller.py | 5 +++-- angrop/rop_chain.py | 30 +++++++++++++++++++++++------ angrop/rop_gadget.py | 18 ++++++++++++++++- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/angrop/chain_builder/func_caller.py b/angrop/chain_builder/func_caller.py index 45a2539..d23c50c 100644 --- a/angrop/chain_builder/func_caller.py +++ b/angrop/chain_builder/func_caller.py @@ -7,7 +7,7 @@ from .builder import Builder from .. import rop_utils from ..errors import RopException -from ..rop_gadget import RopGadget +from ..rop_gadget import FunctionGadget l = logging.getLogger(__name__) @@ -95,6 +95,7 @@ def func_call(self, address, args, **kwargs): :param needs_return: whether to continue the ROP after invoking the function :return: a RopChain which invokes the function with the arguments """ + symbol = None # is it a symbol? if isinstance(address, str): symbol = address @@ -110,7 +111,7 @@ def func_call(self, address, args, **kwargs): self.project.arch.name, platform=self.project.simos.name if self.project.simos is not None else None, )(self.project.arch) - func_gadget = RopGadget(address) + func_gadget = FunctionGadget(address, symbol) func_gadget.stack_change = self.project.arch.bytes func_gadget.pc_offset = 0 return self._func_call(func_gadget, cc, args, **kwargs) diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index cc51175..1a7557b 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -210,6 +210,12 @@ def payload_bv(self): sp = test_state.regs.sp return test_state.memory.load(sp, self.payload_len) + def addr_to_asmstring(self, addr): + for g in self._gadgets: + if g.addr == addr: + return g.dstr() + return "" + def find_symbol(self, addr): plt = self._p.loader.find_plt_stub_name(addr) if plt: @@ -248,7 +254,7 @@ def payload_code(self, constraints=None, print_instructions=True, timeout=None): if symbol: instruction_code = f"\t# {symbol}" else: - asmstring = rop_utils.addr_to_asmstring(self._p, value) + asmstring = self.addr_to_asmstring(value) if asmstring != "": instruction_code = "\t# " + asmstring @@ -314,8 +320,20 @@ def copy(self): def __str__(self): return self.payload_code() - def print_gadget_asm(self): - for gadget in self._gadgets: - for addr in gadget.bbl_addrs: - self._p.factory.block(addr).pp() - print() + def dstr(self): + res = '' + for v in self._values: + if v.symbolic: + res += f"{v}\n" + continue + for g in self._gadgets: + if g.addr == v.concreted: + res += f"{g.addr:#x}: {g.dstr()}\n" + break + else: + res += f"{v.concreted:#x}\n" + + return res + + def pp(self): + print(self.dstr()) \ No newline at end of file diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index f07afc9..f751e4e 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -144,8 +144,11 @@ def has_symbolic_access(self): accesses = set(self.mem_reads + self.mem_writes + self.mem_changes) return any(x.is_symbolic_access() for x in accesses) + def dstr(self): + return "; ".join(addr_to_asmstring(self.project, addr) for addr in self.bbl_addrs) + def pp(self): - print("; ".join(addr_to_asmstring(self.project, addr) for addr in self.bbl_addrs)) + print(self.dstr()) def __str__(self): s = "Gadget %#x\n" % self.addr @@ -291,3 +294,16 @@ def copy(self): new.makes_syscall = self.makes_syscall new.starts_with_syscall = self.starts_with_syscall return new + +class FunctionGadget(RopGadget): + """ + a function call + """ + def __init__(self, addr, symbol): + super().__init__(addr) + self.symbol = symbol + + def dstr(self): + if self.symbol: + return f"<{self.symbol}>" + return f"" \ No newline at end of file From e8c57245ea1263db025e266fbc36b21d680eaeb5 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 21 Jan 2025 11:41:30 -0700 Subject: [PATCH 076/106] make _add_gadget_value a separate function --- angrop/chain_builder/builder.py | 2 +- angrop/rop_chain.py | 30 ++++++++++++++---------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index d361326..66d9bda 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -222,7 +222,7 @@ def map_stack_var(ast, value): if sym_var in stack_var_to_value: val = stack_var_to_value[sym_var] if isinstance(val, RopGadget): - chain.add_gadget(val, append_addr_only=True) + chain._add_gadget_value(val) else: # HACK: Because angrop appears to have originally been written # with assumptions around x86 ret gadgets, the target of the final jump diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 1a7557b..f50c4c4 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -68,21 +68,10 @@ def add_value(self, value): self._values.append(value) self.payload_len += self._p.arch.bytes - def add_gadget(self, gadget, append_addr_only=False): - # angrop was originally written with the assumption that gadget addresses - # appear in the chain in the same order in which the gadgets are executed. - # This is not always true when there are gadgets that end with a jump to - # an address from a register instead of the stack. - # For example, if the ROP chain has three gadgets A, B, and C where gadget - # B ends with a jump to some register, gadget A would have to load the - # address of gadget C into the register before jumping to gadget B. - # Therefore, the address of gadget C might need to be placed before the - # address of gadget B. - # The append_addr_only argument and the set_gadgets method below were added - # to support chains like this without breaking the existing API. - if not append_addr_only: - self._gadgets.append(gadget) - + def _add_gadget_value(self, gadget): + """ + create a RopValue for the gadget's address and add it to chain._values + """ value = gadget.addr if self._pie: value -= self._p.loader.main_object.mapped_base @@ -90,11 +79,20 @@ def add_gadget(self, gadget, append_addr_only=False): if self._pie: value._rebase = True - if append_addr_only or (idx := self.next_pc_idx()) is None: + if (idx := self.next_pc_idx()) is None: self.add_value(value) else: self._values[idx] = value + def add_gadget(self, gadget): + # angrop was originally written with the assumption that gadget addresses + # appear in the chain in the same order in which the gadgets are executed. + # This is not always true when there are gadgets that end with a jump to + # an address from a register instead of the stack. + # For example, when we do `ret` in aarch64 + self._add_gadget_value(gadget) + self._gadgets.append(gadget) + def set_gadgets(self, gadgets: list[RopGadget]): self._gadgets = gadgets From 202497278352cd9bf24a3632f8653f5b1ba9bbc4 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 21 Jan 2025 15:43:13 -0700 Subject: [PATCH 077/106] use dstr and pp for chain/gadget printing --- angrop/chain_builder/builder.py | 11 +++++------ angrop/chain_builder/pivot.py | 3 +-- angrop/chain_builder/reg_mover.py | 4 ++-- angrop/chain_builder/reg_setter.py | 22 +++++----------------- angrop/chain_builder/shifter.py | 4 ++-- angrop/rop_chain.py | 16 ++++++++-------- 6 files changed, 23 insertions(+), 37 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 66d9bda..04d140c 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -203,16 +203,15 @@ def map_stack_var(ast, value): for offset in range(0, stack_change, bytes_per_pop): sym_word = test_symbolic_state.stack_read(offset, bytes_per_pop) # check if we can constrain val to be the roparg_filler - if test_symbolic_state.solver.satisfiable( - (sym_word == self.roparg_filler,) - ): + if test_symbolic_state.solver.satisfiable([sym_word == self.roparg_filler]): # constrain the val to be the roparg_filler test_symbolic_state.add_constraints(sym_word == self.roparg_filler) # create the ropchain - chain = RopChain( - self.project, self, state=test_symbolic_state.copy(), badbytes=self.badbytes - ) + chain = RopChain(self.project, + self, + state=test_symbolic_state.copy(), + badbytes=self.badbytes) # iterate through the stack values that need to be in the chain for offset in range(-bytes_per_pop, stack_change, bytes_per_pop): diff --git a/angrop/chain_builder/pivot.py b/angrop/chain_builder/pivot.py index 5ac2edb..bce2da6 100644 --- a/angrop/chain_builder/pivot.py +++ b/angrop/chain_builder/pivot.py @@ -102,8 +102,7 @@ def pivot_reg(self, reg_val): if len(variables) == 1 and variables.pop().startswith(f'reg_{reg}'): return chain else: - insts = [str(self.project.factory.block(g.addr).capstone) for g in chain._gadgets] - chain_str = '\n-----\n'.join(insts) + chain_str = chain.dstr() l.exception("Somehow angrop thinks\n%s\ncan be use for stack pivoting", chain_str) except Exception: # pylint: disable=broad-exception-caught continue diff --git a/angrop/chain_builder/reg_mover.py b/angrop/chain_builder/reg_mover.py index 4903003..7aee6b5 100644 --- a/angrop/chain_builder/reg_mover.py +++ b/angrop/chain_builder/reg_mover.py @@ -26,11 +26,11 @@ def verify(self, chain, preserve_regs, registers): given a potential chain, verify whether the chain can move the registers correctly by symbolically execute the chain """ + chain_str = chain.dstr() state = chain.exec() for reg, val in registers.items(): bv = getattr(state.regs, reg) if bv.depth != 1 or val.reg_name not in bv._encoded_name.decode(): - chain_str = '\n-----\n'.join([str(self.project.factory.block(g.addr).capstone)for g in chain._gadgets]) l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation.", chain_str) return False for act in state.history.actions.hardcopy: @@ -103,7 +103,7 @@ def run(self, preserve_regs=None, **registers): # now see whether any of the chain candidates can work for gadgets in chains: - chain_str = '\n-----\n'.join([str(self.project.factory.block(g.addr).capstone)for g in gadgets]) + chain_str = "\n".join(g.dstr() for g in gadgets) l.debug("building reg_setting chain with chain:\n%s", chain_str) stack_change = sum(x.stack_change for x in gadgets) try: diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index f0a5c57..15994ac 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -42,13 +42,7 @@ def verify(self, chain, preserve_regs, registers): given a potential chain, verify whether the chain can set the registers correctly by symbolically execute the chain """ - chain_str = "\n-----\n".join( - "\n".join( - str(self.project.factory.block(addr).capstone) - for addr in g.bbl_addrs - ) - for g in chain._gadgets - ) + chain_str = chain.dstr() state = chain.exec() for act in state.history.actions.hardcopy: if act.type not in ("mem", "reg"): @@ -63,14 +57,14 @@ def verify(self, chain, preserve_regs, registers): offset -= act.offset % self.project.arch.bytes reg_name = self.project.arch.translate_register_name(offset) if reg_name in preserve_regs: - l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 1.", chain_str) + l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 1.\ntarget registers: %s", chain_str, registers) return False for reg, val in registers.items(): bv = getattr(state.regs, reg) if (val.symbolic != bv.symbolic) or state.solver.eval(bv != val.data): - l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 2.", chain_str) + l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 2.\ntarget registers: %s", chain_str, registers) return False - # the next pc must come from the stack + # the next pc must come from the stack or just marked as the next_pc if len(state.regs.pc.variables) != 1: return False pc_var = set(state.regs.pc.variables).pop() @@ -106,13 +100,7 @@ def run(self, modifiable_memory_range=None, use_partial_controllers=False, pres max_length=max_length, ), ): - chain_str = "\n-----\n".join( - "\n".join( - str(self.project.factory.block(addr).capstone) - for addr in g.bbl_addrs - ) - for g in gadgets - ) + chain_str = "\n".join(g.dstr() for g in gadgets) l.debug("building reg_setting chain with chain:\n%s", chain_str) stack_change = sum(x.stack_change for x in gadgets) try: diff --git a/angrop/chain_builder/shifter.py b/angrop/chain_builder/shifter.py index f14821b..92465c7 100644 --- a/angrop/chain_builder/shifter.py +++ b/angrop/chain_builder/shifter.py @@ -33,7 +33,7 @@ def verify_shift(self, chain, length, preserve_regs): offset -= act.offset % self.project.arch.bytes reg_name = self.project.arch.translate_register_name(offset) if reg_name in preserve_regs: - chain_str = '\n-----\n'.join([str(self.project.factory.block(g.addr).capstone)for g in chain._gadgets]) + chain_str = chain.dstr() l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation.", chain_str) return False return True @@ -51,7 +51,7 @@ def verify_retsled(self, chain, size, preserve_regs): if reg_name == self.arch.stack_pointer: continue if reg_name in preserve_regs: - chain_str = '\n-----\n'.join([str(self.project.factory.block(g.addr).capstone)for g in chain._gadgets]) + chain_str = chain.dstr() l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation.", chain_str) return False return True diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index f50c4c4..9ebfe05 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -291,10 +291,7 @@ def exec(self, max_steps=None, timeout=None): for value, _ in reversed(values[1:]): state.stack_push(value) if max_steps is None: - max_steps = max( - sum(len(gadget.bbl_addrs) for gadget in self._gadgets), - 2 * len(self._gadgets), - ) + max_steps = sum(len(gadget.bbl_addrs) for gadget in self._gadgets) try: state = rop_utils.step_to_unconstrained_successor(self._p, state, max_steps=max_steps, allow_simprocedures=True) @@ -320,17 +317,20 @@ def __str__(self): def dstr(self): res = '' + bs = self._p.arch.bytes + prefix_len = bs*2+2 + prefix = " "*prefix_len for v in self._values: if v.symbolic: - res += f"{v}\n" + res += prefix + f" {v.ast}\n" continue for g in self._gadgets: if g.addr == v.concreted: - res += f"{g.addr:#x}: {g.dstr()}\n" + fmt = f"%#0{prefix_len}x" + res += fmt % g.addr + f": {g.dstr()}\n" break else: - res += f"{v.concreted:#x}\n" - + res += prefix + f" {v.concreted:#x}\n" return res def pp(self): From 30347470c71b1b9e1c44efb3c4a6a091c9289989 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 21 Jan 2025 15:43:43 -0700 Subject: [PATCH 078/106] fix a bug in chain constraint collection --- angrop/chain_builder/builder.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 04d140c..a3e9793 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -140,9 +140,11 @@ def map_stack_var(ast, value): state = test_symbolic_state # Step through each gadget and constrain the ip. + # save the constraints into test_symbolic_state for gadget in gadgets: map_stack_var(state.ip, gadget) state.solver.add(state.ip == gadget.addr) + test_symbolic_state.solver.add(state.ip == gadget.addr) for addr in gadget.bbl_addrs[1:]: succ = state.step() succ_states = [ @@ -162,6 +164,9 @@ def map_stack_var(ast, value): ) state = succ.unconstrained_successors[0] + if len(state.solver.eval_upto(state.ip, 2)) < 2: + raise RopException("The final pc is not unconstrained!") + # Record the variable that controls the final ip. next_pc_val = rop_utils.cast_rop_value( test_symbolic_state.solver.BVS("next_pc", self.project.arch.bits), From 356959f99536d3b71a84bc5d990237a1b733d3b6 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 21 Jan 2025 15:45:45 -0700 Subject: [PATCH 079/106] remove RiscyROP.md --- README-RiscyROP.md | 56 ---------------------------------------------- 1 file changed, 56 deletions(-) delete mode 100644 README-RiscyROP.md diff --git a/README-RiscyROP.md b/README-RiscyROP.md deleted file mode 100644 index d39ca04..0000000 --- a/README-RiscyROP.md +++ /dev/null @@ -1,56 +0,0 @@ -# RiscyROP Usage - -## z3 Memory Usage - -Unfortunately there appears to be some kind of memory leak issue involving z3 that causes the memory usage to keep increasing during gadget finding. -With the latest z3 version the memory usage will increase to several GB per thread very quickly, but with older versions like 4.12.6.0 it's not as bad and the workaround I implemented that periodically restarts the worker processes is enough to keep the memory usage below 1.5 GB per thread. - -## Finding Gadgets - -Disable angrop's `fast_mode` setting when initializing the project, otherwise you will get very few gadgets. -The new gadget analyzer is a lot slower than angrop's original implementation. -You'll probably want to increase the number of processes from the default of 4, but make sure you have enough memory. -On my machine, it takes around 30 minutes to an hour to find gadgets in nginx and glibc with 16 processes. - -```python -import angr, angrop -p = angr.Project("some_binary", auto_load_libs=False) -rop = p.analyses.ROP(fast_mode=False) -rop.find_gadgets(16) -``` - -Since gadget finding takes a while, you can save the gadgets and load them later so that you don't have to run the gadget finder again. - -```python -rop.save_gadgets("gadgets") -rop.load_gadgets("gadgets") -``` - -## Chain Building - -Building register setting chains should work well, but building other types of chains might not work since integration of the new algorithms with the existing angrop features isn't fully complete. -On large binaries like glibc the new algorithm can set most if not all of the argument registers. -You can set the `modifiable_memory_range` argument to a range of addresses that can be safely accessed. -This will allow the chain builder to use gadgets that access memory outside of the stack, and it will ensure that the addresses are within the given range. -The maximum chain length defaults to 10 gadgets, which might not be enough if the number of registers is large. - -```python -chain = rop.set_regs(x0=1, x1=2, x2=3, x3=4, x4=5, x5=6, x6=7, x7=8, x30=42, modifiable_memory_range=(0x1000, 0x2000), max_length=15) -chain.print_gadget_asm() -chain.print_payload_code() -``` - -The address of the first gadget is placed at the beginning of the chain since all of the existing code assumes this is the case, but you might have to put it somewhere else depending on how you enter the chain. -For example, the initial gadget address would probably have to be placed further up the stack if return addresses are stored at the beginning of the stack frame instead of the end. -Similarly, the address that you want the last gadget to jump to might have to be placed somewhere in the middle of the chain instead of right after the chain. -`chain.next_pc_idx()` tells you which value in the chain should be replaced with the desired address if this is the case. - -If things aren't working, you might want to enable debug logging: - -```python -import logging -logging.getLogger('angrop.chain_builder.reg_setter').setLevel('DEBUG') -``` - -If the chain builder finds a sequence of gadgets that should work but it encounters an error when concretizing the chain, it will try a different sequence of gadgets. -However, this should rarely happen with the new algorithm. From ce4c3410d3bd39d3927deb7c95fe6e07dea51e9e Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 21 Jan 2025 17:53:03 -0700 Subject: [PATCH 080/106] clean up reg_setter --- angrop/chain_builder/builder.py | 11 +- angrop/chain_builder/mem_changer.py | 2 +- angrop/chain_builder/mem_writer.py | 4 +- angrop/chain_builder/reg_setter.py | 326 +++++++++++++++------------- angrop/rop_chain.py | 12 +- tests/test_chainbuilder.py | 3 + 6 files changed, 187 insertions(+), 171 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index a3e9793..0a03a8d 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -226,15 +226,16 @@ def map_stack_var(ast, value): if sym_var in stack_var_to_value: val = stack_var_to_value[sym_var] if isinstance(val, RopGadget): - chain._add_gadget_value(val) + # this is special, we know this won't be "next_pc", so don't try + # to take "next_pc"'s position + value = RopValue(val.addr, self.project) + value.rebase_analysis(chain=chain) + chain.add_value(value) else: # HACK: Because angrop appears to have originally been written # with assumptions around x86 ret gadgets, the target of the final jump # is not included in the chain if it is the last value. - if ( - offset == stack_change - bytes_per_pop - and val is next_pc_val - ): + if offset == stack_change - bytes_per_pop and val is next_pc_val: break chain.add_value(val) else: diff --git a/angrop/chain_builder/mem_changer.py b/angrop/chain_builder/mem_changer.py index 8e4e03b..c718970 100644 --- a/angrop/chain_builder/mem_changer.py +++ b/angrop/chain_builder/mem_changer.py @@ -96,7 +96,7 @@ def add_to_mem(self, addr, value, data_size=None): # get the data from trying to set all the registers registers = dict((reg, 0x41) for reg in self.chain_builder.arch.reg_set) l.debug("getting reg data for mem adds") - _, _, reg_data = self.chain_builder._reg_setter._find_reg_setting_gadgets(max_stack_change=0x50, **registers) + _, _, reg_data = self.chain_builder._reg_setter.find_candidate_chains_graph_search(max_stack_change=0x50, **registers) l.debug("trying mem_add gadgets") # filter out gadgets that certainly cannot be used for add_mem diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index 6ef06f8..67ccab4 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -52,7 +52,7 @@ def _gen_mem_write_gadgets(self, string_data): registers = dict((reg, 0x41) for reg in self.arch.reg_set) l.debug("getting reg data for mem writes") reg_setter = self.chain_builder._reg_setter - _, _, reg_data = reg_setter._find_reg_setting_gadgets(max_stack_change=0x50, **registers) + _, _, reg_data = reg_setter.find_candidate_chains_graph_search(max_stack_change=0x50, **registers) l.debug("trying mem_write gadgets") # limit the maximum size of the chain @@ -79,7 +79,7 @@ def _gen_mem_write_gadgets(self, string_data): use_partial_controllers = True l.warning("Trying to use partial controllers for memory write") l.debug("getting reg data for mem writes") - _, _, reg_data = self.chain_builder._reg_setter._find_reg_setting_gadgets(max_stack_change=0x50, + _, _, reg_data = self.chain_builder._reg_setter.find_candidate_chains_graph_search(max_stack_change=0x50, use_partial_controllers=True, **registers) l.debug("trying mem_write gadgets") diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 15994ac..32f6521 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -16,12 +16,9 @@ l = logging.getLogger("angrop.chain_builder.reg_setter") class RegSetter(Builder): - """ - TODO: get rid of Salls's code - """ def __init__(self, chain_builder): super().__init__(chain_builder) - self._reg_setting_gadgets = None + self._reg_setting_gadgets = None # all the gadgets that can set registers self.hard_chain_cache = None # Estimate of how difficult it is to set each register. self._reg_weights = None @@ -84,28 +81,13 @@ def run(self, modifiable_memory_range=None, use_partial_controllers=False, pres for x in registers: registers[x] = rop_utils.cast_rop_value(registers[x], self.project) - for gadgets in itertools.chain( - self._find_all_candidate_chains( - self._find_relevant_gadgets(**registers), - preserve_regs.copy(), - **registers, - ), - self._backwards_recursive_search( - self._reg_setting_gadgets, - set(registers), - current_chain=[], - preserve_regs=preserve_regs, - modifiable_memory_range=modifiable_memory_range, - visited={}, - max_length=max_length, - ), - ): + for gadgets in self.iterate_candidate_chains(modifiable_memory_range, preserve_regs, max_length, registers): chain_str = "\n".join(g.dstr() for g in gadgets) l.debug("building reg_setting chain with chain:\n%s", chain_str) stack_change = sum(x.stack_change for x in gadgets) try: chain = self._build_reg_setting_chain(gadgets, modifiable_memory_range, - registers, stack_change) + registers, stack_change) chain._concretize_chain_values(timeout=len(chain._values)*3) if self.verify(chain, preserve_regs, registers): #self._chain_cache[reg_tuple].append(gadgets) @@ -115,124 +97,35 @@ def run(self, modifiable_memory_range=None, use_partial_controllers=False, pres raise RopException("Couldn't set registers :(") - def _find_relevant_gadgets(self, **registers): - """ - find gadgets that may pop/load/change requested registers - exclude gadgets that do symbolic memory access - """ - gadgets = set({}) - for g in self._reg_setting_gadgets: - if g.has_symbolic_access(): - continue - for reg in registers: - if reg in g.popped_regs: - gadgets.add(g) - if reg in g.changed_regs: - gadgets.add(g) - if reg in g.reg_dependencies.keys(): - gadgets.add(g) - if reg in g.concrete_regs.keys(): - gadgets.add(g) - return gadgets - - def _recursively_find_chains(self, gadgets, chain, preserve_regs, todo_regs, hard_preserve_regs): - """ - preserve_regs: soft preservation, can be overwritten as long as it gets back to control - hard_preserve_regs: cannot touch these regs at all - """ - if not todo_regs: - return [chain] - - todo_list = [] - for g in gadgets: - set_regs = g.popped_regs.intersection(todo_regs) - if not set_regs: - continue - if g.changed_regs.intersection(hard_preserve_regs): - continue - clobbered_regs = g.changed_regs.intersection(preserve_regs) - if clobbered_regs - set_regs: - continue - new_preserve = preserve_regs.copy() - new_preserve.update(set_regs) - new_chain = chain.copy() - new_chain.append(g) - todo_list.append((new_chain, new_preserve, todo_regs-set_regs, hard_preserve_regs)) - - res = [] - for todo in todo_list: - res += self._recursively_find_chains(gadgets, *todo) - return res - - @staticmethod - def _find_concrete_chains(gadgets, registers): - chains = [] - for g in gadgets: - for reg, val in registers.items(): - if reg in g.concrete_regs and g.concrete_regs[reg] == val: - chains.append([g]) - return chains - - def _find_add_chain(self, gadgets, reg, val): - """ - find one chain to set one single register to a specific value using concrete values only through add/dec - """ - val = rop_utils.cast_rop_value(val, self.project) - concrete_setter_gadgets = [ x for x in gadgets if reg in x.concrete_regs ] - delta_gadgets = [ x for x in gadgets if len(x.reg_dependencies) == 1 and reg in x.reg_dependencies\ - and len(x.reg_dependencies[reg]) == 1 and reg in x.reg_dependencies[reg]] - for g1 in concrete_setter_gadgets: - for g2 in delta_gadgets: - try: - chain = self._build_reg_setting_chain([g1, g2], False, # pylint:disable=too-many-function-args - {reg: val}, g1.stack_change+g2.stack_change) - state = chain.exec() - bv = state.registers.load(reg) - if bv.symbolic: - continue - if state.solver.eval(bv == val.data): - return [g1, g2] - except Exception:# pylint:disable=broad-except - pass - return None - - def _find_all_candidate_chains(self, gadgets, preserve_regs, **registers): - """ - 1. find gadgets that set concrete values to the target values, such as xor eax, eax to set eax to 0 - 2. find all pop only chains by BFS search - TODO: handle moves - """ - # get the list of regs that cannot be popped (call it hard_regs) - hard_regs = [reg for reg, val in registers.items() if self._word_contain_badbyte(val)] - if len(hard_regs) > 1: - l.error("too many registers contain bad bytes! bail out! %s", registers) - return [] - - # if hard_regs exists, try to use concrete values to craft the value - hard_chain = [] - if hard_regs and not registers[hard_regs[0]].symbolic: - reg = hard_regs[0] - val = registers[reg].concreted - key = (reg, val) - if key in self.hard_chain_cache: - hard_chain = self.hard_chain_cache[key] - else: - hard_chains = self._find_concrete_chains(gadgets, {reg: val}) - if hard_chains: - hard_chain = hard_chains[0] - else: - hard_chain = self._find_add_chain(gadgets, reg, val) - self.hard_chain_cache[key] = hard_chain # we cache the result even if it fails - if not hard_chain: - l.error("Fail to set register: %s to: %#x", reg, val) - return [] - registers.pop(reg) - - preserve_regs.update(hard_regs) - # use the original pop techniques to set other registers - chains = self._recursively_find_chains(gadgets, hard_chain, preserve_regs, - set(registers.keys()), preserve_regs) - return self._sort_chains(chains) + def iterate_candidate_chains(self, modifiable_memory_range, preserve_regs, max_length, registers): + # algorithm1 + gadgets, _, _ = self.find_candidate_chains_graph_search(modifiable_memory_range=modifiable_memory_range, + preserve_regs=preserve_regs.copy(), + **registers) + if gadgets: + yield gadgets + + # algorithm2 + gadgets_list = self.find_candidate_chains_pop_only_bfs_search( + self._find_relevant_gadgets(**registers), + preserve_regs.copy(), + **registers) + for gadgets in gadgets_list: + yield gadgets + + # algorithm3 + for gadgets in self.find_candidate_chains_backwards_recursive_search( + self._reg_setting_gadgets, + set(registers), + current_chain=[], + preserve_regs=preserve_regs.copy(), + modifiable_memory_range=modifiable_memory_range, + visited={}, + max_length=max_length): + yield gadgets + return + + #### Chain Building Algorithm 1: fast but unreliable graph-based search #### @staticmethod def _tuple_to_gadgets(data, reg_tuple): @@ -270,7 +163,7 @@ def _verify_chain(chain, regs): # todo allow specify initial regs # todo memcopy(from_addr, to_addr, len) # todo handle "leave" then try to do a mem write on chess from codegate-finals - def _find_reg_setting_gadgets(self, modifiable_memory_range=None, use_partial_controllers=False, + def find_candidate_chains_graph_search(self, modifiable_memory_range=None, use_partial_controllers=False, max_stack_change=None, preserve_regs=None, **registers): """ Finds a list of gadgets which set the desired registers @@ -487,7 +380,130 @@ def _check_if_sufficient_partial_control(self, gadget, reg, value): return True return False - def _backwards_recursive_search( + #### Chain Building Algorithm 2: pop-only BFS search #### + + def _find_relevant_gadgets(self, **registers): + """ + find gadgets that may pop/load/change requested registers + exclude gadgets that do symbolic memory access + """ + gadgets = set({}) + for g in self._reg_setting_gadgets: + if g.has_symbolic_access(): + continue + for reg in registers: + if reg in g.popped_regs: + gadgets.add(g) + if reg in g.changed_regs: + gadgets.add(g) + if reg in g.reg_dependencies.keys(): + gadgets.add(g) + if reg in g.concrete_regs.keys(): + gadgets.add(g) + return gadgets + + @staticmethod + def _find_concrete_chains(gadgets, registers): + chains = [] + for g in gadgets: + for reg, val in registers.items(): + if reg in g.concrete_regs and g.concrete_regs[reg] == val: + chains.append([g]) + return chains + + def find_candidate_chains_pop_only_bfs_search(self, gadgets, preserve_regs, **registers): + """ + 1. find gadgets that set concrete values to the target values, such as xor eax, eax to set eax to 0 + 2. find all pop only chains by BFS search + TODO: handle moves + """ + # get the list of regs that cannot be popped (call it hard_regs) + hard_regs = [reg for reg, val in registers.items() if self._word_contain_badbyte(val)] + if len(hard_regs) > 1: + l.error("too many registers contain bad bytes! bail out! %s", registers) + return [] + + # if hard_regs exists, try to use concrete values to craft the value + hard_chain = [] + if hard_regs and not registers[hard_regs[0]].symbolic: + reg = hard_regs[0] + val = registers[reg].concreted + key = (reg, val) + if key in self.hard_chain_cache: + hard_chain = self.hard_chain_cache[key] + else: + hard_chains = self._find_concrete_chains(gadgets, {reg: val}) + if hard_chains: + hard_chain = hard_chains[0] + else: + hard_chain = self._find_add_chain(gadgets, reg, val) + self.hard_chain_cache[key] = hard_chain # we cache the result even if it fails + if not hard_chain: + l.error("Fail to set register: %s to: %#x", reg, val) + return [] + registers.pop(reg) + + preserve_regs.update(hard_regs) + # use the original pop techniques to set other registers + chains = self._recursively_find_chains(gadgets, hard_chain, preserve_regs, + set(registers.keys()), preserve_regs) + return self._sort_chains(chains) + + def _find_add_chain(self, gadgets, reg, val): + """ + find one chain to set one single register to a specific value using concrete values only through add/dec + """ + val = rop_utils.cast_rop_value(val, self.project) + concrete_setter_gadgets = [ x for x in gadgets if reg in x.concrete_regs ] + delta_gadgets = [ x for x in gadgets if len(x.reg_dependencies) == 1 and reg in x.reg_dependencies\ + and len(x.reg_dependencies[reg]) == 1 and reg in x.reg_dependencies[reg]] + for g1 in concrete_setter_gadgets: + for g2 in delta_gadgets: + try: + chain = self._build_reg_setting_chain([g1, g2], False, # pylint:disable=too-many-function-args + {reg: val}, g1.stack_change+g2.stack_change) + state = chain.exec() + bv = state.registers.load(reg) + if bv.symbolic: + continue + if state.solver.eval(bv == val.data): + return [g1, g2] + except Exception:# pylint:disable=broad-except + pass + return None + + def _recursively_find_chains(self, gadgets, chain, preserve_regs, todo_regs, hard_preserve_regs): + """ + preserve_regs: soft preservation, can be overwritten as long as it gets back to control + hard_preserve_regs: cannot touch these regs at all + """ + if not todo_regs: + return [chain] + + todo_list = [] + for g in gadgets: + set_regs = g.popped_regs.intersection(todo_regs) + if not set_regs: + continue + if g.changed_regs.intersection(hard_preserve_regs): + continue + clobbered_regs = g.changed_regs.intersection(preserve_regs) + if clobbered_regs - set_regs: + continue + new_preserve = preserve_regs.copy() + new_preserve.update(set_regs) + new_chain = chain.copy() + new_chain.append(g) + todo_list.append((new_chain, new_preserve, todo_regs-set_regs, hard_preserve_regs)) + + res = [] + for todo in todo_list: + res += self._recursively_find_chains(gadgets, *todo) + return res + + #### Chain Building Algorithm 3: RiscyROP's backwards search #### + + def find_candidate_chains_backwards_recursive_search( self, gadgets: Iterable[RopGadget], registers: set[str], @@ -541,7 +557,7 @@ def _backwards_recursive_search( for gadget, remaining_regs in potential_next_gadgets: current_chain.append(gadget) - yield from self._backwards_recursive_search( + yield from self.find_candidate_chains_backwards_recursive_search( gadgets, remaining_regs, current_chain, @@ -606,7 +622,13 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str return remaining_regs - def __filter_gadgets(self, gadgets): + #### Gadget Filtering #### + + def _filter_gadgets(self, gadgets): + """ + group gadgets by features and drop lesser groups + """ + # gadget grouping d = defaultdict(list) for g in gadgets: key = (len(g.changed_regs), g.stack_change, g.num_mem_access, g.isn_count) @@ -616,6 +638,7 @@ def __filter_gadgets(self, gadgets): if len(d) == 1: return {gadgets.pop()} + # only keep the best groups keys = set(d.keys()) bests = set() while keys: @@ -627,6 +650,8 @@ def __filter_gadgets(self, gadgets): break else: bests.add(k1) + + # turn groups back to gadgets gadgets = set() for key, val in d.items(): if key not in bests: @@ -645,20 +670,17 @@ def _same_effect(self, g1, g2): return False return True - def _filter_gadgets(self, gadgets): + def filter_gadgets(self, gadgets): + """ + process gadgets based their effects + """ bests = set() gadgets = set(gadgets) while gadgets: g0 = gadgets.pop() equal_class = {g for g in gadgets if self._same_effect(g0, g)} equal_class.add(g0) - bests = bests.union(self.__filter_gadgets(equal_class)) + bests = bests.union(self._filter_gadgets(equal_class)) gadgets -= equal_class - return bests - - def filter_gadgets(self, gadgets): - """ - filter gadgets having the same effect - """ - return self._filter_gadgets(gadgets) \ No newline at end of file + return bests \ No newline at end of file diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 9ebfe05..4c6f15c 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -68,10 +68,7 @@ def add_value(self, value): self._values.append(value) self.payload_len += self._p.arch.bytes - def _add_gadget_value(self, gadget): - """ - create a RopValue for the gadget's address and add it to chain._values - """ + def add_gadget(self, gadget): value = gadget.addr if self._pie: value -= self._p.loader.main_object.mapped_base @@ -84,13 +81,6 @@ def _add_gadget_value(self, gadget): else: self._values[idx] = value - def add_gadget(self, gadget): - # angrop was originally written with the assumption that gadget addresses - # appear in the chain in the same order in which the gadgets are executed. - # This is not always true when there are gadgets that end with a jump to - # an address from a register instead of the stack. - # For example, when we do `ret` in aarch64 - self._add_gadget_value(gadget) self._gadgets.append(gadget) def set_gadgets(self, gadgets: list[RopGadget]): diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index 0605920..1c724ed 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -68,6 +68,9 @@ def test_arm_func_call(): rop.find_gadgets() rop.save_gadgets(cache_path) + chain = rop.set_regs(lr=0x41414141) + assert sum(g.stack_change for g in chain._gadgets) <= 12 + proj.hook_symbol('write', angr.SIM_PROCEDURES['posix']['write']()) chain1 = rop.func_call("write", [1, 0x4E15F0, 9]) state = chain1.exec(max_steps=8) From 01c85718538524e50b7ee728d71e2f9de474e5fa Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 21 Jan 2025 18:12:05 -0700 Subject: [PATCH 081/106] execute more steps for function execution --- tests/test_chainbuilder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index 1c724ed..0de2755 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -73,16 +73,16 @@ def test_arm_func_call(): proj.hook_symbol('write', angr.SIM_PROCEDURES['posix']['write']()) chain1 = rop.func_call("write", [1, 0x4E15F0, 9]) - state = chain1.exec(max_steps=8) + state = chain1.exec(max_steps=100) assert state.posix.dumps(1) == b'malloc.c\x00' proj.hook_symbol('puts', angr.SIM_PROCEDURES['libc']['puts']()) chain2 = rop.func_call("puts", [0x4E15F0]) - state = chain2.exec(max_steps=8) + state = chain2.exec(max_steps=100) assert state.posix.dumps(1) == b'malloc.c\n' chain = chain1 + chain2 - state = chain.exec(max_steps=8) + state = chain.exec(max_steps=100) assert state.posix.dumps(1) == b'malloc.c\x00malloc.c\n' def test_i386_syscall(): From efb95b83eb98f92bb07af6e3785ebe4682ef344a Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 21 Jan 2025 19:43:04 -0700 Subject: [PATCH 082/106] fix a few minor bugs in mem_writer --- angrop/chain_builder/mem_writer.py | 97 +++++++++++++----------------- 1 file changed, 42 insertions(+), 55 deletions(-) diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index 67ccab4..ff658cb 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -19,9 +19,11 @@ class MemWriter(Builder): def __init__(self, chain_builder): super().__init__(chain_builder) self._mem_write_gadgets = None + self._good_mem_write_gadgets = None def update(self): self._mem_write_gadgets = self._get_all_mem_write_gadgets(self.chain_builder.gadgets) + self._good_mem_write_gadgets = set() def _set_regs(self, *args, **kwargs): return self.chain_builder._reg_setter.run(*args, **kwargs) @@ -45,62 +47,45 @@ def _gen_mem_write_gadgets(self, string_data): # create a dict of bytes per write to gadgets # assume we need intersection of addr_dependencies and data_dependencies to be 0 # TODO could allow mem_reads as long as we control the address? - possible_gadgets = self._mem_write_gadgets - while possible_gadgets: - # get the data from trying to set all the registers - registers = dict((reg, 0x41) for reg in self.arch.reg_set) - l.debug("getting reg data for mem writes") - reg_setter = self.chain_builder._reg_setter - _, _, reg_data = reg_setter.find_candidate_chains_graph_search(max_stack_change=0x50, **registers) - l.debug("trying mem_write gadgets") + # generate from the cache first + if self._good_mem_write_gadgets: + for g in self._good_mem_write_gadgets: + yield g + + possible_gadgets = self._mem_write_gadgets.copy() - self._good_mem_write_gadgets + # use the graph-search to gain a rough idea about (stack_change, register setting) + registers = dict((reg, 0x41) for reg in self.arch.reg_set) + l.debug("getting reg data for mem writes") + reg_setter = self.chain_builder._reg_setter + _, _, reg_data = reg_setter.find_candidate_chains_graph_search(max_stack_change=0x50, **registers) + l.debug("trying mem_write gadgets") + + # find a write gadget that induces the smallest stack_change + while possible_gadgets: # limit the maximum size of the chain best_stack_change = 0x400 best_gadget = None - use_partial_controllers = False - for t, vals in reg_data.items(): - if vals[1] >= best_stack_change: + # regs: according to the graph search, what registers can be controlled + # vals[1]: stack_change to set those registers + for regs, vals in reg_data.items(): + reg_set_stack_change = vals[1] + if reg_set_stack_change >= best_stack_change: continue for g in possible_gadgets: mem_write = g.mem_writes[0] - if (set(mem_write.addr_dependencies) | set(mem_write.data_dependencies)).issubset(set(t)): - stack_change = g.stack_change + vals[1] - bytes_per_write = mem_write.data_size // 8 - num_writes = (len(string_data) + bytes_per_write - 1)//bytes_per_write - stack_change *= num_writes - if stack_change < best_stack_change: - best_gadget = g - best_stack_change = stack_change - - # try again using partial_controllers - best_stack_change = 0x400 - if best_gadget is None: - use_partial_controllers = True - l.warning("Trying to use partial controllers for memory write") - l.debug("getting reg data for mem writes") - _, _, reg_data = self.chain_builder._reg_setter.find_candidate_chains_graph_search(max_stack_change=0x50, - use_partial_controllers=True, - **registers) - l.debug("trying mem_write gadgets") - for t, vals in reg_data.items(): - if vals[1] >= best_stack_change: + if not (mem_write.addr_dependencies | mem_write.data_dependencies).issubset(regs): continue - for g in possible_gadgets: - mem_write = g.mem_writes[0] - # we need the addr to not be partially controlled - if (set(mem_write.addr_dependencies) | set(mem_write.data_dependencies)).issubset(set(t)) and \ - len(set(mem_write.addr_dependencies) & vals[3]) == 0: - stack_change = g.stack_change + vals[1] - # only one byte at a time - bytes_per_write = 1 - num_writes = (len(string_data) + bytes_per_write - 1)//bytes_per_write - stack_change *= num_writes - if stack_change < best_stack_change: - best_gadget = g - best_stack_change = stack_change - - yield best_gadget, use_partial_controllers + stack_change = g.stack_change + reg_set_stack_change + bytes_per_write = mem_write.data_size // 8 + num_writes = (len(string_data) + bytes_per_write - 1)//bytes_per_write + stack_change *= num_writes + if stack_change < best_stack_change: + best_gadget = g + best_stack_change = stack_change + + yield best_gadget possible_gadgets.remove(best_gadget) @rop_utils.timeout(5) @@ -132,15 +117,13 @@ def _write_to_mem(self, addr, string_data, fill_byte=b"\xff"):# pylint:disable=i :param fill_byte: a byte to use to fill up the string if necessary :return: a rop chain """ - - gen = self._gen_mem_write_gadgets(string_data) - gadget, use_partial_controllers = next(gen, (None, None)) - while gadget: + for gadget in self._gen_mem_write_gadgets(string_data): try: - return self._try_write_to_mem(gadget, use_partial_controllers, addr, string_data, fill_byte) + chain = self._try_write_to_mem(gadget, False, addr, string_data, fill_byte) + self._good_mem_write_gadgets.add(gadget) + return chain except (RopException, angr.errors.SimEngineError, angr.errors.SimUnsatError): pass - gadget, use_partial_controllers = next(gen, (None, None)) raise RopException("Fail to write data to memory :(") @@ -165,9 +148,12 @@ def write_to_mem(self, addr, data, fill_byte=b"\xff"): if x not in self.badbytes: e += bytes([x]) else: - elems.append(e) + if e: + elems.append(e) elems.append(bytes([x])) e = b'' + if e: + elems.append(e) # do the write offset = 0 @@ -175,7 +161,7 @@ def write_to_mem(self, addr, data, fill_byte=b"\xff"): for elem in elems: ptr = addr + offset if self._word_contain_badbyte(ptr): - raise RopException(f"{ptr:#x} contains bad byte!") + raise RopException(f"{ptr} contains bad byte!") if len(elem) != 1 or ord(elem) not in self.badbytes: chain += self._write_to_mem(ptr, elem, fill_byte=fill_byte) offset += len(elem) @@ -262,6 +248,7 @@ def _write_to_mem_with_gadget(self, gadget, addr_val, data, use_partial_controll sim_data = state.memory.load(addr_val.data, len(data)) if not state.solver.eval(sim_data == data): raise RopException("memory write fails") + # the next pc must come from the stack if len(state.regs.pc.variables) != 1: raise RopException("must have only one pc variable") From d97eba265b78cad9038c3744424beeabd5620daf Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 21 Jan 2025 20:35:29 -0700 Subject: [PATCH 083/106] make aarch64 support only_check_near_rets --- angrop/arch.py | 3 ++- angrop/gadget_finder/__init__.py | 8 ++++---- angrop/gadget_finder/gadget_analyzer.py | 23 ++++------------------- 3 files changed, 10 insertions(+), 24 deletions(-) diff --git a/angrop/arch.py b/angrop/arch.py index c8a872f..daa150a 100644 --- a/angrop/arch.py +++ b/angrop/arch.py @@ -98,7 +98,8 @@ def block_make_sense(self, block): class AARCH64(ROPArch): def __init__(self, project, kernel_mode=False): super().__init__(project, kernel_mode=kernel_mode) - self.max_block_size = 4 * 15 + self.ret_insts = {b'\xc0\x03_\xd6'} + self.max_block_size = self.alignment * 15 class MIPS(ROPArch): def __init__(self, project, kernel_mode=False): diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 768ca90..361b5d9 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -14,7 +14,7 @@ from . import gadget_analyzer from ..arch import get_arch from ..errors import RopException -from ..arch import ARM, X86, AMD64 +from ..arch import ARM, X86, AMD64, AARCH64 l = logging.getLogger(__name__) @@ -53,8 +53,8 @@ def __init__(self, project, fast_mode=None, only_check_near_rets=True, max_block self.kernel_mode = kernel_mode self.stack_gsize = stack_gsize - if only_check_near_rets and not isinstance(self.arch, (X86, AMD64)): - l.warning("only_check_near_rets only makes sense for i386/amd64, setting it to False") + if only_check_near_rets and not isinstance(self.arch, (X86, AMD64, AARCH64)): + l.warning("only_check_near_rets only makes sense for i386/amd64/aarch64, setting it to False") self.only_check_near_rets = False # override parameters @@ -362,7 +362,7 @@ def _get_ret_locations_by_string(self): :return: all the locations in the binary with a ret instruction """ if not self.arch.ret_insts: - raise RopException("Only have ret strings for i386 and x86_64") + raise RopException("Only have ret strings for i386/x86_64/aarch64") return self._get_locations_by_strings(self.arch.ret_insts) def _get_syscall_locations_by_string(self): diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 975d8a0..5b67e98 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -283,24 +283,6 @@ def _can_reach_stopping_states(self, addr, allow_conditional_branches, max_steps return True return False - def _reach_unconstrained_or_syscall(self, addr): - init_state = self._state.copy() - init_state.ip = addr - - # it will raise errors if angr fails to step the state - final_state = rop_utils.step_to_unconstrained_successor(self.project, state=init_state, stop_at_syscall=True) - - if self.is_in_kernel(final_state): - state = final_state.copy() - try: - succ = self.project.factory.successors(state) - state = succ.flat_successors[0] - state2 = rop_utils.step_to_unconstrained_successor(self.project, state=state) - except Exception: # pylint: disable=broad-exception-caught - return init_state, final_state - return init_state, state2 - return init_state, final_state - def _try_stepping_past_syscall(self, state): try: return rop_utils.step_to_unconstrained_successor(self.project, state, max_steps=3) @@ -350,6 +332,9 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): # create the gadget if ctrl_type == 'syscall' or self._does_syscall(final_state): + # gadgets that do syscall and pivoting are too complicated + if self._does_pivot(final_state): + return None gadget = SyscallGadget(addr=addr) gadget.makes_syscall = self._does_syscall(final_state) gadget.starts_with_syscall = self._starts_with_syscall(addr) @@ -663,7 +648,6 @@ def _compute_sp_change(self, init_state, final_state, gadget): raise RopException("SP has multiple dependencies") if len(dependencies) == 0 and sp_change.symbolic: raise RopException("SP change is uncontrolled") - assert self.arch.base_pointer not in dependencies if len(dependencies) == 0 and not sp_change.symbolic: stack_changes = [init_state.solver.eval(sp_change)] @@ -678,6 +662,7 @@ def _compute_sp_change(self, init_state, final_state, gadget): gadget.stack_change = stack_changes[0] elif type(gadget) is PivotGadget: + # FIXME: step_to_unconstrained_successor is not compatible with conditional_branches final_state = rop_utils.step_to_unconstrained_successor(self.project, state=init_state, precise_action=True) dependencies = self._get_reg_dependencies(final_state, "sp") last_sp = None From f06ebe7961abaf30693971fa63698bb9c281ec95 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 10:49:08 -0700 Subject: [PATCH 084/106] building memory accesses is very slow, let's check the number of accesses beforing building them --- angrop/gadget_finder/gadget_analyzer.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 5b67e98..04e8371 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -144,13 +144,6 @@ def _analyze_gadget(self, addr, allow_conditional_branches): if not gadget: continue - # Step 4: filter out bad gadgets - # too many mem accesses, it can only be done after gadget creation - # specifically, memory access analysis - if gadget.num_mem_access > self.arch.max_sym_mem_access: - l.debug("... too many symbolic memory accesses") - continue - l.debug("... Appending gadget!") gadgets.append(gadget) @@ -384,7 +377,9 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): # check mem accesses l.debug("... analyzing mem accesses") - self._analyze_mem_access(final_state, init_state, gadget) + if not self._analyze_mem_access(final_state, init_state, gadget): + l.debug("... too many symbolic memory accesses") + return None for m_access in gadget.mem_writes + gadget.mem_reads + gadget.mem_changes: if not m_access.is_valid(): @@ -901,6 +896,9 @@ def _analyze_mem_access(self, final_state, init_state, gadget): for m in d[addr]: all_mem_actions.remove(m) + if len(all_mem_actions) + len(gadget.mem_changes) > self.arch.max_sym_mem_access: + return False + # step 3: add all left memory actions to either read/write memory accesses stashes for a in all_mem_actions: mem_access = self._build_mem_access(a, gadget, init_state, final_state) @@ -908,6 +906,7 @@ def _analyze_mem_access(self, final_state, init_state, gadget): gadget.mem_reads.append(mem_access) if a.action == "write": gadget.mem_writes.append(mem_access) + return True def _starts_with_syscall(self, addr): """ From d92873b2263fac5efc5cbba67c7a353bf4ad7784 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 11:27:15 -0700 Subject: [PATCH 085/106] add extra_constraints instead of copying states, this way we can better utilize the solve caches in z3 --- angrop/gadget_finder/gadget_analyzer.py | 7 ++--- angrop/rop_utils.py | 36 +++++++++++++------------ 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 04e8371..2fa7059 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -609,7 +609,6 @@ def _check_if_stack_controls_ast(self, ast, initial_state, gadget_stack_change=N # this is an annoying problem but this code should handle it # prefilter - # FIXME: this check is kinda weird, what if it is rax+rbx? if len(ast.variables) != 1 or not list(ast.variables)[0].startswith("symbolic_stack"): return False @@ -617,12 +616,10 @@ def _check_if_stack_controls_ast(self, ast, initial_state, gadget_stack_change=N if gadget_stack_change is not None: stack_bytes_length = min(max(gadget_stack_change, 0), stack_bytes_length) concrete_stack = claripy.BVV(b"B" * stack_bytes_length) - concrete_stack_s = initial_state.copy() - concrete_stack_s.add_constraints( - initial_state.memory.load(initial_state.regs.sp, stack_bytes_length) == concrete_stack) + const = initial_state.memory.load(initial_state.regs.sp, stack_bytes_length) == concrete_stack test_constraint = ast != test_val # stack must have set the register and it must be able to set the register to all 1's or all 0's - ans = not concrete_stack_s.solver.satisfiable(extra_constraints=(test_constraint,)) and \ + ans = not initial_state.solver.satisfiable(extra_constraints=(const, test_constraint,)) and \ rop_utils.fast_unconstrained_check(initial_state, ast) return ans diff --git a/angrop/rop_utils.py b/angrop/rop_utils.py index d063a6f..41c008f 100644 --- a/angrop/rop_utils.py +++ b/angrop/rop_utils.py @@ -29,45 +29,45 @@ def get_ast_dependency(ast): return dependencies -def get_ast_controllers(test_state, ast, reg_deps): +def get_ast_controllers(state, ast, reg_deps): """ looks for registers that we can make symbolic then the ast can be "anything" - :param test_state: the input state + :param state: the input state :param ast: the ast of which we are trying to analyze controllers :param reg_deps: All registers which it depends on :return: A set of register names which can control the ast """ - test_val = 0x4141414141414141 % (2 << test_state.arch.bits) + test_val = 0x4141414141414141 % (2 << state.arch.bits) controllers = [] if not ast.symbolic: return controllers # make sure it can't be symbolic if all the registers are constrained - constrained_copy = test_state.copy() + constraints = [] for reg in reg_deps: - if not constrained_copy.registers.load(reg).symbolic: + if not state.registers.load(reg).symbolic: continue - constrained_copy.add_constraints(constrained_copy.registers.load(reg) == test_val) - if len(constrained_copy.solver.eval_upto(ast, 2)) > 1: + constraints.append(state.registers.load(reg) == test_val) + if len(state.solver.eval_upto(ast, 2, extra_constraints=constraints)) > 1: return controllers for reg in reg_deps: - constrained_copy = test_state.copy() + extra_constraints = [] for r in [a for a in reg_deps if a != reg]: # for bp and registers that might be set - if not constrained_copy.registers.load(r).symbolic: + if not state.registers.load(r).symbolic: continue - constrained_copy.add_constraints(constrained_copy.registers.load(r) == test_val) + extra_constraints.append(state.registers.load(r) == test_val) - if unconstrained_check(constrained_copy, ast): + if unconstrained_check(state, ast, extra_constraints=extra_constraints): controllers.append(reg) return controllers -def unconstrained_check(state, ast): +def unconstrained_check(state, ast, extra_constraints=None): """ Attempts to check if an ast is completely unconstrained :param state: the state to use @@ -82,15 +82,17 @@ def unconstrained_check(state, ast): # chars need to be able to be different test_val_4 = int(("1001"*2 + "1010"*2 + "1011"*2 + "1100"*2 + "1101"*2 + "1110"*2 + "1110"*2 + "0001"*2), 2) \ % (1 << size) - if not state.solver.satisfiable(extra_constraints=(ast == test_val_0,)): + extra = extra_constraints if extra_constraints is not None else [] + + if not state.solver.satisfiable(extra_constraints= extra + [ast == test_val_0]): return False - if not state.solver.satisfiable(extra_constraints=(ast == test_val_1,)): + if not state.solver.satisfiable(extra_constraints= extra + [ast == test_val_1]): return False - if not state.solver.satisfiable(extra_constraints=(ast == test_val_2,)): + if not state.solver.satisfiable(extra_constraints= extra + [ast == test_val_2]): return False - if not state.solver.satisfiable(extra_constraints=(ast == test_val_3,)): + if not state.solver.satisfiable(extra_constraints= extra + [ast == test_val_3]): return False - if not state.solver.satisfiable(extra_constraints=(ast == test_val_4,)): + if not state.solver.satisfiable(extra_constraints= extra + [ast == test_val_4]): return False return True From 818f32b31d4627e39a99a0a1c7a4803deda6b152 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 11:55:54 -0700 Subject: [PATCH 086/106] filter out syscall gadgets that we don't control sysnum for --- angrop/gadget_finder/gadget_analyzer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 2fa7059..5ec0447 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -102,6 +102,9 @@ def filter(state): final_states = list(simgr.unconstrained) if "syscall" in simgr.stashes: + cc = angr.SYSCALL_CC[self.project.arch.name]["default"](self.project.arch) + sysnum_is_constrained = lambda s: not cc.syscall_num(s).symbolic or not rop_utils.fast_unconstrained_check(s, cc.syscall_num(s)) + simgr.move(from_stash='syscall', to_stash='deadended', filter_func=sysnum_is_constrained) final_states.extend(self._try_stepping_past_syscall(state) for state in simgr.syscall) bad_states = simgr.active + simgr.deadended From 3a6128ec9d93acadd0bfb4d3f65ccc78c2bddf60 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 12:32:11 -0700 Subject: [PATCH 087/106] do static analysis before stepping to reduce overhead --- angrop/gadget_finder/gadget_analyzer.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 5ec0447..6536f23 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -80,9 +80,13 @@ def _step_to_gadget_stopping_states(self, init_state): simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) def filter(state): - if state.ip.concrete and self.project.is_hooked(state.addr): + if not state.ip.concrete: + return None + if self.project.is_hooked(state.addr): # We don't want to go into SimProcedures. return simgr.DROP + if not self._block_make_sense(state.addr): + return simgr.DROP if rop_utils.is_in_kernel(self.project, state): return "syscall" return None @@ -102,6 +106,7 @@ def filter(state): final_states = list(simgr.unconstrained) if "syscall" in simgr.stashes: + # for syscallgadget, the syscall number needs to be controlled, or there is no point cc = angr.SYSCALL_CC[self.project.arch.name]["default"](self.project.arch) sysnum_is_constrained = lambda s: not cc.syscall_num(s).symbolic or not rop_utils.fast_unconstrained_check(s, cc.syscall_num(s)) simgr.move(from_stash='syscall', to_stash='deadended', filter_func=sysnum_is_constrained) @@ -170,10 +175,6 @@ def _analyze_gadget(self, addr, allow_conditional_branches): def _valid_state(self, init_state, final_state): if self._change_arch_state(init_state, final_state): return False - for addr in final_state.history.bbl_addrs: - b = final_state.project.factory.block(addr) - if not self.arch.block_make_sense(b): - return False return True def _change_arch_state(self, init_state, final_state): From 288c4a382b262ccde5f8ac1c74234394608b6a25 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 14:48:58 -0700 Subject: [PATCH 088/106] make allow_conditional_branches under fast_mode --- angrop/gadget_finder/__init__.py | 9 +++++++-- angrop/gadget_finder/gadget_analyzer.py | 5 ++++- angrop/rop.py | 9 ++++----- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 361b5d9..805e208 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -37,7 +37,12 @@ def _set_global_gadget_analyzer(rop_gadget_analyzer): _disable_loggers() def run_worker(addr): - return _global_gadget_analyzer.analyze_gadget(addr, allow_conditional_branches=True) + res = _global_gadget_analyzer.analyze_gadget(addr) + if res is None: + return [] + if isinstance(res, list): + return res + return [res] class GadgetFinder: """ @@ -185,7 +190,7 @@ def find_gadgets_single_threaded(self, show_progress=True): assert self.gadget_analyzer is not None for addr in self._addresses_to_check_with_caching(show_progress): - gadgets.extend(self.gadget_analyzer.analyze_gadget(addr, allow_conditional_branches=True)) + gadgets.extend(self.gadget_analyzer.analyze_gadget(addr)) for g in gadgets: g.project = self.project diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 6536f23..2322a10 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -26,6 +26,7 @@ def __init__(self, project, fast_mode, kernel_mode=False, arch=None, stack_gsize self.project = project self.arch = get_arch(project, kernel_mode=kernel_mode) if arch is None else arch self._fast_mode = fast_mode + self._allow_conditional_branches = not self._fast_mode # initial state that others are based off, all analysis should copy the state first and work on # the copied state @@ -40,7 +41,7 @@ def __init__(self, project, fast_mode, kernel_mode=False, arch=None, stack_gsize fast_mode=self._fast_mode) self._concrete_sp = self._state.solver.eval(self._state.regs.sp) - def analyze_gadget(self, addr, allow_conditional_branches=False): + def analyze_gadget(self, addr, allow_conditional_branches=None): """ Find gadgets at the given address. @@ -54,6 +55,8 @@ def analyze_gadget(self, addr, allow_conditional_branches=False): :param allow_conditional_branches: whether to allow gadgets with conditional branches :return: a list of RopGadget instances or a single RopGadget instance """ + if allow_conditional_branches is None: + allow_conditional_branches = self._allow_conditional_branches try: gadgets = self._analyze_gadget(addr, allow_conditional_branches) except RopTimeoutException: diff --git a/angrop/rop.py b/angrop/rop.py index 285f105..d4cae91 100644 --- a/angrop/rop.py +++ b/angrop/rop.py @@ -10,15 +10,13 @@ l = logging.getLogger('angrop.rop') -# todo what if we have mov eax, [rsp+0x20]; ret (cache would need to know where it is or at least a min/max) -# todo what if we have pop eax; mov ebx, eax; need to encode that we cannot set them to different values class ROP(Analysis): """ This class is a semantic aware rop gadget finder It is a work in progress, so don't be surprised if something doesn't quite work After calling find_gadgets(), find_gadgets_single_threaded() or load_gadgets(), - self.gadgets, self.stack_pivots, and self._duplicates is populated. + self.rop_gadgets, self.pivot_gadgets, self.syscall_gadgets are populated. Additionally, all public methods from ChainBuilder are copied into ROP. """ @@ -29,8 +27,9 @@ def __init__(self, only_check_near_rets=True, max_block_size=None, max_sym_mem_a :param only_check_near_rets: If true we skip blocks that are not near rets :param max_block_size: limits the size of blocks considered, longer blocks are less likely to be good rop gadgets so we limit the size we consider - :param fast_mode: if set to True sets options to run fast, if set to False sets options to find more gadgets - if set to None makes a decision based on the size of the binary + :param fast_mode: True/False, if set to None makes a decision based on the size of the binary + if True, skip gadgets with conditonal_branches, floating point operations, jumps + allow smaller gadget size :param is_thumb: execute ROP chain in thumb mode. Only makes difference on ARM architecture. angrop does not switch mode within a rop chain :param kernel_mode: find kernel mode gadgets From 3b8d6ce088fedba12e998f9ef8e9b0134ff5bf4f Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 15:03:05 -0700 Subject: [PATCH 089/106] retore gadget.project after caching the gadgets --- angrop/rop.py | 1 + 1 file changed, 1 insertion(+) diff --git a/angrop/rop.py b/angrop/rop.py index d4cae91..1350eb6 100644 --- a/angrop/rop.py +++ b/angrop/rop.py @@ -159,6 +159,7 @@ def save_gadgets(self, path): """ with open(path, "wb") as f: pickle.dump(self._get_cache_tuple(), f) + for g in self._all_gadgets: g.project = self.project def load_gadgets(self, path): """ From c1a1abe35ce49b1bdf98033e42fb97ecc3cce5d9 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 15:09:38 -0700 Subject: [PATCH 090/106] use instruction count for gadget comparison --- angrop/chain_builder/pivot.py | 2 +- angrop/chain_builder/shifter.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/angrop/chain_builder/pivot.py b/angrop/chain_builder/pivot.py index bce2da6..23bd93e 100644 --- a/angrop/chain_builder/pivot.py +++ b/angrop/chain_builder/pivot.py @@ -123,7 +123,7 @@ def _better_than(self, g1, g2): return False if not g1.changed_regs.issubset(g2.changed_regs): return False - if g1.block_length > g2.block_length: + if g1.isn_count > g2.isn_count: return False return True diff --git a/angrop/chain_builder/shifter.py b/angrop/chain_builder/shifter.py index 92465c7..1483f91 100644 --- a/angrop/chain_builder/shifter.py +++ b/angrop/chain_builder/shifter.py @@ -129,12 +129,16 @@ def _same_effect(self, g1, g2): return False if g1.transit_type != g2.transit_type: return False - if g1.changed_regs != g2.changed_regs: # needed for preserve_regs - return False return True def _better_than(self, g1, g2): - return False + if g1.num_mem_access > g2.num_mem_access: + return False + if not g1.changed_regs.issubset(g2.changed_regs): + return False + if g1.isn_count > g2.isn_count: + return False + return True def filter_gadgets(self, gadgets): """ From 546ad79ee2cf101315e4a5228862fd40ceb55ffa Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 15:29:06 -0700 Subject: [PATCH 091/106] check whether block makes sense or not after checking syscall --- angrop/gadget_finder/gadget_analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 2322a10..0cc92bd 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -88,10 +88,10 @@ def filter(state): if self.project.is_hooked(state.addr): # We don't want to go into SimProcedures. return simgr.DROP - if not self._block_make_sense(state.addr): - return simgr.DROP if rop_utils.is_in_kernel(self.project, state): return "syscall" + if not self._block_make_sense(state.addr): + return simgr.DROP return None simgr.run(n=2, filter_func=filter) From 008bcc6023bce032f4398ba7a081d308e1720709 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 16:00:42 -0700 Subject: [PATCH 092/106] cache syscall block's next block as well, or we may miss some good syscall gadgets --- angrop/arch.py | 1 - angrop/gadget_finder/__init__.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/angrop/arch.py b/angrop/arch.py index daa150a..b15778e 100644 --- a/angrop/arch.py +++ b/angrop/arch.py @@ -99,7 +99,6 @@ class AARCH64(ROPArch): def __init__(self, project, kernel_mode=False): super().__init__(project, kernel_mode=kernel_mode) self.ret_insts = {b'\xc0\x03_\xd6'} - self.max_block_size = self.alignment * 15 class MIPS(ROPArch): def __init__(self, project, kernel_mode=False): diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 805e208..567318c 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -247,8 +247,10 @@ def _addresses_to_check_with_caching(self, show_progress=True): def block_hash(self, block):# pylint:disable=no-self-use """ a hash to uniquely identify a simple block - TODO: block.bytes is too primitive """ + if block.vex.jumpkind == 'Ijk_Sys_syscall': + next_block = self.project.factory.block(block.addr+block.size) + return block.bytes + next_block.bytes return block.bytes def _get_executable_ranges(self): From 376dc1a44c31bbf8c4e5fd7eddf163e6124498f6 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 17:18:05 -0700 Subject: [PATCH 093/106] don't map data on stack, we may perform modifications on the stack data so the mapping will lose track of those modifications --- angrop/chain_builder/builder.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 0a03a8d..8eadd18 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -137,14 +137,12 @@ def map_stack_var(ast, value): arch_bytes = self.project.arch.bytes - state = test_symbolic_state + state = test_symbolic_state.copy() # Step through each gadget and constrain the ip. - # save the constraints into test_symbolic_state for gadget in gadgets: map_stack_var(state.ip, gadget) state.solver.add(state.ip == gadget.addr) - test_symbolic_state.solver.add(state.ip == gadget.addr) for addr in gadget.bbl_addrs[1:]: succ = state.step() succ_states = [ @@ -186,8 +184,7 @@ def map_stack_var(ast, value): if var.concrete_value != val.concreted: raise RopException("Register set to incorrect value") else: - map_stack_var(var, val) - state.solver.add(var == val) + state.solver.add(var == val.data) # Constrain memory access addresses. for action in state.history.actions: @@ -199,6 +196,7 @@ def map_stack_var(ast, value): state.solver.add(action.addr.ast >= modifiable_memory_range[0]) state.solver.add(action.addr.ast < modifiable_memory_range[1]) + # now import the constraints from the state that has reached the end of the ropchain test_symbolic_state.solver.add(*state.solver.constraints) bytes_per_pop = arch_bytes From 1542030fca745a97f7fc9979cea11131146c98be Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 17:42:11 -0700 Subject: [PATCH 094/106] support memory writing when the transit_type is not ret --- angrop/chain_builder/mem_writer.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index ff658cb..88fd439 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -53,7 +53,7 @@ def _gen_mem_write_gadgets(self, string_data): for g in self._good_mem_write_gadgets: yield g - possible_gadgets = self._mem_write_gadgets.copy() - self._good_mem_write_gadgets + possible_gadgets = {g for g in self._mem_write_gadgets.copy() if g.transit_type != 'jmp_reg'} - self._good_mem_write_gadgets # use the graph-search to gain a rough idea about (stack_change, register setting) registers = dict((reg, 0x41) for reg in self.arch.reg_set) @@ -240,7 +240,22 @@ def _write_to_mem_with_gadget(self, gadget, addr_val, data, use_partial_controll chain.add_gadget(gadget) bytes_per_pop = self.project.arch.bytes - for _ in range(gadget.stack_change // bytes_per_pop - 1): + pc_offset = None + if gadget.transit_type == 'pop_pc': + pc_offset = gadget.pc_offset + elif gadget.transit_type == 'ret': + pc_offset = gadget.stack_change - 8 + else: + raise ValueError(f"Unknown gadget transit_type: {gadget.transit_type}") + + for idx in range(gadget.stack_change // bytes_per_pop): + if idx == pc_offset//8: + next_pc_val = rop_utils.cast_rop_value( + chain._blank_state.solver.BVS("next_pc", self.project.arch.bits), + self.project, + ) + chain.add_value(next_pc_val) + continue chain.add_value(self._get_fill_val()) # verify the write actually works @@ -249,9 +264,9 @@ def _write_to_mem_with_gadget(self, gadget, addr_val, data, use_partial_controll if not state.solver.eval(sim_data == data): raise RopException("memory write fails") - # the next pc must come from the stack + # the next pc must be in our control if len(state.regs.pc.variables) != 1: raise RopException("must have only one pc variable") - if not set(state.regs.pc.variables).pop().startswith("symbolic_stack"): - raise RopException("the next pc not from the stack") + if not set(state.regs.pc.variables).pop().startswith("next_pc_"): + raise RopException("the next pc is not in our control!") return chain From 33a026eb10bbc038e2d15f732d905590a171367a Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 17:42:44 -0700 Subject: [PATCH 095/106] make max_block_size configured differently for each arch --- angrop/arch.py | 16 ++++++++++++++-- angrop/gadget_finder/__init__.py | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/angrop/arch.py b/angrop/arch.py index b15778e..d1c37f1 100644 --- a/angrop/arch.py +++ b/angrop/arch.py @@ -8,8 +8,9 @@ def __init__(self, project, kernel_mode=False): self.kernel_mode = kernel_mode self.max_sym_mem_access = 4 self.alignment = project.arch.instruction_alignment - self.max_block_size = self.alignment * 8 self.reg_set = self._get_reg_set() + self.max_block_size = None + self.fast_mode_max_block_size = None a = project.arch self.stack_pointer = a.register_names[a.sp_offset] @@ -36,7 +37,8 @@ def block_make_sense(self, block): class X86(ROPArch): def __init__(self, project, kernel_mode=False): super().__init__(project, kernel_mode=kernel_mode) - self.max_block_size = 20 # X86 and AMD64 have alignment of 1, 8 bytes is certainly not good enough + self.max_block_size = 20 + self.fast_mode_max_block_size = 12 self.syscall_insts = {b"\xcd\x80"} # int 0x80 self.ret_insts = {b"\xc2", b"\xc3", b"\xca", b"\xcb"} self.segment_regs = {"cs", "ds", "es", "fs", "gs", "ss"} @@ -78,14 +80,20 @@ def __init__(self, project, kernel_mode=False): super().__init__(project, kernel_mode=kernel_mode) self.is_thumb = False # by default, we don't use thumb mode self.alignment = self.project.arch.bytes + self.max_block_size = self.alignment * 8 + self.fast_mode_max_block_size = self.alignment * 6 def set_thumb(self): self.is_thumb = True self.alignment = 2 + self.max_block_size = self.alignment * 8 + self.fast_mode_max_block_size = self.alignment * 6 def set_arm(self): self.is_thumb = False self.alignment = self.project.arch.bytes + self.max_block_size = self.alignment * 8 + self.fast_mode_max_block_size = self.alignment * 6 def block_make_sense(self, block): # disable conditional jumps, for now @@ -99,11 +107,15 @@ class AARCH64(ROPArch): def __init__(self, project, kernel_mode=False): super().__init__(project, kernel_mode=kernel_mode) self.ret_insts = {b'\xc0\x03_\xd6'} + self.max_block_size = self.alignment * 10 + self.fast_mode_max_block_size = self.alignment * 6 class MIPS(ROPArch): def __init__(self, project, kernel_mode=False): super().__init__(project, kernel_mode=kernel_mode) self.alignment = self.project.arch.bytes + self.max_block_size = self.alignment * 8 + self.fast_mode_max_block_size = self.alignment * 6 def get_arch(project, kernel_mode=False): name = project.arch.name diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 567318c..467f5f6 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -112,7 +112,7 @@ def _initialize_gadget_analyzer(self): else: self.fast_mode = False if self.fast_mode: - self.arch.max_block_size = 12 + self.arch.max_block_size = self.arch.fast_mode_max_block_size self.arch.max_sym_mem_access = 1 # Recalculate num addresses to check based on fast_mode settings num_to_check = self._num_addresses_to_check() From 358dad0972245ff8bcf235503269ab75695694ee Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 18:13:23 -0700 Subject: [PATCH 096/106] remove jump_reg, now there is only pc_reg --- angrop/chain_builder/reg_setter.py | 6 ++---- angrop/gadget_finder/gadget_analyzer.py | 6 +----- angrop/rop_gadget.py | 3 --- tests/test_gadgets.py | 2 +- 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 32f6521..9046d08 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -610,11 +610,9 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str remaining_regs.add(new_reg) if gadget.transit_type == 'jmp_reg': - # I don't know what's the difference between these two so just error if they're different. - assert gadget.jump_reg == gadget.pc_reg - if gadget.jump_reg in remaining_regs: + if gadget.pc_reg in remaining_regs: return None - remaining_regs.add(gadget.jump_reg) + remaining_regs.add(gadget.pc_reg) if not gadget.constraint_regs.isdisjoint(remaining_regs): return None diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 0cc92bd..192053f 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -109,10 +109,6 @@ def filter(state): final_states = list(simgr.unconstrained) if "syscall" in simgr.stashes: - # for syscallgadget, the syscall number needs to be controlled, or there is no point - cc = angr.SYSCALL_CC[self.project.arch.name]["default"](self.project.arch) - sysnum_is_constrained = lambda s: not cc.syscall_num(s).symbolic or not rop_utils.fast_unconstrained_check(s, cc.syscall_num(s)) - simgr.move(from_stash='syscall', to_stash='deadended', filter_func=sysnum_is_constrained) final_states.extend(self._try_stepping_past_syscall(state) for state in simgr.syscall) bad_states = simgr.active + simgr.deadended @@ -349,7 +345,7 @@ def _create_gadget(self, addr, init_state, final_state, ctrl_type): # for jmp_reg gadget, record the jump target register if transit_type == "jmp_reg": - gadget.pc_reg = gadget.jump_reg = list(final_state.ip.variables)[0].split('_', 1)[1].rsplit('-')[0] + gadget.pc_reg = list(final_state.ip.variables)[0].split('_', 1)[1].rsplit('-')[0] # compute sp change l.debug("... computing sp change") diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index f751e4e..b38d750 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -121,8 +121,6 @@ def __init__(self, addr): # utilize gadgets like `call qword ptr [rax+rbx]` because we have the dependency information. # transition information, i.e. how to pass the control flow to the next gadget self.transit_type = None - # TODO: what's the difference between jump_reg and pc_reg? - self.jump_reg = None self.pc_reg = None # pc_offset is exclusively used when transit_type is "pop_pc", # when pc_offset==stack_change-arch_bytes, transit_type is basically ret @@ -221,7 +219,6 @@ def copy(self): out.reg_moves = list(self.reg_moves) out.block_length = self.block_length out.transit_type = self.transit_type - out.jump_reg = self.jump_reg out.pc_reg = self.pc_reg return out diff --git a/tests/test_gadgets.py b/tests/test_gadgets.py index 6d46b16..52937b5 100644 --- a/tests/test_gadgets.py +++ b/tests/test_gadgets.py @@ -40,7 +40,7 @@ def test_jump_gadget(): jump_gadgets = [x for x in rop._all_gadgets if x.transit_type == "jmp_reg"] assert len(jump_gadgets) > 0 - jump_regs = [x.jump_reg for x in jump_gadgets] + jump_regs = [x.pc_reg for x in jump_gadgets] assert 't9' in jump_regs assert 'ra' in jump_regs From fd8efe124ab45f09d7cbe37e97467cdf0a7d4149 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Wed, 22 Jan 2025 19:11:42 -0700 Subject: [PATCH 097/106] oops, not every arch is 64bit --- angrop/chain_builder/mem_writer.py | 17 ++++++- tests/test_chainbuilder.py | 16 +++++++ tests/test_find_gadgets.py | 12 +++++ tests/test_rop.py | 76 +++++++++++++++++++++--------- 4 files changed, 97 insertions(+), 24 deletions(-) diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index 88fd439..e5922a3 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -43,6 +43,17 @@ def _get_all_mem_write_gadgets(gadgets): possible_gadgets.add(g) return possible_gadgets + def _better_than(self, g1, g2): + if g1.stack_change > g2.stack_change: + return False + if g1.num_mem_access > g2.num_mem_access: + return False + if g1.isn_count > g2.isn_count: + return False + if not g1.changed_regs.issubset(g2.changed_regs): + return False + return True + def _gen_mem_write_gadgets(self, string_data): # create a dict of bytes per write to gadgets # assume we need intersection of addr_dependencies and data_dependencies to be 0 @@ -71,7 +82,7 @@ def _gen_mem_write_gadgets(self, string_data): # vals[1]: stack_change to set those registers for regs, vals in reg_data.items(): reg_set_stack_change = vals[1] - if reg_set_stack_change >= best_stack_change: + if reg_set_stack_change > best_stack_change: continue for g in possible_gadgets: mem_write = g.mem_writes[0] @@ -84,6 +95,8 @@ def _gen_mem_write_gadgets(self, string_data): if stack_change < best_stack_change: best_gadget = g best_stack_change = stack_change + if stack_change == best_stack_change and self._better_than(g, best_gadget): + best_gadget = g yield best_gadget possible_gadgets.remove(best_gadget) @@ -244,7 +257,7 @@ def _write_to_mem_with_gadget(self, gadget, addr_val, data, use_partial_controll if gadget.transit_type == 'pop_pc': pc_offset = gadget.pc_offset elif gadget.transit_type == 'ret': - pc_offset = gadget.stack_change - 8 + pc_offset = gadget.stack_change - bytes_per_pop else: raise ValueError(f"Unknown gadget transit_type: {gadget.transit_type}") diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index 0de2755..4781a95 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -336,6 +336,22 @@ def test_shifter(): state = chain.exec() assert state.regs.sp.concrete_value == init_sp + 0x40 + proj.arch.bytes + # aarch64 + cache_path = os.path.join(CACHE_DIR, "aarch64_glibc_2.19") + proj = angr.Project(os.path.join(BIN_DIR, "tests", "aarch64", "libc.so.6"), auto_load_libs=False) + rop = proj.analyses.ROP(fast_mode=True, only_check_near_rets=False) + + if os.path.exists(cache_path): + rop.load_gadgets(cache_path) + else: + rop.find_gadgets() + rop.save_gadgets(cache_path) + + chain = rop.shift(0x10) + init_sp = chain._blank_state.regs.sp.concrete_value - len(chain._values) * proj.arch.bytes + state = chain.exec() + assert state.regs.sp.concrete_value == init_sp + 0x10 + proj.arch.bytes + def test_retsled(): # i386 cache_path = os.path.join(CACHE_DIR, "i386_glibc_2.35") diff --git a/tests/test_find_gadgets.py b/tests/test_find_gadgets.py index 4812aff..799381b 100644 --- a/tests/test_find_gadgets.py +++ b/tests/test_find_gadgets.py @@ -205,6 +205,18 @@ def test_gadget_filtering(): rop.chain_builder.update() assert len(rop.chain_builder._reg_setter._reg_setting_gadgets) == 1 +def test_aarch64_svc(): + proj = angr.Project(os.path.join(tests_dir, "aarch64", "libc.so.6"), auto_load_libs=False) + rop = proj.analyses.ROP(fast_mode=True, only_check_near_rets=False) + g = rop.analyze_gadget(0x0000000000463820) + assert g is not None + +def test_aarch64_reg_setter(): + proj = angr.Project(os.path.join(tests_dir, "aarch64", "libc.so.6"), auto_load_libs=False) + rop = proj.analyses.ROP(fast_mode=True, only_check_near_rets=False) + g = rop.analyze_gadget(0x00000000004c29a0) + assert g is not None + def run_all(): functions = globals() all_functions = {x:y for x, y in functions.items() if x.startswith('test_')} diff --git a/tests/test_rop.py b/tests/test_rop.py index 3309984..19836f3 100644 --- a/tests/test_rop.py +++ b/tests/test_rop.py @@ -112,6 +112,42 @@ def execute_chain(project, chain): return p.one_active +def verify_execve_chain(chain): + state = chain._blank_state.copy() + proj = state.project + state.memory.store(state.regs.sp, chain.payload_str()) + state.ip = state.stack_pop() + + # step to the system call + simgr = proj.factory.simgr(state) + while simgr.active: + assert len(simgr.active) == 1 + state = simgr.active[0] + obj = proj.loader.find_object_containing(state.ip.concrete_value) + if obj and obj.binary == 'cle##kernel': + break + simgr.step() + + # verify the syscall arguments + state = simgr.active[0] + cc = angr.SYSCALL_CC[proj.arch.name]["default"](proj.arch) + assert cc.syscall_num(state).concrete_value == 0x3b + ptr = state.registers.load(cc.ARG_REGS[0]) + assert state.solver.is_true(state.memory.load(ptr, 8) == b'/bin/sh\0') + assert state.registers.load(cc.ARG_REGS[1]).concrete_value == 0 + assert state.registers.load(cc.ARG_REGS[2]).concrete_value == 0 + +def test_roptest_mips(): + proj = angr.Project(os.path.join(public_bin_location, "mipsel/darpa_ping"), auto_load_libs=False) + rop = proj.analyses.ROP() + rop.find_gadgets_single_threaded(show_progress=False) + + chain = rop.set_regs(s0=0x41414141, s1=0x42424242, v0=0x43434343) + result_state = execute_chain(proj, chain) + assert result_state.solver.eval(result_state.regs.s0) == 0x41414141 + assert result_state.solver.eval(result_state.regs.s1) == 0x42424242 + assert result_state.solver.eval(result_state.regs.v0) == 0x43434343 + def test_rop_x86_64(): b = angr.Project(os.path.join(public_bin_location, "x86_64/datadep_test"), auto_load_libs=False) @@ -202,34 +238,30 @@ def test_roptest_x86_64(): r = p.analyses.ROP(only_check_near_rets=False) r.find_gadgets_single_threaded(show_progress=False) c = r.execve(path=b"/bin/sh") + verify_execve_chain(c) - state = p.factory.blank_state() - state.memory.store(state.regs.sp, c.payload_str()) - state.ip = state.stack_pop() +def test_roptest_aarch64(): + cache_path = os.path.join(test_data_location, "aarch64_glibc_2.19") + proj = angr.Project(os.path.join(public_bin_location, "aarch64", "libc.so.6"), auto_load_libs=False) + rop = proj.analyses.ROP(fast_mode=True, only_check_near_rets=False) - # Step to the syscall. - while state.block(num_inst=1).disassembly.insns[0].mnemonic != 'syscall': - succ = state.step() - assert len(succ.flat_successors) == 1 - assert not succ.unconstrained_successors - state = succ.flat_successors[0] + rop.analyze_gadget(0x4b7ca8) + rop.analyze_gadget(0x4ebad4) - assert state.solver.is_true(state.memory.load(state.regs.rdi, 8) == b'/bin/sh\0') - assert state.regs.rsi.concrete_value == 0 - assert state.regs.rdx.concrete_value == 0 - assert state.regs.rax.concrete_value == 0x3b + chain = rop.set_regs(x0=0x41414141) + assert chain is not None -def test_roptest_mips(): - proj = angr.Project(os.path.join(public_bin_location, "mipsel/darpa_ping"), auto_load_libs=False) - rop = proj.analyses.ROP() - rop.find_gadgets_single_threaded(show_progress=False) + if os.path.exists(cache_path): + rop.load_gadgets(cache_path) + else: + rop.find_gadgets() + rop.save_gadgets(cache_path) - chain = rop.set_regs(s0=0x41414141, s1=0x42424242, v0=0x43434343) - result_state = execute_chain(proj, chain) - assert result_state.solver.eval(result_state.regs.s0) == 0x41414141 - assert result_state.solver.eval(result_state.regs.s1) == 0x42424242 - assert result_state.solver.eval(result_state.regs.v0) == 0x43434343 + chain = rop.write_to_mem(0x41414140, b'AAAAAAA') + assert chain is not None + chain = rop.execve(path=b'/bin/sh') + verify_execve_chain(chain) def run_all(): functions = globals() From f5a3cf9ef861a201621b3c36c135b2bb0a36ee71 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 23 Jan 2025 16:25:36 -0700 Subject: [PATCH 098/106] rebalance add/sub in ropvalue ast so we can track symbolic user ropvalue correctly --- angrop/chain_builder/builder.py | 47 ++++++++++++++++++++++++++++++++ angrop/gadget_finder/__init__.py | 7 +++-- angrop/rop.py | 10 +++++-- 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index 8eadd18..ede3c48 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -103,6 +103,45 @@ def _get_ptr_to_null(self): return addr return None + def _ast_contains_stack_data(self, ast): + vs = ast.variables + return len(vs) == 1 and list(vs)[0].startswith('symbolic_stack_') + + def _rebalance_ast(self, lhs, rhs): + """ + we know that lhs (stack content with modification) == rhs (user ropvalue) + since user ropvalue may be symbolic, we need to present the stack content using the user ropvalue and store it + on stack so that users can eval on their own ropvalue and get the correct solves + TODO: currently, we only support add/sub + """ + assert self._ast_contains_stack_data(lhs) + while lhs.depth != 1: + match lhs.op: + case "__add__" | "__sub__": + arg0 = lhs.args[0] + arg1 = lhs.args[1] + flag = self._ast_contains_stack_data(arg0) + op = lhs.op + if flag: + lhs = arg0 + other = arg1 + else: + lhs = arg1 + other = arg0 + if op == "__add__": + rhs -= other + elif flag: + rhs += other + else: + rhs = other - rhs + case "Reverse": + lhs = lhs.args[0] + rhs = claripy.Reverse(rhs) + case _: + raise ValueError(f"{lhs.op} cannot be rebalanced at the moment. plz create an issue!") + assert self._ast_contains_stack_data(lhs) + return lhs, rhs + @rop_utils.timeout(8) def _build_reg_setting_chain( self, gadgets, modifiable_memory_range, register_dict, stack_change @@ -185,6 +224,14 @@ def map_stack_var(ast, value): raise RopException("Register set to incorrect value") else: state.solver.add(var == val.data) + lhs, rhs = self._rebalance_ast(var, val.data) + rhs = claripy.Reverse(rhs) + ropvalue = val.copy() + if val.rebase: + ropvalue._value = rhs - ropvalue._code_base + else: + ropvalue._value = rhs + map_stack_var(lhs, ropvalue) # Constrain memory access addresses. for action in state.history.actions: diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index 467f5f6..c0b2fbe 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -125,7 +125,10 @@ def _initialize_gadget_analyzer(self): def analyze_gadget(self, addr): g = self.gadget_analyzer.analyze_gadget(addr) - if g: + if isinstance(g, list): + for x in g: + x.project = self.project + elif g: g.project = self.project return g @@ -156,7 +159,7 @@ def get_duplicates(self): cache = self._cache return {k:v for k,v in cache.items() if len(v) >= 2} - def find_gadgets(self, processes=16, show_progress=True): + def find_gadgets(self, processes=4, show_progress=True): self._cache = {} initargs = (self.gadget_analyzer,) diff --git a/angrop/rop.py b/angrop/rop.py index 1350eb6..32281a9 100644 --- a/angrop/rop.py +++ b/angrop/rop.py @@ -96,9 +96,15 @@ def _screen_gadgets(self): def analyze_gadget(self, addr): g = self.gadget_finder.analyze_gadget(addr) - if g: + if g is None: + return g + + if isinstance(g, list): + self._all_gadgets += g + else: self._all_gadgets.append(g) - self._screen_gadgets() + + self._screen_gadgets() return g def analyze_gadget_list(self, addr_list, processes=4, show_progress=True): From c7e2e0426d70bc439910b7e63d51f0e763827552 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 23 Jan 2025 16:54:41 -0700 Subject: [PATCH 099/106] distinguish between analyze_gadget and analyze_addr --- angrop/gadget_finder/__init__.py | 16 ++++++++++------ angrop/rop.py | 24 ++++++++++++++++++------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index c0b2fbe..cfcd5be 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -1,8 +1,8 @@ import re import logging import itertools +from functools import partial from multiprocessing import Pool -from collections import defaultdict import tqdm @@ -36,8 +36,11 @@ def _set_global_gadget_analyzer(rop_gadget_analyzer): _global_gadget_analyzer = rop_gadget_analyzer _disable_loggers() -def run_worker(addr): - res = _global_gadget_analyzer.analyze_gadget(addr) +def run_worker(addr, allow_cond_branch=None): + if allow_cond_branch is None: + res = _global_gadget_analyzer.analyze_gadget(addr) + else: + res = _global_gadget_analyzer.analyze_gadget(addr, allow_conditional_branches=allow_cond_branch) if res is None: return [] if isinstance(res, list): @@ -123,8 +126,8 @@ def _initialize_gadget_analyzer(self): self._gadget_analyzer = gadget_analyzer.GadgetAnalyzer(self.project, self.fast_mode, arch=self.arch, kernel_mode=self.kernel_mode, stack_gsize=self.stack_gsize) - def analyze_gadget(self, addr): - g = self.gadget_analyzer.analyze_gadget(addr) + def analyze_gadget(self, addr, allow_conditional_branches=None): + g = self.gadget_analyzer.analyze_gadget(addr, allow_conditional_branches=allow_conditional_branches) if isinstance(g, list): for x in g: x.project = self.project @@ -141,8 +144,9 @@ def analyze_gadget_list(self, addr_list, processes=4, show_progress=True): iterable = tqdm.tqdm(iterable=iterable, smoothing=0, total=len(addr_list), desc="ROP", maxinterval=0.5, dynamic_ncols=True) + func = partial(run_worker, allow_cond_branch=False) with Pool(processes=processes, initializer=_set_global_gadget_analyzer, initargs=initargs) as pool: - it = pool.imap_unordered(run_worker, iterable, chunksize=1) + it = pool.imap_unordered(func, iterable, chunksize=1) for gs in it: if gs: gadgets += gs diff --git a/angrop/rop.py b/angrop/rop.py index 32281a9..0e12c87 100644 --- a/angrop/rop.py +++ b/angrop/rop.py @@ -94,16 +94,28 @@ def _screen_gadgets(self): self.chain_builder.syscall_gadgets = self.syscall_gadgets self.chain_builder.update() + def analyze_addr(self, addr): + """ + return a list of gadgets that starts from addr + this is possible because of conditional branches + """ + gs = self.gadget_finder.analyze_gadget(addr, allow_conditional_branches=True) + if not gs: + return gs + self._all_gadgets += gs + self._screen_gadgets() + return gs + def analyze_gadget(self, addr): - g = self.gadget_finder.analyze_gadget(addr) + """ + return a gadget or None, it filters out gadgets containing conditional_branches + if you'd like those, use analyze_addr + """ + g = self.gadget_finder.analyze_gadget(addr, allow_conditional_branches=False) if g is None: return g - if isinstance(g, list): - self._all_gadgets += g - else: - self._all_gadgets.append(g) - + self._all_gadgets.append(g) self._screen_gadgets() return g From 79a3bc1567c5ca2594e68cca25f560ea1765f8e4 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 23 Jan 2025 17:02:22 -0700 Subject: [PATCH 100/106] kyle, it is 2025, and 32bit system is still a thing --- angrop/chain_builder/mem_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index e5922a3..5e57a94 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -262,7 +262,7 @@ def _write_to_mem_with_gadget(self, gadget, addr_val, data, use_partial_controll raise ValueError(f"Unknown gadget transit_type: {gadget.transit_type}") for idx in range(gadget.stack_change // bytes_per_pop): - if idx == pc_offset//8: + if idx == pc_offset//bytes_per_pop: next_pc_val = rop_utils.cast_rop_value( chain._blank_state.solver.BVS("next_pc", self.project.arch.bits), self.project, From 3d204f6bc78756ebaf22a4a016ed1de7be0e902e Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 23 Jan 2025 17:08:42 -0700 Subject: [PATCH 101/106] improve the roptest_aarch64 --- tests/test_rop.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_rop.py b/tests/test_rop.py index 19836f3..a72e032 100644 --- a/tests/test_rop.py +++ b/tests/test_rop.py @@ -2,6 +2,7 @@ import angr import angrop # pylint: disable=unused-import import pickle +import claripy import logging l = logging.getLogger("angrop.tests.test_rop") @@ -248,8 +249,11 @@ def test_roptest_aarch64(): rop.analyze_gadget(0x4b7ca8) rop.analyze_gadget(0x4ebad4) - chain = rop.set_regs(x0=0x41414141) + data = claripy.BVS("data", 64) + chain = rop.set_regs(x0=data) assert chain is not None + chain._blank_state.solver.add(data == 0x41414141) + assert b'\xe1\x3eAA' in chain.payload_str() if os.path.exists(cache_path): rop.load_gadgets(cache_path) From ecff7104a236e167c3b74e82fcd713f08db0b41c Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 23 Jan 2025 17:16:38 -0700 Subject: [PATCH 102/106] handle cases where syscall is the last instruction in an executable segment --- angrop/gadget_finder/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index cfcd5be..f7cc628 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -251,12 +251,16 @@ def _addresses_to_check_with_caching(self, show_progress=True): continue yield a - def block_hash(self, block):# pylint:disable=no-self-use + def block_hash(self, block): """ a hash to uniquely identify a simple block """ if block.vex.jumpkind == 'Ijk_Sys_syscall': - next_block = self.project.factory.block(block.addr+block.size) + next_addr = block.addr + block.size + obj = self.project.loader.find_object_containing(next_addr) + if not obj: + return block.bytes + next_block = self.project.factory.block(next_addr) return block.bytes + next_block.bytes return block.bytes From 2435bd6aae85b1085bd9f2a08f2b74b1bc28e8d3 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 23 Jan 2025 18:13:45 -0700 Subject: [PATCH 103/106] don't leave rebase as None --- angrop/chain_builder/mem_writer.py | 2 ++ angrop/rop_chain.py | 3 +-- tests/test_chainbuilder.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index 5e57a94..e771c9a 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -244,8 +244,10 @@ def _write_to_mem_with_gadget(self, gadget, addr_val, data, use_partial_controll var_names.remove(name) if reg in var_names.pop(): var = RopValue(var, self.project) + var._rebase = False if addr_val._rebase: var.rebase_ptr() + var._rebase = True break reg_vals[reg] = var diff --git a/angrop/rop_chain.py b/angrop/rop_chain.py index 4c6f15c..7b352ea 100644 --- a/angrop/rop_chain.py +++ b/angrop/rop_chain.py @@ -73,8 +73,7 @@ def add_gadget(self, gadget): if self._pie: value -= self._p.loader.main_object.mapped_base value = RopValue(value, self._p) - if self._pie: - value._rebase = True + value._rebase = self._pie is True if (idx := self.next_pc_idx()) is None: self.add_value(value) diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index 4781a95..b1118c4 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -161,17 +161,17 @@ def test_ropvalue(): rop.save_gadgets(cache_path) chain = rop.write_to_mem(0x800000, b"/bin/sh\x00") - assert sum(not x._rebase for x in chain._values) == 4 # 4 values + assert sum(x._rebase is False for x in chain._values) == 4 # 4 values value = RopValue(0x800000, proj) value._rebase = False chain = rop.write_to_mem(value, b"/bin/sh\x00") - assert sum(not x._rebase for x in chain._values) == 4 # 4 values + assert sum(x._rebase is False for x in chain._values) == 4 # 4 values value = RopValue(0x800000, proj) value.rebase_ptr() chain = rop.write_to_mem(value, b"/bin/sh\x00") - assert sum(not x._rebase for x in chain._values) == 2 # 2 values + assert sum(x._rebase is False for x in chain._values) == 2 # 4 values def test_reg_move(): cache_path = os.path.join(CACHE_DIR, "bronze_ropchain") From 42e35ac2bb6da7041b0e4045fa65be57004a8700 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 23 Jan 2025 18:32:09 -0700 Subject: [PATCH 104/106] linting --- angrop/chain_builder/builder.py | 6 ++--- angrop/chain_builder/func_caller.py | 3 ++- angrop/chain_builder/mem_changer.py | 3 ++- angrop/chain_builder/mem_writer.py | 16 +++++++----- angrop/chain_builder/reg_setter.py | 33 ++++++++++++++----------- angrop/chain_builder/sys_caller.py | 5 ++-- angrop/gadget_finder/gadget_analyzer.py | 9 ++++--- angrop/rop.py | 11 ++++++--- angrop/rop_gadget.py | 5 ++-- angrop/rop_utils.py | 2 +- tests/test_chainbuilder.py | 3 ++- 11 files changed, 56 insertions(+), 40 deletions(-) diff --git a/angrop/chain_builder/builder.py b/angrop/chain_builder/builder.py index ede3c48..c7f5171 100644 --- a/angrop/chain_builder/builder.py +++ b/angrop/chain_builder/builder.py @@ -1,7 +1,6 @@ import struct from abc import abstractmethod from functools import cmp_to_key -from collections import defaultdict import claripy @@ -103,7 +102,8 @@ def _get_ptr_to_null(self): return addr return None - def _ast_contains_stack_data(self, ast): + @staticmethod + def _ast_contains_stack_data(ast): vs = ast.variables return len(vs) == 1 and list(vs)[0].startswith('symbolic_stack_') @@ -323,7 +323,7 @@ def __filter_gadgets(self, gadgets): g1 = gadgets.pop() # check if nothing is better than g1 for g2 in bests|gadgets: - if self._better_than(g2, g1): + if self._better_than(g2, g1): #pylint: disable=arguments-out-of-order break else: bests.add(g1) diff --git a/angrop/chain_builder/func_caller.py b/angrop/chain_builder/func_caller.py index d23c50c..f554371 100644 --- a/angrop/chain_builder/func_caller.py +++ b/angrop/chain_builder/func_caller.py @@ -69,7 +69,8 @@ def _func_call(self, func_gadget, cc, args, extra_regs=None, preserve_regs=None, # 1. handle stack arguments # 2. handle function return address to maintain the control flow if stack_arguments: - cleaner = self.chain_builder.shift((len(stack_arguments)+1)*arch_bytes, next_pc_idx=-1, preserve_regs=preserve_regs) + shift_bytes = (len(stack_arguments)+1)*arch_bytes + cleaner = self.chain_builder.shift(shift_bytes, next_pc_idx=-1, preserve_regs=preserve_regs) chain.add_gadget(cleaner._gadgets[0]) for arg in stack_arguments: chain.add_value(arg) diff --git a/angrop/chain_builder/mem_changer.py b/angrop/chain_builder/mem_changer.py index c718970..7af6e3c 100644 --- a/angrop/chain_builder/mem_changer.py +++ b/angrop/chain_builder/mem_changer.py @@ -96,7 +96,8 @@ def add_to_mem(self, addr, value, data_size=None): # get the data from trying to set all the registers registers = dict((reg, 0x41) for reg in self.chain_builder.arch.reg_set) l.debug("getting reg data for mem adds") - _, _, reg_data = self.chain_builder._reg_setter.find_candidate_chains_graph_search(max_stack_change=0x50, **registers) + _, _, reg_data = self.chain_builder._reg_setter.find_candidate_chains_graph_search(max_stack_change=0x50, + **registers) l.debug("trying mem_add gadgets") # filter out gadgets that certainly cannot be used for add_mem diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index e771c9a..d6977ee 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -61,10 +61,10 @@ def _gen_mem_write_gadgets(self, string_data): # generate from the cache first if self._good_mem_write_gadgets: - for g in self._good_mem_write_gadgets: - yield g + yield from self._good_mem_write_gadgets - possible_gadgets = {g for g in self._mem_write_gadgets.copy() if g.transit_type != 'jmp_reg'} - self._good_mem_write_gadgets + possible_gadgets = {g for g in self._mem_write_gadgets.copy() if g.transit_type != 'jmp_reg'} + possible_gadgets -= self._good_mem_write_gadgets # already yield these # use the graph-search to gain a rough idea about (stack_change, register setting) registers = dict((reg, 0x41) for reg in self.arch.reg_set) @@ -98,8 +98,11 @@ def _gen_mem_write_gadgets(self, string_data): if stack_change == best_stack_change and self._better_than(g, best_gadget): best_gadget = g - yield best_gadget - possible_gadgets.remove(best_gadget) + if best_gadget: + possible_gadgets.remove(best_gadget) + yield best_gadget + else: + break @rop_utils.timeout(5) def _try_write_to_mem(self, gadget, use_partial_controllers, addr, string_data, fill_byte): @@ -251,7 +254,8 @@ def _write_to_mem_with_gadget(self, gadget, addr_val, data, use_partial_controll break reg_vals[reg] = var - chain = self._set_regs(use_partial_controllers=use_partial_controllers, **reg_vals) + + chain = self._set_regs(**reg_vals) chain.add_gadget(gadget) bytes_per_pop = self.project.arch.bytes diff --git a/angrop/chain_builder/reg_setter.py b/angrop/chain_builder/reg_setter.py index 9046d08..eccd7c4 100644 --- a/angrop/chain_builder/reg_setter.py +++ b/angrop/chain_builder/reg_setter.py @@ -16,6 +16,12 @@ l = logging.getLogger("angrop.chain_builder.reg_setter") class RegSetter(Builder): + """ + a chain builder that aims to set registers using different algorithms + 1. algo1: graph-search, fast, not reliable + 2. algo2: pop-only bfs search, fast, reliable, can generate chains to bypass bad-bytes + 3. algo3: riscy-rop inspired backward search, slow, can utilize gadgets containing conditional branches + """ def __init__(self, chain_builder): super().__init__(chain_builder) self._reg_setting_gadgets = None # all the gadgets that can set registers @@ -54,12 +60,14 @@ def verify(self, chain, preserve_regs, registers): offset -= act.offset % self.project.arch.bytes reg_name = self.project.arch.translate_register_name(offset) if reg_name in preserve_regs: - l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 1.\ntarget registers: %s", chain_str, registers) + l.exception("Somehow angrop thinks\n%s\ncan be used for the chain generation-1.\nregisters: %s", + chain_str, registers) return False for reg, val in registers.items(): bv = getattr(state.regs, reg) if (val.symbolic != bv.symbolic) or state.solver.eval(bv != val.data): - l.exception("Somehow angrop thinks \n%s\n can be used for the chain generation - 2.\ntarget registers: %s", chain_str, registers) + l.exception("Somehow angrop thinks\n%s\ncan be used for the chain generation-2.\nregisters: %s", + chain_str, registers) return False # the next pc must come from the stack or just marked as the next_pc if len(state.regs.pc.variables) != 1: @@ -67,7 +75,7 @@ def verify(self, chain, preserve_regs, registers): pc_var = set(state.regs.pc.variables).pop() return pc_var.startswith("symbolic_stack") or pc_var.startswith("next_pc") - def run(self, modifiable_memory_range=None, use_partial_controllers=False, preserve_regs=None, max_length=10, **registers): + def run(self, modifiable_memory_range=None, preserve_regs=None, max_length=10, **registers): if len(registers) == 0: return RopChain(self.project, None, badbytes=self.badbytes) @@ -106,24 +114,20 @@ def iterate_candidate_chains(self, modifiable_memory_range, preserve_regs, max_l yield gadgets # algorithm2 - gadgets_list = self.find_candidate_chains_pop_only_bfs_search( + yield from self.find_candidate_chains_pop_only_bfs_search( self._find_relevant_gadgets(**registers), preserve_regs.copy(), **registers) - for gadgets in gadgets_list: - yield gadgets # algorithm3 - for gadgets in self.find_candidate_chains_backwards_recursive_search( + yield from self.find_candidate_chains_backwards_recursive_search( self._reg_setting_gadgets, set(registers), current_chain=[], preserve_regs=preserve_regs.copy(), modifiable_memory_range=modifiable_memory_range, visited={}, - max_length=max_length): - yield gadgets - return + max_length=max_length) #### Chain Building Algorithm 1: fast but unreliable graph-based search #### @@ -137,6 +141,7 @@ def _tuple_to_gadgets(data, reg_tuple): curr_tuple = reg_tuple else: gadgets_reverse = reg_tuple[2] + curr_tuple = () while curr_tuple != (): gadgets_reverse.append(data[curr_tuple][2]) curr_tuple = data[curr_tuple][0] @@ -586,11 +591,11 @@ def _get_remaining_regs(self, gadget: RopGadget, registers: set[str]) -> set[str for reg in registers: if reg in gadget.popped_regs: - vars = gadget.popped_reg_vars[reg] - if not vars.isdisjoint(stack_dependencies): + reg_vars = gadget.popped_reg_vars[reg] + if not reg_vars.isdisjoint(stack_dependencies): # Two registers are popped from the same location on the stack. return None - stack_dependencies |= vars + stack_dependencies |= reg_vars continue new_reg = reg for reg_move in gadget.reg_moves: @@ -681,4 +686,4 @@ def filter_gadgets(self, gadgets): bests = bests.union(self._filter_gadgets(equal_class)) gadgets -= equal_class - return bests \ No newline at end of file + return bests diff --git a/angrop/chain_builder/sys_caller.py b/angrop/chain_builder/sys_caller.py index eb85097..2c1ecb9 100644 --- a/angrop/chain_builder/sys_caller.py +++ b/angrop/chain_builder/sys_caller.py @@ -51,8 +51,7 @@ def supported_os(os): def update(self): self.syscall_gadgets = self._filter_gadgets(self.chain_builder.syscall_gadgets) - @staticmethod - def _filter_gadgets(gadgets): + def _filter_gadgets(self, gadgets): return sorted(gadgets, key=functools.cmp_to_key(cmp)) def _try_invoke_execve(self, path_addr): @@ -169,7 +168,7 @@ def key_func(x): try: return self._func_call(gadget, cc, args, extra_regs=extra_regs, needs_return=needs_return, preserve_regs=preserve_regs, **kwargs) - except Exception: # pylint: disable=broad-exception-caught + except Exception: # pylint:disable=broad-exception-caught continue raise RopException(f"Fail to invoke syscall {syscall_num} with arguments: {args}!") diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 192053f..4bd555c 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -82,7 +82,7 @@ def _step_to_gadget_stopping_states(self, init_state): try: simgr = self.project.factory.simulation_manager(init_state, save_unconstrained=True) - def filter(state): + def filter_func(state): if not state.ip.concrete: return None if self.project.is_hooked(state.addr): @@ -94,8 +94,9 @@ def filter(state): return simgr.DROP return None - simgr.run(n=2, filter_func=filter) - simgr.move(from_stash='active', to_stash='syscall', filter_func=lambda s: rop_utils.is_in_kernel(self.project, s)) + simgr.run(n=2, filter_func=filter_func) + simgr.move(from_stash='active', to_stash='syscall', + filter_func=lambda s: rop_utils.is_in_kernel(self.project, s)) except (claripy.errors.ClaripySolverInterruptError, claripy.errors.ClaripyZ3Error, ValueError): return [], [] @@ -282,7 +283,7 @@ def _can_reach_stopping_states(self, addr, allow_conditional_branches, max_steps def _try_stepping_past_syscall(self, state): try: return rop_utils.step_to_unconstrained_successor(self.project, state, max_steps=3) - except Exception: # pylint: disable=broad-exception-caught + except Exception: # pylint:disable=broad-exception-caught return state def _identify_transit_type(self, final_state, ctrl_type): diff --git a/angrop/rop.py b/angrop/rop.py index 0e12c87..2645467 100644 --- a/angrop/rop.py +++ b/angrop/rop.py @@ -160,14 +160,16 @@ def find_gadgets_single_threaded(self, show_progress=True): return self.rop_gadgets def _get_cache_tuple(self): - all_gadgets = [x for x in self._all_gadgets] - for g in all_gadgets: g.project = None + all_gadgets = self._all_gadgets + for g in all_gadgets: + g.project = None return (all_gadgets, self._duplicates) def _load_cache_tuple(self, tup): self._all_gadgets = tup[0] self._duplicates = tup[1] - for g in self._all_gadgets: g.project = self.project + for g in self._all_gadgets: + g.project = self.project self._screen_gadgets() def save_gadgets(self, path): @@ -177,7 +179,8 @@ def save_gadgets(self, path): """ with open(path, "wb") as f: pickle.dump(self._get_cache_tuple(), f) - for g in self._all_gadgets: g.project = self.project + for g in self._all_gadgets: + g.project = self.project def load_gadgets(self, path): """ diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index b38d750..53ba85a 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -205,7 +205,8 @@ def __repr__(self): return "" % self.addr def copy(self): - out = RopGadget(self.project, self.addr) + out = RopGadget(self.addr) + out.project = self.project out.addr = self.addr out.changed_regs = set(self.changed_regs) out.popped_regs = set(self.popped_regs) @@ -303,4 +304,4 @@ def __init__(self, addr, symbol): def dstr(self): if self.symbol: return f"<{self.symbol}>" - return f"" \ No newline at end of file + return f"" diff --git a/angrop/rop_utils.py b/angrop/rop_utils.py index 41c008f..235955e 100644 --- a/angrop/rop_utils.py +++ b/angrop/rop_utils.py @@ -152,7 +152,7 @@ def _asts_must_be_equal(state, ast1, ast2): return True -def fast_uninitialized_filler(name, addr, size, state): +def fast_uninitialized_filler(_, addr, size, state): return state.solver.BVS("uninitialized" + hex(addr), size, explicit_name=True) diff --git a/tests/test_chainbuilder.py b/tests/test_chainbuilder.py index b1118c4..4c17d1c 100644 --- a/tests/test_chainbuilder.py +++ b/tests/test_chainbuilder.py @@ -1,8 +1,9 @@ import os +import claripy + import angr import angrop # pylint: disable=unused-import -import claripy from angrop.rop_value import RopValue from angrop.errors import RopException From 38f9ab431a4f9b154b10ea62a9a14ff9b64a0fe0 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Fri, 24 Jan 2025 14:08:20 -0700 Subject: [PATCH 105/106] update README --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 428efb2..4f8135f 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Supported architectures: * x86/x64 * ARM * MIPS +* AARCH64 It should be relatively easy to support other architectures that are supported by `angr`. If you'd like to use `angrop` on other architectures, please create an issue and we will look into it :) @@ -138,8 +139,6 @@ Allow strings to be passed as arguments to func_call(), which are then written t Add a function for open, read, write (for ctf's) -Allow using of angr objects such as BVV, BVS to make using symbolic values easy - The segment analysis for finding executable addresses seems to break on non-elf binaries often, such as PE files, kernel modules. Allow setting constraints on the generated chain e.g. bytes that are valid. From bd0b2339aa540e5fb2675706a15f9f30962e6a80 Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Fri, 24 Jan 2025 14:57:52 -0700 Subject: [PATCH 106/106] make pyright happy --- angrop/chain_builder/mem_writer.py | 4 ++-- angrop/chain_builder/pivot.py | 2 +- angrop/chain_builder/sys_caller.py | 6 +++--- angrop/gadget_finder/__init__.py | 23 ++++++++++++++++------- angrop/gadget_finder/gadget_analyzer.py | 6 ++++-- angrop/rop.py | 13 ++++++++----- angrop/rop_gadget.py | 14 ++++++++------ angrop/rop_utils.py | 10 ++++++---- 8 files changed, 48 insertions(+), 30 deletions(-) diff --git a/angrop/chain_builder/mem_writer.py b/angrop/chain_builder/mem_writer.py index d6977ee..5df1582 100644 --- a/angrop/chain_builder/mem_writer.py +++ b/angrop/chain_builder/mem_writer.py @@ -18,8 +18,8 @@ class MemWriter(Builder): """ def __init__(self, chain_builder): super().__init__(chain_builder) - self._mem_write_gadgets = None - self._good_mem_write_gadgets = None + self._mem_write_gadgets: set = None # type: ignore + self._good_mem_write_gadgets: set = None # type: ignore def update(self): self._mem_write_gadgets = self._get_all_mem_write_gadgets(self.chain_builder.gadgets) diff --git a/angrop/chain_builder/pivot.py b/angrop/chain_builder/pivot.py index 23bd93e..52c1d29 100644 --- a/angrop/chain_builder/pivot.py +++ b/angrop/chain_builder/pivot.py @@ -30,7 +30,7 @@ class Pivot(Builder): """ def __init__(self, chain_builder): super().__init__(chain_builder) - self._pivot_gadgets = None + self._pivot_gadgets: list = None # type: ignore def update(self): self._pivot_gadgets = self.filter_gadgets(self.chain_builder.pivot_gadgets) diff --git a/angrop/chain_builder/sys_caller.py b/angrop/chain_builder/sys_caller.py index 2c1ecb9..e8ac8d3 100644 --- a/angrop/chain_builder/sys_caller.py +++ b/angrop/chain_builder/sys_caller.py @@ -42,16 +42,16 @@ class SysCaller(FuncCaller): def __init__(self, chain_builder): super().__init__(chain_builder) - self.syscall_gadgets = None + self.syscall_gadgets: list = None # type: ignore @staticmethod def supported_os(os): return "unix" in os.lower() def update(self): - self.syscall_gadgets = self._filter_gadgets(self.chain_builder.syscall_gadgets) + self.syscall_gadgets = self.filter_gadgets(self.chain_builder.syscall_gadgets) - def _filter_gadgets(self, gadgets): + def filter_gadgets(self, gadgets) -> list: # pylint: disable=no-self-use return sorted(gadgets, key=functools.cmp_to_key(cmp)) def _try_invoke_execve(self, path_addr): diff --git a/angrop/gadget_finder/__init__.py b/angrop/gadget_finder/__init__.py index f7cc628..2324d28 100644 --- a/angrop/gadget_finder/__init__.py +++ b/angrop/gadget_finder/__init__.py @@ -21,7 +21,7 @@ logging.getLogger('pyvex.lifting').setLevel("ERROR") -_global_gadget_analyzer = None +_global_gadget_analyzer: gadget_analyzer.GadgetAnalyzer = None # type: ignore # disable loggers in each worker def _disable_loggers(): @@ -71,13 +71,16 @@ def __init__(self, project, fast_mode=None, only_check_near_rets=True, max_block if max_sym_mem_access: self.arch.max_sym_mem_access = max_sym_mem_access if is_thumb: - self.arch.set_thumb() + assert isinstance(self.arch, ARM), "is_thumb is only compatible with ARM binaries!" + arch: ARM = self.arch + arch.set_thumb() # internal stuff - self._ret_locations = None - self._syscall_locations = None - self._cache = None # cache seen blocks, dict(block_hash => sets of addresses) - self._gadget_analyzer = None + self._ret_locations: list = None # type: ignore + self._syscall_locations: list = None # type: ignore + # cache seen blocks, dict(block_hash => sets of addresses) + self._cache: dict = None # type: ignore + self._gadget_analyzer: gadget_analyzer.GadgetAnalyzer = None # type: ignore self._executable_ranges = None # silence annoying loggers @@ -197,7 +200,13 @@ def find_gadgets_single_threaded(self, show_progress=True): assert self.gadget_analyzer is not None for addr in self._addresses_to_check_with_caching(show_progress): - gadgets.extend(self.gadget_analyzer.analyze_gadget(addr)) + res = self.gadget_analyzer.analyze_gadget(addr) + if res is None: + continue + if isinstance(res, list): + gadgets.extend(res) + continue + gadgets.append(res) for g in gadgets: g.project = self.project diff --git a/angrop/gadget_finder/gadget_analyzer.py b/angrop/gadget_finder/gadget_analyzer.py index 4bd555c..9aae7a7 100644 --- a/angrop/gadget_finder/gadget_analyzer.py +++ b/angrop/gadget_finder/gadget_analyzer.py @@ -41,7 +41,7 @@ def __init__(self, project, fast_mode, kernel_mode=False, arch=None, stack_gsize fast_mode=self._fast_mode) self._concrete_sp = self._state.solver.eval(self._state.regs.sp) - def analyze_gadget(self, addr, allow_conditional_branches=None): + def analyze_gadget(self, addr, allow_conditional_branches=None) -> list[RopGadget] | RopGadget | None: """ Find gadgets at the given address. @@ -662,7 +662,7 @@ def _compute_sp_change(self, init_state, final_state, gadget): final_state = rop_utils.step_to_unconstrained_successor(self.project, state=init_state, precise_action=True) dependencies = self._get_reg_dependencies(final_state, "sp") last_sp = None - init_sym_sp = None + init_sym_sp: frozenset = None # type: ignore prev_act = None for act in final_state.history.actions: if act.type == 'reg' and act.action == 'write' and act.storage == self.arch.stack_pointer: @@ -677,6 +677,8 @@ def _compute_sp_change(self, init_state, final_state, gadget): else: gadget.stack_change = 0 + assert init_sym_sp is not None, "there is no sybmolic sp, how does the pivoting work?" + # if is popped from stack, we need to compensate for the popped sp value on the stack # if it is a pop, then sp comes from stack and the previous action must be a mem read # and the data is the new sp diff --git a/angrop/rop.py b/angrop/rop.py index 2645467..3780d21 100644 --- a/angrop/rop.py +++ b/angrop/rop.py @@ -1,6 +1,7 @@ import pickle import inspect import logging +from typing import cast from angr import Analysis, register_analysis @@ -38,8 +39,9 @@ def __init__(self, only_check_near_rets=True, max_block_size=None, max_sym_mem_a """ # private list of RopGadget's - self._all_gadgets = [] # all types of gadgets - self._duplicates = None # all equivalent gadgets (with the same instructions) + self._all_gadgets: list[RopGadget] = [] # all types of gadgets + # all equivalent gadgets (with the same instructions) + self._duplicates: dict = None # type: ignore # public list of RopGadget's self.rop_gadgets = [] # gadgets used for ROP, like pop rax; ret @@ -99,7 +101,8 @@ def analyze_addr(self, addr): return a list of gadgets that starts from addr this is possible because of conditional branches """ - gs = self.gadget_finder.analyze_gadget(addr, allow_conditional_branches=True) + res = self.gadget_finder.analyze_gadget(addr, allow_conditional_branches=True) + gs:list[RopGadget]|None = cast(list[RopGadget]|None, res) if not gs: return gs self._all_gadgets += gs @@ -111,10 +114,10 @@ def analyze_gadget(self, addr): return a gadget or None, it filters out gadgets containing conditional_branches if you'd like those, use analyze_addr """ - g = self.gadget_finder.analyze_gadget(addr, allow_conditional_branches=False) + res = self.gadget_finder.analyze_gadget(addr, allow_conditional_branches=False) + g = cast(RopGadget|None, res) if g is None: return g - self._all_gadgets.append(g) self._screen_gadgets() return g diff --git a/angrop/rop_gadget.py b/angrop/rop_gadget.py index 53ba85a..7564eac 100644 --- a/angrop/rop_gadget.py +++ b/angrop/rop_gadget.py @@ -1,3 +1,4 @@ +from angr import Project from .rop_utils import addr_to_asmstring class RopMemAccess: @@ -95,10 +96,10 @@ class RopGadget: Gadget objects """ def __init__(self, addr): - self.project = None + self.project: Project = None # type: ignore self.addr = addr self.block_length = None - self.stack_change = None + self.stack_change: int = None # type: ignore # register effect information self.changed_regs = set() @@ -120,7 +121,7 @@ def __init__(self, addr): # it is just a register. With the register setting framework, we will be able to # utilize gadgets like `call qword ptr [rax+rbx]` because we have the dependency information. # transition information, i.e. how to pass the control flow to the next gadget - self.transit_type = None + self.transit_type: str = None # type: ignore self.pc_reg = None # pc_offset is exclusively used when transit_type is "pop_pc", # when pc_offset==stack_change-arch_bytes, transit_type is basically ret @@ -131,8 +132,8 @@ def __init__(self, addr): # Registers that affect path constraints self.constraint_regs = set() # Instruction count to estimate complexity - self.isn_count = None - self.has_conditional_branch = None + self.isn_count: int = None # type: ignore + self.has_conditional_branch: bool = None # type: ignore @property def num_mem_access(self): @@ -205,7 +206,7 @@ def __repr__(self): return "" % self.addr def copy(self): - out = RopGadget(self.addr) + out = self.__class__(self.addr) out.project = self.project out.addr = self.addr out.changed_regs = set(self.changed_regs) @@ -255,6 +256,7 @@ def __repr__(self): return f"" def copy(self): + new = super().copy() new.stack_change_after_pivot = self.stack_change_after_pivot new.sp_reg_controllers = set(self.sp_reg_controllers) diff --git a/angrop/rop_utils.py b/angrop/rop_utils.py index 235955e..40fc45a 100644 --- a/angrop/rop_utils.py +++ b/angrop/rop_utils.py @@ -3,6 +3,7 @@ import angr import claripy +from angr.engines.successors import SimSuccessors from .errors import RegNotFoundException, RopException, RopTimeoutException from .rop_value import RopValue @@ -12,7 +13,7 @@ def addr_to_asmstring(project, addr): return "; ".join(["%s %s" %(i.mnemonic, i.op_str) for i in block.capstone.insns]) -def get_ast_dependency(ast): +def get_ast_dependency(ast) -> set: """ ast must be created from a symbolic state where registers values are named "sreg_REG-" looks for registers that if we make the register symbolic then the ast becomes symbolic @@ -29,7 +30,7 @@ def get_ast_dependency(ast): return dependencies -def get_ast_controllers(state, ast, reg_deps): +def get_ast_controllers(state, ast, reg_deps) -> set: """ looks for registers that we can make symbolic then the ast can be "anything" :param state: the input state @@ -40,7 +41,7 @@ def get_ast_controllers(state, ast, reg_deps): test_val = 0x4141414141414141 % (2 << state.arch.bits) - controllers = [] + controllers = set() if not ast.symbolic: return controllers @@ -62,7 +63,7 @@ def get_ast_controllers(state, ast, reg_deps): extra_constraints.append(state.registers.load(r) == test_val) if unconstrained_check(state, ast, extra_constraints=extra_constraints): - controllers.append(reg) + controllers.add(reg) return controllers @@ -309,6 +310,7 @@ def step_to_unconstrained_successor(project, state, max_steps=2, allow_simproced # nums state.options.add(angr.options.BYPASS_UNSUPPORTED_SYSCALL) + succ: SimSuccessors = None # type: ignore if not precise_action: succ = project.factory.successors(state) if stop_at_syscall and succ.flat_successors: