Skip to content

Commit

Permalink
Merge pull request #27 from quarkslab/upgrade-pypcode
Browse files Browse the repository at this point in the history
update pypcode to use version >2.0.0
  • Loading branch information
RobinDavid authored Mar 20, 2024
2 parents 5cbf4be + a155da2 commit 3f9e327
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 138 deletions.
162 changes: 32 additions & 130 deletions bindings/python/quokka/backends/pypcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def get_pypcode_context(
Arguments:
arch: Quokka program architecture
endian: Architecture endianness
Raises:
PypcodeError: if the conversion for arch is not found
Expand All @@ -74,7 +75,7 @@ def get_pypcode_context(
quokka.analysis.ArchARM64: "AARCH64:LE:64:v8A",
quokka.analysis.ArchARMThumb: "ARM:LE:32:v8T",
quokka.analysis.ArchMIPS: "MIPS:LE:32:default",
quokka.analysis.ArchMIPS: "MIPS:LE:64:default",
quokka.analysis.ArchMIPS64: "MIPS:LE:64:default",
quokka.analysis.ArchPPC: "PowerPC:LE:32:default",
quokka.analysis.ArchPPC64: "PowerPC:LE:64:default",
}
Expand All @@ -93,105 +94,6 @@ def get_pypcode_context(
return pypcode.Context(pcode_arch)


def equality(self: pypcode.ContextObj, other: Any) -> bool:
"""Check if two pypcode objets are the same
We use monkey patching to attach the equality method to other classes and rely on
__slots__ to check which fields to check.
Arguments:
self: First object
other: Other variable
Returns:
Boolean for equality
"""
return isinstance(other, self.__class__) and all(
getattr(other, attr) == getattr(self, attr)
for attr in self.__slots__
if attr != "cobj"
)


def object_hash(obj: pypcode.ContextObj) -> int:
"""Create a hash value for a pypcode object
This allows to create set of values.
Arguments:
obj: Object to hash
Returns:
An integer for the hash
"""

assert isinstance(obj, pypcode.ContextObj)
return sum(hash(getattr(obj, attr)) for attr in obj.__slots__ if attr != "cobj")


pypcode.Varnode.__eq__ = equality
pypcode.Varnode.__hash__ = object_hash

pypcode.AddrSpace.__eq__ = equality
pypcode.AddrSpace.__hash__ = object_hash

pypcode.PcodeOp.__eq__ = equality
pypcode.PcodeOp.__hash__ = object_hash


def combine_instructions(
block: quokka.Block, translated_instructions: Sequence[pypcode.Translation]
) -> List[pypcode.PcodeOp]:
"""Combine instructions between the Quokka and PyPcode
Some instruction are split between IDA and Ghidra, so we have to account for it.
A problem for example is the support of prefixes (such LOCK) which are decoded as 2
instructions by Ghidra (wrong) but only 1 by IDA (correct).
Arguments:
block: Quokka block
translated_instructions: Translated instructions by Pypcode
Raises
PypcodeError: if the combination doesn't work
Returns:
A list of Pypcode statements
"""
pcode_instructions: List[pypcode.PcodeOp] = []
translated_instructions = iter(translated_instructions)

instruction: quokka.Instruction
for instruction in block.instructions:
instruction._pcode_insts = []
remaining_size: int = instruction.size
while remaining_size > 0:
try:
pcode_inst: pypcode.Translation = next(translated_instructions)
except StopIteration as exc:
logger.error(
"Disassembly discrepancy between Pypcode / IDA: missing inst"
)
raise quokka.PypcodeError(
f"Decoding error for block at 0x{block.start:x}"
) from exc

remaining_size -= pcode_inst.length
instruction._pcode_insts.extend(pcode_inst.ops)

if remaining_size < 0:
logger.error(
"Disassembly discrepancy between Pypcode / IDA: sizes mismatch"
)
raise quokka.PypcodeError(
f"Decoding error for block at 0x{block.start:x}"
)

pcode_instructions.extend(list(pcode_inst.ops))

return pcode_instructions


def update_pypcode_context(program: quokka.Program, is_thumb: bool) -> pypcode.Context:
"""Return an appropriate pypcode context for the decoding
Expand Down Expand Up @@ -246,19 +148,22 @@ def pypcode_decode_block(block: quokka.Block) -> List[pypcode.PcodeOp]:
block.program, first_instruction.thumb
)

# Translate
translation = context.translate(
code=block.bytes,
base=block.start,
max_inst=0,
)

if translation.error:
logger.error(translation.error.explain)
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x}")
try:
# Translate
translation = context.translate(
block.bytes, # buf
block.start, # base_address
0, # max_bytes
0, # max_instructions
)
return translation.ops

pcode_instructions = combine_instructions(block, translation.instructions)
return pcode_instructions
except pypcode.BadDataError as e:
logger.error(e)
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x} (BadDataError)")
except pypcode.UnimplError as e:
logger.error(e)
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x} (UnimplError)")


def pypcode_decode_instruction(
Expand All @@ -268,7 +173,7 @@ def pypcode_decode_instruction(
This will return the list of Pcode operations done for the instruction.
Note that a (binary) instruction is expected to have several pcode instructions
associated.
associated. When decoding a single instruction IMARK instructions are excluded!
Arguments:
inst: Instruction to translate
Expand All @@ -281,22 +186,19 @@ def pypcode_decode_instruction(
"""

context: pypcode.Context = update_pypcode_context(inst.program, inst.thumb)
translation = context.translate(
code=inst.bytes,
base=inst.address,
max_inst=1,
)

if not translation.error:

instructions = translation.instructions
if len(instructions) > 1:
logger.warning("Mismatch of instruction size IDA/Pypcode")

instructions = list(
itertools.chain.from_iterable(inst.ops for inst in instructions)
try:
translation = context.translate(
inst.bytes, # buf
inst.address, # base_address
0, # max_bytes
1, # max_instructions
)
return instructions

logger.error(translation.error.explain)
raise quokka.PypcodeError("Unable to decode instruction")
return [x for x in translation.ops if x.opcode != pypcode.OpCode.IMARK]

except pypcode.BadDataError as e:
logger.error(e)
raise quokka.PypcodeError(f"Unable to decode instruction (BadDataError)")
except pypcode.UnimplError as e:
logger.error(e)
raise quokka.PypcodeError(f"Unable to decode instruction (UnimplError)")
2 changes: 1 addition & 1 deletion bindings/python/quokka/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def bytes(self) -> bytes:

return block_bytes

@property
@cached_property
def pcode_insts(self) -> List[pypcode.PcodeOp]:
"""Generate PCode instructions for the block
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ test = [
"pytest-mock",
"pytest-cov",
"coverage[toml]",
"pypcode>=1.1.1",
"pypcode>=2.0.0",
]
pypcode = ["pypcode>=1.1.1"]
pypcode = ["pypcode>=2.0.0"]
doc = [
"mkdocs",
"mkdocs-material",
Expand All @@ -45,7 +45,7 @@ dev = [
"mypy",
"mypy-protobuf",
"nox",
"pypcode>=1.1.1",
"pypcode>=2.0.0",
]

[tool.setuptools]
Expand Down
8 changes: 4 additions & 4 deletions tests/python/tests/backends/test_pypcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
def test_pypcode_context():

context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchX86)
assert context.lang.id == "x86:LE:32:default"
assert context.language.id == "x86:LE:32:default"

context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchX64)
assert context.lang.id == "x86:LE:64:default"
assert context.language.id == "x86:LE:64:default"

context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchARM64)
assert context.lang.id == "AARCH64:LE:64:v8A"
assert context.language.id == "AARCH64:LE:64:v8A"

context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchARM)
assert context.lang.id == "ARM:LE:32:v8"
assert context.language.id == "ARM:LE:32:v8"

with pytest.raises(quokka.PypcodeError):
pypcode_backend.get_pypcode_context(quokka.analysis.QuokkaArch)
Expand Down

0 comments on commit 3f9e327

Please sign in to comment.