Skip to content

Commit

Permalink
Fix README and dev setup issues
Browse files Browse the repository at this point in the history
  • Loading branch information
jonschz committed Oct 27, 2024
1 parent f56fc94 commit 6f5a020
Show file tree
Hide file tree
Showing 20 changed files with 145 additions and 61 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# Python
build
*.egg-info/
*.pyc
*.pyo
__pycache__/
dist
.venv/

# VS Code
.vscode/

# General
ghidra_import.log
16 changes: 13 additions & 3 deletions ghidra_scripts/README.md → reccmp/ghidra_scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,28 @@ The scripts in this directory provide additional functionality in Ghidra, e.g. i

## Setup

There are two ways to install this script:
- Case 1: You only want to use this script in a decompilation project.
- Create a virtual environment inside your _decompilation project_ and install `reccmp` there. Make sure to _not_ install `reccmp` in editable mode (`pip install -e`).
- Your decompilation project needs to have a valid `reccmp-build.yml` next to its `reccmp-project.yml`. This is a known limitation.
- Case 2: You want to actively develop this script.
- Create a virtual env in _this repository_ in a top level directory called `.venv` and install this project there (`pip install .` or `pip install -e .`).
- Copy `dev_config.example.json` to `dev_config.json` and configure a decompilation target.

### Ghidrathon
Since these scripts and its dependencies are written in Python 3, [Ghidrathon](https://github.com/mandiant/Ghidrathon) must be installed first. Follow the instructions and install a recent build (these scripts were tested with Python 3.12 and Ghidrathon v4.0.0).

### Script Directory
This step differs slightly depending on your setup.

- In Ghidra, _Open Window -> Script Manager_.
- Click the _Manage Script Directories_ button on the top right.
- Click the _Add_ (Plus icon) button and select this file's parent directory.
- Click the _Add_ (Plus icon) button.
- Case 1: Select `Lib/site-packages/reccmp/ghidra_scripts` inside your _project's_ virtual environment.
- Case 2: Select `<this repository's root>/reccmp/ghidra_scripts`. (Don't select anything within this repository's venv, especially if you have installed this project in editable mode).
- Close the window and click the _Refresh_ button.
- This script should now be available under the folder _LEGO1_.

### Virtual environment
As of now, there must be a Python virtual environment set up under `$REPOSITORY_ROOT/.venv`, and the dependencies of `isledecomp` must be installed there, see [here](../README.md#tooling).

## Development
- Type hints for Ghidra (optional): Download a recent release from https://github.com/VDOO-Connected-Trust/ghidra-pyi-generator,
Expand Down
File renamed without changes.
3 changes: 3 additions & 0 deletions reccmp/ghidra_scripts/dev_config.example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"projectDir": "<path to a directory containing a reccmp-build.yml>"
}
3 changes: 3 additions & 0 deletions reccmp/ghidra_scripts/dev_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"projectDir": "C:\\Users\\Jonathan\\vscode-workspace\\isle2"
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# pyright: reportMissingModuleSource=false

import importlib
import json
import logging.handlers
import sys
import logging
Expand Down Expand Up @@ -52,7 +53,7 @@ def reload_module(module: str):

reload_module("lego_util.statistics")
reload_module("lego_util.globals")
from lego_util.globals import GLOBALS, SupportedModules
from lego_util.globals import GLOBALS


def setup_logging():
Expand All @@ -61,13 +62,9 @@ def setup_logging():
# formatter = logging.Formatter("%(name)s %(levelname)-8s %(message)s") # use this to identify loggers
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(formatter)
file_handler = logging.FileHandler(
Path(__file__).absolute().parent.joinpath("import.log"), mode="w"
)
file_handler.setFormatter(formatter)
logging.root.setLevel(GLOBALS.loglevel)
logging.root.addHandler(stdout_handler)
logging.root.addHandler(file_handler)

logger.info("Starting import...")


Expand All @@ -94,15 +91,14 @@ def get_repository_root():
return Path(__file__).absolute().parent.parent.parent


def add_python_path(path: str):
def add_python_path(path: Path):
"""
Scripts in Ghidra are executed from the tools/ghidra_scripts directory. We need to add
a few more paths to the Python path so we can import the other libraries.
"""
venv_path = get_repository_root().joinpath(path)
logger.info("Adding %s to Python Path", venv_path)
assert venv_path.exists()
sys.path.insert(1, str(venv_path))
logger.info("Adding %s to Python Path", path)
assert path.exists()
sys.path.insert(1, str(path))


# We need to quote the types here because they might not exist when running without Ghidra
Expand Down Expand Up @@ -190,30 +186,87 @@ def log_and_track_failure(
)


def main():
if GLOBALS.running_from_ghidra:
origfile_name = getProgramFile().getName()

if origfile_name == "LEGO1.DLL":
GLOBALS.module = SupportedModules.LEGO1
elif origfile_name in ["LEGO1D.DLL", "BETA10.DLL"]:
GLOBALS.module = SupportedModules.BETA10
else:
raise Lego1Exception(
f"Unsupported file name in import script: {origfile_name}"
)

logger.info("Importing file: %s", GLOBALS.module.orig_filename())

repo_root = get_repository_root()
origfile_path = repo_root.joinpath("legobin").joinpath(
GLOBALS.module.orig_filename()
def find_and_add_venv_to_pythonpath():
path = Path(__file__)

# Add the virtual environment if we are in one, e.g. `.venv/Lib/site-packages/reccmp/ghidra_scripts/import_[...].py`
while not path.is_mount():
if path.name == "site-packages":
add_python_path(path)
return
path = path.parent

# Development setup: Running from the reccmp repository. The dependencies must be installed in a venv with name `.venv`.

# This one is needed when the reccmp project is installed in editable mode and we are running directly from the source
add_python_path(Path(__file__).parent.parent.parent)

# Now we add the virtual environment where the dependencies need to be installed
path = Path(__file__)
while not path.is_mount():
venv_candidate = path / ".venv"
if venv_candidate.exists():
add_python_path(venv_candidate / "Lib" / "site-packages")
return
path = path.parent

logger.warning(
"No virtual environment was found. This script might fail to find dependencies."
)
build_directory = repo_root.joinpath(GLOBALS.module.build_dir_name())
recompiledfile_name = f"{GLOBALS.module.recomp_filename_without_extension()}.DLL"
recompiledfile_path = build_directory.joinpath(recompiledfile_name)
pdbfile_name = f"{GLOBALS.module.recomp_filename_without_extension()}.PDB"
pdbfile_path = build_directory.joinpath(pdbfile_name)


def find_build_target() -> "RecCmpBuiltTarget":
"""
Known issue: In order to use this script, `reccmp-build.yml` must be located in the same directory as `reccmp-project.yml`.
"""

project_search_path = Path(__file__).parent

try:
project = RecCmpBuiltProject.from_directory(project_search_path)
except RecCmpProjectNotFoundException as e:
# Figure out if we are in a debugging scenario
debug_config_file = Path(__file__).parent / "dev_config.json"
if not debug_config_file.exists():
raise RecCmpProjectNotFoundException(
f"Cannot find a reccmp project under {project_search_path} (missing {RECCMP_PROJECT_CONFIG}/{RECCMP_BUILD_CONFIG})"
) from e

with debug_config_file.open() as infile:
debug_config = json.load(infile)

project = RecCmpBuiltProject.from_directory(Path(debug_config["projectDir"]))

# Set up logfile next to the project config file
file_handler = logging.FileHandler(
project.project_config_path.parent.joinpath("ghidra_import.log"), mode="w"
)
file_handler.setFormatter(logging.root.handlers[0].formatter)
logging.root.addHandler(file_handler)

if GLOBALS.running_from_ghidra:
GLOBALS.target_name = getProgramFile().getName()

matching_targets = [
t for t in project.targets.values() if t.filename == GLOBALS.target_name
]

if not matching_targets:
logger.error("No target with file name '%s' is configured", GLOBALS.target_name)
sys.exit(1)
elif len(matching_targets) > 1:
logger.warning(
"Found multiple targets for file name '%s'. Using the first one.",
GLOBALS.target_name,
)

return matching_targets[0]


def main():
target = find_build_target()

logger.info("Importing file: %s", target.original_path)

if not GLOBALS.verbose:
logging.getLogger("isledecomp.bin").setLevel(logging.WARNING)
Expand All @@ -223,11 +276,11 @@ def main():
logging.getLogger("isledecomp.cvdump.symbols").setLevel(logging.WARNING)

logger.info("Starting comparison")
with Bin(str(origfile_path), find_str=True) as origfile, Bin(
str(recompiledfile_path)
with Bin(target.original_path, find_str=True) as origfile, Bin(
target.recompiled_path
) as recompfile:
isle_compare = IsleCompare(
origfile, recompfile, str(pdbfile_path), str(repo_root)
origfile, recompfile, target.recompiled_pdb, target.source_root
)

logger.info("Comparison complete.")
Expand All @@ -246,20 +299,22 @@ def main():
# sys.path is not reset after running the script, so we should restore it
sys_path_backup = sys.path.copy()
try:
# make modules installed in the venv available in Ghidra
add_python_path(".venv/Lib/site-packages")
# This one is needed when isledecomp is installed in editable mode in the venv
add_python_path("tools/isledecomp")
find_and_add_venv_to_pythonpath()

import setuptools # pylint: disable=unused-import # required to fix a distutils issue in Python 3.12

reload_module("isledecomp")
# Packages are imported down here because reccmp's dependencies are only available after the venv was added to the pythonpath
from reccmp.project.common import RECCMP_BUILD_CONFIG, RECCMP_PROJECT_CONFIG
from reccmp.project.detect import RecCmpBuiltProject, RecCmpBuiltTarget
from reccmp.project.error import RecCmpProjectNotFoundException

reload_module("reccmp.isledecomp")
from reccmp.isledecomp import Bin

reload_module("isledecomp.compare")
reload_module("reccmp.isledecomp.compare")
from reccmp.isledecomp.compare import Compare as IsleCompare

reload_module("isledecomp.compare.db")
reload_module("reccmp.isledecomp.compare.db")

reload_module("lego_util.exceptions")
from lego_util.exceptions import Lego1Exception
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ def sanitize_name(name: str) -> str:
# Importing function names like `FUN_10001234` into BETA10 can be confusing
# because Ghidra's auto-generated functions look exactly the same.
# Therefore, such function names are replaced by `LEGO_10001234` in the BETA10 import.
if GLOBALS.module == SupportedModules.BETA10:

# FIXME: The identification here is a crutch - we need a more reusable solution for this scenario
if GLOBALS.target_name.upper() == "BETA10.DLL":
new_name = re.sub(r"FUN_([0-9a-f]{8})", r"LEGO1_\1", new_name)

if "<" in name:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ def build_dir_name(self):
class Globals:
verbose: bool
loglevel: int
module: SupportedModules
# TODO: Add a more permanent solution here.
# For example: A custom function prefix per target that defaults to `FUN`
target_name: str = "LEGO1.DLL"
running_from_ghidra: bool = False
# statistics
statistics: Statistics = field(default_factory=Statistics)
Expand All @@ -38,5 +40,4 @@ class Globals:
verbose=False,
# loglevel=logging.INFO,
loglevel=logging.DEBUG,
module=SupportedModules.LEGO1, # this default value will be used when run outside of Ghidra
)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 3 additions & 2 deletions reccmp/isledecomp/bin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from pathlib import Path
import struct
import bisect
from functools import cached_property
Expand Down Expand Up @@ -111,9 +112,9 @@ class Bin:

# pylint: disable=too-many-instance-attributes

def __init__(self, filename: str, find_str: bool = False) -> None:
def __init__(self, filename: Path | str, find_str: bool = False) -> None:
logger.debug('Parsing headers of "%s"... ', filename)
self.filename = filename
self.filename = str(filename)
self.view: memoryview = None
self.imagebase = None
self.entry = None
Expand Down
7 changes: 4 additions & 3 deletions reccmp/isledecomp/compare/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import logging
import difflib
from pathlib import Path
import struct
import uuid
from dataclasses import dataclass
Expand Down Expand Up @@ -67,12 +68,12 @@ def lookup(addr: int, size: int) -> Optional[bytes]:
class Compare:
# pylint: disable=too-many-instance-attributes
def __init__(
self, orig_bin: IsleBin, recomp_bin: IsleBin, pdb_file: str, code_dir: str
self, orig_bin: IsleBin, recomp_bin: IsleBin, pdb_file: Path | str, code_dir: Path | str
):
self.orig_bin = orig_bin
self.recomp_bin = recomp_bin
self.pdb_file = pdb_file
self.code_dir = code_dir
self.pdb_file = str(pdb_file)
self.code_dir = str(code_dir)
# Controls whether we dump the asm output to a file
self.debug: bool = False
self.runid: str = uuid.uuid4().hex[:8]
Expand Down
8 changes: 4 additions & 4 deletions reccmp/project/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def find_filename_recursively(directory: Path, filename: str) -> typing.Optional
class RecCmpProject:
def __init__(
self,
project_config_path: typing.Optional[Path],
project_config_path: Path,
):
self.project_config_path = project_config_path
self.targets: dict[str, RecCmpTarget] = {}
Expand Down Expand Up @@ -118,9 +118,9 @@ def from_directory(cls, directory: Path) -> typing.Optional["RecCmpProject"]:
class RecCmpBuiltProject:
def __init__(
self,
project_config_path: typing.Optional[Path],
user_config: typing.Optional[Path],
build_config: typing.Optional[Path],
project_config_path: Path,
user_config: Path,
build_config: Path,
):
self.project_config_path = project_config_path
self.user_config = user_config
Expand Down
2 changes: 1 addition & 1 deletion reccmp/tools/stackcmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def main():
target.recompiled_path
) as recompfile:
isle_compare = IsleCompare(
origfile, recompfile, target.pdb_path, target.source_root
origfile, recompfile, target.recompiled_pdb, target.source_root
)

if args.loglevel == logging.DEBUG:
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ install_requires = file:requirements.txt

[options.packages.find]
exclude =
reccmp.tests.*
tests*

[options.entry_points]
console_scripts =
Expand Down

0 comments on commit 6f5a020

Please sign in to comment.