From 3d56ed23d1268ab210d67b0b84576bc4ef76c79a Mon Sep 17 00:00:00 2001 From: Zion Leonahenahe Basque Date: Wed, 1 Nov 2023 11:22:38 -0700 Subject: [PATCH] Verify numbers produced in pipeline verification (#6) * Better verification scripts * Allow old versions * Update number checking * Update * Remove uneeded CI files * bump * Add binutils * Update the README --- .github/workflows/python-app.yml | 6 +- README.md | 38 +++++++--- sailreval/__init__.py | 2 +- sailreval/decompilers/angr_dec.py | 10 ++- scripts/ci_verify_pipeline.sh | 35 ---------- scripts/verify_pipeline.sh | 3 +- tests/ci/angr_dream_example.c | 64 ----------------- tests/ci/angr_dream_example.linemaps | 60 ---------------- tests/ci/angr_dream_example.toml | 100 --------------------------- tests/ci/angr_sailr_example.c | 63 ----------------- tests/ci/angr_sailr_example.linemaps | 58 ---------------- tests/ci/angr_sailr_example.toml | 100 --------------------------- tests/test_pipeline.py | 70 +++++++++++++++++++ 13 files changed, 110 insertions(+), 499 deletions(-) delete mode 100755 scripts/ci_verify_pipeline.sh delete mode 100644 tests/ci/angr_dream_example.c delete mode 100644 tests/ci/angr_dream_example.linemaps delete mode 100644 tests/ci/angr_dream_example.toml delete mode 100644 tests/ci/angr_sailr_example.c delete mode 100644 tests/ci/angr_sailr_example.linemaps delete mode 100644 tests/ci/angr_sailr_example.toml create mode 100644 tests/test_pipeline.py diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index ef3cc4e..7bfd9d8 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -28,11 +28,11 @@ jobs: - name: Install dependencies run: | pip install --upgrade pip - sudo apt-get update && sudo apt-get install unzip openjdk-19-jdk graphviz-dev -y + sudo apt-get update && sudo apt-get install gcc make binutils unzip openjdk-19-jdk graphviz-dev -y pip3 install -e . - pip3 install angr + pip3 install angr pytest - name: Run verification tests run: | # a hack to allow for docker containers in github actions and ARM export JAVA_OPTS="-Djdk.lang.Process.launchMechanism=vfork" - ./scripts/ci_verify_pipeline.sh \ No newline at end of file + pytest tests/test_pipeline.py \ No newline at end of file diff --git a/README.md b/README.md index f917524..e2a8758 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The SAILR evaluation pipeline, `sailreval`, is a tool for measuring various aspects of decompilation quality. This evaluation pipeline was originally developed for the USENIX 2024 paper ["Ahoy SAILR! There is No Need to DREAM of C: A Compiler-Aware Structuring Algorithm for Binary Decompilation"](https://www.zionbasque.com/files/publications/sailr_usenix24.pdf). It supports 26 different C packages from Debian, -for compiling, decompiling and measuring. Currently, angr, Hex-Rays (IDA Pro), and Ghidra are supported as decompilers. +for compiling, decompiling, and measuring. Currently, angr, Hex-Rays (IDA Pro), and Ghidra are supported as decompilers. If you are only looking to use the SAILR version of angr, then jump to the [using SAILR on angr](#using-sailr-on-angr-decompiler) section. @@ -23,6 +23,7 @@ If you are only looking to use the SAILR version of angr, then jump to the [usin ## Overview: +This repo contains the `sailreval` Python package and information about the SAILR paper artifacts. `sailreval` is the Python package that contains all the code for running the evaluation pipeline. `sailreval` evaluates the quality of decompilation by comparing it to the original source code. This evaluation is done in four phases: @@ -35,34 +36,49 @@ Each phase requires the phase directly before it to have run, however, you can s required files. For example, you can skip compilation phase if you already have the object files and preprocessed source. ## Installation -`sailreval` can be used in two ways: locally or in a docker container. If you plan on reproducing the results from the paper, -or using pre-set decompilers, then you should use the docker container. -Run the setup script to install the dependencies: +The `sailreval` package can be used in two ways: locally or in a docker container. +If you plan on reproducing the results of the SAILR paper, or using some pre-packaged decompiler like Ghidra, than you +will need both. Below are two methods for installing, one is heavy (docker and local) and one is light (only local). +Make sure you have Docker installed on your system. + +### Install Script (Recommended) +On Linux and MacOS: ```bash ./setup.sh ``` -This will install the Python package locally and build the docker container. If you know you don't want to use the docker -container, then you can directly install the Python package with `pip3 insatll .`. Note: you need `graphviz` on your system. +This will build the Docker container, install system dependencies, and install the Python package locally. + +### Only Python Package +If you want to use only local decompilers, and you have the build dependencies installed for your compiled project, you +can install the Python package without the Docker container. For an example of this use case, see +our [CI runner](./.github/workflows/python-app.yml). +```bash +pip3 insatll -e . +``` +Note: you will need to install the system dependencies for the Python project yourself, listed [here]([CI runner](./.github/workflows/python-app.yml). +The package is also available on PyPi, so remote installation works as well. + +### Install Verification Verify the installation by running: ```bash ./scripts/verify_pipeline.sh ``` -If your installation is correct, you should see some final output like: -``` +This will use both the Docker container and your local install to run the Pipeline. +If you installed correctly, you should see some final output like: +```md # Evaluation Data ## Stats Layout: ('sum', 'mean', 'median') ### O2 -Metric | source | angr_sailr | angr_dream +Metric | source | angr_sailr | angr_dream ---------- | ----------- | ----------- | ----------- -cfged | 0/0/0.0 | 14/1.75/2.0 | 34/4.25/2.0 +gotos | 1/0.12/0.0 | 1/0.12/0.0 | 0/0/0.0 ... ``` - ## Usage After installation, if you used the script normally (i.e. the docker install), than you can use the `docker-eval.sh` script which is a proxy to the `eval.py` script, but inside the container. diff --git a/sailreval/__init__.py b/sailreval/__init__.py index 6498b2a..7f30ba1 100755 --- a/sailreval/__init__.py +++ b/sailreval/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.1.0" +__version__ = "1.2.0" # create loggers import logging diff --git a/sailreval/decompilers/angr_dec.py b/sailreval/decompilers/angr_dec.py index 34d5815..6d07d41 100755 --- a/sailreval/decompilers/angr_dec.py +++ b/sailreval/decompilers/angr_dec.py @@ -279,11 +279,17 @@ def generate_linemaps(dec, codegen, base_addr=0x400000): return base_addr = dec.project.loader.main_object.image_base_delta - + if hasattr(dec, "unmodified_clinic_graph"): + nodes = dec.unmodified_clinic_graph.nodes + else: + l.warning(f"You are likely using an older version of angr that has no unmodified_clinic_graph." + f" Using clinic_graph instead, results will be less accurate...") + nodes = dec.clinic.cc_graph.nodes + # get the mapping of the original AIL graph mapping = defaultdict(set) ail_node_addr_map = { - node.addr: node for node in dec.unmodified_clinic_graph.nodes + node.addr: node for node in nodes } for addr, ail_block in ail_node_addr_map.items(): # get instructions of this block diff --git a/scripts/ci_verify_pipeline.sh b/scripts/ci_verify_pipeline.sh deleted file mode 100755 index 442df13..0000000 --- a/scripts/ci_verify_pipeline.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -PROJECT_DIR="$SCRIPT_DIR/.." -EXAMPLE_PROJECT_DIR="$SCRIPT_DIR/../tests/example_project" -RESULTS_DIR="$SCRIPT_DIR/../results/O2" -CORES="$(nproc --all)" - -echo "Installing dependencies..." -sudo apt-get update && sudo apt-get install -y gcc make strip - -echo "[+] Checking if results directory exists..." -if [ ! -d "$RESULTS_DIR" ]; then - echo "[+] Creating results directory..." - mkdir -p "$RESULTS_DIR" -fi - -echo "[+] Running pipeline in $PROJECT_DIR..." -cd "$PROJECT_DIR" || exit 1 - -echo "[+] Running compilation pipeline..." && \ -# runs on the default opts, which is O2 -./eval.py --compile example_project --cores "$CORES" && \ -echo "[+] Running decompilation pipeline (only for source)..." && \ -./eval.py --decompile example_project --cores "$CORES" --use-dec source && \ -echo "[+] Copying decompilation..." && \ -mkdir -p "$PROJECT_DIR"/results/O2/example_project/sailr_decompiled && \ -cp "$PROJECT_DIR"/tests/ci/angr* "$PROJECT_DIR"/results/O2/example_project/sailr_decompiled && \ -echo "[+] Running measurement pipeline (gotos, bools, calls, cfged)..." && \ -./eval.py --measure example_project --use-metric gotos cfged bools func_calls --use-dec source angr_sailr angr_dream --cores "$CORES" && \ -echo "[+] Running aggregation pipeline..." && \ -./eval.py --summarize-targets example_project --use-dec source angr_sailr angr_dream --use-metric gotos cfged bools func_calls --show-stats && \ -# cleanup -rm -rf "$RESULTS_DIR/example_project" && \ -echo "[+] The pipeline has successfully finished!" || (echo "[!] Pipeline failed, check the last stage it was in to figure out where!" && exit 1) diff --git a/scripts/verify_pipeline.sh b/scripts/verify_pipeline.sh index 2f4c607..e82f070 100755 --- a/scripts/verify_pipeline.sh +++ b/scripts/verify_pipeline.sh @@ -2,7 +2,6 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" PROJECT_DIR="$SCRIPT_DIR/.." -EXAMPLE_PROJECT_DIR="$SCRIPT_DIR/../tests/example_project" RESULTS_DIR="$SCRIPT_DIR/../results/O2" CORES="$(nproc --all)" @@ -28,5 +27,5 @@ echo "[+] Running measurement pipeline (gotos, bools, calls, cfged)..." && \ echo "[+] Running aggregation pipeline..." && \ ./eval.py --summarize-targets example_project --use-dec source angr_sailr angr_dream --use-metric gotos cfged bools func_calls --show-stats && \ # cleanup -#rm -rf "$RESULTS_DIR/example_project" && \ +rm -rf "$RESULTS_DIR/example_project" && \ echo "[+] The pipeline has successfully finished!" || (echo "[!] Pipeline failed, check the last stage it was in to figure out where!" && exit 1) diff --git a/tests/ci/angr_dream_example.c b/tests/ci/angr_dream_example.c deleted file mode 100644 index f094bf3..0000000 --- a/tests/ci/angr_dream_example.c +++ /dev/null @@ -1,64 +0,0 @@ -void complete_job() { - unsigned long long v1; // rax - - v1 = puts("checking..."); - return; -} - -void next_job() { - unsigned long long v1; // rax - - puts("next_job"); - v1 = 1; - return; -} - -void refresh_jobs() { - unsigned long long v1; // rax - - puts("refresh_jobs"); - v1 = 2; - return; -} - -void fast_unlock() { - unsigned long long v1; // rax - - puts("fast_unlock"); - v1 = 4; - return; -} - -void log_workers() { - unsigned long long v1; // rax - - v1 = puts("log_workers"); - return; -} - -long long schedule_job(unsigned long a0, unsigned long long a1, unsigned long a2) { - if (a0 && a1) { - complete_job(); - if (EARLY_EXIT != a2) { - next_job(); - refresh_jobs(); - } - } - if (!a0 || !a1) - refresh_jobs(); - if (a1 || EARLY_EXIT != a2) - fast_unlock(); - complete_job(); - log_workers(); - return job_status(a1); -} - -long long job_status(unsigned long a0) { - puts("job_status"); - return a0; -} - -int main(unsigned long a0, struct struct_0 *a1[3]) { - return schedule_job(a1[0]->field_0, a1[1]->field_0, a1[2]->field_0); -} - diff --git a/tests/ci/angr_dream_example.linemaps b/tests/ci/angr_dream_example.linemaps deleted file mode 100644 index d11ff37..0000000 --- a/tests/ci/angr_dream_example.linemaps +++ /dev/null @@ -1,60 +0,0 @@ -[complete_job] -1 = [ 96,] -2 = [ 96,] -3 = [ 96,] -4 = [ 96, 100, 107,] -5 = [ 96, 107, 100,] - -[next_job] -1 = [ 0,] -2 = [ 0,] -3 = [ 0,] -4 = [ 0, 4, 8, 15,] -5 = [ 20,] -6 = [ 25, 29,] - -[refresh_jobs] -1 = [ 32,] -2 = [ 32,] -3 = [ 32,] -4 = [ 32, 36, 40, 47,] -5 = [ 52,] -6 = [ 57, 61,] - -[fast_unlock] -1 = [ 64,] -2 = [ 64,] -3 = [ 64,] -4 = [ 64, 68, 72, 79,] -5 = [ 84,] -6 = [ 89, 93,] - -[log_workers] -1 = [ 112,] -2 = [ 112,] -3 = [ 112,] -4 = [ 112, 116, 123,] -5 = [ 112, 123, 116,] - -[schedule_job] -1 = [ 160,] -2 = [ 160, 164, 165, 167, 168, 172, 174,] -3 = [ 180, 182, 184,] -4 = [ 195, 189,] -5 = [ 197, 199,] -6 = [ 204, 206,] -10 = [ 224, 226,] -11 = [ 195, 189,] -12 = [ 211, 213,] -13 = [ 235, 237,] -14 = [ 242, 244,] -15 = [ 257,] - -[job_status] -1 = [ 128,] -2 = [ 128, 132, 134, 137, 144,] -3 = [ 149, 152, 154,] - -[main] -1 = [ 352,] -2 = [ 380,] diff --git a/tests/ci/angr_dream_example.toml b/tests/ci/angr_dream_example.toml deleted file mode 100644 index ff670ac..0000000 --- a/tests/ci/angr_dream_example.toml +++ /dev/null @@ -1,100 +0,0 @@ -[complete_job] -loc = 7 -gotos = 0 -dec_time = 0.02066183090209961 - -[next_job] -loc = 8 -gotos = 0 -dec_time = 0.03953123092651367 - -[refresh_jobs] -loc = 8 -gotos = 0 -dec_time = 0.0382227897644043 - -[fast_unlock] -loc = 8 -gotos = 0 -dec_time = 0.03956198692321777 - -[log_workers] -loc = 7 -gotos = 0 -dec_time = 0.020367145538330078 - -[schedule_job] -loc = 17 -gotos = 0 -dec_time = 0.4368276596069336 - -[job_status] -loc = 5 -gotos = 0 -dec_time = 0.05095648765563965 - -[main] -loc = 4 -gotos = 0 -dec_time = 0.060524702072143555 - -[complete_job.bools] -ands = 0 -ors = 0 - -[complete_job.func_calls] -puts = 1 - -[next_job.bools] -ands = 0 -ors = 0 - -[next_job.func_calls] -puts = 1 - -[refresh_jobs.bools] -ands = 0 -ors = 0 - -[refresh_jobs.func_calls] -puts = 1 - -[fast_unlock.bools] -ands = 0 -ors = 0 - -[fast_unlock.func_calls] -puts = 1 - -[log_workers.bools] -ands = 0 -ors = 0 - -[log_workers.func_calls] -puts = 1 - -[schedule_job.bools] -ands = 1 -ors = 2 - -[schedule_job.func_calls] -complete_job = 2 -next_job = 1 -refresh_jobs = 2 -fast_unlock = 1 -log_workers = 1 -job_status = 1 - -[job_status.bools] -ands = 0 -ors = 0 - -[job_status.func_calls] -puts = 1 - -[main.bools] -ands = 0 -ors = 0 - -[main.func_calls] -schedule_job = 1 diff --git a/tests/ci/angr_sailr_example.c b/tests/ci/angr_sailr_example.c deleted file mode 100644 index 597cb7d..0000000 --- a/tests/ci/angr_sailr_example.c +++ /dev/null @@ -1,63 +0,0 @@ -void complete_job() { - unsigned long long v1; // rax - - v1 = puts("checking..."); - return; -} - -void next_job() { - unsigned long long v1; // rax - - puts("next_job"); - v1 = 1; - return; -} - -void refresh_jobs() { - unsigned long long v1; // rax - - puts("refresh_jobs"); - v1 = 2; - return; -} - -void fast_unlock() { - unsigned long long v1; // rax - - puts("fast_unlock"); - v1 = 4; - return; -} - -void log_workers() { - unsigned long long v1; // rax - - v1 = puts("log_workers"); - return; -} - -long long schedule_job(unsigned long a0, unsigned long long a1, unsigned long a2) { - if (a0 && a1) { - complete_job(); - if (EARLY_EXIT == a2) - goto LABEL_4000eb; - next_job(); - } - refresh_jobs(); - if (a1 || a1) - fast_unlock(); -LABEL_4000eb: - complete_job(); - log_workers(); - return job_status(a1); -} - -long long job_status(unsigned long a0) { - puts("job_status"); - return a0; -} - -int main(unsigned long a0, struct struct_0 *a1[3]) { - return schedule_job(a1[0]->field_0, a1[1]->field_0, a1[2]->field_0); -} - diff --git a/tests/ci/angr_sailr_example.linemaps b/tests/ci/angr_sailr_example.linemaps deleted file mode 100644 index 76275b3..0000000 --- a/tests/ci/angr_sailr_example.linemaps +++ /dev/null @@ -1,58 +0,0 @@ -[complete_job] -1 = [ 96,] -2 = [ 96,] -3 = [ 96,] -4 = [ 96, 100, 107,] -5 = [ 96, 107, 100,] - -[next_job] -1 = [ 0,] -2 = [ 0,] -3 = [ 0,] -4 = [ 0, 4, 8, 15,] -5 = [ 20,] -6 = [ 25, 29,] - -[refresh_jobs] -1 = [ 32,] -2 = [ 32,] -3 = [ 32,] -4 = [ 32, 36, 40, 47,] -5 = [ 52,] -6 = [ 57, 61,] - -[fast_unlock] -1 = [ 64,] -2 = [ 64,] -3 = [ 64,] -4 = [ 64, 68, 72, 79,] -5 = [ 84,] -6 = [ 89, 93,] - -[log_workers] -1 = [ 112,] -2 = [ 112,] -3 = [ 112,] -4 = [ 112, 116, 123,] -5 = [ 112, 123, 116,] - -[schedule_job] -1 = [ 160,] -3 = [ 180, 182, 184,] -4 = [ 195, 189,] -5 = [ 195, 189,] -6 = [ 197, 199,] -8 = [ 204, 206,] -10 = [ 211, 213,] -12 = [ 235, 237,] -13 = [ 242, 244,] -14 = [ 257,] - -[job_status] -1 = [ 128,] -2 = [ 128, 132, 134, 137, 144,] -3 = [ 149, 152, 154,] - -[main] -1 = [ 352,] -2 = [ 380,] diff --git a/tests/ci/angr_sailr_example.toml b/tests/ci/angr_sailr_example.toml deleted file mode 100644 index 38c107f..0000000 --- a/tests/ci/angr_sailr_example.toml +++ /dev/null @@ -1,100 +0,0 @@ -[complete_job] -loc = 7 -gotos = 0 -dec_time = 0.03466939926147461 - -[next_job] -loc = 8 -gotos = 0 -dec_time = 0.04835772514343262 - -[refresh_jobs] -loc = 8 -gotos = 0 -dec_time = 0.04874897003173828 - -[fast_unlock] -loc = 8 -gotos = 0 -dec_time = 0.0477290153503418 - -[log_workers] -loc = 7 -gotos = 0 -dec_time = 0.029893875122070312 - -[schedule_job] -loc = 16 -gotos = 1 -dec_time = 0.6433112621307373 - -[job_status] -loc = 5 -gotos = 0 -dec_time = 0.06193256378173828 - -[main] -loc = 4 -gotos = 0 -dec_time = 0.07178354263305664 - -[complete_job.bools] -ands = 0 -ors = 0 - -[complete_job.func_calls] -puts = 1 - -[next_job.bools] -ands = 0 -ors = 0 - -[next_job.func_calls] -puts = 1 - -[refresh_jobs.bools] -ands = 0 -ors = 0 - -[refresh_jobs.func_calls] -puts = 1 - -[fast_unlock.bools] -ands = 0 -ors = 0 - -[fast_unlock.func_calls] -puts = 1 - -[log_workers.bools] -ands = 0 -ors = 0 - -[log_workers.func_calls] -puts = 1 - -[schedule_job.bools] -ands = 1 -ors = 1 - -[schedule_job.func_calls] -complete_job = 2 -next_job = 1 -refresh_jobs = 1 -fast_unlock = 1 -log_workers = 1 -job_status = 1 - -[job_status.bools] -ands = 0 -ors = 0 - -[job_status.func_calls] -puts = 1 - -[main.bools] -ands = 0 -ors = 0 - -[main.func_calls] -schedule_job = 1 diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 0000000..17bbe9a --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,70 @@ +import unittest +from pathlib import Path +import subprocess +import os + +import toml + +from sailreval import SAILR_DECOMPILERS, SAILR_METRICS + +TESTS_DIR = Path(__file__).parent.absolute() +SCRIPTS_DIR = (TESTS_DIR.parent / "scripts").absolute() + + +def undockerify_script(script_path: Path, output_path: Path) -> Path: + with open(script_path, "r") as f: + script = f.read() + + script = script.replace("sudo ./docker-eval.sh", "./eval.py") + with open(output_path, "w") as f: + f.write(script) + + os.chmod(str(output_path), os.stat(str(output_path)).st_mode | 0o111) + return output_path + + +class TestPipeline(unittest.TestCase): + def test_eval_script(self): + """ + Tests every stage in the eval.py script as described in the README. + """ + opt_level = "O2" + project = "example_project" + decompilers = [SAILR_DECOMPILERS.SOURCE_CODE, SAILR_DECOMPILERS.ANGR_SAILR, SAILR_DECOMPILERS.ANGR_DREAM] + metrics = [SAILR_METRICS.GOTO_COUNT, SAILR_METRICS.BOOLEAN_COUNT, SAILR_METRICS.FUNC_CALLS, SAILR_METRICS.CFGED] + + # take the verification script and replace docker instances (for CI run) + eval_script_copy = undockerify_script(SCRIPTS_DIR / "verify_pipeline.sh", TESTS_DIR / "verify_pipeline.sh") + # run the eval script like normal uses would run it (without docker) + ret = subprocess.run([f"{eval_script_copy}"], shell=True, cwd=TESTS_DIR) + assert ret.returncode == 0 + + # verify the output + results_file = Path("results.toml") + assert results_file.exists() + with open(results_file, "r") as f: + results = toml.load(f) + + project_results = results[f"{opt_level}/{project}"] + for dec in decompilers: + assert dec in project_results + for metric in metrics: + # since we are requesting --show-stats in eval, we need to check for the sum instead of normal + # metric name; normally, this suffix is not needed. + assert f"{metric}_sum" in project_results[dec] + + # make sure real numbers occurred + assert project_results[SAILR_DECOMPILERS.SOURCE_CODE][f"{SAILR_METRICS.GOTO_COUNT}_sum"] == 1 + for dec in decompilers: + assert project_results[dec][f"{SAILR_METRICS.FUNC_CALLS}_sum"] > 0 + + # verify we had the same number of functions as in source (8) + metadata = project_results["metadata"] + assert metadata["total_unique_functions_in_src"] == 8 + assert metadata["total_unique_functions_in_all_metrics"] == 8 + + # cleanup: + eval_script_copy.unlink() + results_file.unlink() + Path("full_summary.md").unlink() + Path("summary.md").unlink()