0vercl0k · m4drat · Oct 12, 2023
diff --git a/.gitignore b/.gitignore
@@ -36,4 +36,8 @@ src/wtf/fuzzer_*
 src/build/
 src/build_msvc/
 src/out
+src/.cache
 targets/
+__pycache__/
+
+compile_commands.*
diff --git a/README.md b/README.md
@@ -34,7 +34,7 @@ The best way to try the features out is to work with the [fuzzer_hevd](src/wtf/f
 
 ### Starting a server node
 
-The server is basically the brain and keeps track of all the state: the  aggregated code-coverage, the corpus, it generates and distributes the test-cases to client.
+The server is basically the brain and keeps track of all the state: the aggregated code-coverage, the corpus, it generates and distributes the test-cases to client.
 
 This is how you might choose to launch a server node:
 
@@ -284,6 +284,7 @@ In this section I briefly mention various differences between the execution back
 
 ### bochscpu
 - ✅ Full system code-coverage (edge coverage available via `--edges`),
+- ✅ LAF/Compcov assisted coverage collection (available via `--compcov` and `--laf` options),
 - ✅ Demand-paging,
 - ✅ Timeout is the number of instructions which is very precise,
 - ✅ Full execution traces are supported,

diff --git a/scripts/alternative.yaml b/scripts/alternative.yaml
@@ -0,0 +1,59 @@
+title: wtf-laf-config
+seed: 1337
+target-dir: .
+
+master:
+  runs: 999000000
+  max_len: 120000
+  name: rizin
+  inputs: inputs
+  outputs: outputs
+
+nodes:
+  bochs-laf-compcov:
+    backend: bochscpu
+    edges: 1
+    compcov: 1
+    laf: user
+    laf-allowed-ranges: 0x7FF7FE680000-0x7FF7FF405000
+    name: rizin
+    limit: 900000
+
+  bochs-laf:
+    backend: bochscpu
+    edges: 1
+    compcov: 0
+    laf: user
+    laf-allowed-ranges: 0x7FF7FE680000-0x7FF7FF405000
+    name: rizin
+    limit: 900000
+
+  kvm-none-0:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-1:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-2:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-3:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-4:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-5:
+    backend: kvm
+    name: rizin
+    limit: 2
diff --git a/scripts/analyze-experiments.py b/scripts/analyze-experiments.py
@@ -0,0 +1,183 @@
+import argparse as ap
+import pathlib
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+import yaml
+
+
+def load_single_result(result_file: pathlib.Path) -> pd.DataFrame:
+    dataframe = pd.read_json(result_file)["stats"]
+    relative_timestamps = [entry["relative_timestamp"] for entry in dataframe]
+    coverage = [entry["coverage"] for entry in dataframe]
+    crashes = [entry["crashes"] for entry in dataframe]
+    execs_sec = [entry["execs_sec"] for entry in dataframe]
+    corpus_size = [entry["corpus_size"] for entry in dataframe]
+    dataframe = pd.DataFrame(
+        {
+            f"relative_timestamp-{result_file.stem}": relative_timestamps,
+            f"coverage-{result_file.stem}": coverage,
+            f"crashes-{result_file.stem}": crashes,
+            f"execs_sec-{result_file.stem}": execs_sec,
+            f"corpus_size-{result_file.stem}": corpus_size,
+        }
+    )
+    return dataframe
+
+
+def load_results(results_dir: pathlib.Path) -> pd.DataFrame:
+    dataframe = pd.DataFrame()
+    for result in results_dir.glob("*.json"):
+        result_df = load_single_result(result)
+        dataframe = pd.concat([dataframe, result_df], axis=1)
+
+    return dataframe
+
+
+def load_experiment_config(exp_config: pathlib.Path) -> tuple:
+    with open(exp_config, "r", encoding="ascii") as conf_stream:
+        try:
+            config = yaml.safe_load(conf_stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+            exit(1)
+
+    results_dir = pathlib.Path(config.get("results-dir", "experiment-results")).resolve()
+    base_config = pathlib.Path(config.get("base-config", None)).stem
+    alternative_config = pathlib.Path(config.get("alternative-config", None)).stem
+
+    return (results_dir, base_config, alternative_config)
+
+
+def plot_results(results: pd.DataFrame, base: str, alternative: str):
+    sns.set_theme(style="darkgrid")
+
+    IGNORE_FIRST_N = 10
+    IGNORE_LAST_N = -22
+
+    for graph_type in ("coverage", "execs_sec", "corpus_size"):
+        base_mean = results.iloc[
+            IGNORE_FIRST_N:IGNORE_LAST_N,
+            results.columns.str.contains(f"{graph_type}-bb-coverage-.*-{base}"),
+        ].mean(axis=1)
+        base_std = results.iloc[
+            IGNORE_FIRST_N:IGNORE_LAST_N,
+            results.columns.str.contains(f"{graph_type}-bb-coverage-.*-{base}"),
+        ].std(axis=1)
+
+        alt_mean = results.iloc[
+            IGNORE_FIRST_N:IGNORE_LAST_N,
+            results.columns.str.contains(f"{graph_type}-bb-coverage-.*-{alternative}"),
+        ].mean(axis=1)
+        alt_std = results.iloc[
+            IGNORE_FIRST_N:IGNORE_LAST_N,
+            results.columns.str.contains(f"{graph_type}-bb-coverage-.*-{alternative}"),
+        ].std(axis=1)
+
+        start_time = results.iloc[
+            IGNORE_FIRST_N,
+            results.columns.str.contains("relative_timestamp-bb-coverage-.*-.*"),
+        ].mean()
+        end_time = results.iloc[
+            IGNORE_LAST_N,
+            results.columns.str.contains("relative_timestamp-bb-coverage-.*-.*"),
+        ].mean()
+
+        plt.figure(dpi=150)
+        ax = plt.subplot()
+        ax.xaxis.set_major_formatter(mpl.dates.DateFormatter("%H:%M"))
+        ax.xaxis.set_major_locator(mpl.dates.MinuteLocator(interval=30))
+        ax.set_xlabel("time")
+        ax.set_xlim(
+            pd.to_datetime(start_time - 400, unit="s"), pd.to_datetime(end_time + 400, unit="s")
+        )
+        plt.xticks(rotation=45)
+
+        timestamps = pd.date_range(
+            start=pd.to_datetime(start_time, unit="s"),
+            end=pd.to_datetime(end_time, unit="s"),
+            periods=len(base_mean),
+        )
+
+        ax.plot(
+            timestamps,
+            base_mean,
+            label="no-laf",
+            linewidth=2,
+        )
+        ax.plot(
+            timestamps,
+            alt_mean,
+            label="laf",
+            linestyle="--",
+            linewidth=2,
+        )
+        ax.legend()
+        ax.set(title=graph_type)
+
+        ax.fill_between(
+            timestamps,
+            base_mean - base_std,
+            base_mean + base_std,
+            alpha=0.2,
+        )
+        ax.fill_between(
+            timestamps,
+            alt_mean - alt_std,
+            alt_mean + alt_std,
+            alpha=0.2,
+        )
+
+    COV_ENTRY_N = results.index[-30]
+
+    plt.figure()
+    sns.boxplot(
+        data=pd.DataFrame(
+            {
+                "no-laf": results.loc[
+                    COV_ENTRY_N,
+                    results.columns.str.contains(f"coverage-bb-coverage-.*-{base}"),
+                ],
+                "laf": results.loc[
+                    COV_ENTRY_N,
+                    results.columns.str.contains(f"coverage-bb-coverage-.*-{alternative}"),
+                ],
+            }
+        ),
+    )
+
+    plt.show()
+
+
+def main():
+    parser = ap.ArgumentParser()
+    parser.add_argument(
+        "-b",
+        "--base",
+        type=str,
+        help="Name of the base configuration (e.g. no-laf)",
+    )
+    parser.add_argument(
+        "-a",
+        "--alternative",
+        type=str,
+        help="Name of the alternative configuration (e.g. laf)",
+    )
+    parser.add_argument(
+        "results",
+        type=pathlib.Path,
+        help="Path to the results directory",
+    )
+    args = parser.parse_args()
+
+    base_name = args.base
+    alternative_name = args.alternative
+    results_dir = args.results
+
+    results = load_results(results_dir, base_name, alternative_name)
+    plot_results(results, base_name, alternative_name)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/base.yaml b/scripts/base.yaml
@@ -0,0 +1,57 @@
+title: wtf-laf-config
+seed: 1337
+target-dir: .
+
+master:
+  runs: 999000000
+  max_len: 120000
+  name: rizin
+  inputs: inputs
+  outputs: outputs
+
+nodes:
+  bochs-laf-compcov:
+    backend: bochscpu
+    edges: 1
+    compcov: 0
+    laf: 0
+    name: rizin
+    limit: 900000
+
+  bochs-laf:
+    backend: bochscpu
+    edges: 1
+    compcov: 0
+    laf: 0
+    name: rizin
+    limit: 900000
+
+  kvm-none-0:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-1:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-2:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-3:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-4:
+    backend: kvm
+    name: rizin
+    limit: 2
+
+  kvm-none-5:
+    backend: kvm
+    name: rizin
+    limit: 2
diff --git a/scripts/experiment.yaml b/scripts/experiment.yaml
@@ -0,0 +1,9 @@
+title: laf-vs-nolaf
+
+round-duration: 21600 # 6 hours
+rounds: 5
+results-dir: laf-vs-nolaf-results
+cov-instructions-limit: 900000
+
+base-config: base.yaml
+alternative-config: alternative.yaml
diff --git a/scripts/gen_coveragefile_binja.py b/scripts/gen_coveragefile_binja.py
@@ -9,6 +9,7 @@
 def generate_coverage_file(bv):
     # bv.file.filename: 'C:/path/to/binary.bndb'
     name = Path(bv.file.filename).stem
+    name = name.replace("-", "_")
 
     bb_list = []
 

diff --git a/scripts/gen_coveragefile_ghidra.py b/scripts/gen_coveragefile_ghidra.py
@@ -19,7 +19,7 @@
     block = block_iterator.next()
 
 json_object = {
-    'name': program_name,
+    'name': program_name.replace("-", "_"),
     'addresses': address_list
 }
 

diff --git a/scripts/gen_coveragefile_ida.py b/scripts/gen_coveragefile_ida.py
@@ -6399,7 +6399,7 @@ def main():
                 addrs.add(rva)
 
     cov = {
-        'name': filepath.with_suffix('').name,
+        'name': filepath.with_suffix('').name.replace("-", "_"),
         'addresses': sorted(addrs)
     }