Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added experimental Compcov/LAF support for the Bochs backend #186

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,8 @@ src/wtf/fuzzer_*
src/build/
src/build_msvc/
src/out
src/.cache
targets/
__pycache__/

compile_commands.*
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ The best way to try the features out is to work with the [fuzzer_hevd](src/wtf/f

### Starting a server node

The server is basically the brain and keeps track of all the state: the aggregated code-coverage, the corpus, it generates and distributes the test-cases to client.
The server is basically the brain and keeps track of all the state: the aggregated code-coverage, the corpus, it generates and distributes the test-cases to client.

This is how you might choose to launch a server node:

Expand Down Expand Up @@ -284,6 +284,7 @@ In this section I briefly mention various differences between the execution back

### bochscpu
- ✅ Full system code-coverage (edge coverage available via `--edges`),
- ✅ LAF/Compcov assisted coverage collection (available via `--compcov` and `--laf` options),
- ✅ Demand-paging,
- ✅ Timeout is the number of instructions which is very precise,
- ✅ Full execution traces are supported,
Expand Down
59 changes: 59 additions & 0 deletions scripts/alternative.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
title: wtf-laf-config
seed: 1337
target-dir: .

master:
runs: 999000000
max_len: 120000
name: rizin
inputs: inputs
outputs: outputs

nodes:
bochs-laf-compcov:
backend: bochscpu
edges: 1
compcov: 1
laf: user
laf-allowed-ranges: 0x7FF7FE680000-0x7FF7FF405000
name: rizin
limit: 900000

bochs-laf:
backend: bochscpu
edges: 1
compcov: 0
laf: user
laf-allowed-ranges: 0x7FF7FE680000-0x7FF7FF405000
name: rizin
limit: 900000

kvm-none-0:
backend: kvm
name: rizin
limit: 2

kvm-none-1:
backend: kvm
name: rizin
limit: 2

kvm-none-2:
backend: kvm
name: rizin
limit: 2

kvm-none-3:
backend: kvm
name: rizin
limit: 2

kvm-none-4:
backend: kvm
name: rizin
limit: 2

kvm-none-5:
backend: kvm
name: rizin
limit: 2
183 changes: 183 additions & 0 deletions scripts/analyze-experiments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import argparse as ap
import pathlib
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import yaml


def load_single_result(result_file: pathlib.Path) -> pd.DataFrame:
dataframe = pd.read_json(result_file)["stats"]
relative_timestamps = [entry["relative_timestamp"] for entry in dataframe]
coverage = [entry["coverage"] for entry in dataframe]
crashes = [entry["crashes"] for entry in dataframe]
execs_sec = [entry["execs_sec"] for entry in dataframe]
corpus_size = [entry["corpus_size"] for entry in dataframe]
dataframe = pd.DataFrame(
{
f"relative_timestamp-{result_file.stem}": relative_timestamps,
f"coverage-{result_file.stem}": coverage,
f"crashes-{result_file.stem}": crashes,
f"execs_sec-{result_file.stem}": execs_sec,
f"corpus_size-{result_file.stem}": corpus_size,
}
)
return dataframe


def load_results(results_dir: pathlib.Path) -> pd.DataFrame:
dataframe = pd.DataFrame()
for result in results_dir.glob("*.json"):
result_df = load_single_result(result)
dataframe = pd.concat([dataframe, result_df], axis=1)

return dataframe


def load_experiment_config(exp_config: pathlib.Path) -> tuple:
with open(exp_config, "r", encoding="ascii") as conf_stream:
try:
config = yaml.safe_load(conf_stream)
except yaml.YAMLError as exc:
print(exc)
exit(1)

results_dir = pathlib.Path(config.get("results-dir", "experiment-results")).resolve()
base_config = pathlib.Path(config.get("base-config", None)).stem
alternative_config = pathlib.Path(config.get("alternative-config", None)).stem

return (results_dir, base_config, alternative_config)


def plot_results(results: pd.DataFrame, base: str, alternative: str):
sns.set_theme(style="darkgrid")

IGNORE_FIRST_N = 10
IGNORE_LAST_N = -22

for graph_type in ("coverage", "execs_sec", "corpus_size"):
base_mean = results.iloc[
IGNORE_FIRST_N:IGNORE_LAST_N,
results.columns.str.contains(f"{graph_type}-bb-coverage-.*-{base}"),
].mean(axis=1)
base_std = results.iloc[
IGNORE_FIRST_N:IGNORE_LAST_N,
results.columns.str.contains(f"{graph_type}-bb-coverage-.*-{base}"),
].std(axis=1)

alt_mean = results.iloc[
IGNORE_FIRST_N:IGNORE_LAST_N,
results.columns.str.contains(f"{graph_type}-bb-coverage-.*-{alternative}"),
].mean(axis=1)
alt_std = results.iloc[
IGNORE_FIRST_N:IGNORE_LAST_N,
results.columns.str.contains(f"{graph_type}-bb-coverage-.*-{alternative}"),
].std(axis=1)

start_time = results.iloc[
IGNORE_FIRST_N,
results.columns.str.contains("relative_timestamp-bb-coverage-.*-.*"),
].mean()
end_time = results.iloc[
IGNORE_LAST_N,
results.columns.str.contains("relative_timestamp-bb-coverage-.*-.*"),
].mean()

plt.figure(dpi=150)
ax = plt.subplot()
ax.xaxis.set_major_formatter(mpl.dates.DateFormatter("%H:%M"))
ax.xaxis.set_major_locator(mpl.dates.MinuteLocator(interval=30))
ax.set_xlabel("time")
ax.set_xlim(
pd.to_datetime(start_time - 400, unit="s"), pd.to_datetime(end_time + 400, unit="s")
)
plt.xticks(rotation=45)

timestamps = pd.date_range(
start=pd.to_datetime(start_time, unit="s"),
end=pd.to_datetime(end_time, unit="s"),
periods=len(base_mean),
)

ax.plot(
timestamps,
base_mean,
label="no-laf",
linewidth=2,
)
ax.plot(
timestamps,
alt_mean,
label="laf",
linestyle="--",
linewidth=2,
)
ax.legend()
ax.set(title=graph_type)

ax.fill_between(
timestamps,
base_mean - base_std,
base_mean + base_std,
alpha=0.2,
)
ax.fill_between(
timestamps,
alt_mean - alt_std,
alt_mean + alt_std,
alpha=0.2,
)

COV_ENTRY_N = results.index[-30]

plt.figure()
sns.boxplot(
data=pd.DataFrame(
{
"no-laf": results.loc[
COV_ENTRY_N,
results.columns.str.contains(f"coverage-bb-coverage-.*-{base}"),
],
"laf": results.loc[
COV_ENTRY_N,
results.columns.str.contains(f"coverage-bb-coverage-.*-{alternative}"),
],
}
),
)

plt.show()


def main():
parser = ap.ArgumentParser()
parser.add_argument(
"-b",
"--base",
type=str,
help="Name of the base configuration (e.g. no-laf)",
)
parser.add_argument(
"-a",
"--alternative",
type=str,
help="Name of the alternative configuration (e.g. laf)",
)
parser.add_argument(
"results",
type=pathlib.Path,
help="Path to the results directory",
)
args = parser.parse_args()

base_name = args.base
alternative_name = args.alternative
results_dir = args.results

results = load_results(results_dir, base_name, alternative_name)
plot_results(results, base_name, alternative_name)


if __name__ == "__main__":
main()
57 changes: 57 additions & 0 deletions scripts/base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
title: wtf-laf-config
seed: 1337
target-dir: .

master:
runs: 999000000
max_len: 120000
name: rizin
inputs: inputs
outputs: outputs

nodes:
bochs-laf-compcov:
backend: bochscpu
edges: 1
compcov: 0
laf: 0
name: rizin
limit: 900000

bochs-laf:
backend: bochscpu
edges: 1
compcov: 0
laf: 0
name: rizin
limit: 900000

kvm-none-0:
backend: kvm
name: rizin
limit: 2

kvm-none-1:
backend: kvm
name: rizin
limit: 2

kvm-none-2:
backend: kvm
name: rizin
limit: 2

kvm-none-3:
backend: kvm
name: rizin
limit: 2

kvm-none-4:
backend: kvm
name: rizin
limit: 2

kvm-none-5:
backend: kvm
name: rizin
limit: 2
9 changes: 9 additions & 0 deletions scripts/experiment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
title: laf-vs-nolaf

round-duration: 21600 # 6 hours
rounds: 5
results-dir: laf-vs-nolaf-results
cov-instructions-limit: 900000

base-config: base.yaml
alternative-config: alternative.yaml
1 change: 1 addition & 0 deletions scripts/gen_coveragefile_binja.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
def generate_coverage_file(bv):
# bv.file.filename: 'C:/path/to/binary.bndb'
name = Path(bv.file.filename).stem
name = name.replace("-", "_")

bb_list = []

Expand Down
2 changes: 1 addition & 1 deletion scripts/gen_coveragefile_ghidra.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
block = block_iterator.next()

json_object = {
'name': program_name,
'name': program_name.replace("-", "_"),
'addresses': address_list
}

Expand Down
2 changes: 1 addition & 1 deletion scripts/gen_coveragefile_ida.py
Original file line number Diff line number Diff line change
Expand Up @@ -6399,7 +6399,7 @@ def main():
addrs.add(rva)

cov = {
'name': filepath.with_suffix('').name,
'name': filepath.with_suffix('').name.replace("-", "_"),
'addresses': sorted(addrs)
}

Expand Down
Loading
Loading