Skip to content

Commit

Permalink
Add the Hu CFGED algorithm for eval (#17)
Browse files Browse the repository at this point in the history
* Fix some pipeline errors

* Add the Hu CFGED algorithm for eval

* fix
  • Loading branch information
mahaloz authored Nov 26, 2024
1 parent ae5069a commit 2c3c6b0
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 4 deletions.
1 change: 1 addition & 0 deletions sailreval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class SAILR_METRICS:
GED_MAX = "ged_max"
GED_EXACT = "ged_exact"
CFGED = "cfged"
HU_CFGED = "hu_cfged"
GRAPH_SIZE = "graph_size"
BLOCK_COUNT = "block_count"
DEC_TIME = "dec_time"
Expand Down
6 changes: 4 additions & 2 deletions sailreval/decompilers/angr_dec.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def angr_decompile(
LoweredSwitchSimplifier, ReturnDeduplicator, ReturnDuplicatorLow, ReturnDuplicatorHigh, CrossJumpReverter,
ConstPropOptReverter, DuplicationReverter, FlipBooleanCmp, ITERegionConverter
)
from angr.analyses.decompiler.presets import DECOMPILATION_PRESETS
from cle.backends.coff import Coff

# setup a CFG with Calling Conventions recovered
Expand Down Expand Up @@ -123,7 +124,7 @@ def angr_decompile(
LoweredSwitchSimplifier, ReturnDeduplicator, ReturnDuplicatorLow, ReturnDuplicatorHigh, CrossJumpReverter,
ConstPropOptReverter, DuplicationReverter, FlipBooleanCmp, ITERegionConverter
]
all_optimization_passes = angr.analyses.decompiler.optimization_passes.get_default_optimization_passes(
all_optimization_passes = DECOMPILATION_PRESETS["full"].get_optimization_passes(
"AMD64", "linux", disable_opts=[] if use_deoptimizers else deoptimizers
)
if is_windows and LoweredSwitchSimplifier in all_optimization_passes:
Expand Down Expand Up @@ -434,7 +435,8 @@ def handle_CFunctionCall(self, obj: CFunctionCall):
func_call_counts[obj.callee_func.name] += 1
return super().handle_CFunctionCall(obj)

FunctionCallCounter.handle(codegen.cfunc)
call_counter = FunctionCallCounter()
call_counter.handle(codegen.cfunc)
func_call_counts = dict(func_call_counts)
if not func_call_counts:
func_call_counts = {"__empty__": 0}
Expand Down
3 changes: 2 additions & 1 deletion sailreval/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .nesting_count import count_nesting_ifs
from .duplication import count_func_calls
from .total_bools import count_if_bools
from .ged_to_source import ged_upperbound_score, cfg_edit_distance, ged_max_score, block_count, ged_exact_score, graph_size
from .ged_to_source import ged_upperbound_score, cfg_edit_distance, ged_max_score, block_count, ged_exact_score, graph_size, hu_cfged_score
from .post_metrics import norm_cfged, zero_cfged, funcs_w_goto, goto_func_cfged

from sailreval import SAILR_METRICS
Expand Down Expand Up @@ -36,6 +36,7 @@
SAILR_METRICS.GED_UPPERBOUND: ged_upperbound_score,
SAILR_METRICS.GED_MAX: ged_max_score,
SAILR_METRICS.GED_EXACT: ged_exact_score,
SAILR_METRICS.HU_CFGED: hu_cfged_score,

# Important Metrics
SAILR_METRICS.GOTO_COUNT: count_total_gotos,
Expand Down
20 changes: 19 additions & 1 deletion sailreval/metrics/ged_to_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from pathlib import Path
from typing import Dict

from cfgutils.similarity import ged_max, ged_upperbound, ged_exact
from pyjoern.mapping import correct_decompiler_mappings, read_line_maps
from cfgutils.similarity import ged_max, ged_upperbound, ged_exact, hu_cfged
from cfgutils.similarity import cfg_edit_distance as _cfg_edit_distance
import networkx as nx

Expand Down Expand Up @@ -160,3 +161,20 @@ def block_count(
return None

return len(dec_cfg.nodes)


#
# More special case GED algorithms
#

def hu_cfged_score(
func_name, client, source_cfgs: Dict[str, nx.DiGraph] = None, dec_cfgs: Dict[str, nx.DiGraph] = None,
decompiler=None, binary_path=None, **kwargs
):
if decompiler == "source":
return float(0)
source_cfg, dec_cfg = _verify_has_valid_graphs(func_name, client, source_cfgs, dec_cfgs, decompiler, binary_path)
if source_cfg is None or dec_cfg is None:
return None

return hu_cfged(dec_cfg, source_cfg)

0 comments on commit 2c3c6b0

Please sign in to comment.