diff --git a/sailreval/__init__.py b/sailreval/__init__.py index f04db21..70ab574 100755 --- a/sailreval/__init__.py +++ b/sailreval/__init__.py @@ -22,6 +22,7 @@ class SAILR_METRICS: GED_MAX = "ged_max" GED_EXACT = "ged_exact" CFGED = "cfged" + HU_CFGED = "hu_cfged" GRAPH_SIZE = "graph_size" BLOCK_COUNT = "block_count" DEC_TIME = "dec_time" diff --git a/sailreval/decompilers/angr_dec.py b/sailreval/decompilers/angr_dec.py index e08cc97..93cf670 100755 --- a/sailreval/decompilers/angr_dec.py +++ b/sailreval/decompilers/angr_dec.py @@ -93,6 +93,7 @@ def angr_decompile( LoweredSwitchSimplifier, ReturnDeduplicator, ReturnDuplicatorLow, ReturnDuplicatorHigh, CrossJumpReverter, ConstPropOptReverter, DuplicationReverter, FlipBooleanCmp, ITERegionConverter ) + from angr.analyses.decompiler.presets import DECOMPILATION_PRESETS from cle.backends.coff import Coff # setup a CFG with Calling Conventions recovered @@ -123,7 +124,7 @@ def angr_decompile( LoweredSwitchSimplifier, ReturnDeduplicator, ReturnDuplicatorLow, ReturnDuplicatorHigh, CrossJumpReverter, ConstPropOptReverter, DuplicationReverter, FlipBooleanCmp, ITERegionConverter ] - all_optimization_passes = angr.analyses.decompiler.optimization_passes.get_default_optimization_passes( + all_optimization_passes = DECOMPILATION_PRESETS["full"].get_optimization_passes( "AMD64", "linux", disable_opts=[] if use_deoptimizers else deoptimizers ) if is_windows and LoweredSwitchSimplifier in all_optimization_passes: @@ -434,7 +435,8 @@ def handle_CFunctionCall(self, obj: CFunctionCall): func_call_counts[obj.callee_func.name] += 1 return super().handle_CFunctionCall(obj) - FunctionCallCounter.handle(codegen.cfunc) + call_counter = FunctionCallCounter() + call_counter.handle(codegen.cfunc) func_call_counts = dict(func_call_counts) if not func_call_counts: func_call_counts = {"__empty__": 0} diff --git a/sailreval/metrics/__init__.py b/sailreval/metrics/__init__.py index c57f719..519e374 100755 --- a/sailreval/metrics/__init__.py +++ b/sailreval/metrics/__init__.py @@ -7,7 +7,7 @@ from .nesting_count import count_nesting_ifs from .duplication import count_func_calls from .total_bools import count_if_bools -from .ged_to_source import ged_upperbound_score, cfg_edit_distance, ged_max_score, block_count, ged_exact_score, graph_size +from .ged_to_source import ged_upperbound_score, cfg_edit_distance, ged_max_score, block_count, ged_exact_score, graph_size, hu_cfged_score from .post_metrics import norm_cfged, zero_cfged, funcs_w_goto, goto_func_cfged from sailreval import SAILR_METRICS @@ -36,6 +36,7 @@ SAILR_METRICS.GED_UPPERBOUND: ged_upperbound_score, SAILR_METRICS.GED_MAX: ged_max_score, SAILR_METRICS.GED_EXACT: ged_exact_score, + SAILR_METRICS.HU_CFGED: hu_cfged_score, # Important Metrics SAILR_METRICS.GOTO_COUNT: count_total_gotos, diff --git a/sailreval/metrics/ged_to_source.py b/sailreval/metrics/ged_to_source.py index 39590c3..61185d5 100644 --- a/sailreval/metrics/ged_to_source.py +++ b/sailreval/metrics/ged_to_source.py @@ -2,7 +2,8 @@ from pathlib import Path from typing import Dict -from cfgutils.similarity import ged_max, ged_upperbound, ged_exact +from pyjoern.mapping import correct_decompiler_mappings, read_line_maps +from cfgutils.similarity import ged_max, ged_upperbound, ged_exact, hu_cfged from cfgutils.similarity import cfg_edit_distance as _cfg_edit_distance import networkx as nx @@ -160,3 +161,20 @@ def block_count( return None return len(dec_cfg.nodes) + + +# +# More special case GED algorithms +# + +def hu_cfged_score( + func_name, client, source_cfgs: Dict[str, nx.DiGraph] = None, dec_cfgs: Dict[str, nx.DiGraph] = None, + decompiler=None, binary_path=None, **kwargs +): + if decompiler == "source": + return float(0) + source_cfg, dec_cfg = _verify_has_valid_graphs(func_name, client, source_cfgs, dec_cfgs, decompiler, binary_path) + if source_cfg is None or dec_cfg is None: + return None + + return hu_cfged(dec_cfg, source_cfg)