diff --git a/librz/analysis/op.c b/librz/analysis/op.c index 25e963d468c..85feaaea2a0 100644 --- a/librz/analysis/op.c +++ b/librz/analysis/op.c @@ -94,7 +94,20 @@ static int defaultCycles(RzAnalysisOp *op) { } } -RZ_API int rz_analysis_op(RzAnalysis *analysis, RzAnalysisOp *op, ut64 addr, const ut8 *data, int len, RzAnalysisOpMask mask) { +/** + * \brief Disassemble the given \p data at \p addr to an RzAnalysisOp. + * Note: \p op will be set to an invalid operation in case of failure. + * + * \param analysis The RzAnalysis to use. + * \param op An _uninitialized_ RzAnalysisOp to save the result into. + * \param addr The address the data is located. + * \param data The buffer with the bytes to disassemble. + * \param len Length of the \p data in bytes. + * \param mask The which analysis details should be disassembled. + * + * \return The number of bytes which were disassembled. -1 in case of failure. + */ +RZ_API int rz_analysis_op(RZ_NONNULL RzAnalysis *analysis, RZ_OUT RzAnalysisOp *op, ut64 addr, const ut8 *data, int len, RzAnalysisOpMask mask) { rz_return_val_if_fail(analysis && op && len > 0, -1); rz_analysis_op_init(op); diff --git a/librz/core/cgraph.c b/librz/core/cgraph.c index 5448e7f3fa9..cd18807cc45 100644 --- a/librz/core/cgraph.c +++ b/librz/core/cgraph.c @@ -9,6 +9,9 @@ #include #include #include "core_private.h" +#include +#include +#include #include #include #include @@ -492,6 +495,9 @@ RZ_API RZ_OWN RzGraph /**/ *rz_core_graph(RzCore *core, RzCor case RZ_CORE_GRAPH_TYPE_ICFG: graph = rz_core_graph_icfg(core); break; + case RZ_CORE_GRAPH_TYPE_CFG: + graph = rz_core_graph_cfg(core, addr); + break; case RZ_CORE_GRAPH_TYPE_DIFF: default: rz_warn_if_reached(); @@ -868,3 +874,144 @@ RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_icfg(RZ_NONNULL RzC ht_uu_free(graph_idx); return graph; } + +static inline bool is_leaf_op(const RzAnalysisOp *op) { + return (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_ILL || + (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_RET || + (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_UNK; +} + +static inline bool is_call(const RzAnalysisOp *op) { + return (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_CALL; +} + +static inline bool is_uncond_jump(const RzAnalysisOp *op) { + return (op->type & RZ_ANALYSIS_OP_TYPE_MASK) == RZ_ANALYSIS_OP_TYPE_JMP && + !((op->type & RZ_ANALYSIS_OP_HINT_MASK) & RZ_ANALYSIS_OP_TYPE_COND); +} + +static inline bool ignore_next_instr(const RzAnalysisOp *op) { + // Ignore if: + return is_uncond_jump(op) || (op->fail != UT64_MAX && !is_call(op)); // Except calls, everything which has set fail +} + +/** + * \brief Add an edge to the graph and update \p to_visit vector and the \p nodes_visited hash table. + * + * \param graph The graph to work on. + * \param to_visit The vector with addresses to visit. + * \param nodes_visited The hash table holding already visited addresses and their node indices in the graph. + * \param from The parent node. + * \param to The target node of the edge. + * + * \return true On success. + * \return false On failure. + */ +static bool add_edge_to_cfg(RZ_NONNULL RzGraph /**/ *graph, RZ_NONNULL RzVector /**/ *to_visit, RZ_NONNULL HtUU *nodes_visited, ut64 from, ut64 to) { + rz_return_val_if_fail(graph && to_visit && nodes_visited, -1); + bool visited = false; + ut64 from_idx = ht_uu_find(nodes_visited, from, &visited); + if (!visited && from != to) { + RZ_LOG_ERROR("'from' node should have been added before. 0x%" PFMT64x " -> 0x%" PFMT64x "\n", from, to); + return false; + } + + RzGraphNode *to_node = rz_graph_add_node_info(graph, rz_str_newf("0x%" PFMT64x, to), NULL, to); + if (!to_node) { + RZ_LOG_ERROR("Could not add node at 0x%" PFMT64x "\n", to); + return false; + } + ut64 to_idx = to_node->idx; + if (from == to) { + from_idx = to_idx; + } + to_idx = ht_uu_find(nodes_visited, to, &visited); + + if (from != to && !visited) { + // The target node wasn't visited before. Otherwise this is a back-edge. + rz_vector_push(to_visit, &to); + } + + ht_uu_insert(nodes_visited, to, to_node->idx); + rz_graph_add_edge(graph, rz_graph_get_node(graph, from_idx), to_node); + return true; +} + +/** + * \brief Get the procedual control flow graph (CFG) at an address. + * Calls are not followed. + * + * \param core The current core. + * \param addr The CFG entry point. + * + * \return The CFG at address \p addr or NULL in case of failure. + */ +RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_cfg(RZ_NONNULL RzCore *core, ut64 addr) { + rz_return_val_if_fail(core && core->analysis && core->io, NULL); + RzGraph *graph = rz_graph_new(); + if (!graph) { + return NULL; + } + + // Visited instructions. Indexed by instruction address, value is index in graph. + HtUU *nodes_visited = ht_uu_new0(); + // Addresses to visit. + RzVector *to_visit = rz_vector_new(sizeof(ut64), NULL, NULL); + + // Add entry node + RzGraphNode *entry = rz_graph_add_node_info(graph, rz_str_newf("0x%" PFMT64x, addr), NULL, addr); + ht_uu_insert(nodes_visited, addr, entry->idx); + rz_vector_push(to_visit, &addr); + + while (rz_vector_len(to_visit) > 0) { + ut64 cur_addr = 0; + rz_vector_pop(to_visit, &cur_addr); + + ut8 buf[64] = { 0 }; + if (rz_io_nread_at(core->io, cur_addr, buf, sizeof(buf)) < 0) { + RZ_LOG_ERROR("Could not generate CFG at 0x%" PFMT64x ". rz_io_nread_at() failed at 0x%" PFMT64x ".\n", addr, cur_addr); + goto error; + } + + RzAnalysisOp op = { 0 }; + int disas_bytes = rz_analysis_op(core->analysis, &op, cur_addr, buf, sizeof(buf), RZ_ANALYSIS_OP_MASK_DISASM); + if (disas_bytes <= 0 || is_leaf_op(&op)) { + // A leaf. It was added before to the graph by the parent node. + rz_analysis_op_fini(&op); + continue; + } + + if (op.jump != UT64_MAX && !is_call(&op)) { + if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, op.jump)) { + goto error; + } + } + if (op.fail != UT64_MAX && !is_call(&op)) { + if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, op.fail)) { + goto error; + } + } + + if (ignore_next_instr(&op)) { + rz_analysis_op_fini(&op); + continue; + } + + // Add next instruction + ut64 next_addr = cur_addr + disas_bytes; + if (!add_edge_to_cfg(graph, to_visit, nodes_visited, cur_addr, next_addr)) { + goto error; + } + rz_analysis_op_fini(&op); + } + +fini: + rz_vector_free(to_visit); + ht_uu_free(nodes_visited); + return graph; + +error: + rz_graph_free(graph); + graph = NULL; + goto fini; +} diff --git a/librz/core/cmd/cmd_analysis.c b/librz/core/cmd/cmd_analysis.c index 207aef8737c..084af25bd6d 100644 --- a/librz/core/cmd/cmd_analysis.c +++ b/librz/core/cmd/cmd_analysis.c @@ -4706,6 +4706,11 @@ RZ_IPI RzCmdStatus rz_analysis_graph_icfg_handler(RzCore *core, int argc, const return bool2status(rz_core_graph_print(core, core->offset, RZ_CORE_GRAPH_TYPE_ICFG, format)); } +RZ_IPI RzCmdStatus rz_analysis_graph_cfg_handler(RzCore *core, int argc, const char **argv) { + const RzCoreGraphFormat format = rz_core_graph_format_from_string(argv[1]); + return bool2status(rz_core_graph_print(core, core->offset, RZ_CORE_GRAPH_TYPE_CFG, format)); +} + RZ_IPI RzCmdStatus rz_analysis_graph_custom_handler(RzCore *core, int argc, const char **argv) { const RzCoreGraphFormat format = rz_core_graph_format_from_string(argv[1]); return bool2status(rz_core_agraph_print(core, format)); diff --git a/librz/core/cmd_descs/cmd_analysis.yaml b/librz/core/cmd_descs/cmd_analysis.yaml index bebc3a4176d..6c8b903f11d 100644 --- a/librz/core/cmd_descs/cmd_analysis.yaml +++ b/librz/core/cmd_descs/cmd_analysis.yaml @@ -1068,6 +1068,14 @@ commands: type: RZ_CMD_ARG_TYPE_CHOICES default_value: "ascii" choices_cb: rz_analysis_graph_format_choices + - name: agF + summary: Control flow graph (without calls) + cname: analysis_graph_cfg + args: + - name: format + type: RZ_CMD_ARG_TYPE_CHOICES + default_value: "ascii" + choices_cb: rz_analysis_graph_format_choices - name: agf summary: Basic blocks function graph cname: analysis_graph_bb_function diff --git a/librz/core/cmd_descs/cmd_descs.c b/librz/core/cmd_descs/cmd_descs.c index ffa81ce09fc..3d0f769e802 100644 --- a/librz/core/cmd_descs/cmd_descs.c +++ b/librz/core/cmd_descs/cmd_descs.c @@ -216,6 +216,7 @@ static const RzCmdDescArg analysis_graph_dataref_global_args[2]; static const RzCmdDescArg analysis_graph_callgraph_function_args[2]; static const RzCmdDescArg analysis_graph_callgraph_global_args[2]; static const RzCmdDescArg analysis_graph_icfg_args[2]; +static const RzCmdDescArg analysis_graph_cfg_args[2]; static const RzCmdDescArg analysis_graph_bb_function_args[2]; static const RzCmdDescArg analysis_graph_imports_args[2]; static const RzCmdDescArg analysis_graph_refs_args[2]; @@ -4092,6 +4093,21 @@ static const RzCmdDescHelp analysis_graph_icfg_help = { .args = analysis_graph_icfg_args, }; +static const RzCmdDescArg analysis_graph_cfg_args[] = { + { + .name = "format", + .type = RZ_CMD_ARG_TYPE_CHOICES, + .default_value = "ascii", + .choices.choices_cb = rz_analysis_graph_format_choices, + + }, + { 0 }, +}; +static const RzCmdDescHelp analysis_graph_cfg_help = { + .summary = "Control flow graph (without calls)", + .args = analysis_graph_cfg_args, +}; + static const RzCmdDescArg analysis_graph_bb_function_args[] = { { .name = "format", @@ -19385,6 +19401,9 @@ RZ_IPI void rzshell_cmddescs_init(RzCore *core) { RzCmdDesc *analysis_graph_icfg_cd = rz_cmd_desc_argv_new(core->rcmd, ag_cd, "agCi", rz_analysis_graph_icfg_handler, &analysis_graph_icfg_help); rz_warn_if_fail(analysis_graph_icfg_cd); + RzCmdDesc *analysis_graph_cfg_cd = rz_cmd_desc_argv_new(core->rcmd, ag_cd, "agF", rz_analysis_graph_cfg_handler, &analysis_graph_cfg_help); + rz_warn_if_fail(analysis_graph_cfg_cd); + RzCmdDesc *analysis_graph_bb_function_cd = rz_cmd_desc_argv_new(core->rcmd, ag_cd, "agf", rz_analysis_graph_bb_function_handler, &analysis_graph_bb_function_help); rz_warn_if_fail(analysis_graph_bb_function_cd); diff --git a/librz/core/cmd_descs/cmd_descs.h b/librz/core/cmd_descs/cmd_descs.h index 1154a712536..2acb59d9e4e 100644 --- a/librz/core/cmd_descs/cmd_descs.h +++ b/librz/core/cmd_descs/cmd_descs.h @@ -463,6 +463,8 @@ RZ_IPI RzCmdStatus rz_analysis_graph_callgraph_function_handler(RzCore *core, in RZ_IPI RzCmdStatus rz_analysis_graph_callgraph_global_handler(RzCore *core, int argc, const char **argv); // "agCi" RZ_IPI RzCmdStatus rz_analysis_graph_icfg_handler(RzCore *core, int argc, const char **argv); +// "agF" +RZ_IPI RzCmdStatus rz_analysis_graph_cfg_handler(RzCore *core, int argc, const char **argv); // "agf" RZ_IPI RzCmdStatus rz_analysis_graph_bb_function_handler(RzCore *core, int argc, const char **argv); // "agi" diff --git a/librz/core/core.c b/librz/core/core.c index 3a431ba63c1..9bca0ef3d2a 100644 --- a/librz/core/core.c +++ b/librz/core/core.c @@ -892,7 +892,7 @@ static const char *rizin_argv[] = { "aei", "aeim", "aeip", "aek", "aek-", "aeli", "aelir", "aep?", "aep", "aep-", "aepc", "aets?", "aets+", "aets-", "aes", "aesp", "aesb", "aeso", "aesou", "aess", "aesu", "aesue", "aetr", "aex", "aF", - "ag?", "ag", "aga", "agA", "agc", "agC", "agCi", "agd", "agf", "agi", "agr", "agR", "agx", "agg", "ag-", + "ag?", "ag", "aga", "agA", "agc", "agC", "agCi", "agF", "agd", "agf", "agi", "agr", "agR", "agx", "agg", "ag-", "agn?", "agn", "agn-", "age?", "age", "age-", "agl", "agfl", "ah?", "ah", "ah.", "ah-", "ah*", "aha", "ahb", "ahc", "ahe", "ahf", "ahh", "ahi?", "ahi", "ahj", "aho", diff --git a/librz/include/rz_analysis.h b/librz/include/rz_analysis.h index 02a7e89dca6..3ae3fd503af 100644 --- a/librz/include/rz_analysis.h +++ b/librz/include/rz_analysis.h @@ -1596,7 +1596,7 @@ RZ_API bool rz_analysis_op_fini(RzAnalysisOp *op); RZ_API int rz_analysis_op_reg_delta(RzAnalysis *analysis, ut64 addr, const char *name); RZ_API bool rz_analysis_op_is_eob(RzAnalysisOp *op); RZ_API RzList /**/ *rz_analysis_op_list_new(void); -RZ_API int rz_analysis_op(RzAnalysis *analysis, RzAnalysisOp *op, ut64 addr, const ut8 *data, int len, RzAnalysisOpMask mask); +RZ_API int rz_analysis_op(RZ_NONNULL RzAnalysis *analysis, RZ_OUT RzAnalysisOp *op, ut64 addr, const ut8 *data, int len, RzAnalysisOpMask mask); RZ_API RzAnalysisOp *rz_analysis_op_hexstr(RzAnalysis *analysis, ut64 addr, const char *hexstr); RZ_API char *rz_analysis_op_to_string(RzAnalysis *analysis, RzAnalysisOp *op); diff --git a/librz/include/rz_core.h b/librz/include/rz_core.h index d7c7753a733..893ac01d290 100644 --- a/librz/include/rz_core.h +++ b/librz/include/rz_core.h @@ -772,6 +772,7 @@ typedef enum { RZ_CORE_GRAPH_TYPE_NORMAL, ///< Normal graph RZ_CORE_GRAPH_TYPE_IL, ///< RzIL graph RZ_CORE_GRAPH_TYPE_ICFG, ///< Inter-procedual control flow graph + RZ_CORE_GRAPH_TYPE_CFG, ///< control flow graph (without calls) RZ_CORE_GRAPH_TYPE_UNK ///< Unknown graph } RzCoreGraphType; @@ -785,6 +786,7 @@ RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_line(RzCore *core, RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_il(RZ_NONNULL RzCore *core, ut64 addr); RZ_API RZ_OWN RzGraph /**/ *rz_core_graph(RzCore *core, RzCoreGraphType type, ut64 addr); RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_icfg(RZ_NONNULL RzCore *core); +RZ_API RZ_OWN RzGraph /**/ *rz_core_graph_cfg(RZ_NONNULL RzCore *core, ut64 addr); RZ_API RzCoreGraphFormat rz_core_graph_format_from_string(RZ_NULLABLE const char *x); RZ_API RzCoreGraphType rz_core_graph_type_from_string(RZ_NULLABLE const char *x); diff --git a/test/db/cmd/cmd_graph b/test/db/cmd/cmd_graph index 1528ea45429..b340252a8f9 100644 --- a/test/db/cmd/cmd_graph +++ b/test/db/cmd/cmd_graph @@ -431,3 +431,337 @@ EXPECT=<