diff --git a/README.md b/README.md index 3e35da1..706f58d 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,12 @@ For a colourless output : cargo run --bin sierra-decompiler --no-color ``` +It it also possible to get a verbose output with more informations : + +``` +cargo run --bin sierra-decompiler --verbose +``` + #### Print the contract's Control-Flow Graph ``` diff --git a/doc/images/decompiler-output.png b/doc/images/decompiler-output.png index bd934c9..aff60dd 100644 Binary files a/doc/images/decompiler-output.png and b/doc/images/decompiler-output.png differ diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 30c21aa..b7a9d34 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -9,6 +9,8 @@ edition = "2021" cairo-lang-sierra = "~2.6.3" colored = "2.1.0" graphviz-rust = "0.9.0" +lazy_static = "1.4.0" +regex = "1.10.4" [dev-dependencies] serde_json = "1.0.116" diff --git a/lib/examples/generate_cfg.rs b/lib/examples/generate_cfg.rs index ca6cc29..ea361c2 100644 --- a/lib/examples/generate_cfg.rs +++ b/lib/examples/generate_cfg.rs @@ -7,7 +7,7 @@ fn main() { let program = SierraProgram::new(content); // Decompile the Sierra programs - let mut decompiler = program.decompiler(); + let mut decompiler = program.decompiler(false); decompiler.decompile(false); // Generate & print the dot graph diff --git a/lib/examples/parse_sierra_file.rs b/lib/examples/parse_sierra_file.rs index f91f917..5c00479 100644 --- a/lib/examples/parse_sierra_file.rs +++ b/lib/examples/parse_sierra_file.rs @@ -6,8 +6,11 @@ fn main() { // Init a new SierraProgram with the .sierra file content let program = SierraProgram::new(content); + // Don't use the verbose output + let verbose_output = false; + // Decompile the Sierra program - let mut decompiler = program.decompiler(); + let mut decompiler = program.decompiler(verbose_output); // Print the decompiled program with use_color=true parameter // You can disable colored output by passing use_color=false diff --git a/lib/examples/parse_starknet_file.rs b/lib/examples/parse_starknet_file.rs index 6df155c..e8d6540 100644 --- a/lib/examples/parse_starknet_file.rs +++ b/lib/examples/parse_starknet_file.rs @@ -18,8 +18,11 @@ fn main() { // Init a new SierraProgram with the deserialized sierra file content let program = SierraProgram::new(prog_sierra_string); + // Don't use the verbose output + let verbose_output = false; + // Decompile the Sierra program - let mut decompiler = program.decompiler(); + let mut decompiler = program.decompiler(verbose_output); // Print the decompiled program with use_color=true parameter // You can disable colored output by passing use_color=false diff --git a/lib/src/decompiler/cfg.rs b/lib/src/decompiler/cfg.rs index 5d1ad3f..8c1f053 100644 --- a/lib/src/decompiler/cfg.rs +++ b/lib/src/decompiler/cfg.rs @@ -206,7 +206,7 @@ impl<'a> ControlFlowGraph { statement.raw_statement() ); } - + dot_graph += &format!( "\t\t\"{}\" [label=\"{}\" shape=\"box\" style=\"{}\" fillcolor=\"{}\" color=\"{}\" fontname=\"{}\" margin=\"{}\"];\n", block.name, diff --git a/lib/src/decompiler/decompiler.rs b/lib/src/decompiler/decompiler.rs index 11e9b01..37ac50a 100644 --- a/lib/src/decompiler/decompiler.rs +++ b/lib/src/decompiler/decompiler.rs @@ -25,16 +25,20 @@ pub struct Decompiler<'a> { printed_blocks: Vec, /// The function we are currently working on current_function: Option>, + /// Enable / disable the verbose output + /// Some statements are not included in the regular output to improve the readability + verbose: bool, } impl<'a> Decompiler<'a> { - pub fn new(sierra_program: &'a SierraProgram) -> Self { + pub fn new(sierra_program: &'a SierraProgram, verbose: bool) -> Self { Decompiler { sierra_program, functions: Vec::new(), indentation: 1, printed_blocks: Vec::new(), current_function: None, + verbose: verbose, } } @@ -346,17 +350,23 @@ impl<'a> Decompiler<'a> { .iter() .map(|block| { self.indentation = 1; // Reset indentation after processing each block - let result = self.basic_block_recursive(block); - result + self.basic_block_recursive(block) }) .collect::() } else { String::new() }; + // Define bold braces for function body enclosure + let bold_brace_open = "{".blue().bold(); + let bold_brace_close = "}".blue().bold(); + // Combine prototype and body into a formatted string let purple_comment = format!("// Function {}", index + 1).purple(); - format!("{}\n{} {{\n{}}}", purple_comment, prototype, body) + format!( + "{}\n{} {}\n{}{}", // Added bold braces around the function body + purple_comment, prototype, bold_brace_open, body, bold_brace_close + ) }) .collect(); @@ -368,6 +378,10 @@ impl<'a> Decompiler<'a> { fn basic_block_recursive(&mut self, block: &BasicBlock) -> String { let mut basic_blocks_str = String::new(); + // Define bold braces once for use in formatting + let bold_brace_open = "{".blue().bold(); + let bold_brace_close = "}".blue().bold(); + // Add the root basic block basic_blocks_str += &self.basic_block_to_string(block); @@ -375,7 +389,7 @@ impl<'a> Decompiler<'a> { for edge in &block.edges { // If branch if edge.edge_type == EdgeType::ConditionalTrue { - // Indentate the if block + // Indent the if block self.indentation += 1; if let Some(edge_basic_block) = self @@ -406,13 +420,18 @@ impl<'a> Decompiler<'a> { .find(|b| edge.destination == b.start_offset) { if !self.printed_blocks.contains(edge_basic_block) { - // end of if block + // End of if block self.indentation -= 1; - basic_blocks_str += - &("\t".repeat(self.indentation as usize) + "} else {\n"); + basic_blocks_str += &format!( + "{}{} else {}{}\n", + "\t".repeat(self.indentation as usize), + bold_brace_close, + bold_brace_open, + "\t".repeat(self.indentation as usize) + ); - // Indentate the else block + // Indent the else block self.indentation += 1; basic_blocks_str += &self.basic_block_recursive(edge_basic_block); @@ -423,7 +442,11 @@ impl<'a> Decompiler<'a> { self.indentation -= 1; if !basic_blocks_str.is_empty() { - basic_blocks_str += &("\t".repeat(self.indentation as usize) + "}\n"); + basic_blocks_str += &format!( + "{}{}\n", + "\t".repeat(self.indentation as usize), + bold_brace_close + ); } } } @@ -445,6 +468,9 @@ impl<'a> Decompiler<'a> { let mut decompiled_basic_block = String::new(); let indentation = "\t".repeat(self.indentation as usize); + // Define the bold brace + let bold_brace_open = "{".blue().bold(); + // Append each statement to the string block for statement in &block.statements { // If condition @@ -453,8 +479,12 @@ impl<'a> Decompiler<'a> { let function_name = &conditional_branch.function; let function_arguments = conditional_branch.parameters.join(", "); decompiled_basic_block += &format!( - "{}if ({}({}) == 0) {{\n", - indentation, function_name, function_arguments + "{}if ({}({}) == 0) {}{}\n", + indentation, + function_name, + function_arguments, + bold_brace_open, + "\t".repeat(self.indentation as usize + 1) // Adjust for nested content indentation ); } } @@ -465,8 +495,11 @@ impl<'a> Decompiler<'a> { } // Default case else { - decompiled_basic_block += - &format!("{}{}\n", indentation, statement.formatted_statement()); + // Add the formatted statements to the block + // Some statements are only included in the verbose output + if let Some(formatted_statement) = statement.formatted_statement(self.verbose) { + decompiled_basic_block += &format!("{}{}\n", indentation, formatted_statement); + } } } diff --git a/lib/src/decompiler/function.rs b/lib/src/decompiler/function.rs index 5ac73ee..3c656b8 100644 --- a/lib/src/decompiler/function.rs +++ b/lib/src/decompiler/function.rs @@ -1,4 +1,6 @@ use colored::*; +use lazy_static::lazy_static; +use regex::Regex; use cairo_lang_sierra::program::BranchTarget; use cairo_lang_sierra::program::GenFunction; @@ -10,6 +12,27 @@ use crate::decompiler::cfg::SierraConditionalBranch; use crate::extract_parameters; use crate::parse_element_name; +lazy_static! { + /// Those libfuncs id patterns are blacklisted from the regular decompiler output (not the verbose) + /// to make it more readable + /// + /// We use lazy_static for performances issues + + // Variable drop + static ref DROP_REGEX: Regex = Regex::new(r"drop(<.*>)?").unwrap(); + // Store temporary variable + static ref STORE_TEMP_REGEX: Regex = Regex::new(r"store_temp(<.*>)?").unwrap(); + + /// These are libfuncs id patterns whose representation in the decompiler output can be improved + + // User defined function call + static ref FUNCTION_CALL_REGEX: Regex = Regex::new(r"function_call<(.*)>").unwrap(); + // Arithmetic operations + static ref ADDITION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_add").unwrap(); + static ref SUBSTRACTION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_sub").unwrap(); + static ref MULTIPLICATION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_mul").unwrap(); +} + /// A struct representing a statement #[derive(Debug, Clone)] pub struct SierraStatement { @@ -43,7 +66,7 @@ impl SierraStatement { /// Formats the statement as a string /// We try to format them in a way that is as similar as possible to the Cairo syntax - pub fn formatted_statement(&self) -> String { + pub fn formatted_statement(&self, verbose: bool) -> Option { match &self.statement { // Return statements GenStatement::Return(vars) => { @@ -56,18 +79,17 @@ impl SierraStatement { formatted.push_str(&format!("v{}", var.id)); } formatted.push_str(")"); - formatted + Some(formatted) } - // Function calls & variables assignments + // Invocation statements GenStatement::Invocation(invocation) => { - // Function name in blue - let libfunc_id_str = parse_element_name!(invocation.libfunc_id).blue(); + let libfunc_id = parse_element_name!(invocation.libfunc_id); + if !Self::is_function_allowed(&libfunc_id, verbose) { + return None; // Skip formatting if function is not allowed + } + let libfunc_id_str = libfunc_id.blue(); - // Function parameters let parameters = extract_parameters!(invocation.args); - let parameters_str = parameters.join(", "); - - // Assigned variables let assigned_variables = extract_parameters!(&invocation .branches .first() @@ -79,17 +101,104 @@ impl SierraStatement { String::new() }; - // Format the string based on the presence of assigned variables - if !assigned_variables.is_empty() { - format!( + if STORE_TEMP_REGEX.is_match(&libfunc_id) + && assigned_variables_str == parameters.join(", ") + // Print the redundant store_temp in the verbose output + && !verbose + { + return None; // Do not format if it's a redundant store_temp + } + + Some(Self::invocation_formatting( + &assigned_variables_str, + &libfunc_id_str, + ¶meters, + )) + } + } + } + + /// Checks if the given function name is allowed to be included in the formatted statement + fn is_function_allowed(function_name: &str, verbose: bool) -> bool { + // We allow every function in the verbose output + if verbose { + return true; + } + + match function_name { + "branch_align" + | "disable_ap_tracking" + | "finalize_locals" + | "revoke_ap_tracking" + | "get_builtin_costs" => false, + _ => { + // Check blacklisted functions patterns + if DROP_REGEX.is_match(function_name) { + false + } else { + true + } + } + } + } + + /// Formats an invocation statement + fn invocation_formatting( + assigned_variables_str: &str, + libfunc_id_str: &str, + parameters: &[String], + ) -> String { + // Join parameters for general use + let parameters_str = parameters.join(", "); + + // Handling user-defined function calls + if let Some(caps) = FUNCTION_CALL_REGEX.captures(libfunc_id_str) { + if let Some(inner_func) = caps.get(1) { + let formatted_func = inner_func.as_str(); + if !assigned_variables_str.is_empty() { + return format!( "{} = {}({})", - assigned_variables_str, libfunc_id_str, parameters_str - ) + assigned_variables_str, + formatted_func.blue(), + parameters_str + ); } else { - format!("{}({})", libfunc_id_str, parameters_str) + return format!("{}({})", formatted_func.blue(), parameters_str); } } } + + // Handling arithmetic operations + let operator = if ADDITION_REGEX.is_match(libfunc_id_str) { + "+" + } else if SUBSTRACTION_REGEX.is_match(libfunc_id_str) { + "-" + } else if MULTIPLICATION_REGEX.is_match(libfunc_id_str) { + "*" + } else { + // Return default formatting if no special formatting is applicable + return if !assigned_variables_str.is_empty() { + format!( + "{} = {}({})", + assigned_variables_str, + libfunc_id_str.blue(), + parameters_str + ) + } else { + format!("{}({})", libfunc_id_str.blue(), parameters_str) + }; + }; + + // Format arithmetic operations more explicitly + format!( + "{} = {}", + assigned_variables_str, + parameters + .iter() + .map(|p| p.as_str()) + .collect::>() + .join(&format!(" {} ", operator)) + ) } /// Return the raw statement, as in the original sierra file diff --git a/lib/src/sierra_program.rs b/lib/src/sierra_program.rs index e70c3d3..2b462d3 100644 --- a/lib/src/sierra_program.rs +++ b/lib/src/sierra_program.rs @@ -29,7 +29,7 @@ impl SierraProgram { } /// Decompiles the Sierra program and returns a `Decompiler` instance - pub fn decompiler(&self) -> Decompiler { - Decompiler::new(self) + pub fn decompiler(&self, verbose: bool) -> Decompiler { + Decompiler::new(self, verbose) } } diff --git a/lib/tests/test_decompiler.rs b/lib/tests/test_decompiler.rs index 43fad56..d24c176 100644 --- a/lib/tests/test_decompiler.rs +++ b/lib/tests/test_decompiler.rs @@ -8,8 +8,62 @@ fn test_decompiler_output() { // Init a new SierraProgram with the .sierra file content let program = SierraProgram::new(content); + // Don't use the verbose output + let verbose_output = false; + + // Decompile the Sierra program + let mut decompiler = program.decompiler(verbose_output); + + // Decompile the sierra program with a colorless output + let use_color = false; + let decompiler_output = decompiler.decompile(use_color); + + let expected_output = r#"type felt252 +type Const +type NonZero + +libfunc disable_ap_tracking +libfunc dup +libfunc felt252_is_zero +libfunc branch_align +libfunc drop +libfunc store_temp +libfunc drop> +libfunc felt252_add +libfunc const_as_immediate> +libfunc felt252_sub +libfunc function_call + +// Function 1 +func examples::fib::fib (v0: felt252, v1: felt252, v2: felt252) -> (felt252) { + v2, v3 = dup(v2) + if (felt252_is_zero(v3) == 0) { + v1, v5 = dup(v1) + v6 = v0 + v5 + v7 = const_as_immediate>() + v8 = v2 - v7 + v9 = user@examples::fib::fib(v1, v6, v8) + return (v9) + } else { + return (v0) + } +}"#; + assert_eq!(decompiler_output, expected_output); +} + +#[test] +fn test_decompiler_verbose_output() { + // Read file content + let content = include_str!("../../examples/sierra/fib.sierra").to_string(); + + // Init a new SierraProgram with the .sierra file content + let program = SierraProgram::new(content); + + // Use the verbose output + let verbose_output = true; + // Decompile the Sierra program - let mut decompiler = program.decompiler(); + let mut decompiler = program.decompiler(verbose_output); // Decompile the sierra program with a colorless output let use_color = false; @@ -35,19 +89,19 @@ libfunc function_call func examples::fib::fib (v0: felt252, v1: felt252, v2: felt252) -> (felt252) { disable_ap_tracking() v2, v3 = dup(v2) - if (felt252_is_zero(v3) == 0) { + if (felt252_is_zero(v3) == 0) { branch_align() drop>(v4) v1, v5 = dup(v1) - v6 = felt252_add(v0, v5) + v6 = v0 + v5 v7 = const_as_immediate>() - v8 = felt252_sub(v2, v7) + v8 = v2 - v7 v1 = store_temp(v1) v6 = store_temp(v6) v8 = store_temp(v8) - v9 = function_call(v1, v6, v8) + v9 = user@examples::fib::fib(v1, v6, v8) return (v9) - } else { + } else { branch_align() drop(v1) drop(v2) diff --git a/lib/tests/test_dotgraph.rs b/lib/tests/test_dotgraph.rs index e49fcfc..bab3e69 100644 --- a/lib/tests/test_dotgraph.rs +++ b/lib/tests/test_dotgraph.rs @@ -8,8 +8,11 @@ fn test_dogtgraph_cfg_output() { // Init a new SierraProgram with the .sierra file content let program = SierraProgram::new(content); + // Don't use the verbose output + let verbose_output = false; + // Decompile the Sierra program - let mut decompiler = program.decompiler(); + let mut decompiler = program.decompiler(verbose_output); // Decompile the sierra program with a colorless output let use_color = false; diff --git a/sierra-decompiler/src/main.rs b/sierra-decompiler/src/main.rs index f7db042..414082f 100644 --- a/sierra-decompiler/src/main.rs +++ b/sierra-decompiler/src/main.rs @@ -28,6 +28,10 @@ struct Args { /// Output directory for the CFG file #[clap(long, default_value = "./output_cfg")] cfg_output: PathBuf, + + /// Enable verbose decompiler output + #[clap(short, long, default_value = "false")] + verbose: bool, } fn main() { @@ -49,7 +53,7 @@ fn main() { // Color output by default and if CFG is not enabled to avoid bugs in the SVG output let colored_output = !args.no_color ^ args.cfg; - let mut decompiler = program.decompiler(); + let mut decompiler = program.decompiler(args.verbose); let decompiled_code = decompiler.decompile(colored_output); if args.cfg {