Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify decompiler output #7

Merged
merged 8 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ For a colourless output :
cargo run --bin sierra-decompiler <sierra file> --no-color
```

It it also possible to get a verbose output with more informations :

```
cargo run --bin sierra-decompiler <sierra file> --verbose
```

#### Print the contract's Control-Flow Graph

```
Expand Down
Binary file modified doc/images/decompiler-output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ edition = "2021"
cairo-lang-sierra = "~2.6.3"
colored = "2.1.0"
graphviz-rust = "0.9.0"
lazy_static = "1.4.0"
regex = "1.10.4"

[dev-dependencies]
serde_json = "1.0.116"
Expand Down
2 changes: 1 addition & 1 deletion lib/examples/generate_cfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ fn main() {
let program = SierraProgram::new(content);

// Decompile the Sierra programs
let mut decompiler = program.decompiler();
let mut decompiler = program.decompiler(false);
decompiler.decompile(false);

// Generate & print the dot graph
Expand Down
5 changes: 4 additions & 1 deletion lib/examples/parse_sierra_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ fn main() {
// Init a new SierraProgram with the .sierra file content
let program = SierraProgram::new(content);

// Don't use the verbose output
let verbose_output = false;

// Decompile the Sierra program
let mut decompiler = program.decompiler();
let mut decompiler = program.decompiler(verbose_output);

// Print the decompiled program with use_color=true parameter
// You can disable colored output by passing use_color=false
Expand Down
5 changes: 4 additions & 1 deletion lib/examples/parse_starknet_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ fn main() {
// Init a new SierraProgram with the deserialized sierra file content
let program = SierraProgram::new(prog_sierra_string);

// Don't use the verbose output
let verbose_output = false;

// Decompile the Sierra program
let mut decompiler = program.decompiler();
let mut decompiler = program.decompiler(verbose_output);

// Print the decompiled program with use_color=true parameter
// You can disable colored output by passing use_color=false
Expand Down
2 changes: 1 addition & 1 deletion lib/src/decompiler/cfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ impl<'a> ControlFlowGraph {
statement.raw_statement()
);
}

dot_graph += &format!(
"\t\t\"{}\" [label=\"{}\" shape=\"box\" style=\"{}\" fillcolor=\"{}\" color=\"{}\" fontname=\"{}\" margin=\"{}\"];\n",
block.name,
Expand Down
61 changes: 47 additions & 14 deletions lib/src/decompiler/decompiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,20 @@ pub struct Decompiler<'a> {
printed_blocks: Vec<BasicBlock>,
/// The function we are currently working on
current_function: Option<Function<'a>>,
/// Enable / disable the verbose output
/// Some statements are not included in the regular output to improve the readability
verbose: bool,
}

impl<'a> Decompiler<'a> {
pub fn new(sierra_program: &'a SierraProgram) -> Self {
pub fn new(sierra_program: &'a SierraProgram, verbose: bool) -> Self {
Decompiler {
sierra_program,
functions: Vec::new(),
indentation: 1,
printed_blocks: Vec::new(),
current_function: None,
verbose: verbose,
}
}

Expand Down Expand Up @@ -346,17 +350,23 @@ impl<'a> Decompiler<'a> {
.iter()
.map(|block| {
self.indentation = 1; // Reset indentation after processing each block
let result = self.basic_block_recursive(block);
result
self.basic_block_recursive(block)
})
.collect::<String>()
} else {
String::new()
};

// Define bold braces for function body enclosure
let bold_brace_open = "{".blue().bold();
let bold_brace_close = "}".blue().bold();

// Combine prototype and body into a formatted string
let purple_comment = format!("// Function {}", index + 1).purple();
format!("{}\n{} {{\n{}}}", purple_comment, prototype, body)
format!(
"{}\n{} {}\n{}{}", // Added bold braces around the function body
purple_comment, prototype, bold_brace_open, body, bold_brace_close
)
})
.collect();

Expand All @@ -368,14 +378,18 @@ impl<'a> Decompiler<'a> {
fn basic_block_recursive(&mut self, block: &BasicBlock) -> String {
let mut basic_blocks_str = String::new();

// Define bold braces once for use in formatting
let bold_brace_open = "{".blue().bold();
let bold_brace_close = "}".blue().bold();

// Add the root basic block
basic_blocks_str += &self.basic_block_to_string(block);

// Add the edges
for edge in &block.edges {
// If branch
if edge.edge_type == EdgeType::ConditionalTrue {
// Indentate the if block
// Indent the if block
self.indentation += 1;

if let Some(edge_basic_block) = self
Expand Down Expand Up @@ -406,13 +420,18 @@ impl<'a> Decompiler<'a> {
.find(|b| edge.destination == b.start_offset)
{
if !self.printed_blocks.contains(edge_basic_block) {
// end of if block
// End of if block
self.indentation -= 1;

basic_blocks_str +=
&("\t".repeat(self.indentation as usize) + "} else {\n");
basic_blocks_str += &format!(
"{}{} else {}{}\n",
"\t".repeat(self.indentation as usize),
bold_brace_close,
bold_brace_open,
"\t".repeat(self.indentation as usize)
);

// Indentate the else block
// Indent the else block
self.indentation += 1;

basic_blocks_str += &self.basic_block_recursive(edge_basic_block);
Expand All @@ -423,7 +442,11 @@ impl<'a> Decompiler<'a> {
self.indentation -= 1;

if !basic_blocks_str.is_empty() {
basic_blocks_str += &("\t".repeat(self.indentation as usize) + "}\n");
basic_blocks_str += &format!(
"{}{}\n",
"\t".repeat(self.indentation as usize),
bold_brace_close
);
}
}
}
Expand All @@ -445,6 +468,9 @@ impl<'a> Decompiler<'a> {
let mut decompiled_basic_block = String::new();
let indentation = "\t".repeat(self.indentation as usize);

// Define the bold brace
let bold_brace_open = "{".blue().bold();

// Append each statement to the string block
for statement in &block.statements {
// If condition
Expand All @@ -453,8 +479,12 @@ impl<'a> Decompiler<'a> {
let function_name = &conditional_branch.function;
let function_arguments = conditional_branch.parameters.join(", ");
decompiled_basic_block += &format!(
"{}if ({}({}) == 0) {{\n",
indentation, function_name, function_arguments
"{}if ({}({}) == 0) {}{}\n",
indentation,
function_name,
function_arguments,
bold_brace_open,
"\t".repeat(self.indentation as usize + 1) // Adjust for nested content indentation
);
}
}
Expand All @@ -465,8 +495,11 @@ impl<'a> Decompiler<'a> {
}
// Default case
else {
decompiled_basic_block +=
&format!("{}{}\n", indentation, statement.formatted_statement());
// Add the formatted statements to the block
// Some statements are only included in the verbose output
if let Some(formatted_statement) = statement.formatted_statement(self.verbose) {
decompiled_basic_block += &format!("{}{}\n", indentation, formatted_statement);
}
}
}

Expand Down
139 changes: 124 additions & 15 deletions lib/src/decompiler/function.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use colored::*;
use lazy_static::lazy_static;
use regex::Regex;

use cairo_lang_sierra::program::BranchTarget;
use cairo_lang_sierra::program::GenFunction;
Expand All @@ -10,6 +12,27 @@ use crate::decompiler::cfg::SierraConditionalBranch;
use crate::extract_parameters;
use crate::parse_element_name;

lazy_static! {
/// Those libfuncs id patterns are blacklisted from the regular decompiler output (not the verbose)
/// to make it more readable
///
/// We use lazy_static for performances issues

// Variable drop
static ref DROP_REGEX: Regex = Regex::new(r"drop(<.*>)?").unwrap();
// Store temporary variable
static ref STORE_TEMP_REGEX: Regex = Regex::new(r"store_temp(<.*>)?").unwrap();

/// These are libfuncs id patterns whose representation in the decompiler output can be improved

// User defined function call
static ref FUNCTION_CALL_REGEX: Regex = Regex::new(r"function_call<(.*)>").unwrap();
// Arithmetic operations
static ref ADDITION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_add").unwrap();
static ref SUBSTRACTION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_sub").unwrap();
static ref MULTIPLICATION_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)(_overflowing)?_mul").unwrap();
}

/// A struct representing a statement
#[derive(Debug, Clone)]
pub struct SierraStatement {
Expand Down Expand Up @@ -43,7 +66,7 @@ impl SierraStatement {

/// Formats the statement as a string
/// We try to format them in a way that is as similar as possible to the Cairo syntax
pub fn formatted_statement(&self) -> String {
pub fn formatted_statement(&self, verbose: bool) -> Option<String> {
match &self.statement {
// Return statements
GenStatement::Return(vars) => {
Expand All @@ -56,18 +79,17 @@ impl SierraStatement {
formatted.push_str(&format!("v{}", var.id));
}
formatted.push_str(")");
formatted
Some(formatted)
}
// Function calls & variables assignments
// Invocation statements
GenStatement::Invocation(invocation) => {
// Function name in blue
let libfunc_id_str = parse_element_name!(invocation.libfunc_id).blue();
let libfunc_id = parse_element_name!(invocation.libfunc_id);
if !Self::is_function_allowed(&libfunc_id, verbose) {
return None; // Skip formatting if function is not allowed
}
let libfunc_id_str = libfunc_id.blue();

// Function parameters
let parameters = extract_parameters!(invocation.args);
let parameters_str = parameters.join(", ");

// Assigned variables
let assigned_variables = extract_parameters!(&invocation
.branches
.first()
Expand All @@ -79,17 +101,104 @@ impl SierraStatement {
String::new()
};

// Format the string based on the presence of assigned variables
if !assigned_variables.is_empty() {
format!(
if STORE_TEMP_REGEX.is_match(&libfunc_id)
&& assigned_variables_str == parameters.join(", ")
// Print the redundant store_temp in the verbose output
&& !verbose
{
return None; // Do not format if it's a redundant store_temp
}

Some(Self::invocation_formatting(
&assigned_variables_str,
&libfunc_id_str,
&parameters,
))
}
}
}

/// Checks if the given function name is allowed to be included in the formatted statement
fn is_function_allowed(function_name: &str, verbose: bool) -> bool {
// We allow every function in the verbose output
if verbose {
return true;
}

match function_name {
"branch_align"
| "disable_ap_tracking"
| "finalize_locals"
| "revoke_ap_tracking"
| "get_builtin_costs" => false,
_ => {
// Check blacklisted functions patterns
if DROP_REGEX.is_match(function_name) {
false
} else {
true
}
}
}
}

/// Formats an invocation statement
fn invocation_formatting(
assigned_variables_str: &str,
libfunc_id_str: &str,
parameters: &[String],
) -> String {
// Join parameters for general use
let parameters_str = parameters.join(", ");

// Handling user-defined function calls
if let Some(caps) = FUNCTION_CALL_REGEX.captures(libfunc_id_str) {
if let Some(inner_func) = caps.get(1) {
let formatted_func = inner_func.as_str();
if !assigned_variables_str.is_empty() {
return format!(
"{} = {}({})",
assigned_variables_str, libfunc_id_str, parameters_str
)
assigned_variables_str,
formatted_func.blue(),
parameters_str
);
} else {
format!("{}({})", libfunc_id_str, parameters_str)
return format!("{}({})", formatted_func.blue(), parameters_str);
}
}
}

// Handling arithmetic operations
let operator = if ADDITION_REGEX.is_match(libfunc_id_str) {
"+"
} else if SUBSTRACTION_REGEX.is_match(libfunc_id_str) {
"-"
} else if MULTIPLICATION_REGEX.is_match(libfunc_id_str) {
"*"
} else {
// Return default formatting if no special formatting is applicable
return if !assigned_variables_str.is_empty() {
format!(
"{} = {}({})",
assigned_variables_str,
libfunc_id_str.blue(),
parameters_str
)
} else {
format!("{}({})", libfunc_id_str.blue(), parameters_str)
};
};

// Format arithmetic operations more explicitly
format!(
"{} = {}",
assigned_variables_str,
parameters
.iter()
.map(|p| p.as_str())
.collect::<Vec<_>>()
.join(&format!(" {} ", operator))
)
}

/// Return the raw statement, as in the original sierra file
Expand Down
Loading
Loading