Add stark prover (#552)

* Add stark prover * Fmt * Fix updated code * Add Cairo prover * Fmt * Re add docs * Add cache to CI * Add build for cairo0 programs * Add fuzzer * Fix ci * Fix ci * Fix CI requirements * Fix CI, no python in mac * Install python requirements after checkout * Re add reqs * CI remove install reqs * Temporary disable check bench * Re add check cargo bench * Add coverage * Nextest ci
lambdaclass · Sep 19, 2023 · c2e45d0 · c2e45d0
1 parent b394a6a
commit c2e45d0
Show file tree

Hide file tree

Showing 103 changed files with 14,699 additions and 11 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -82,26 +82,45 @@ jobs:
         with:
           toolchain: stable
           components: clippy
+
+      - name: Set up cargo cache
+        uses: Swatinem/rust-cache@v2
+
+      - name: Python3 build
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+          cache: "pip"
+
+      - name: Install cairo-lang toolchain and dependencies
+        run: pip install -r requirements.txt
+
+      - name: Install testing tools
+        uses: taiki-e/install-action@v2
+        with:
+          tool: cargo-nextest,cargo-llvm-cov
+
       - name: Run tests with no std
         run: cargo test --package lambdaworks-math --no-default-features
-      - name: Install cargo-llvm-cov
-        uses: taiki-e/install-action@cargo-llvm-cov
+
       - name: Run tests and generate code coverage
-        run: cargo llvm-cov --lcov --output-path lcov.info
+        run: make coverage
+
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           files: lcov.info
           fail_ci_if_error: true
-
+          
   test_macos:
     name: Test (macOS, Apple sillicon)
     runs-on: [self-hosted, macOS]
     env:
       CARGO_TERM_COLOR: always
     steps:
       - uses: actions/checkout@v3
+
       - name: Rustup toolchain install
         uses: dtolnay/rust-toolchain@stable
         with:
@@ -110,5 +129,6 @@ jobs:
 
       - name: Run clippy
         run: make clippy-metal
+
       - name: Run tests
         run: make test-metal
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,7 @@
 [workspace]
-members = ["math", "crypto", "gpu", "benches", "provers/plonk"]
+members = ["math", "crypto", "gpu", "benches", "provers/plonk", "provers/stark", "provers/cairo"]
 exclude = ["ensure-no_std"]
+resolver = "2"
 
 [workspace.package]
 version = "0.1.3"
@@ -10,9 +11,10 @@ repository = "https://github.com/lambdaclass/lambdaworks"
 
 [workspace.dependencies]
 iai-callgrind = "0.3.1"
-lambdaworks-crypto = { path = "./crypto", version = "0.1.3" }
-lambdaworks-gpu = { path = "./gpu", version = "0.1.3" }
-lambdaworks-math = { path = "./math", version = "0.1.3" }
+lambdaworks-crypto = { path = "./crypto" }
+lambdaworks-gpu = { path = "./gpu" }
+lambdaworks-math = { path = "./math" }
+stark-platinum-prover = { path = "./provers/stark" }
 
 [profile.bench]
 lto = true

diff --git a/Makefile b/Makefile
@@ -1,6 +1,21 @@
-.PHONY: test clippy docker-shell nix-shell benchmarks benchmark docs build-cuda build-metal clippy-metal test-metal
+.PHONY: test clippy docker-shell nix-shell benchmarks benchmark docs build-cuda build-metal clippy-metal test-metal coverage clean
 
-test:
+ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
+
+CAIRO0_PROGRAMS_DIR=provers/cairo/cairo_programs/cairo0
+CAIRO0_PROGRAMS:=$(wildcard $(CAIRO0_PROGRAMS_DIR)/*.cairo)
+COMPILED_CAIRO0_PROGRAMS:=$(patsubst $(CAIRO0_PROGRAMS_DIR)/%.cairo, $(CAIRO0_PROGRAMS_DIR)/%.json, $(CAIRO0_PROGRAMS))
+
+# Rule to compile Cairo programs for testing purposes.
+# If the `cairo-lang` toolchain is installed, programs will be compiled with it.
+# Otherwise, the cairo_compile docker image will be used
+# When using the docker version, be sure to build the image using `make docker_build_cairo_compiler`.
+$(CAIRO0_PROGRAMS_DIR)/%.json: $(CAIRO0_PROGRAMS_DIR)/%.cairo
+	@echo "Compiling Cairo program..."
+	@cairo-compile --cairo_path="$(CAIRO0_PROGRAMS_DIR)" $< --output $@ 2> /dev/null --proof_mode || \
+	docker run --rm -v $(ROOT_DIR)/$(CAIRO0_PROGRAMS_DIR):/pwd/$(CAIRO0_PROGRAMS_DIR) cairo --proof_mode /pwd/$< > $@
+
+test: $(COMPILED_CAIRO0_PROGRAMS)
 	cargo test
 
 clippy:
@@ -30,14 +45,17 @@ benchmark:
 flamegraph_stark:
 	CARGO_PROFILE_BENCH_DEBUG=true cargo flamegraph --root --bench stark_benchmarks -- --bench
 
+coverage: $(COMPILED_CAIRO0_PROGRAMS)
+	cargo llvm-cov nextest --lcov --output-path lcov.info
+
 METAL_DIR = math/src/gpu/metal
 build-metal:
 	xcrun -sdk macosx metal $(METAL_DIR)/all.metal -o $(METAL_DIR)/lib.metallib
 
 clippy-metal:
 	cargo clippy --workspace --all-targets -F metal -- -D warnings
 
-test-metal:
+test-metal: $(COMPILED_CAIRO0_PROGRAMS)
 	cargo test -F metal
 
 CUDA_DIR = math/src/gpu/cuda/shaders
@@ -58,6 +76,11 @@ build-cuda:
 docs:
 	cd docs && mdbook serve --open
 
+STARK_DESERIALIZE_FUZZER = deserialize_stark_proof
+proof-deserializer-fuzzer:
+		cd fuzz/deserialize_stark_proof
+		cargo +nightly fuzz run --fuzz-dir . $(STARK_DESERIALIZE_FUZZER)
+
 FUZZER = field_from_hex
 run-no-gpu-fuzzer:
 		cd fuzz/no_gpu_fuzz

diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
@@ -10,3 +10,19 @@
   - [Protocol](./plonk/protocol.md)
   - [Implementation](./plonk/implementation.md)
   - [Circuit API](./plonk/constraint_system.md)
+
+- [STARKs](./starks/starks.md)
+    - [Recap](./starks/recap.md)
+    - [Protocol overview](./starks/protocol_overview.md)
+    - [Protocol](./starks/protocol.md)
+    - [Implementation](./starks/implementation.md)
+        - [High Level API](./starks/api.md)
+        - [Under the hood](./starks/under_the_hood.md)
+
+- [Cairo](./starks/cairo.md)
+    - [Trace Formal Description](./starks/cairo_trace_succinct.md)
+    - [Trace Detailed Description](./starks/cairo_trace_descriptive.md)
+    - [RAP](./starks/cairo_rap.md)
+    - [Virtual Columns](./starks/virtual_cols.md)
+    - [Built-ins](./starks/builtins.md)
+    - [CLI](./starks/cairo_cli.md)
diff --git a/docs/src/starks/api.md b/docs/src/starks/api.md
@@ -0,0 +1,160 @@
+# High level API: Fibonacci example
+
+Let's go over the main test we use for our prover, where we compute a STARK proof for a fibonacci trace with 4 rows and then verify it.
+
+```rust
+fn test_prove_fib() {
+    let trace = simple_fibonacci::fibonacci_trace([FE::from(1), FE::from(1)], 8);
+    let proof_options = ProofOptions::default_test_options();
+
+    let pub_inputs = FibonacciPublicInputs {
+        a0: FE::one(),
+        a1: FE::one(),
+    };
+
+    let proof = prove::<F, FibonacciAIR<F>>(&trace, &pub_inputs, &proof_options).unwrap();
+    assert!(verify::<F, FibonacciAIR<F>>(&proof, &pub_inputs, &proof_options));
+}
+```
+
+The proving system revolves around the `prove` function, that takes a trace, public inputs and proof options as inputs to generate a proof, and a `verify` function that takes the generated proof, the public inputs and the proof options as inputs, outputting `true` when the proof is verified correctly and `false` otherwise. Note that the public inputs and proof options should be the same for both. Public inputs should be shared by the Cairo runner to prover and verifier, and the proof options should have been agreed on beforehand by the two entities beforehand.
+
+Below we go over the main things involved in this code.
+
+## AIR
+
+To prove the integrity of a fibonacci trace, we first need to define what it means for a trace to be valid. As we've talked about in the recap, this involves defining an `AIR` for our computation where we specify both the boundary and transition constraints for a fibonacci sequence.
+
+In code, this is done through the `AIR` trait. Implementing `AIR` requires defining a couple methods, but the two most important ones are `boundary_constraints` and `compute_transition`, which encode the boundary and transition constraints of our computation.
+
+
+### Boundary Constraints
+For our Fibonacci `AIR`, boundary constraints look like this:
+
+```rust
+fn boundary_constraints(
+    &self,
+    _rap_challenges: &Self::RAPChallenges,
+) -> BoundaryConstraints<Self::Field> {
+    let a0 = BoundaryConstraint::new_simple(0, self.pub_inputs.a0.clone());
+    let a1 = BoundaryConstraint::new_simple(1, self.pub_inputs.a1.clone());
+
+    BoundaryConstraints::from_constraints(vec![a0, a1])
+}
+```
+
+The `BoundaryConstraint` struct represents a specific boundary constraint, meaning "column `i` at row `j` should be equal to `x`". In this case, because we have only one column, we are using the `new_simple` method to simply say 
+
+- Row `0` should equal the public input `a0`, which in the typical fibonacci is set to 1.
+- Row `1` should equal the public input `a1`, which in the typical fibonacci is set to 1.
+
+In the case of multiple columns, the `new` method exists so you can also specify column number.
+
+After instantiating each of these constraints, we return all of them through the struct `BoundaryConstraints`.
+
+### Transition Constraints
+
+The way we specify our fibonacci transition constraint looks like this:
+
+```rust
+fn compute_transition(
+    &self,
+    frame: &air::frame::Frame<Self::Field>,
+    _rap_challenges: &Self::RAPChallenges,
+) -> Vec<FieldElement<Self::Field>> {
+    let first_row = frame.get_row(0);
+    let second_row = frame.get_row(1);
+    let third_row = frame.get_row(2);
+
+    vec![third_row[0] - second_row[0] - first_row[0]]
+}
+```
+
+It's not completely obvious why this is how we chose to express transition constraints, so let's talk a little about it. 
+
+What we need to specify in this method is the relationship that has to hold between the current step of computation and the previous ones. For this, we get a `Frame` as an argument. This is a struct holding the current step (i.e. the current row of the trace) and all previous ones needed to encode our constraint. In our case, this is the current row and the two previous ones. To access rows we use the `get_row` method. The current step is always the last row (in our case `2`), with the others coming before it.
+
+In our `compute_transition` method we get the three rows we need and return
+
+```rust
+third_row[0] - second_row[0] - first_row[0]
+```
+
+which is the value that needs to be zero for our constraint to hold. Because we support multiple transition constraints, we actually return a vector with one value per constraint, so the first element holds the first constraint value and so on.
+
+## TraceTable
+
+After defining our AIR, we create our specific trace to prove against it. 
+
+```rust
+let trace = fibonacci_trace([FE17::new(1), FE17::new(1)], 4);
+
+let trace_table = TraceTable {
+    table: trace.clone(),
+    num_cols: 1,
+};
+```
+
+`TraceTable` is the struct holding execution traces; the `num_cols` says how many columns the trace has, the `table` field is a `vec` holding the actual values of the trace in row-major form, meaning if the trace looks like this
+
+```
+| 1  | 2  |
+| 3  | 4  |
+| 5  | 6  |
+```
+
+then its corresponding `TraceTable` is 
+
+```rust
+let trace_table = TraceTable {
+    table: vec![1, 2, 3, 4, 5, 6],
+    num_cols: 2,
+};
+```
+
+In our example, `fibonacci_trace` is just a helper function we use to generate the fibonacci trace with `4` rows and `[1, 1]` as the first two values.
+
+## AIR Context
+
+After specifying our constraints and trace, the only thing left to do is provide a few parameters related to the STARK protocol and our `AIR`. These specify things such as the number of columns of the trace and proof configuration, among others. They are all encapsulated in the `AirContext` struct, which in our example we instantiate like this:
+
+```rust
+let context = AirContext {
+    options: ProofOptions {
+        blowup_factor: 2,
+        fri_number_of_queries: 1,
+        coset_offset: 3,
+    },
+    trace_columns: trace_table.n_cols,
+    transition_degrees: vec![1],
+    transition_exemptions: vec![2],
+    transition_offsets: vec![0, 1, 2],
+    num_transition_constraints: 1,
+};
+```
+
+Let's go over each of them:
+
+- `options` requires a `ProofOptions` struct holding specific parameters related to the STARK protocol to be used when proving. They are:
+    - The `blowup_factor` used for the trace LDE extension, a parameter related to the security of the protocol.
+    - The number of queries performed by the verifier when doing `FRI`, also related to security.
+    - The `offset` used for the LDE coset. This depends on the field being used for the STARK proof.
+- `trace_columns` are the number of columns of the trace, respectively.
+- `transition_degrees` holds the degree of each transition constraint.
+- `transition_exemptions` is a `Vec` which tells us, for each column, the number of rows the transition constraints should not apply, starting from the end of the trace. In the example, the transition constraints won't apply on the last two rows of the trace.
+- `transition_offsets` holds the indexes that define a frame for our `AIR`. In our fibonacci case, these are `[0, 1, 2]` because we need the current row and the two previous one to define our transition constraint.
+- `num_transition_constraints` simply says how many transition constraints our `AIR` has.
+
+## Proving execution
+
+Having defined all of the above, proving our fibonacci example amounts to instantiating the necessary structs and then calling `prove` passing the trace, public inputs and proof options. We use a simple implementation of a hasher called `TestHasher` to handle merkle proof building.
+
+```rust 
+let proof = prove(&trace_table, &pub_inputs, &proof_options);
+```
+
+Verifying is then done by passing the proof of execution along with the same `AIR` to the `verify` function.
+
+```rust
+assert!(verify(&proof, &pub_inputs, &proof_options));
+```
diff --git a/docs/src/starks/builtins.md b/docs/src/starks/builtins.md
@@ -0,0 +1,27 @@
+# Builtins
+
+We can understand the built-in as a small machine, that we can use to efficiently prove a subprogram. For example, it may be able to prove a hash, like Poseidon or Keccak, verify a signature, or check that some variable is in a range, and the cost would be less than what we would have if using the Cairo VM instructions.
+
+For each subprogram we want to prove, we will have a machine, which will have its own set of constraints in the prover. Let's take for example the Range Check built-in. This builtin enforces that a value $X$ is between 0 and $2^{128}$.
+
+The logic behind the built-in is pretty straightforward. We split $X$ into 8 parts. So we will say that $X = X_{0} + X_{1} * 2^{16} + X_{2} * 2^{32} + X_{3} * 2^{48} + ... + X_{7} * 2^{112}$
+
+Then we require that each is in the range $0 < X_{i} < 2^{16}$. The idea here is to reuse the Range Check constraint that checks if the offsets are between $0$ and $2^{16}$. If we can decompose the number in eight limbs of 16 bits, and we don't need any more limbs, it follows that the number will be less than $2^{128}$
+
+The missing ingredient is how we make sure that each value $X$ that should be constrained by the built-in is actually constrained.
+
+The process starts with the VM designating special memory positions for the built-in. You can think of this as a way of communicating the VM with the specific built-in machine by sharing memory. 
+
+The VM won't save any instruction associated with how the built-in gets to the result and will assume the output is correct. You can think of this as an IO device in any computer, which works in a similar fashion. The VM delegates the work to an external device and takes the result from the memory.
+
+Knowing which specific positions of the memory are used by the built-in, the prover can add more constraints that enforce the calculations of the built-in were done correctly. Let's see how it's done.
+
+In the constraint system of the VM, we will treat every memory cell associated with the built-in as any other, treating it as a pair of addresses and values with the usual constraints. Additionally, we will add more that are specific to the builtin. 
+
+Let's say we have multiple values $x_{i}$, such that each $x_{i}$ needs to be range checked by the built-in. Let each value be stored in a memory address $m_{i}$. Let the initial expected memory position for the range check built-in be $r_{0}$. Here $r_{0}$ is a value known and a public input.
+
+We need to enforce then that $m_{0} = r_{0}$, and that the built in $m_{i+1} = m_{i} + 1$. These constraints have to be put on top of the constraints that are used by the memory, and that's the key to all of this. If these constraints weren't in place, there wouldn't be an enforced link between the Builtin and the VM, which would lead to security issues.
+
+As one last detail, since the memory cells share the same constraints, and we add more for the ones in the builtin, we can treat the builtin cells as a subcolumn. In that case, we can assign one cell for the memory every N cell, giving a ratio that will be observable in the layout. 
+
+This gives a better relationship between the number of cells used for the VM, and the builtin, giving an improvement in performance.
diff --git a/docs/src/starks/cairo.md b/docs/src/starks/cairo.md
@@ -0,0 +1 @@
+# Cairo
diff --git a/docs/src/starks/cairo_cli.md b/docs/src/starks/cairo_cli.md
@@ -0,0 +1 @@
+# CLI
diff --git a/docs/src/starks/cairo_rap.md b/docs/src/starks/cairo_rap.md
@@ -0,0 +1,45 @@
+## Extended columns
+
+The verifier sends challenges $\alpha, z \in \mathbb{F}$ (or the prover samples them from the transcript). Additional columns are added to incorporate the memory constraints. To define them the prover follows these steps:
+1. Stack the rows of the submatrix of $T$ defined by the columns `pc, dst_addr, op0_addr, op1_addr` into a vector `a` of length $2^{n+2}$ (this means that the first entries of `a` are `pc[0], dst_addr[0], op0_addr[0], op1_addr[0], pc[1], dst_addr[1],...`).
+1. Stack the the rows of the submatrix defined by the columns `inst, dst, op0, op1` into a vector `v` of length $2^{n+2}$.
+1. Define $M_{\text{Mem}}\in\mathbb{F}^{2^{n+2}\times 2}$ to be the matrix with columns $a$, $v$.
+1. Define $M_{\text{MemRepl}}\in\mathbb{F}^{2^{n+2}\times 2}$ to be the matrix that's equal to $M_{\text{Mem}}$ in the first $2^{n+2} - L_{\text{pub}}$ rows, and its last $L_{\text{pub}}$ entries are the addresses and values of the actual public memory (program code).
+1. Sort $M_{\text{MemRepl}}$ by the first column in increasing order. The result is a matrix $M_{\text{MemReplSorted}}$ of size $2^{n+2}\times 2$. Denote its columns by $a'$ and $v'$.
+1. Compute the vector $p$ of size $2^{n+2}$ with entries 
+$$ p_i := \prod_{j=0}^i\frac{z - (a_i' + \alpha v_i')}{z - (a_i + \alpha v_i)}$$
+1. Reshape the matrix $M_{\text{MemReplSorted}}$ into a $2^n\times 8$ in row-major. Reshape the vector $p$ into a $2^n \times 4$ matrix in row-major.
+1. Concatenate these 12 rows. The result is a matrix $M_\text{MemRAP2}$ of size $2^n \times 12$
+
+The verifier sends challenge $z' \in \mathbb{F}$. Further columns are added to incorporate the range check constraints following these steps:
+
+1. Stack the rows of the submatrix of $T$ defined by the columns in the group `offsets` into a vector $b$ of length $3\cdot 2^n$.
+1. Sort the values of $b$ in increasing order. Let $b'$ be the result.
+1. Compute the vector $p'$ of size $3\cdot 2^n$ with entries
+$$ p_i' := \prod_{j=0}^i\frac{z' - b_i'}{z' - b_i}$$
+1. Reshape $b'$ and $p'$ into matrices of size $2^n \times 3$ each and concatenate them into a matrix $M_\text{RangeCheckRAP2}$ of size $2^n \times 6$.
+1. Concatenate $M_\text{MemRAP2}$ and $M_\text{RangeCheckRAP2}$ into a matrix $M_\text{RAP2}$ of size $2^n \times 18$.
+
+
+Using the notation described at the beginning, $m'=33$, $m''=18$ and $m=52$. They are respectively the columns of the first and second part of the rap, and the total number of columns.
+
+
+Putting all together, the final layout of the trace is the following
+
+```
+ A.  flags      (16) : Decoded instruction flags
+ B.  res        (1)  : Res value
+ C.  pointers   (2)  : Temporary memory pointers (ap and fp)
+ D.  mem_a      (4)  : Memory addresses (pc, dst_addr, op0_addr, op1_addr)
+ E.  mem_v      (4)  : Memory values (inst, dst, op0, op1)
+ F.  offsets    (3)  : (off_dst, off_op0, off_op1)
+ G.  derived    (3)  : (t0, t1, mul)
+ H.  mem_a'     (4)  : Sorted memory addresses
+ I.  mem_v'     (4)  : Sorted memory values
+ J.  mem_p      (4)  : Memory permutation argument columns
+ K.  offsets_b' (3)  : Sorted offset columns
+ L.  offsets_p' (3)  : Range check permutation argument columns
+
+ A                B C  D    E    F   G   H    I    J    K   L
+|xxxxxxxxxxxxxxxx|x|xx|xxxx|xxxx|xxx|xxx|xxxx|xxxx|xxxx|xxx|xxx|
+```