RISC-V machine: Use dynamic VADCOP (#1683)

Builds on #1687 Fixed #1572 With this PR, we are using dynamic VADCOP in the RISC-V zk-VM. There were a few smaller fixes needed to make this work. In summary, the changes are as follows: - We set the degree the main machine to `None`, and all fixed lookup machines to the appropriate size. As a consequence, the CPU, all block machines & memory have a dynamic size. - As a consequence, I had to adjust some tests (set the size of all machines, so they can still be run with monolithic provers) *and* was able to remove the `Memory_<size>` machines 🎉 - With the main machine being of flexible size, the prover can chose for how long to run it. We run it for `1 << (MAX_DEGREE_LOG - 2)` steps and compute the bootloader inputs accordingly. With this choice, we can guarantee that the register memory (which can be up to 4x larger than the main machine) does not run out of rows. Note that while we do access `MAX_DEGREE_LOG` in a bunch of places now, this will go away once #1667 is merged, which will allow us to configure the degree range in ASM and for each machine individually. ### Example: ```bash export MAX_LOG_DEGREE=18 cargo run -r --bin powdr-rs compile riscv/tests/riscv_data/many_chunks -o output --continuations cargo run -r --bin powdr-rs execute output/many_chunks.asm -o output --continuations -w cargo run -r --features plonky3,halo2 prove output/many_chunks.asm -d output/chunk_0 --field gl --backend plonky3-composite ``` This leads to the following output: ``` == Proving machine: main (size 65536), stage 0 ==> Proof stage computed in 1.918317417s == Proving machine: main__rom (size 8192), stage 0 ==> Proof stage computed in 45.847375ms == Proving machine: main_binary (size 1024), stage 0 ==> Proof stage computed in 27.718416ms == Proving machine: main_bit2 (size 4), stage 0 ==> Proof stage computed in 15.280667ms == Proving machine: main_bit6 (size 64), stage 0 ==> Proof stage computed in 17.449875ms == Proving machine: main_bit7 (size 128), stage 0 ==> Proof stage computed in 20.717834ms == Proving machine: main_bootloader_inputs (size 262144), stage 0 ==> Proof stage computed in 524.013375ms == Proving machine: main_byte (size 256), stage 0 ==> Proof stage computed in 17.280167ms == Proving machine: main_byte2 (size 65536), stage 0 ==> Proof stage computed in 164.709625ms == Proving machine: main_byte_binary (size 262144), stage 0 ==> Proof stage computed in 504.743917ms == Proving machine: main_byte_compare (size 65536), stage 0 ==> Proof stage computed in 169.881542ms == Proving machine: main_byte_shift (size 65536), stage 0 ==> Proof stage computed in 146.235916ms == Proving machine: main_memory (size 32768), stage 0 ==> Proof stage computed in 326.522167ms == Proving machine: main_poseidon_gl (size 16384), stage 0 ==> Proof stage computed in 1.324662625s == Proving machine: main_regs (size 262144), stage 0 ==> Proof stage computed in 2.009408667s == Proving machine: main_shift (size 32), stage 0 ==> Proof stage computed in 13.71825ms == Proving machine: main_split_gl (size 16384), stage 0 ==> Proof stage computed in 108.019334ms Proof generation took 7.364567s Proof size: 8432928 bytes Writing output/chunk_0/many_chunks_proof.bin. ``` Note that `main_bootloader_inputs` is still equal to the maximum size, we should fix that in a following PR!
powdr-labs · Aug 15, 2024 · 34fdbd1 · 34fdbd1
1 parent 440e555
commit 34fdbd1
Show file tree

Hide file tree

Showing 13 changed files with 59 additions and 286 deletions.
diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
@@ -11,7 +11,7 @@ on:
 env:
   CARGO_TERM_COLOR: always
   POWDR_GENERATE_PROOFS: "true"
-  MAX_DEGREE_LOG: "10"
+  MAX_DEGREE_LOG: "20"
 
 jobs:
   build:

diff --git a/executor/src/witgen/generator.rs b/executor/src/witgen/generator.rs
@@ -98,6 +98,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for Generator<'a, T> {
 impl<'a, T: FieldElement> Generator<'a, T> {
     pub fn new(
         name: String,
+        degree: DegreeType,
         fixed_data: &'a FixedData<'a, T>,
         connecting_identities: &BTreeMap<u64, &'a Identity<T>>,
         identities: Vec<&'a Identity<T>>,
@@ -107,7 +108,7 @@ impl<'a, T: FieldElement> Generator<'a, T> {
         let data = FinalizableData::new(&witnesses);
 
         Self {
-            degree: fixed_data.common_degree(&witnesses),
+            degree,
             connecting_identities: connecting_identities.clone(),
             name,
             fixed_data,
@@ -214,6 +215,7 @@ impl<'a, T: FieldElement> Generator<'a, T> {
 
         let mut processor = VmProcessor::new(
             self.name().to_string(),
+            self.degree,
             RowIndex::from_degree(row_offset, self.degree),
             self.fixed_data,
             &self.identities,

diff --git a/executor/src/witgen/machines/machine_extractor.rs b/executor/src/witgen/machines/machine_extractor.rs
@@ -167,6 +167,7 @@ pub fn split_out_machines<'a, T: FieldElement>(
                 .unwrap();
             machines.push(KnownMachine::Vm(Generator::new(
                 name_with_type("Vm"),
+                fixed.common_degree(&machine_witnesses),
                 fixed,
                 &connecting_identities,
                 machine_identities,

diff --git a/executor/src/witgen/mod.rs b/executor/src/witgen/mod.rs
@@ -238,8 +238,17 @@ impl<'a, 'b, T: FieldElement> WitnessGenerator<'a, 'b, T> {
         };
 
         let generator = (!base_witnesses.is_empty()).then(|| {
+            let main_size = fixed
+                .common_set_degree(&base_witnesses)
+                // In the dynamic VADCOP setting, we assume that some machines
+                // (e.g. register memory) may take up to 4x the number of rows
+                // of the main machine. By running the main machine only 1/4 of
+                // of the steps, we ensure that no secondary machine will run out
+                // of rows.
+                .unwrap_or(1 << (*MAX_DEGREE_LOG - 2));
             let mut generator = Generator::new(
                 "Main Machine".to_string(),
+                main_size,
                 &fixed,
                 &BTreeMap::new(), // No connecting identities
                 base_identities,
@@ -375,25 +384,18 @@ impl<'a, T: FieldElement> FixedData<'a, T> {
                     poly.array_elements()
                         .map(|(name, poly_id)| {
                             let external_values = external_witness_values.remove(name.as_str());
-                            if let Some(external_values) = &external_values {
-                                if external_values.len() != poly.degree.unwrap() as usize {
-                                    log::debug!(
-                                        "External witness values for column {} were only partially provided \
-                                        (length is {} but the degree is {})",
-                                        name,
-                                        external_values.len(),
-                                        poly.degree.unwrap()
-                                    );
-                                }
-                            }
                             // Remove any hint for witness columns of a later stage
                             // (because it might reference a challenge that is not available yet)
-                            let value = if poly.stage.unwrap_or_default() <= stage.into() { value.as_ref() } else { None };
+                            let value = if poly.stage.unwrap_or_default() <= stage.into() {
+                                value.as_ref()
+                            } else {
+                                None
+                            };
                             WitnessColumn::new(poly_id.id as usize, &name, value, external_values)
                         })
                         .collect::<Vec<_>>()
                 },
-        ));
+            ));
 
         if !external_witness_values.is_empty() {
             let available_columns = witness_cols

diff --git a/executor/src/witgen/vm_processor.rs b/executor/src/witgen/vm_processor.rs
@@ -66,17 +66,17 @@ pub struct VmProcessor<'a, 'b, 'c, T: FieldElement, Q: QueryCallback<T>> {
 }
 
 impl<'a, 'b, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'b, 'c, T, Q> {
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         machine_name: String,
+        degree: DegreeType,
         row_offset: RowIndex,
         fixed_data: &'a FixedData<'a, T>,
         identities: &[&'a Identity<T>],
         witnesses: &'c HashSet<PolyID>,
         data: FinalizableData<T>,
         mutable_state: &'c mut MutableState<'a, 'b, T, Q>,
     ) -> Self {
-        let degree = fixed_data.common_degree(witnesses);
-
         let (identities_with_next, identities_without_next): (Vec<_>, Vec<_>) = identities
             .iter()
             .partition(|identity| identity.contains_next_ref());

diff --git a/riscv/src/code_gen.rs b/riscv/src/code_gen.rs
@@ -132,14 +132,11 @@ pub fn translate_program<F: FieldElement>(
     with_bootloader: bool,
 ) -> String {
     // Do this in a separate function to avoid most of the code being generic on F.
-    let (initial_mem, instructions, degree) =
-        translate_program_impl(program, runtime, with_bootloader);
+    let (initial_mem, instructions) = translate_program_impl(program, runtime, with_bootloader);
 
-    let degree_log = degree.ilog2();
     riscv_machine(
         runtime,
-        degree,
-        &preamble::<F>(runtime, degree_log.into(), with_bootloader),
+        &preamble::<F>(runtime, with_bootloader),
         initial_mem,
         instructions,
     )
@@ -149,7 +146,7 @@ fn translate_program_impl(
     mut program: impl RiscVProgram,
     runtime: &Runtime,
     with_bootloader: bool,
-) -> (Vec<String>, Vec<String>, u64) {
+) -> (Vec<String>, Vec<String>) {
     let mut initial_mem = Vec::new();
     let mut data_code = Vec::new();
     for MemEntry { label, addr, value } in program.take_initial_mem() {
@@ -291,36 +288,19 @@ fn translate_program_impl(
     }
     statements.extend(runtime.ecall_handler());
 
-    // The program ROM needs to fit the degree, so we use the next power of 2.
-    let degree = statements.len().ilog2() + 1;
-    let degree = std::cmp::max(degree, 18);
-    log::info!("Inferred degree 2^{degree}");
-
-    // In practice, these are the lengths of single proofs that we want to support.
-    // Reasoning:
-    // - 18: is the lower bound for the Binary and Shift machines.
-    // - 20: revm's ROM does not fit in 2^19.
-    // - >20: may be needed in the future.
-    // This is an assert for now, but could be a compiler warning or error.
-    // TODO note that if the degree is higher than 18 we might need mux machines for Binary and
-    // Shift.
-    assert!((18..=20).contains(&degree));
-    let degree = 1 << degree;
-
-    (initial_mem, statements, degree)
+    (initial_mem, statements)
 }
 
 fn riscv_machine(
     runtime: &Runtime,
-    degree: u64,
     preamble: &str,
     initial_memory: Vec<String>,
     program: Vec<String>,
 ) -> String {
     format!(
         r#"
 {}
-machine Main with degree: {degree} {{
+machine Main {{
 {}
 
 {}
@@ -346,7 +326,7 @@ let initial_memory: (fe, fe)[] = [
     )
 }
 
-fn preamble<T: FieldElement>(runtime: &Runtime, degree: u64, with_bootloader: bool) -> String {
+fn preamble<T: FieldElement>(runtime: &Runtime, with_bootloader: bool) -> String {
     let bootloader_preamble_if_included = if with_bootloader {
         bootloader_preamble()
     } else {
@@ -391,8 +371,7 @@ fn preamble<T: FieldElement>(runtime: &Runtime, degree: u64, with_bootloader: bo
         + &memory(with_bootloader)
         + r#"
     // =============== Register memory =======================
-"# + format!("std::machines::memory::Memory_{} regs(byte2);", degree + 2)
-        .as_str()
+"# + "std::machines::memory::Memory regs(byte2);"
         + r#"
     // Get the value in register Y.
     instr get_reg Y -> X link ~> X = regs.mload(Y, STEP);

diff --git a/riscv/src/continuations.rs b/riscv/src/continuations.rs
@@ -7,7 +7,7 @@ use powdr_ast::{
     asm_analysis::AnalysisASMFile,
     parsed::{asm::parse_absolute_path, Expression, Number, PilStatement},
 };
-use powdr_executor::constant_evaluator::get_uniquely_sized;
+use powdr_executor::constant_evaluator::MAX_DEGREE_LOG;
 use powdr_number::FieldElement;
 use powdr_pipeline::Pipeline;
 use powdr_riscv_executor::{get_main_machine, Elem, ExecutionTrace, MemoryState, ProfilerOptions};
@@ -74,22 +74,20 @@ where
 {
     let num_chunks = bootloader_inputs.len();
 
+    // The size of the main machine is dynamic, so we need to chose a size.
+    // We chose a size 4x smaller than the maximum size, so that we can guarantee
+    // that the register memory does not run out of rows.
+    // TODO: After #1667 ("Support degree ranges") is merged, this can be set in ASM
+    //       and we can just use the maximum size here.
+    let length = 1 << (*MAX_DEGREE_LOG - 2);
+
     log::info!("Computing fixed columns...");
-    let fixed_cols = pipeline.compute_fixed_cols().unwrap();
+    pipeline.compute_fixed_cols().unwrap();
 
     // Advance the pipeline to the optimized PIL stage, so that it doesn't need to be computed
     // in every chunk.
     pipeline.compute_optimized_pil().unwrap();
 
-    // TODO hacky way to find the degree of the main machine, fix.
-    let length = get_uniquely_sized(&fixed_cols)
-        .unwrap()
-        .iter()
-        .find(|(col, _)| col == "main.STEP")
-        .unwrap()
-        .1
-        .len() as u64;
-
     bootloader_inputs
         .into_iter()
         .enumerate()
@@ -263,26 +261,10 @@ pub fn rust_continuations_dry_run<F: FieldElement>(
     let mut proven_trace = first_real_execution_row;
     let mut chunk_index = 0;
 
-    let length = program
-        .machines()
-        .fold(None, |acc, (_, m)| acc.or(m.degree.clone()))
-        .unwrap();
-
-    let length: usize = match length {
-        Expression::Number(
-            _,
-            Number {
-                value: length,
-                type_: None,
-            },
-        ) => length.try_into().unwrap(),
-        e => unimplemented!(
-            "degree {e} is not supported in continuations as we don't have an evaluator yet"
-        ),
-    };
+    let length = 1 << (*MAX_DEGREE_LOG - 2);
 
     loop {
-        log::info!("\nRunning chunk {}...", chunk_index);
+        log::info!("\nRunning chunk {} for {} steps...", chunk_index, length);
 
         log::info!("Building bootloader inputs for chunk {}...", chunk_index);
         let mut accessed_pages = BTreeSet::new();

diff --git a/std/machines/binary.asm b/std/machines/binary.asm
@@ -5,7 +5,8 @@ use std::utils::unchanged_until;
 // Binary for single bytes using an exhaustive table
 machine ByteBinary with
     latch: latch,
-    operation_id: operation_id
+    operation_id: operation_id,
+    degree: 262144
 {
     operation run<0> P_operation, P_A, P_B -> P_C;