From 14079450cb29f48f7d9519b8232abbe6b8b79dee Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 7 Aug 2024 18:22:59 -0400 Subject: [PATCH 01/18] feat: cache lookup commitments (optional) --- halo2_proofs/Cargo.toml | 13 +- halo2_proofs/src/plonk/lookup/prover.rs | 47 ++++++ halo2_proofs/src/plonk/mv_lookup/prover.rs | 35 ++++- halo2_proofs/src/plonk/prover.rs | 158 ++++++++++++++++++--- halo2_proofs/src/poly.rs | 1 + halo2_proofs/src/poly/commitment.rs | 23 ++- 6 files changed, 248 insertions(+), 29 deletions(-) diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 83adc2e834..1d62e93851 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -59,15 +59,16 @@ blake2b_simd = "1" # MSRV 1.66.0 sha3 = "0.9.1" rand_chacha = "0.3" maybe-rayon = { version = "0.1.1"} -lazy_static = { version = "1", optional = true } env_logger = "0.10.0" rustc-hash = "2.0.0" - +lazy_static = "1.4.0" # GPU Icicle integration icicle = { git = "https://github.com/ingonyama-zk/icicle.git", branch = "rust/large-bucket-factor-msm", optional = true } rustacuda = { version = "0.1", optional = true } -serde = { version = "1", optional = true, features = ["derive"] } serde_derive = { version = "1", optional = true} +bincode = { version = "1.3.3", default_features = false } +serde = { version = "1.0.126", features = ["derive"] } + # Developer tooling dependencies plotters = { version = "0.3.0", default-features = false, optional = true } @@ -88,7 +89,7 @@ serde_json = "1" getrandom = { version = "0.2", features = ["js"] } [features] -default = ["batch", "bits"] +default = ["batch", "bits", "derive_serde"] dev-graph = ["plotters", "tabbycat"] test-dev-graph = [ "dev-graph", @@ -102,10 +103,10 @@ thread-safe-region = [] sanity-checks = [] batch = ["rand_core/getrandom"] circuit-params = [] -counter = ["lazy_static"] +counter = [] icicle_gpu = ["icicle", "rustacuda"] mv-lookup = [] -cost-estimator = ["serde", "serde_derive"] +cost-estimator = ["serde_derive"] derive_serde = ["halo2curves/derive_serde"] parallel-poly-read = [] diff --git a/halo2_proofs/src/plonk/lookup/prover.rs b/halo2_proofs/src/plonk/lookup/prover.rs index a122944f9d..b27b451fff 100644 --- a/halo2_proofs/src/plonk/lookup/prover.rs +++ b/halo2_proofs/src/plonk/lookup/prover.rs @@ -3,7 +3,9 @@ use super::super::{ ProvingKey, }; use super::Argument; +use crate::helpers::SerdeCurveAffine; use crate::plonk::evaluation::evaluate; +use crate::SerdeFormat; use crate::{ arithmetic::{eval_polynomial, parallelize, CurveAffine}, poly::{ @@ -23,6 +25,7 @@ use maybe_rayon::prelude::{ }; use maybe_rayon::slice::ParallelSliceMut; use rand_core::RngCore; + use std::{ collections::BTreeMap, iter, @@ -49,6 +52,49 @@ pub(in crate::plonk) struct Committed { permuted_table_blind: Blind, pub(in crate::plonk) product_poly: Polynomial, product_blind: Blind, + pub(in crate::plonk) commitment: C, +} + +impl Committed { + pub fn write( + &self, + writer: &mut W, + format: SerdeFormat, + ) -> std::io::Result<()> + where + ::ScalarExt: crate::helpers::SerdePrimeField, + { + self.permuted_input_poly.write(writer, format)?; + self.permuted_input_blind.write(writer, format)?; + self.permuted_table_poly.write(writer, format)?; + self.permuted_table_blind.write(writer, format)?; + self.product_poly.write(writer, format)?; + self.product_blind.write(writer, format)?; + + self.commitment.write(writer, format) + } + + pub fn read(reader: &mut R, format: SerdeFormat) -> std::io::Result + where + ::ScalarExt: crate::helpers::SerdePrimeField, + { + let permuted_input_poly = Polynomial::read(reader, format)?; + let permuted_input_blind = Blind::read(reader, format)?; + let permuted_table_poly = Polynomial::read(reader, format)?; + let permuted_table_blind = Blind::read(reader, format)?; + let product_poly = Polynomial::read(reader, format)?; + let product_blind = Blind::read(reader, format)?; + let commitment = C::read(reader, format)?; + Ok(Committed { + permuted_input_poly, + permuted_input_blind, + permuted_table_poly, + permuted_table_blind, + product_poly, + product_blind, + commitment, + }) + } } pub(in crate::plonk) struct Evaluated { @@ -306,6 +352,7 @@ impl Permuted { permuted_table_blind: self.permuted_table_blind, product_poly: z, product_blind, + commitment: product_commitment, }) } } diff --git a/halo2_proofs/src/plonk/mv_lookup/prover.rs b/halo2_proofs/src/plonk/mv_lookup/prover.rs index 49aa77bc2d..b907cc8970 100644 --- a/halo2_proofs/src/plonk/mv_lookup/prover.rs +++ b/halo2_proofs/src/plonk/mv_lookup/prover.rs @@ -2,7 +2,9 @@ use super::super::{ circuit::Expression, ChallengeBeta, ChallengeTheta, ChallengeX, Error, ProvingKey, }; use super::Argument; +use crate::helpers::SerdeCurveAffine; use crate::plonk::evaluation::evaluate; +use crate::SerdeFormat; use crate::{ arithmetic::{eval_polynomial, parallelize, CurveAffine}, poly::{ @@ -11,7 +13,6 @@ use crate::{ }, transcript::{EncodedChallenge, TranscriptWrite}, }; - use ff::WithSmallOrderMulGroup; use group::{ ff::{BatchInvert, Field}, @@ -39,6 +40,37 @@ pub(in crate::plonk) struct Prepared { pub(in crate::plonk) struct Committed { pub(in crate::plonk) m_poly: Polynomial, pub(in crate::plonk) phi_poly: Polynomial, + pub(in crate::plonk) commitment: C, +} + +impl Committed { + pub fn write( + &self, + writer: &mut W, + format: SerdeFormat, + ) -> std::io::Result<()> + where + ::ScalarExt: crate::helpers::SerdePrimeField, + { + self.m_poly.write(writer, format)?; + self.phi_poly.write(writer, format)?; + self.commitment.write(writer, format) + } + + pub fn read(reader: &mut R, format: SerdeFormat) -> std::io::Result + where + ::ScalarExt: crate::helpers::SerdePrimeField, + { + let m_poly = Polynomial::read(reader, format)?; + let phi_poly = Polynomial::read(reader, format)?; + let commitment = C::read(reader, format)?; + + Ok(Committed { + m_poly, + phi_poly, + commitment, + }) + } } pub(in crate::plonk) struct Evaluated { @@ -362,6 +394,7 @@ impl Prepared { Ok(Committed { m_poly: pk.vk.domain.lagrange_to_coeff(self.m_values), phi_poly: pk.vk.domain.lagrange_to_coeff(phi), + commitment: phi_commitment, }) } } diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index e617311eec..17a26b3122 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -1,5 +1,6 @@ use ff::{Field, FromUniformBytes, WithSmallOrderMulGroup}; use group::Curve; +use halo2curves::serde::SerdeObject; use rand_core::RngCore; use rustc_hash::FxBuildHasher; use rustc_hash::FxHashMap as HashMap; @@ -7,6 +8,14 @@ use rustc_hash::FxHashSet as HashSet; use std::collections::BTreeSet; use std::iter; use std::ops::RangeTo; +use std::time::Instant; + +lazy_static::lazy_static! { + /// an optional directory to read and write the lookup table cache + static ref LOOKUP_COMMITMENT_CACHE: Option = std::env::var("LOOKUP_COMMITMENT_CACHE") + .ok() + .map(std::path::PathBuf::from); +} use super::{ circuit::{ @@ -59,7 +68,8 @@ pub fn create_proof< transcript: &mut T, ) -> Result<(), Error> where - Scheme::Scalar: WithSmallOrderMulGroup<3> + FromUniformBytes<64>, + Scheme::Scalar: WithSmallOrderMulGroup<3> + FromUniformBytes<64> + SerdeObject, + Scheme::Curve: SerdeObject, { #[cfg(feature = "counter")] { @@ -81,8 +91,10 @@ where } } + let start = Instant::now(); // Hash verification key into transcript pk.vk.hash_into(transcript)?; + log::trace!("Hashing verification key: {:?}", start.elapsed()); let domain = &pk.vk.domain; let mut meta = ConstraintSystem::default(); @@ -100,6 +112,7 @@ where pub instance_polys: Vec>, } + let start = Instant::now(); let instance: Vec> = instances .iter() .map(|instance| -> Result, Error> { @@ -154,6 +167,7 @@ where }) }) .collect::, _>>()?; + log::trace!("Instance preparation: {:?}", start.elapsed()); #[derive(Clone)] struct AdviceSingle { @@ -306,6 +320,7 @@ where } } + let start = Instant::now(); let (advice, challenges) = { let mut advice = vec![ AdviceSingle:: { @@ -320,6 +335,7 @@ where let unusable_rows_start = params.n() as usize - (meta.blinding_factors() + 1); for current_phase in pk.vk.cs.phases() { + let _start = Instant::now(); let column_indices = meta .advice_column_phase .iter() @@ -336,6 +352,7 @@ where for ((circuit, advice), instances) in circuits.iter().zip(advice.iter_mut()).zip(instances) { + let _start = Instant::now(); let mut witness = WitnessCollection { k: params.k(), current_phase, @@ -351,6 +368,7 @@ where _marker: std::marker::PhantomData, }; + let _start = Instant::now(); // Synthesize the circuit to obtain the witness and other information. ConcreteCircuit::FloorPlanner::synthesize( &mut witness, @@ -359,6 +377,7 @@ where meta.constants.clone(), )?; + let _start = Instant::now(); let mut advice_values = batch_invert_assigned::( witness .advice @@ -374,6 +393,7 @@ where .collect(), ); + let _start = Instant::now(); // Add blinding factors to advice columns for (column_index, advice_values) in column_indices.iter().zip(&mut advice_values) { if !witness.unblinded_advice.contains(column_index) { @@ -387,6 +407,7 @@ where } } + let _start = Instant::now(); // Compute commitments to advice column polynomials let blinds: Vec<_> = column_indices .iter() @@ -412,6 +433,7 @@ where let advice_commitments = advice_commitments; drop(advice_commitments_projective); + let _start = Instant::now(); for commitment in &advice_commitments { transcript.write_point(*commitment)?; } @@ -439,10 +461,14 @@ where (advice, challenges) }; + log::trace!("Advice preparation: {:?}", start.elapsed()); // Sample theta challenge for keeping lookup columns linearly independent + let start = Instant::now(); let theta: ChallengeTheta<_> = transcript.squeeze_challenge_scalar(); + log::trace!("Theta challenge: {:?}", start.elapsed()); + let start = Instant::now(); #[cfg(feature = "mv-lookup")] let lookups: Vec>> = instance .iter() @@ -498,14 +524,20 @@ where .collect() }) .collect::, _>>()?; + log::trace!("Lookup preparation: {:?}", start.elapsed()); // Sample beta challenge + let start = Instant::now(); let beta: ChallengeBeta<_> = transcript.squeeze_challenge_scalar(); + log::trace!("Beta challenge: {:?}", start.elapsed()); // Sample gamma challenge + let start = Instant::now(); let gamma: ChallengeGamma<_> = transcript.squeeze_challenge_scalar(); + log::trace!("Gamma challenge: {:?}", start.elapsed()); // Commit to permutations. + let start = Instant::now(); let permutations: Vec> = instance .iter() .zip(advice.iter()) @@ -524,31 +556,82 @@ where ) }) .collect::, _>>()?; + log::trace!("Permutation commitment: {:?}", start.elapsed()); #[cfg(feature = "mv-lookup")] - let lookups: Vec>> = lookups - .into_iter() - .map(|lookups| -> Result, _> { - // Construct and commit to products for each lookup - lookups - .into_iter() - .map(|lookup| lookup.commit_grand_sum(pk, params, beta, &mut rng, transcript)) - .collect::, _>>() - }) - .collect::, _>>()?; + let commit_lookups = || -> Result>>, _> { + lookups + .into_iter() + .map(|lookups| -> Result, _> { + // Construct and commit to products for each lookup + lookups + .into_iter() + .map(|lookup| lookup.commit_grand_sum(pk, params, beta, &mut rng, transcript)) + .collect::, _>>() + }) + .collect::, _>>() + }; #[cfg(not(feature = "mv-lookup"))] - let lookups: Vec>> = lookups - .into_iter() - .map(|lookups| -> Result, _> { - // Construct and commit to products for each lookup - lookups - .into_iter() - .map(|lookup| lookup.commit_product(pk, params, beta, gamma, &mut rng, transcript)) - .collect::, _>>() - }) - .collect::, _>>()?; + let commit_lookups = || -> Result>>, _> { + lookups + .into_iter() + .map(|lookups| -> Result, _> { + // Construct and commit to products for each lookup + lookups + .into_iter() + .map(|lookup| { + lookup.commit_product(pk, params, beta, gamma, &mut rng, transcript) + }) + .collect::, _>>() + }) + .collect::, _>>() + }; + + let start = Instant::now(); + // if LOOKUP_COMMITMENT_CACHE is set, try to load the lookup commitments from the cache + let lookups = match LOOKUP_COMMITMENT_CACHE.clone() { + Some(cache_path) => { + let cache_path = cache_path.join("lookup_commitments"); + if cache_path.exists() { + log::info!("Loading lookup commitments from cache"); + let cache_file = std::fs::File::open(cache_path)?; + let mut cache_reader = std::io::BufReader::new(cache_file); + let lookups = (0..instance.len()) + .map(|_| { + (0..pk.vk.cs.lookups.len()) + .map(|_| { + let c = lookup::prover::Committed::read( + &mut cache_reader, + crate::SerdeFormat::RawBytesUnchecked, + )?; + transcript.write_point(c.commitment)?; + Ok(c) + }) + .collect::>>() + }) + .collect::, _>>()?; + Ok(lookups) + } else { + log::info!("Caching lookup commitments"); + let cache = commit_lookups()?; + let cache_file = std::fs::File::create(cache_path)?; + let mut cache_writer = std::io::BufWriter::new(cache_file); + for lookup in &cache { + for commitment in lookup { + commitment + .write(&mut cache_writer, crate::SerdeFormat::RawBytesUnchecked)?; + } + } + Ok(cache) + } + } + None => commit_lookups(), + }?; + + log::trace!("Lookup commitment: {:?}", start.elapsed()); + let start = Instant::now(); let shuffles: Vec>> = instance .iter() .zip(advice.iter()) @@ -576,14 +659,20 @@ where .collect::, _>>() }) .collect::, _>>()?; + log::trace!("Shuffle commitment: {:?}", start.elapsed()); + let start = Instant::now(); // Commit to the vanishing argument's random polynomial for blinding h(x_3) let vanishing = vanishing::Argument::commit(params, domain, &mut rng, transcript)?; + log::trace!("Vanishing commitment: {:?}", start.elapsed()); // Obtain challenge for keeping all separate gates linearly independent + let start = Instant::now(); let y: ChallengeY<_> = transcript.squeeze_challenge_scalar(); + log::trace!("Y challenge: {:?}", start.elapsed()); // Calculate the advice polys + let start = Instant::now(); let advice: Vec> = advice .into_iter() .map( @@ -601,8 +690,10 @@ where }, ) .collect(); + log::trace!("Advice calculation: {:?}", start.elapsed()); // Evaluate the h(X) polynomial + let start = Instant::now(); let h_poly = pk.ev.evaluate_h( pk, &advice @@ -622,13 +713,19 @@ where &shuffles, &permutations, ); + log::trace!("H(X) evaluation: {:?}", start.elapsed()); // Construct the vanishing argument's h(X) commitments + let start = Instant::now(); let vanishing = vanishing.construct(params, domain, h_poly, &mut rng, transcript)?; + log::trace!("Vanishing construction: {:?}", start.elapsed()); + let start = Instant::now(); let x: ChallengeX<_> = transcript.squeeze_challenge_scalar(); let xn = x.pow([params.n()]); + log::trace!("X challenge: {:?}", start.elapsed()); + let start = Instant::now(); if P::QUERY_INSTANCE { // Compute and hash instance evals for each circuit instance for instance in instance.iter() { @@ -650,7 +747,9 @@ where } } } + log::trace!("Instance evaluation: {:?}", start.elapsed()); + let start = Instant::now(); // Compute and hash advice evals for each circuit instance for advice in advice.iter() { // Evaluate polynomials at omega^i x @@ -670,7 +769,9 @@ where transcript.write_scalar(*eval)?; } } + log::trace!("Advice evaluation: {:?}", start.elapsed()); + let start = Instant::now(); // Compute and hash fixed evals (shared across all circuit instances) let fixed_evals: Vec<_> = meta .fixed_queries @@ -679,24 +780,34 @@ where eval_polynomial(&pk.fixed_polys[column.index()], domain.rotate_omega(*x, at)) }) .collect(); + log::trace!("Fixed evaluation: {:?}", start.elapsed()); // Hash each fixed column evaluation + let start = Instant::now(); for eval in fixed_evals.iter() { transcript.write_scalar(*eval)?; } + log::trace!("Fixed evaluation hashing: {:?}", start.elapsed()); + let start = Instant::now(); let vanishing = vanishing.evaluate(x, xn, domain, transcript)?; + log::trace!("Vanishing evaluation: {:?}", start.elapsed()); // Evaluate common permutation data + let start = Instant::now(); pk.permutation.evaluate(x, transcript)?; + log::trace!("Permutation evaluation: {:?}", start.elapsed()); // Evaluate the permutations, if any, at omega^i x. + let start = Instant::now(); let permutations: Vec> = permutations .into_iter() .map(|permutation| -> Result<_, _> { permutation.construct().evaluate(pk, x, transcript) }) .collect::, _>>()?; + log::trace!("Permutation evaluation: {:?}", start.elapsed()); // Evaluate the lookups, if any, at omega^i x. + let start = Instant::now(); let lookups: Vec>> = lookups .into_iter() .map(|lookups| -> Result, _> { @@ -706,8 +817,10 @@ where .collect::, _>>() }) .collect::, _>>()?; + log::trace!("Lookup evaluation: {:?}", start.elapsed()); // Evaluate the shuffles, if any, at omega^i x. + let start = Instant::now(); let shuffles: Vec>> = shuffles .into_iter() .map(|shuffles| -> Result, _> { @@ -717,7 +830,9 @@ where .collect::, _>>() }) .collect::, _>>()?; + log::trace!("Shuffle evaluation: {:?}", start.elapsed()); + let start = Instant::now(); let instances = instance .iter() .zip(advice.iter()) @@ -767,6 +882,7 @@ where .chain(pk.permutation.open(x)) // We query the h(X) polynomial at x .chain(vanishing.open(x)); + log::trace!("Open queries: {:?}", start.elapsed()); #[cfg(feature = "counter")] { diff --git a/halo2_proofs/src/poly.rs b/halo2_proofs/src/poly.rs index 753b65eb82..cd022cff12 100644 --- a/halo2_proofs/src/poly.rs +++ b/halo2_proofs/src/poly.rs @@ -9,6 +9,7 @@ use crate::SerdeFormat; use group::ff::{BatchInvert, Field}; #[cfg(feature = "parallel-poly-read")] use maybe_rayon::{iter::ParallelIterator, prelude::ParallelSliceMut}; + use std::fmt::Debug; use std::io; use std::marker::PhantomData; diff --git a/halo2_proofs/src/poly/commitment.rs b/halo2_proofs/src/poly/commitment.rs index ebc26fe9c3..9398750611 100644 --- a/halo2_proofs/src/poly/commitment.rs +++ b/halo2_proofs/src/poly/commitment.rs @@ -3,11 +3,12 @@ use super::{ strategy::Guard, Coeff, LagrangeCoeff, Polynomial, }; -use crate::poly::Error; use crate::transcript::{EncodedChallenge, TranscriptRead, TranscriptWrite}; +use crate::{helpers::SerdePrimeField, poly::Error}; use ff::Field; use halo2curves::CurveAffine; use rand_core::RngCore; + use std::{ fmt::Debug, io::{self}, @@ -197,6 +198,26 @@ impl Default for Blind { } } +impl Blind { + /// Writes polynomial to buffer using `SerdePrimeField::write`. + pub(crate) fn write( + &self, + writer: &mut W, + format: crate::SerdeFormat, + ) -> std::io::Result<()> { + self.0.write(writer, format)?; + Ok(()) + } + + /// Reads polynomial from buffer using `SerdePrimeField::read`. + pub(crate) fn read( + reader: &mut R, + format: crate::SerdeFormat, + ) -> std::io::Result { + Ok(Blind(F::read(reader, format)?)) + } +} + impl Blind { /// Given `rng` creates new blinding scalar pub fn new(rng: &mut R) -> Self { From ef2607e622ab438bb58b8b98f0d2d2c95197a6cc Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 7 Aug 2024 23:36:18 -0400 Subject: [PATCH 02/18] chore: parallelize inv calc for mv --- halo2_proofs/src/plonk/evaluation.rs | 32 ++++++++++++++--- halo2_proofs/src/plonk/prover.rs | 53 ++++++++++++++-------------- 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index afd1fd3ea6..440b1530be 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -15,6 +15,10 @@ use crate::{ }; use group::ff::{Field, PrimeField, WithSmallOrderMulGroup}; +use maybe_rayon::iter::IndexedParallelIterator; +use maybe_rayon::iter::IntoParallelRefIterator; +use maybe_rayon::iter::IntoParallelRefMutIterator; +use maybe_rayon::iter::ParallelIterator; use super::{shuffle, ConstraintSystem, Expression}; @@ -387,29 +391,35 @@ impl Evaluator { let l_active_row = &pk.l_active_row; let p = &pk.vk.cs.permutation; + let start = std::time::Instant::now(); // Calculate the advice and instance cosets let advice: Vec>> = advice_polys .iter() .map(|advice_polys| { advice_polys - .iter() + .par_iter() .map(|poly| domain.coeff_to_extended(poly.clone())) .collect() }) .collect(); + log::info!(" - Advice cosets: {:?}", start.elapsed()); + + let start = std::time::Instant::now(); let instance: Vec>> = instance_polys .iter() .map(|instance_polys| { instance_polys - .iter() + .par_iter() .map(|poly| domain.coeff_to_extended(poly.clone())) .collect() }) .collect(); + log::info!(" - Instance cosets: {:?}", start.elapsed()); let mut values = domain.empty_extended(); // Core expression evaluations + let num_threads = multicore::current_num_threads(); for ((((advice, instance), lookups), shuffles), permutation) in advice .iter() @@ -419,6 +429,7 @@ impl Evaluator { .zip(permutations.iter()) { // Custom gates + let start = std::time::Instant::now(); multicore::scope(|scope| { let chunk_size = (size + num_threads - 1) / num_threads; for (thread_idx, values) in values.chunks_mut(chunk_size).enumerate() { @@ -446,8 +457,10 @@ impl Evaluator { }); } }); + log::info!(" - Custom gates: {:?}", start.elapsed()); // Permutations + let start = std::time::Instant::now(); let sets = &permutation.sets; if !sets.is_empty() { let blinding_factors = pk.vk.cs.blinding_factors(); @@ -528,14 +541,16 @@ impl Evaluator { } }); } + log::info!(" - Permutations: {:?}", start.elapsed()); + let start = std::time::Instant::now(); // For lookups, compute inputs_inv_sum = ∑ 1 / (f_i(X) + α) // The outer vector has capacity self.lookups.len() // The middle vector has capacity domain.extended_len() // The inner vector has capacity #[cfg(feature = "mv-lookup")] let inputs_inv_sum: Vec>> = lookups - .iter() + .par_iter() .enumerate() .map(|(n, _)| { let (inputs_lookup_evaluator, _) = &self.lookups[n]; @@ -550,8 +565,8 @@ impl Evaluator { // For each compressed input column, evaluate at ω^i and add beta // This is a vector of length self.lookups[n].0.len() let inputs_values: Vec = inputs_lookup_evaluator - .iter() - .zip(inputs_eval_data.iter_mut()) + .par_iter() + .zip(inputs_eval_data.par_iter_mut()) .map(|(input_lookup_evaluator, input_eval_data)| { input_lookup_evaluator.evaluate( input_eval_data, @@ -586,7 +601,11 @@ impl Evaluator { inputs_inv_sums }) .collect(); + #[cfg(feature = "mv-lookup")] + log::info!(" - Lookups inv sum: {:?}", start.elapsed()); + #[cfg(feature = "mv-lookup")] + let start = std::time::Instant::now(); // Lookups #[cfg(feature = "mv-lookup")] for (n, lookup) in lookups.iter().enumerate() { @@ -769,8 +788,10 @@ impl Evaluator { } }); } + log::info!(" - Lookups constraints: {:?}", start.elapsed()); // Shuffle constraints + let start = std::time::Instant::now(); for (n, shuffle) in shuffles.iter().enumerate() { let product_coset = pk.vk.domain.coeff_to_extended(shuffle.product_poly.clone()); @@ -831,6 +852,7 @@ impl Evaluator { } }); } + log::info!(" - Shuffle constraints: {:?}", start.elapsed()); } values } diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index 17a26b3122..51898962ee 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -1,6 +1,7 @@ use ff::{Field, FromUniformBytes, WithSmallOrderMulGroup}; use group::Curve; use halo2curves::serde::SerdeObject; + use rand_core::RngCore; use rustc_hash::FxBuildHasher; use rustc_hash::FxHashMap as HashMap; @@ -94,7 +95,7 @@ where let start = Instant::now(); // Hash verification key into transcript pk.vk.hash_into(transcript)?; - log::trace!("Hashing verification key: {:?}", start.elapsed()); + log::info!("Hashing verification key: {:?}", start.elapsed()); let domain = &pk.vk.domain; let mut meta = ConstraintSystem::default(); @@ -167,7 +168,7 @@ where }) }) .collect::, _>>()?; - log::trace!("Instance preparation: {:?}", start.elapsed()); + log::info!("Instance preparation: {:?}", start.elapsed()); #[derive(Clone)] struct AdviceSingle { @@ -461,12 +462,12 @@ where (advice, challenges) }; - log::trace!("Advice preparation: {:?}", start.elapsed()); + log::info!("Advice preparation: {:?}", start.elapsed()); // Sample theta challenge for keeping lookup columns linearly independent let start = Instant::now(); let theta: ChallengeTheta<_> = transcript.squeeze_challenge_scalar(); - log::trace!("Theta challenge: {:?}", start.elapsed()); + log::info!("Theta challenge: {:?}", start.elapsed()); let start = Instant::now(); #[cfg(feature = "mv-lookup")] @@ -524,17 +525,17 @@ where .collect() }) .collect::, _>>()?; - log::trace!("Lookup preparation: {:?}", start.elapsed()); + log::info!("Lookup preparation: {:?}", start.elapsed()); // Sample beta challenge let start = Instant::now(); let beta: ChallengeBeta<_> = transcript.squeeze_challenge_scalar(); - log::trace!("Beta challenge: {:?}", start.elapsed()); + log::info!("Beta challenge: {:?}", start.elapsed()); // Sample gamma challenge let start = Instant::now(); let gamma: ChallengeGamma<_> = transcript.squeeze_challenge_scalar(); - log::trace!("Gamma challenge: {:?}", start.elapsed()); + log::info!("Gamma challenge: {:?}", start.elapsed()); // Commit to permutations. let start = Instant::now(); @@ -556,7 +557,7 @@ where ) }) .collect::, _>>()?; - log::trace!("Permutation commitment: {:?}", start.elapsed()); + log::info!("Permutation commitment: {:?}", start.elapsed()); #[cfg(feature = "mv-lookup")] let commit_lookups = || -> Result>>, _> { @@ -629,7 +630,7 @@ where None => commit_lookups(), }?; - log::trace!("Lookup commitment: {:?}", start.elapsed()); + log::info!("Lookup commitment: {:?}", start.elapsed()); let start = Instant::now(); let shuffles: Vec>> = instance @@ -659,17 +660,17 @@ where .collect::, _>>() }) .collect::, _>>()?; - log::trace!("Shuffle commitment: {:?}", start.elapsed()); + log::info!("Shuffle commitment: {:?}", start.elapsed()); let start = Instant::now(); // Commit to the vanishing argument's random polynomial for blinding h(x_3) let vanishing = vanishing::Argument::commit(params, domain, &mut rng, transcript)?; - log::trace!("Vanishing commitment: {:?}", start.elapsed()); + log::info!("Vanishing commitment: {:?}", start.elapsed()); // Obtain challenge for keeping all separate gates linearly independent let start = Instant::now(); let y: ChallengeY<_> = transcript.squeeze_challenge_scalar(); - log::trace!("Y challenge: {:?}", start.elapsed()); + log::info!("Y challenge: {:?}", start.elapsed()); // Calculate the advice polys let start = Instant::now(); @@ -690,7 +691,7 @@ where }, ) .collect(); - log::trace!("Advice calculation: {:?}", start.elapsed()); + log::info!("Advice calculation: {:?}", start.elapsed()); // Evaluate the h(X) polynomial let start = Instant::now(); @@ -713,17 +714,17 @@ where &shuffles, &permutations, ); - log::trace!("H(X) evaluation: {:?}", start.elapsed()); + log::info!("H(X) evaluation: {:?}", start.elapsed()); // Construct the vanishing argument's h(X) commitments let start = Instant::now(); let vanishing = vanishing.construct(params, domain, h_poly, &mut rng, transcript)?; - log::trace!("Vanishing construction: {:?}", start.elapsed()); + log::info!("Vanishing construction: {:?}", start.elapsed()); let start = Instant::now(); let x: ChallengeX<_> = transcript.squeeze_challenge_scalar(); let xn = x.pow([params.n()]); - log::trace!("X challenge: {:?}", start.elapsed()); + log::info!("X challenge: {:?}", start.elapsed()); let start = Instant::now(); if P::QUERY_INSTANCE { @@ -747,7 +748,7 @@ where } } } - log::trace!("Instance evaluation: {:?}", start.elapsed()); + log::info!("Instance evaluation: {:?}", start.elapsed()); let start = Instant::now(); // Compute and hash advice evals for each circuit instance @@ -769,7 +770,7 @@ where transcript.write_scalar(*eval)?; } } - log::trace!("Advice evaluation: {:?}", start.elapsed()); + log::info!("Advice evaluation: {:?}", start.elapsed()); let start = Instant::now(); // Compute and hash fixed evals (shared across all circuit instances) @@ -780,23 +781,23 @@ where eval_polynomial(&pk.fixed_polys[column.index()], domain.rotate_omega(*x, at)) }) .collect(); - log::trace!("Fixed evaluation: {:?}", start.elapsed()); + log::info!("Fixed evaluation: {:?}", start.elapsed()); // Hash each fixed column evaluation let start = Instant::now(); for eval in fixed_evals.iter() { transcript.write_scalar(*eval)?; } - log::trace!("Fixed evaluation hashing: {:?}", start.elapsed()); + log::info!("Fixed evaluation hashing: {:?}", start.elapsed()); let start = Instant::now(); let vanishing = vanishing.evaluate(x, xn, domain, transcript)?; - log::trace!("Vanishing evaluation: {:?}", start.elapsed()); + log::info!("Vanishing evaluation: {:?}", start.elapsed()); // Evaluate common permutation data let start = Instant::now(); pk.permutation.evaluate(x, transcript)?; - log::trace!("Permutation evaluation: {:?}", start.elapsed()); + log::info!("Permutation evaluation: {:?}", start.elapsed()); // Evaluate the permutations, if any, at omega^i x. let start = Instant::now(); @@ -804,7 +805,7 @@ where .into_iter() .map(|permutation| -> Result<_, _> { permutation.construct().evaluate(pk, x, transcript) }) .collect::, _>>()?; - log::trace!("Permutation evaluation: {:?}", start.elapsed()); + log::info!("Permutation evaluation: {:?}", start.elapsed()); // Evaluate the lookups, if any, at omega^i x. let start = Instant::now(); @@ -817,7 +818,7 @@ where .collect::, _>>() }) .collect::, _>>()?; - log::trace!("Lookup evaluation: {:?}", start.elapsed()); + log::info!("Lookup evaluation: {:?}", start.elapsed()); // Evaluate the shuffles, if any, at omega^i x. let start = Instant::now(); @@ -830,7 +831,7 @@ where .collect::, _>>() }) .collect::, _>>()?; - log::trace!("Shuffle evaluation: {:?}", start.elapsed()); + log::info!("Shuffle evaluation: {:?}", start.elapsed()); let start = Instant::now(); let instances = instance @@ -882,7 +883,7 @@ where .chain(pk.permutation.open(x)) // We query the h(X) polynomial at x .chain(vanishing.open(x)); - log::trace!("Open queries: {:?}", start.elapsed()); + log::info!("Open queries: {:?}", start.elapsed()); #[cfg(feature = "counter")] { From c629df8667387082fb39fcdacd7e76712e9c9709 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 7 Aug 2024 23:41:41 -0400 Subject: [PATCH 03/18] trace logs --- halo2_proofs/src/plonk/evaluation.rs | 14 +++---- halo2_proofs/src/plonk/prover.rs | 56 ++++++++++++++-------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 440b1530be..9798366c89 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -402,7 +402,7 @@ impl Evaluator { .collect() }) .collect(); - log::info!(" - Advice cosets: {:?}", start.elapsed()); + log::trace!(" - Advice cosets: {:?}", start.elapsed()); let start = std::time::Instant::now(); let instance: Vec>> = instance_polys @@ -414,7 +414,7 @@ impl Evaluator { .collect() }) .collect(); - log::info!(" - Instance cosets: {:?}", start.elapsed()); + log::trace!(" - Instance cosets: {:?}", start.elapsed()); let mut values = domain.empty_extended(); @@ -457,7 +457,7 @@ impl Evaluator { }); } }); - log::info!(" - Custom gates: {:?}", start.elapsed()); + log::trace!(" - Custom gates: {:?}", start.elapsed()); // Permutations let start = std::time::Instant::now(); @@ -541,7 +541,7 @@ impl Evaluator { } }); } - log::info!(" - Permutations: {:?}", start.elapsed()); + log::trace!(" - Permutations: {:?}", start.elapsed()); let start = std::time::Instant::now(); // For lookups, compute inputs_inv_sum = ∑ 1 / (f_i(X) + α) @@ -602,7 +602,7 @@ impl Evaluator { }) .collect(); #[cfg(feature = "mv-lookup")] - log::info!(" - Lookups inv sum: {:?}", start.elapsed()); + log::trace!(" - Lookups inv sum: {:?}", start.elapsed()); #[cfg(feature = "mv-lookup")] let start = std::time::Instant::now(); @@ -788,7 +788,7 @@ impl Evaluator { } }); } - log::info!(" - Lookups constraints: {:?}", start.elapsed()); + log::trace!(" - Lookups constraints: {:?}", start.elapsed()); // Shuffle constraints let start = std::time::Instant::now(); @@ -852,7 +852,7 @@ impl Evaluator { } }); } - log::info!(" - Shuffle constraints: {:?}", start.elapsed()); + log::trace!(" - Shuffle constraints: {:?}", start.elapsed()); } values } diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index 51898962ee..d7e9dffc63 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -95,7 +95,7 @@ where let start = Instant::now(); // Hash verification key into transcript pk.vk.hash_into(transcript)?; - log::info!("Hashing verification key: {:?}", start.elapsed()); + log::trace!("Hashing verification key: {:?}", start.elapsed()); let domain = &pk.vk.domain; let mut meta = ConstraintSystem::default(); @@ -168,7 +168,7 @@ where }) }) .collect::, _>>()?; - log::info!("Instance preparation: {:?}", start.elapsed()); + log::trace!("Instance preparation: {:?}", start.elapsed()); #[derive(Clone)] struct AdviceSingle { @@ -462,12 +462,12 @@ where (advice, challenges) }; - log::info!("Advice preparation: {:?}", start.elapsed()); + log::trace!("Advice preparation: {:?}", start.elapsed()); // Sample theta challenge for keeping lookup columns linearly independent let start = Instant::now(); let theta: ChallengeTheta<_> = transcript.squeeze_challenge_scalar(); - log::info!("Theta challenge: {:?}", start.elapsed()); + log::trace!("Theta challenge: {:?}", start.elapsed()); let start = Instant::now(); #[cfg(feature = "mv-lookup")] @@ -525,17 +525,17 @@ where .collect() }) .collect::, _>>()?; - log::info!("Lookup preparation: {:?}", start.elapsed()); + log::trace!("Lookup preparation: {:?}", start.elapsed()); // Sample beta challenge let start = Instant::now(); let beta: ChallengeBeta<_> = transcript.squeeze_challenge_scalar(); - log::info!("Beta challenge: {:?}", start.elapsed()); + log::trace!("Beta challenge: {:?}", start.elapsed()); // Sample gamma challenge let start = Instant::now(); let gamma: ChallengeGamma<_> = transcript.squeeze_challenge_scalar(); - log::info!("Gamma challenge: {:?}", start.elapsed()); + log::trace!("Gamma challenge: {:?}", start.elapsed()); // Commit to permutations. let start = Instant::now(); @@ -557,7 +557,7 @@ where ) }) .collect::, _>>()?; - log::info!("Permutation commitment: {:?}", start.elapsed()); + log::trace!("Permutation commitment: {:?}", start.elapsed()); #[cfg(feature = "mv-lookup")] let commit_lookups = || -> Result>>, _> { @@ -595,7 +595,7 @@ where Some(cache_path) => { let cache_path = cache_path.join("lookup_commitments"); if cache_path.exists() { - log::info!("Loading lookup commitments from cache"); + log::trace!("Loading lookup commitments from cache"); let cache_file = std::fs::File::open(cache_path)?; let mut cache_reader = std::io::BufReader::new(cache_file); let lookups = (0..instance.len()) @@ -614,7 +614,7 @@ where .collect::, _>>()?; Ok(lookups) } else { - log::info!("Caching lookup commitments"); + log::trace!("Caching lookup commitments"); let cache = commit_lookups()?; let cache_file = std::fs::File::create(cache_path)?; let mut cache_writer = std::io::BufWriter::new(cache_file); @@ -630,7 +630,7 @@ where None => commit_lookups(), }?; - log::info!("Lookup commitment: {:?}", start.elapsed()); + log::trace!("Lookup commitment: {:?}", start.elapsed()); let start = Instant::now(); let shuffles: Vec>> = instance @@ -660,17 +660,17 @@ where .collect::, _>>() }) .collect::, _>>()?; - log::info!("Shuffle commitment: {:?}", start.elapsed()); + log::trace!("Shuffle commitment: {:?}", start.elapsed()); let start = Instant::now(); // Commit to the vanishing argument's random polynomial for blinding h(x_3) let vanishing = vanishing::Argument::commit(params, domain, &mut rng, transcript)?; - log::info!("Vanishing commitment: {:?}", start.elapsed()); + log::trace!("Vanishing commitment: {:?}", start.elapsed()); // Obtain challenge for keeping all separate gates linearly independent let start = Instant::now(); let y: ChallengeY<_> = transcript.squeeze_challenge_scalar(); - log::info!("Y challenge: {:?}", start.elapsed()); + log::trace!("Y challenge: {:?}", start.elapsed()); // Calculate the advice polys let start = Instant::now(); @@ -691,7 +691,7 @@ where }, ) .collect(); - log::info!("Advice calculation: {:?}", start.elapsed()); + log::trace!("Advice calculation: {:?}", start.elapsed()); // Evaluate the h(X) polynomial let start = Instant::now(); @@ -714,17 +714,17 @@ where &shuffles, &permutations, ); - log::info!("H(X) evaluation: {:?}", start.elapsed()); + log::trace!("H(X) evaluation: {:?}", start.elapsed()); // Construct the vanishing argument's h(X) commitments let start = Instant::now(); let vanishing = vanishing.construct(params, domain, h_poly, &mut rng, transcript)?; - log::info!("Vanishing construction: {:?}", start.elapsed()); + log::trace!("Vanishing construction: {:?}", start.elapsed()); let start = Instant::now(); let x: ChallengeX<_> = transcript.squeeze_challenge_scalar(); let xn = x.pow([params.n()]); - log::info!("X challenge: {:?}", start.elapsed()); + log::trace!("X challenge: {:?}", start.elapsed()); let start = Instant::now(); if P::QUERY_INSTANCE { @@ -748,7 +748,7 @@ where } } } - log::info!("Instance evaluation: {:?}", start.elapsed()); + log::trace!("Instance evaluation: {:?}", start.elapsed()); let start = Instant::now(); // Compute and hash advice evals for each circuit instance @@ -770,7 +770,7 @@ where transcript.write_scalar(*eval)?; } } - log::info!("Advice evaluation: {:?}", start.elapsed()); + log::trace!("Advice evaluation: {:?}", start.elapsed()); let start = Instant::now(); // Compute and hash fixed evals (shared across all circuit instances) @@ -781,23 +781,23 @@ where eval_polynomial(&pk.fixed_polys[column.index()], domain.rotate_omega(*x, at)) }) .collect(); - log::info!("Fixed evaluation: {:?}", start.elapsed()); + log::trace!("Fixed evaluation: {:?}", start.elapsed()); // Hash each fixed column evaluation let start = Instant::now(); for eval in fixed_evals.iter() { transcript.write_scalar(*eval)?; } - log::info!("Fixed evaluation hashing: {:?}", start.elapsed()); + log::trace!("Fixed evaluation hashing: {:?}", start.elapsed()); let start = Instant::now(); let vanishing = vanishing.evaluate(x, xn, domain, transcript)?; - log::info!("Vanishing evaluation: {:?}", start.elapsed()); + log::trace!("Vanishing evaluation: {:?}", start.elapsed()); // Evaluate common permutation data let start = Instant::now(); pk.permutation.evaluate(x, transcript)?; - log::info!("Permutation evaluation: {:?}", start.elapsed()); + log::trace!("Permutation evaluation: {:?}", start.elapsed()); // Evaluate the permutations, if any, at omega^i x. let start = Instant::now(); @@ -805,7 +805,7 @@ where .into_iter() .map(|permutation| -> Result<_, _> { permutation.construct().evaluate(pk, x, transcript) }) .collect::, _>>()?; - log::info!("Permutation evaluation: {:?}", start.elapsed()); + log::trace!("Permutation evaluation: {:?}", start.elapsed()); // Evaluate the lookups, if any, at omega^i x. let start = Instant::now(); @@ -818,7 +818,7 @@ where .collect::, _>>() }) .collect::, _>>()?; - log::info!("Lookup evaluation: {:?}", start.elapsed()); + log::trace!("Lookup evaluation: {:?}", start.elapsed()); // Evaluate the shuffles, if any, at omega^i x. let start = Instant::now(); @@ -831,7 +831,7 @@ where .collect::, _>>() }) .collect::, _>>()?; - log::info!("Shuffle evaluation: {:?}", start.elapsed()); + log::trace!("Shuffle evaluation: {:?}", start.elapsed()); let start = Instant::now(); let instances = instance @@ -883,7 +883,7 @@ where .chain(pk.permutation.open(x)) // We query the h(X) polynomial at x .chain(vanishing.open(x)); - log::info!("Open queries: {:?}", start.elapsed()); + log::trace!("Open queries: {:?}", start.elapsed()); #[cfg(feature = "counter")] { From f7743559c1123d0a700da9f88acb039ff293d10f Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Thu, 8 Aug 2024 18:13:23 -0400 Subject: [PATCH 04/18] feat: precalc mv-lookup cosets --- halo2_proofs/src/plonk/evaluation.rs | 29 ++++++++++++- halo2_proofs/src/plonk/mv_lookup/prover.rs | 14 ++++++- halo2_proofs/src/plonk/prover.rs | 47 +--------------------- 3 files changed, 40 insertions(+), 50 deletions(-) diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 9798366c89..1749570900 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -604,6 +604,20 @@ impl Evaluator { #[cfg(feature = "mv-lookup")] log::trace!(" - Lookups inv sum: {:?}", start.elapsed()); + #[cfg(feature = "mv-lookup")] + let mut cosets: Vec<_> = { + let domain = &pk.vk.domain; + lookups + .par_iter() + .map(|lookup| { + ( + domain.coeff_to_extended(lookup.phi_poly.clone()), + domain.coeff_to_extended(lookup.m_poly.clone()), + ) + }) + .collect() + }; + #[cfg(feature = "mv-lookup")] let start = std::time::Instant::now(); // Lookups @@ -612,8 +626,9 @@ impl Evaluator { // Polynomials required for this lookup. // Calculated here so these only have to be kept in memory for the short time // they are actually needed. - let phi_coset = pk.vk.domain.coeff_to_extended(lookup.phi_poly.clone()); - let m_coset = pk.vk.domain.coeff_to_extended(lookup.m_poly.clone()); + let start = std::time::Instant::now(); + + let (phi_coset, m_coset) = &cosets.remove(0); // Lookup constraints /* @@ -624,6 +639,7 @@ impl Evaluator { = (τ(X) * Π(φ_i(X)) * ∑ 1/(φ_i(X))) - Π(φ_i(X)) * m(X) = Π(φ_i(X)) * (τ(X) * ∑ 1/(φ_i(X)) - m(X)) */ + let start = std::time::Instant::now(); parallelize(&mut values, |values, start| { let (inputs_lookup_evaluator, table_lookup_evaluator) = &self.lookups[n]; let mut inputs_eval_data: Vec<_> = inputs_lookup_evaluator @@ -711,6 +727,7 @@ impl Evaluator { *value = *value * y + (lhs - rhs) * l_active_row[idx]; } }); + log::trace!(" - Lookups constraints: {:?}", start.elapsed()); } #[cfg(not(feature = "mv-lookup"))] @@ -719,16 +736,23 @@ impl Evaluator { // Polynomials required for this lookup. // Calculated here so these only have to be kept in memory for the short time // they are actually needed. + let start = std::time::Instant::now(); let product_coset = pk.vk.domain.coeff_to_extended(lookup.product_poly.clone()); + log::trace!(" - Product coset: {:?}", start.elapsed()); + let start = std::time::Instant::now(); let permuted_input_coset = pk .vk .domain .coeff_to_extended(lookup.permuted_input_poly.clone()); + log::trace!(" - Permuted input coset: {:?}", start.elapsed()); + let start = std::time::Instant::now(); let permuted_table_coset = pk .vk .domain .coeff_to_extended(lookup.permuted_table_poly.clone()); + log::trace!(" - Permuted table coset: {:?}", start.elapsed()); + let start = std::time::Instant::now(); // Lookup constraints parallelize(&mut values, |values, start| { let lookup_evaluator = &self.lookups[n]; @@ -787,6 +811,7 @@ impl Evaluator { * l_active_row[idx]); } }); + log::trace!(" - Lookups constraints: {:?}", start.elapsed()); } log::trace!(" - Lookups constraints: {:?}", start.elapsed()); diff --git a/halo2_proofs/src/plonk/mv_lookup/prover.rs b/halo2_proofs/src/plonk/mv_lookup/prover.rs index b907cc8970..62a47d39a1 100644 --- a/halo2_proofs/src/plonk/mv_lookup/prover.rs +++ b/halo2_proofs/src/plonk/mv_lookup/prover.rs @@ -18,11 +18,11 @@ use group::{ ff::{BatchInvert, Field}, Curve, }; +use rustc_hash::FxHashMap as HashMap; use rand_core::RngCore; use std::{ - collections::BTreeMap, iter, ops::{Mul, MulAssign}, }; @@ -124,26 +124,33 @@ impl> Argument { compressed_expression }; + let start = std::time::Instant::now(); // Get values of input expressions involved in the lookup and compress them let compressed_inputs_expressions: Vec<_> = self .inputs_expressions .iter() .map(|input_expressions| compress_expressions(input_expressions)) .collect(); + log::trace!("compressed_inputs_expressions {:?}", start.elapsed()); // Get values of table expressions involved in the lookup and compress them + let start = std::time::Instant::now(); let compressed_table_expression = compress_expressions(&self.table_expressions); + log::trace!("compressed_table_expression {:?}", start.elapsed()); let blinding_factors = pk.vk.cs.blinding_factors(); // compute m(X) - let table_index_value_mapping: BTreeMap, usize> = compressed_table_expression + let start = std::time::Instant::now(); + let table_index_value_mapping: HashMap, usize> = compressed_table_expression .iter() .take(params.n() as usize - blinding_factors - 1) .enumerate() .map(|(i, &x)| (x.to_repr().as_ref().to_owned(), i)) .collect(); + log::trace!("table_index_value_mapping {:?}", start.elapsed()); + let start = std::time::Instant::now(); let m_values: Vec = { use std::sync::atomic::{AtomicU64, Ordering}; let m_values: Vec = (0..params.n()).map(|_| AtomicU64::new(0)).collect(); @@ -174,6 +181,7 @@ impl> Argument { .map(|mi| F::from(mi.load(Ordering::Relaxed))) .collect() }; + log::trace!("m_values {:?}", start.elapsed()); let m_values = pk.vk.domain.lagrange_from_vec(m_values); #[cfg(feature = "sanity-checks")] @@ -218,8 +226,10 @@ impl> Argument { } // commit to m(X) + let start = std::time::Instant::now(); let blind = Blind(C::Scalar::ZERO); let m_commitment = params.commit_lagrange(&m_values, blind).to_affine(); + log::trace!("m_commitment {:?}", start.elapsed()); // write commitment of m(X) to transcript transcript.write_point(m_commitment)?; diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index d7e9dffc63..53977daf63 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -11,13 +11,6 @@ use std::iter; use std::ops::RangeTo; use std::time::Instant; -lazy_static::lazy_static! { - /// an optional directory to read and write the lookup table cache - static ref LOOKUP_COMMITMENT_CACHE: Option = std::env::var("LOOKUP_COMMITMENT_CACHE") - .ok() - .map(std::path::PathBuf::from); -} - use super::{ circuit::{ sealed::{self}, @@ -590,45 +583,7 @@ where }; let start = Instant::now(); - // if LOOKUP_COMMITMENT_CACHE is set, try to load the lookup commitments from the cache - let lookups = match LOOKUP_COMMITMENT_CACHE.clone() { - Some(cache_path) => { - let cache_path = cache_path.join("lookup_commitments"); - if cache_path.exists() { - log::trace!("Loading lookup commitments from cache"); - let cache_file = std::fs::File::open(cache_path)?; - let mut cache_reader = std::io::BufReader::new(cache_file); - let lookups = (0..instance.len()) - .map(|_| { - (0..pk.vk.cs.lookups.len()) - .map(|_| { - let c = lookup::prover::Committed::read( - &mut cache_reader, - crate::SerdeFormat::RawBytesUnchecked, - )?; - transcript.write_point(c.commitment)?; - Ok(c) - }) - .collect::>>() - }) - .collect::, _>>()?; - Ok(lookups) - } else { - log::trace!("Caching lookup commitments"); - let cache = commit_lookups()?; - let cache_file = std::fs::File::create(cache_path)?; - let mut cache_writer = std::io::BufWriter::new(cache_file); - for lookup in &cache { - for commitment in lookup { - commitment - .write(&mut cache_writer, crate::SerdeFormat::RawBytesUnchecked)?; - } - } - Ok(cache) - } - } - None => commit_lookups(), - }?; + let lookups = commit_lookups()?; log::trace!("Lookup commitment: {:?}", start.elapsed()); From 65205fab554f5b24eb872f9ea2a62daca9052ec9 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:45:57 -0400 Subject: [PATCH 05/18] chore: precompute coset feature flag --- halo2_proofs/Cargo.toml | 1 + halo2_proofs/src/plonk/evaluation.rs | 56 +++++++++++++++++++--------- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 1d62e93851..d411e94cd4 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -109,6 +109,7 @@ mv-lookup = [] cost-estimator = ["serde_derive"] derive_serde = ["halo2curves/derive_serde"] parallel-poly-read = [] +precompute-coset = [] [lib] bench = false diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 1749570900..2319cdacc5 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -604,7 +604,7 @@ impl Evaluator { #[cfg(feature = "mv-lookup")] log::trace!(" - Lookups inv sum: {:?}", start.elapsed()); - #[cfg(feature = "mv-lookup")] + #[cfg(all(feature = "mv-lookup", feature = "precompute-coset"))] let mut cosets: Vec<_> = { let domain = &pk.vk.domain; lookups @@ -628,8 +628,14 @@ impl Evaluator { // they are actually needed. let start = std::time::Instant::now(); + #[cfg(feature = "precompute-coset")] let (phi_coset, m_coset) = &cosets.remove(0); + #[cfg(not(feature = "precompute-coset"))] + let phi_coset = pk.vk.domain.coeff_to_extended(lookup.phi_poly.clone()); + #[cfg(not(feature = "precompute-coset"))] + let m_coset = pk.vk.domain.coeff_to_extended(lookup.m_poly.clone()); + // Lookup constraints /* φ_i(X) = f_i(X) + α @@ -730,27 +736,44 @@ impl Evaluator { log::trace!(" - Lookups constraints: {:?}", start.elapsed()); } + #[cfg(all(not(feature = "mv-lookup"), feature = "precompute-coset"))] + let mut cosets: Vec<_> = { + let domain = &pk.vk.domain; + lookups + .par_iter() + .map(|lookup| { + ( + domain.coeff_to_extended(lookup.product_poly.clone()), + domain.coeff_to_extended(lookup.permuted_input_poly.clone()), + domain.coeff_to_extended(lookup.permuted_table_poly.clone()), + ) + }) + .collect() + }; + #[cfg(not(feature = "mv-lookup"))] // Lookups for (n, lookup) in lookups.iter().enumerate() { // Polynomials required for this lookup. // Calculated here so these only have to be kept in memory for the short time // they are actually needed. - let start = std::time::Instant::now(); - let product_coset = pk.vk.domain.coeff_to_extended(lookup.product_poly.clone()); - log::trace!(" - Product coset: {:?}", start.elapsed()); - let start = std::time::Instant::now(); - let permuted_input_coset = pk - .vk - .domain - .coeff_to_extended(lookup.permuted_input_poly.clone()); - log::trace!(" - Permuted input coset: {:?}", start.elapsed()); - let start = std::time::Instant::now(); - let permuted_table_coset = pk - .vk - .domain - .coeff_to_extended(lookup.permuted_table_poly.clone()); - log::trace!(" - Permuted table coset: {:?}", start.elapsed()); + + #[cfg(feature = "precompute-coset")] + let (product_coset, permuted_input_coset, permuted_table_coset) = &cosets.remove(0); + + #[cfg(not(feature = "precompute-coset"))] + let (product_coset, permuted_input_coset, permuted_table_coset) = { + let product_coset = pk.vk.domain.coeff_to_extended(lookup.product_poly.clone()); + let permuted_input_coset = pk + .vk + .domain + .coeff_to_extended(lookup.permuted_input_poly.clone()); + let permuted_table_coset = pk + .vk + .domain + .coeff_to_extended(lookup.permuted_table_poly.clone()); + (product_coset, permuted_input_coset, permuted_table_coset) + }; let start = std::time::Instant::now(); // Lookup constraints @@ -811,7 +834,6 @@ impl Evaluator { * l_active_row[idx]); } }); - log::trace!(" - Lookups constraints: {:?}", start.elapsed()); } log::trace!(" - Lookups constraints: {:?}", start.elapsed()); From 0a0dc341c7f29d211fe2d120f47fa8e113a4adcd Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:03:34 -0400 Subject: [PATCH 06/18] parallelize commitments and prep --- halo2_proofs/src/plonk/evaluation.rs | 53 +++++---- halo2_proofs/src/plonk/mv_lookup/prover.rs | 123 +++++++++++---------- halo2_proofs/src/plonk/prover.rs | 42 +++++-- 3 files changed, 119 insertions(+), 99 deletions(-) diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 2319cdacc5..bf5a3d6024 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -604,7 +604,7 @@ impl Evaluator { #[cfg(feature = "mv-lookup")] log::trace!(" - Lookups inv sum: {:?}", start.elapsed()); - #[cfg(all(feature = "mv-lookup", feature = "precompute-coset"))] + #[cfg(all(feature = "mv-lookup"))] let mut cosets: Vec<_> = { let domain = &pk.vk.domain; lookups @@ -622,31 +622,25 @@ impl Evaluator { let start = std::time::Instant::now(); // Lookups #[cfg(feature = "mv-lookup")] - for (n, lookup) in lookups.iter().enumerate() { - // Polynomials required for this lookup. - // Calculated here so these only have to be kept in memory for the short time - // they are actually needed. - let start = std::time::Instant::now(); - - #[cfg(feature = "precompute-coset")] - let (phi_coset, m_coset) = &cosets.remove(0); - - #[cfg(not(feature = "precompute-coset"))] - let phi_coset = pk.vk.domain.coeff_to_extended(lookup.phi_poly.clone()); - #[cfg(not(feature = "precompute-coset"))] - let m_coset = pk.vk.domain.coeff_to_extended(lookup.m_poly.clone()); + parallelize(&mut values, |values, start| { + for (n, _lookup) in lookups.iter().enumerate() { + // Polynomials required for this lookup. + // Calculated here so these only have to be kept in memory for the short time + // they are actually needed. + + #[cfg(feature = "precompute-coset")] + let (phi_coset, m_coset) = &cosets[n]; + + // Lookup constraints + /* + φ_i(X) = f_i(X) + α + τ(X) = t(X) + α + LHS = τ(X) * Π(φ_i(X)) * (ϕ(gX) - ϕ(X)) + RHS = τ(X) * Π(φ_i(X)) * (∑ 1/(φ_i(X)) - m(X) / τ(X)))) + = (τ(X) * Π(φ_i(X)) * ∑ 1/(φ_i(X))) - Π(φ_i(X)) * m(X) + = Π(φ_i(X)) * (τ(X) * ∑ 1/(φ_i(X)) - m(X)) + */ - // Lookup constraints - /* - φ_i(X) = f_i(X) + α - τ(X) = t(X) + α - LHS = τ(X) * Π(φ_i(X)) * (ϕ(gX) - ϕ(X)) - RHS = τ(X) * Π(φ_i(X)) * (∑ 1/(φ_i(X)) - m(X) / τ(X)))) - = (τ(X) * Π(φ_i(X)) * ∑ 1/(φ_i(X))) - Π(φ_i(X)) * m(X) - = Π(φ_i(X)) * (τ(X) * ∑ 1/(φ_i(X)) - m(X)) - */ - let start = std::time::Instant::now(); - parallelize(&mut values, |values, start| { let (inputs_lookup_evaluator, table_lookup_evaluator) = &self.lookups[n]; let mut inputs_eval_data: Vec<_> = inputs_lookup_evaluator .iter() @@ -732,9 +726,12 @@ impl Evaluator { // q(X) = LHS - RHS mod zH(X) *value = *value * y + (lhs - rhs) * l_active_row[idx]; } - }); - log::trace!(" - Lookups constraints: {:?}", start.elapsed()); - } + } + }); + + // delete the cosets + #[cfg(feature = "mv-lookup")] + cosets.clear(); #[cfg(all(not(feature = "mv-lookup"), feature = "precompute-coset"))] let mut cosets: Vec<_> = { diff --git a/halo2_proofs/src/plonk/mv_lookup/prover.rs b/halo2_proofs/src/plonk/mv_lookup/prover.rs index 62a47d39a1..fa224565f7 100644 --- a/halo2_proofs/src/plonk/mv_lookup/prover.rs +++ b/halo2_proofs/src/plonk/mv_lookup/prover.rs @@ -1,5 +1,5 @@ use super::super::{ - circuit::Expression, ChallengeBeta, ChallengeTheta, ChallengeX, Error, ProvingKey, + circuit::Expression, ChallengeBeta, ChallengeTheta, ChallengeX, Error, ProvingKey, VerifyingKey, }; use super::Argument; use crate::helpers::SerdeCurveAffine; @@ -34,6 +34,7 @@ pub(in crate::plonk) struct Prepared { compressed_inputs_expressions: Vec>, compressed_table_expression: Polynomial, m_values: Polynomial, + pub(in crate::plonk) commitment: C, } #[derive(Debug)] @@ -78,17 +79,9 @@ pub(in crate::plonk) struct Evaluated { } impl> Argument { - pub(in crate::plonk) fn prepare< - 'a, - 'params: 'a, - C, - P: Params<'params, C>, - E: EncodedChallenge, - R: RngCore, - T: TranscriptWrite, - >( + pub(in crate::plonk) fn prepare<'a, 'params: 'a, C, P: Params<'params, C>>( &self, - pk: &ProvingKey, + vk: &VerifyingKey, params: &P, domain: &EvaluationDomain, theta: ChallengeTheta, @@ -96,21 +89,20 @@ impl> Argument { fixed_values: &'a [Polynomial], instance_values: &'a [Polynomial], challenges: &'a [C::Scalar], - _rng: R, // in case we want to blind (do we actually need zk?) - transcript: &mut T, ) -> Result, Error> where C: CurveAffine, C::Curve: Mul + MulAssign, { + let n = params.n() as usize; // Closure to get values of expressions and compress them let compress_expressions = |expressions: &[Expression]| { let compressed_expression = expressions .iter() .map(|expression| { - pk.vk.domain.lagrange_from_vec(evaluate( + vk.domain.lagrange_from_vec(evaluate( expression, - params.n() as usize, + n, 1, fixed_values, advice_values, @@ -128,7 +120,7 @@ impl> Argument { // Get values of input expressions involved in the lookup and compress them let compressed_inputs_expressions: Vec<_> = self .inputs_expressions - .iter() + .par_iter() .map(|input_expressions| compress_expressions(input_expressions)) .collect(); log::trace!("compressed_inputs_expressions {:?}", start.elapsed()); @@ -138,13 +130,15 @@ impl> Argument { let compressed_table_expression = compress_expressions(&self.table_expressions); log::trace!("compressed_table_expression {:?}", start.elapsed()); - let blinding_factors = pk.vk.cs.blinding_factors(); + let blinding_factors = vk.cs.blinding_factors(); + + let chunk_size = n - blinding_factors - 1; // compute m(X) let start = std::time::Instant::now(); let table_index_value_mapping: HashMap, usize> = compressed_table_expression - .iter() - .take(params.n() as usize - blinding_factors - 1) + .par_iter() + .take(chunk_size) .enumerate() .map(|(i, &x)| (x.to_repr().as_ref().to_owned(), i)) .collect(); @@ -155,26 +149,25 @@ impl> Argument { use std::sync::atomic::{AtomicU64, Ordering}; let m_values: Vec = (0..params.n()).map(|_| AtomicU64::new(0)).collect(); - for compressed_input_expression in compressed_inputs_expressions.iter() { - let res: Result<(), Error> = compressed_input_expression - .par_iter() - .take(params.n() as usize - blinding_factors - 1) - .map(|fi| { - let index = match table_index_value_mapping - .get(&fi.to_repr().as_ref().to_owned()) - { - Some(value) => value, - None => { - log::error!("value is OOR of lookup"); - return Err(Error::Synthesis); - } - }; - m_values[*index].fetch_add(1, Ordering::Relaxed); - Ok(()) - }) - .collect(); - res? - } + compressed_inputs_expressions + .par_iter() + .for_each(|compressed_input_expression| { + compressed_input_expression + .iter() + .take(chunk_size) + .for_each(|fi| { + let index = match table_index_value_mapping + .get(&fi.to_repr().as_ref().to_owned()) + { + Some(value) => value, + None => { + log::error!("value is OOR of lookup"); + return; + } + }; + m_values[*index].fetch_add(1, Ordering::Relaxed); + }); + }); m_values .par_iter() @@ -182,7 +175,7 @@ impl> Argument { .collect() }; log::trace!("m_values {:?}", start.elapsed()); - let m_values = pk.vk.domain.lagrange_from_vec(m_values); + let m_values = vk.domain.lagrange_from_vec(m_values); #[cfg(feature = "sanity-checks")] { @@ -232,30 +225,23 @@ impl> Argument { log::trace!("m_commitment {:?}", start.elapsed()); // write commitment of m(X) to transcript - transcript.write_point(m_commitment)?; + // transcript.write_point(m_commitment)?; Ok(Prepared { compressed_inputs_expressions, compressed_table_expression, m_values, + commitment: m_commitment, }) } } impl Prepared { - pub(in crate::plonk) fn commit_grand_sum< - 'params, - P: Params<'params, C>, - E: EncodedChallenge, - R: RngCore, - T: TranscriptWrite, - >( + pub(in crate::plonk) fn commit_grand_sum<'params, P: Params<'params, C>>( self, - pk: &ProvingKey, + vk: &VerifyingKey, params: &P, beta: ChallengeBeta, - mut rng: R, - transcript: &mut T, ) -> Result, Error> { /* φ_i(X) = f_i(X) + α @@ -264,6 +250,7 @@ impl Prepared { RHS = τ(X) * Π(φ_i(X)) * (∑ 1/(φ_i(X)) - m(X) / τ(X)))) */ + let start = std::time::Instant::now(); // ∑ 1/(φ_i(X)) let mut inputs_log_derivatives = vec![C::Scalar::ZERO; params.n() as usize]; for compressed_input_expression in self.compressed_inputs_expressions.iter() { @@ -288,6 +275,9 @@ impl Prepared { } } + log::trace!(" - inputs_log_derivatives {:?}", start.elapsed()); + + let start = std::time::Instant::now(); // 1 / τ(X) let mut table_log_derivatives = vec![C::Scalar::ZERO; params.n() as usize]; parallelize( @@ -302,8 +292,16 @@ impl Prepared { }, ); + log::trace!(" - table_log_derivatives {:?}", start.elapsed()); + + let start = std::time::Instant::now(); table_log_derivatives.iter_mut().batch_invert(); + log::trace!( + " - table_log_derivatives batch_invert {:?}", + start.elapsed() + ); + let start = std::time::Instant::now(); // (Σ 1/(φ_i(X)) - m(X) / τ(X)) let mut log_derivatives_diff = vec![C::Scalar::ZERO; params.n() as usize]; parallelize(&mut log_derivatives_diff, |log_derivatives_diff, start| { @@ -318,9 +316,12 @@ impl Prepared { } }); + log::trace!(" - log_derivatives_diff {:?}", start.elapsed()); + + let start = std::time::Instant::now(); // Compute the evaluations of the lookup grand sum polynomial // over our domain, starting with phi[0] = 0 - let blinding_factors = pk.vk.cs.blinding_factors(); + let blinding_factors = vk.cs.blinding_factors(); let phi = iter::once(C::Scalar::ZERO) .chain(log_derivatives_diff) .scan(C::Scalar::ZERO, |state, cur| { @@ -331,10 +332,12 @@ impl Prepared { // be a 0 .take(params.n() as usize - blinding_factors) // Chain random blinding factors. - .chain((0..blinding_factors).map(|_| C::Scalar::random(&mut rng))) + .chain((0..blinding_factors).map(|_| C::Scalar::ZERO)) .collect::>(); assert_eq!(phi.len(), params.n() as usize); - let phi = pk.vk.domain.lagrange_from_vec(phi); + let phi = vk.domain.lagrange_from_vec(phi); + + log::trace!(" - phi {:?}", start.elapsed()); #[cfg(feature = "sanity-checks")] // This test works only with intermediate representations in this method. @@ -396,14 +399,16 @@ impl Prepared { } let grand_sum_blind = Blind(C::Scalar::ZERO); + let start = std::time::Instant::now(); let phi_commitment = params.commit_lagrange(&phi, grand_sum_blind).to_affine(); + log::trace!(" - phi_commitment {:?}", start.elapsed()); // Hash grand sum commitment - transcript.write_point(phi_commitment)?; + // transcript.write_point(phi_commitment)?; Ok(Committed { - m_poly: pk.vk.domain.lagrange_to_coeff(self.m_values), - phi_poly: pk.vk.domain.lagrange_to_coeff(phi), + m_poly: vk.domain.lagrange_to_coeff(self.m_values), + phi_poly: vk.domain.lagrange_to_coeff(phi), commitment: phi_commitment, }) } @@ -412,11 +417,11 @@ impl Prepared { impl Committed { pub(in crate::plonk) fn evaluate, T: TranscriptWrite>( self, - pk: &ProvingKey, + vk: &VerifyingKey, x: ChallengeX, transcript: &mut T, ) -> Result, Error> { - let domain = &pk.vk.domain; + let domain = &vk.domain; let x_next = domain.rotate_omega(*x, Rotation::next()); let phi_eval = eval_polynomial(&self.phi_poly, *x); diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index 53977daf63..f7feefcd20 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -20,6 +20,9 @@ use super::{ permutation, shuffle, vanishing, ChallengeBeta, ChallengeGamma, ChallengeTheta, ChallengeX, ChallengeY, Error, ProvingKey, }; +use maybe_rayon::iter::IntoParallelIterator; +use maybe_rayon::iter::IntoParallelRefIterator; +use maybe_rayon::iter::ParallelIterator; #[cfg(not(feature = "mv-lookup"))] use super::lookup; @@ -50,7 +53,7 @@ pub fn create_proof< Scheme: CommitmentScheme, P: Prover<'params, Scheme>, E: EncodedChallenge, - R: RngCore, + R: RngCore + Send + Sync, T: TranscriptWrite, ConcreteCircuit: Circuit, >( @@ -64,6 +67,7 @@ pub fn create_proof< where Scheme::Scalar: WithSmallOrderMulGroup<3> + FromUniformBytes<64> + SerdeObject, Scheme::Curve: SerdeObject, + Scheme::ParamsProver: Send + Sync, { #[cfg(feature = "counter")] { @@ -469,13 +473,14 @@ where .zip(advice.iter()) .map(|(instance, advice)| -> Result, Error> { // Construct and commit to permuted values for each lookup - pk.vk + let res: Result, Error> = pk + .vk .cs .lookups - .iter() + .par_iter() .map(|lookup| { lookup.prepare( - pk, + &pk.vk, params, domain, theta, @@ -483,11 +488,15 @@ where &pk.fixed_values, &instance.instance_values, &challenges, - &mut rng, - transcript, ) }) - .collect() + .collect(); + res.iter().for_each(|lookups| { + lookups.iter().for_each(|lookup| { + transcript.write_point(lookup.commitment); + }); + }); + res }) .collect::, _>>()?; @@ -552,16 +561,25 @@ where .collect::, _>>()?; log::trace!("Permutation commitment: {:?}", start.elapsed()); + // preallocate the lookups + #[cfg(feature = "mv-lookup")] let commit_lookups = || -> Result>>, _> { lookups .into_iter() .map(|lookups| -> Result, _> { // Construct and commit to products for each lookup - lookups - .into_iter() - .map(|lookup| lookup.commit_grand_sum(pk, params, beta, &mut rng, transcript)) - .collect::, _>>() + let res = lookups + .into_par_iter() + .map(|lookup| lookup.commit_grand_sum(&pk.vk, params, beta)) + .collect::, _>>(); + + res.iter().for_each(|lookups| { + lookups.iter().for_each(|lookup| { + transcript.write_point(lookup.commitment); + }); + }); + res }) .collect::, _>>() }; @@ -769,7 +787,7 @@ where .map(|lookups| -> Result, _> { lookups .into_iter() - .map(|p| p.evaluate(pk, x, transcript)) + .map(|p| p.evaluate(&pk.vk, x, transcript)) .collect::, _>>() }) .collect::, _>>()?; From 7cbcf5ef42717edf5f54863a533bd3e78c273dd1 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 14 Aug 2024 18:01:43 -0400 Subject: [PATCH 07/18] chore: parallel vanishing args --- halo2_proofs/src/plonk/evaluation.rs | 36 +++++++++------------- halo2_proofs/src/plonk/prover.rs | 35 +++++++++++---------- halo2_proofs/src/plonk/vanishing/prover.rs | 9 ++++-- 3 files changed, 39 insertions(+), 41 deletions(-) diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index bf5a3d6024..3831ca93d6 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -379,6 +379,7 @@ impl Evaluator { shuffles: &[Vec>], permutations: &[permutation::prover::Committed], ) -> Polynomial { + let start = std::time::Instant::now(); let domain = &pk.vk.domain; let size = domain.extended_len(); let rot_scale = 1 << (domain.extended_k() - domain.k()); @@ -390,6 +391,7 @@ impl Evaluator { let l_last = &pk.l_last; let l_active_row = &pk.l_active_row; let p = &pk.vk.cs.permutation; + log::trace!(" - Initialization: {:?}", start.elapsed()); let start = std::time::Instant::now(); // Calculate the advice and instance cosets @@ -420,6 +422,7 @@ impl Evaluator { // Core expression evaluations + let start = std::time::Instant::now(); let num_threads = multicore::current_num_threads(); for ((((advice, instance), lookups), shuffles), permutation) in advice .iter() @@ -429,7 +432,7 @@ impl Evaluator { .zip(permutations.iter()) { // Custom gates - let start = std::time::Instant::now(); + multicore::scope(|scope| { let chunk_size = (size + num_threads - 1) / num_threads; for (thread_idx, values) in values.chunks_mut(chunk_size).enumerate() { @@ -549,10 +552,10 @@ impl Evaluator { // The middle vector has capacity domain.extended_len() // The inner vector has capacity #[cfg(feature = "mv-lookup")] - let inputs_inv_sum: Vec>> = lookups + let mut inputs_inv_sum_cosets: Vec<_> = lookups .par_iter() .enumerate() - .map(|(n, _)| { + .map(|(n, lookup)| { let (inputs_lookup_evaluator, _) = &self.lookups[n]; let mut inputs_eval_data: Vec<_> = inputs_lookup_evaluator .iter() @@ -598,26 +601,16 @@ impl Evaluator { .map(|c| c.to_vec()) .collect(); - inputs_inv_sums + ( + inputs_inv_sums, + domain.coeff_to_extended(lookup.phi_poly.clone()), + domain.coeff_to_extended(lookup.m_poly.clone()), + ) }) .collect(); #[cfg(feature = "mv-lookup")] log::trace!(" - Lookups inv sum: {:?}", start.elapsed()); - #[cfg(all(feature = "mv-lookup"))] - let mut cosets: Vec<_> = { - let domain = &pk.vk.domain; - lookups - .par_iter() - .map(|lookup| { - ( - domain.coeff_to_extended(lookup.phi_poly.clone()), - domain.coeff_to_extended(lookup.m_poly.clone()), - ) - }) - .collect() - }; - #[cfg(feature = "mv-lookup")] let start = std::time::Instant::now(); // Lookups @@ -628,8 +621,7 @@ impl Evaluator { // Calculated here so these only have to be kept in memory for the short time // they are actually needed. - #[cfg(feature = "precompute-coset")] - let (phi_coset, m_coset) = &cosets[n]; + let (inputs_inv_sum, phi_coset, m_coset) = &inputs_inv_sum_cosets[n]; // Lookup constraints /* @@ -681,7 +673,7 @@ impl Evaluator { .fold(C::Scalar::ONE, |acc, input| acc * input); // f_i(X) + α at ω^idx - let fi_inverses = &inputs_inv_sum[n][idx]; + let fi_inverses = &inputs_inv_sum[idx]; let inputs_inv_sum = fi_inverses .iter() .fold(C::Scalar::ZERO, |acc, input| acc + input); @@ -731,7 +723,7 @@ impl Evaluator { // delete the cosets #[cfg(feature = "mv-lookup")] - cosets.clear(); + drop(inputs_inv_sum_cosets); #[cfg(all(not(feature = "mv-lookup"), feature = "precompute-coset"))] let mut cosets: Vec<_> = { diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index f7feefcd20..895200ff24 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -20,6 +20,7 @@ use super::{ permutation, shuffle, vanishing, ChallengeBeta, ChallengeGamma, ChallengeTheta, ChallengeX, ChallengeY, Error, ProvingKey, }; +use maybe_rayon::iter::IndexedParallelIterator; use maybe_rayon::iter::IntoParallelIterator; use maybe_rayon::iter::IntoParallelRefIterator; use maybe_rayon::iter::ParallelIterator; @@ -469,12 +470,11 @@ where let start = Instant::now(); #[cfg(feature = "mv-lookup")] let lookups: Vec>> = instance - .iter() - .zip(advice.iter()) + .par_iter() + .zip(advice.par_iter()) .map(|(instance, advice)| -> Result, Error> { // Construct and commit to permuted values for each lookup - let res: Result, Error> = pk - .vk + pk.vk .cs .lookups .par_iter() @@ -490,16 +490,17 @@ where &challenges, ) }) - .collect(); - res.iter().for_each(|lookups| { - lookups.iter().for_each(|lookup| { - transcript.write_point(lookup.commitment); - }); - }); - res + .collect() }) .collect::, _>>()?; + #[cfg(feature = "mv-lookup")] + lookups.iter().for_each(|lookups| { + lookups.iter().for_each(|lookup| { + transcript.write_point(lookup.commitment); + }); + }); + #[cfg(not(feature = "mv-lookup"))] let lookups: Vec>> = instance .iter() @@ -574,11 +575,6 @@ where .map(|lookup| lookup.commit_grand_sum(&pk.vk, params, beta)) .collect::, _>>(); - res.iter().for_each(|lookups| { - lookups.iter().for_each(|lookup| { - transcript.write_point(lookup.commitment); - }); - }); res }) .collect::, _>>() @@ -603,6 +599,13 @@ where let start = Instant::now(); let lookups = commit_lookups()?; + #[cfg(feature = "mv-lookup")] + lookups.iter().for_each(|lookups| { + lookups.iter().for_each(|lookup| { + transcript.write_point(lookup.commitment); + }); + }); + log::trace!("Lookup commitment: {:?}", start.elapsed()); let start = Instant::now(); diff --git a/halo2_proofs/src/plonk/vanishing/prover.rs b/halo2_proofs/src/plonk/vanishing/prover.rs index 17603e64cd..e6f6417633 100644 --- a/halo2_proofs/src/plonk/vanishing/prover.rs +++ b/halo2_proofs/src/plonk/vanishing/prover.rs @@ -1,5 +1,8 @@ use ff::Field; use group::Curve; +use maybe_rayon::iter::IndexedParallelIterator; +use maybe_rayon::iter::IntoParallelRefIterator; +use maybe_rayon::iter::ParallelIterator; use rand_chacha::ChaCha20Rng; use rand_core::{RngCore, SeedableRng}; use rustc_hash::FxHashMap as HashMap; @@ -96,7 +99,7 @@ impl Argument { impl Committed { pub(in crate::plonk) fn construct< 'params, - P: ParamsProver<'params, C>, + P: ParamsProver<'params, C> + Send + Sync, E: EncodedChallenge, R: RngCore, T: TranscriptWrite, @@ -127,8 +130,8 @@ impl Committed { // Compute commitments to each h(X) piece let h_commitments_projective: Vec<_> = h_pieces - .iter() - .zip(h_blinds.iter()) + .par_iter() + .zip(h_blinds.par_iter()) .map(|(h_piece, blind)| params.commit(h_piece, *blind)) .collect(); let mut h_commitments = vec![C::identity(); h_commitments_projective.len()]; From 38abade26078496aa4d8738b967b0e7fa5d9505c Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Sun, 18 Aug 2024 12:52:51 -0400 Subject: [PATCH 08/18] chore: use cyclone msm --- halo2_proofs/Cargo.toml | 2 +- halo2_proofs/src/arithmetic.rs | 191 ++++++--------------------------- halo2_proofs/src/lib.rs | 1 + 3 files changed, 37 insertions(+), 157 deletions(-) diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index d411e94cd4..d1ede87cf6 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -52,7 +52,7 @@ log = { version = "0.4.17", default_features = false } backtrace = { version = "0.3", optional = true } ff = "0.13" group = "0.13" -halo2curves = { git = "https://github.com/privacy-scaling-explorations/halo2curves", rev="9fff22c", default-features = false } +halo2curves = { git = "https://github.com/privacy-scaling-explorations/halo2curves", rev="b753a832e92d5c86c5c997327a9cf9de86a18851", default-features = false } rand_core = { version = "0.6", default-features = false } tracing = "0.1" blake2b_simd = "1" # MSRV 1.66.0 diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index 02d3330c0a..391eaf515c 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -1,19 +1,21 @@ //! This module provides common utilities, traits and structures for group, //! field and polynomial arithmetic. +#[cfg(feature = "icicle_gpu")] +use super::icicle; use super::multicore; pub use ff::Field; use group::{ ff::{BatchInvert, PrimeField}, - Curve, Group, GroupOpsOwned, ScalarMulOwned, + prime::PrimeCurveAffine, + Curve, GroupOpsOwned, ScalarMulOwned, }; -pub use halo2curves::{CurveAffine, CurveExt}; - -#[cfg(feature = "icicle_gpu")] -use super::icicle; #[cfg(feature = "icicle_gpu")] use rustacuda::prelude::DeviceBuffer; +use halo2curves::msm::msm_best; +pub use halo2curves::{CurveAffine, CurveExt}; + /// This represents an element of a group with basic operations that can be /// performed. This allows an FFT implementation (for example) to operate /// generically over either a field or elliptic curve group. @@ -29,118 +31,14 @@ where { } -fn multiexp_serial(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) { - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); - - let c = if bases.len() < 4 { - 1 - } else if bases.len() < 32 { - 3 - } else { - (f64::from(bases.len() as u32)).ln().ceil() as usize - }; - - fn get_at(segment: usize, c: usize, bytes: &F::Repr) -> usize { - let skip_bits = segment * c; - let skip_bytes = skip_bits / 8; - - if skip_bytes >= (F::NUM_BITS as usize + 7) / 8 { - return 0; - } - - let mut v = [0; 8]; - for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) { - *v = *o; - } - - let mut tmp = u64::from_le_bytes(v); - tmp >>= skip_bits - (skip_bytes * 8); - tmp %= 1 << c; - - tmp as usize - } - - let segments = (C::Scalar::NUM_BITS as usize / c) + 1; - - for current_segment in (0..segments).rev() { - for _ in 0..c { - *acc = acc.double(); - } - - #[derive(Clone, Copy)] - enum Bucket { - None, - Affine(C), - Projective(C::Curve), - } - - impl Bucket { - fn add_assign(&mut self, other: &C) { - *self = match *self { - Bucket::None => Bucket::Affine(*other), - Bucket::Affine(a) => Bucket::Projective(a + *other), - Bucket::Projective(mut a) => { - a += *other; - Bucket::Projective(a) - } - } - } - - fn add(self, mut other: C::Curve) -> C::Curve { - match self { - Bucket::None => other, - Bucket::Affine(a) => { - other += a; - other - } - Bucket::Projective(a) => other + &a, - } - } - } - - let mut buckets: Vec> = vec![Bucket::None; (1 << c) - 1]; - - for (coeff, base) in coeffs.iter().zip(bases.iter()) { - let coeff = get_at::(current_segment, c, coeff); - if coeff != 0 { - buckets[coeff - 1].add_assign(base); - } - } - - // Summation by parts - // e.g. 3a + 2b + 1c = a + - // (a) + b + - // ((a) + b) + c - let mut running_sum = C::Curve::identity(); - for exp in buckets.into_iter().rev() { - running_sum = exp.add(running_sum); - *acc += &running_sum; - } - } -} - -/// Performs a small multi-exponentiation operation. -/// Uses the double-and-add algorithm with doublings shared across points. -pub fn small_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { - let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect(); - let mut acc = C::Curve::identity(); - - // for byte idx - for byte_idx in (0..((C::Scalar::NUM_BITS as usize + 7) / 8)).rev() { - // for bit idx - for bit_idx in (0..8).rev() { - acc = acc.double(); - // for each coeff - for coeff_idx in 0..coeffs.len() { - let byte = coeffs[coeff_idx].as_ref()[byte_idx]; - if ((byte >> bit_idx) & 1) != 0 { - acc += bases[coeff_idx]; - } - } - } - } - - acc +// [JPW] Keep this adapter to halo2curves to minimize code changes. +/// Performs a multi-exponentiation operation. +/// +/// This function will panic if coeffs and bases have a different length. +/// +/// This will use multithreading if beneficial. +pub fn best_multiexp_cpu(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { + msm_best(coeffs, bases) } #[cfg(feature = "icicle_gpu")] @@ -152,40 +50,7 @@ pub fn best_multiexp_gpu(coeffs: &[C::Scalar], is_lagrange: bool return icicle::multiexp_on_device::(scalars_ptr, is_lagrange); } -/// Performs a multi-exponentiation operation. -/// -/// This function will panic if coeffs and bases have a different length. -/// -/// This will use multithreading if beneficial. -pub fn best_multiexp_cpu(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { - assert_eq!(coeffs.len(), bases.len()); - - let num_threads = multicore::current_num_threads(); - if coeffs.len() > num_threads { - let chunk = coeffs.len() / num_threads; - let num_chunks = coeffs.chunks(chunk).len(); - let mut results = vec![C::Curve::identity(); num_chunks]; - multicore::scope(|scope| { - let chunk = coeffs.len() / num_threads; - - for ((coeffs, bases), acc) in coeffs - .chunks(chunk) - .zip(bases.chunks(chunk)) - .zip(results.iter_mut()) - { - scope.spawn(move |_| { - multiexp_serial(coeffs, bases, acc); - }); - } - }); - results.iter().fold(C::Curve::identity(), |a, b| a + b) - } else { - let mut acc = C::Curve::identity(); - multiexp_serial(coeffs, bases, &mut acc); - acc - } -} - +/// Dispatcher /// Performs a radix-$2$ Fast-Fourier Transformation (FFT) on a vector of size /// $n = 2^k$, when provided `log_n` = $k$ and an element of multiplicative /// order $n$ called `omega` ($\omega$). The result is that the vector `a`, when @@ -300,16 +165,18 @@ pub fn recursive_butterfly_arithmetic>( }); } } - /// Convert coefficient bases group elements to lagrange basis by inverse FFT. -pub fn g_to_lagrange(g_projective: Vec, k: u32) -> Vec { +pub fn g_to_lagrange(g_projective: Vec, k: u32) -> Vec { let n_inv = C::Scalar::TWO_INV.pow_vartime([k as u64, 0, 0, 0]); + let omega = C::Scalar::ROOT_OF_UNITY; let mut omega_inv = C::Scalar::ROOT_OF_UNITY_INV; for _ in k..C::Scalar::S { omega_inv = omega_inv.square(); } let mut g_lagrange_projective = g_projective; + let n = g_lagrange_projective.len(); + best_fft(&mut g_lagrange_projective, omega_inv, k); parallelize(&mut g_lagrange_projective, |g, _| { for g in g.iter_mut() { @@ -446,7 +313,8 @@ pub fn parallelize(v: &mu }); } -fn log2_floor(num: usize) -> u32 { +/// +pub fn log2_floor(num: usize) -> u32 { assert!(num > 0); let mut pow = 0; @@ -493,7 +361,7 @@ pub fn lagrange_interpolate(points: &[F], evals: &[F]) -> Vec { .enumerate() .filter(|&(k, _)| k != j) .map(|a| a.1) - .zip(denoms.into_iter()) + .zip(denoms) { product.resize(tmp.len() + 1, F::ZERO); for ((a, b), product) in tmp @@ -508,7 +376,7 @@ pub fn lagrange_interpolate(points: &[F], evals: &[F]) -> Vec { } assert_eq!(tmp.len(), points.len()); assert_eq!(product.len(), points.len() - 1); - for (final_coeff, interpolation_coeff) in final_poly.iter_mut().zip(tmp.into_iter()) { + for (final_coeff, interpolation_coeff) in final_poly.iter_mut().zip(tmp) { *final_coeff += interpolation_coeff * eval; } } @@ -540,11 +408,22 @@ pub(crate) fn powers(base: F) -> impl Iterator { std::iter::successors(Some(F::ONE), move |power| Some(base * power)) } +/// Reverse `l` LSBs of bitvector `n` +pub fn bitreverse(mut n: usize, l: usize) -> usize { + let mut r = 0; + for _ in 0..l { + r = (r << 1) | (n & 1); + n >>= 1; + } + r +} + #[cfg(test)] use rand_core::OsRng; #[cfg(test)] use crate::halo2curves::pasta::Fp; +// use crate::plonk::{get_duration, get_time, start_measure, stop_measure}; #[test] fn test_lagrange_interpolate() { diff --git a/halo2_proofs/src/lib.rs b/halo2_proofs/src/lib.rs index 3c0e3e566f..a91729d645 100644 --- a/halo2_proofs/src/lib.rs +++ b/halo2_proofs/src/lib.rs @@ -7,6 +7,7 @@ #![deny(missing_debug_implementations)] #![deny(missing_docs)] #![deny(unsafe_code)] +#![feature(int_roundings)] #[cfg(feature = "counter")] #[macro_use] From e3c012e408129d479f1ad327bab42b96999d867b Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Sun, 18 Aug 2024 20:48:46 -0400 Subject: [PATCH 09/18] chore: upgrade fft algos --- halo2_proofs/src/arithmetic.rs | 123 +---- halo2_proofs/src/fft.rs | 136 ++++++ halo2_proofs/src/fft/baseline.rs | 128 +++++ halo2_proofs/src/fft/parallel.rs | 282 +++++++++++ halo2_proofs/src/fft/recursive.rs | 461 ++++++++++++++++++ halo2_proofs/src/lib.rs | 1 + halo2_proofs/src/plonk/evaluation.rs | 14 +- halo2_proofs/src/plonk/keygen.rs | 8 +- halo2_proofs/src/plonk/permutation/keygen.rs | 2 +- halo2_proofs/src/plonk/permutation/prover.rs | 2 +- halo2_proofs/src/poly/domain.rs | 467 ++++++++++++++++++- 11 files changed, 1481 insertions(+), 143 deletions(-) create mode 100644 halo2_proofs/src/fft.rs create mode 100644 halo2_proofs/src/fft/baseline.rs create mode 100644 halo2_proofs/src/fft/parallel.rs create mode 100644 halo2_proofs/src/fft/recursive.rs diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index 391eaf515c..35ad44c607 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -51,120 +51,16 @@ pub fn best_multiexp_gpu(coeffs: &[C::Scalar], is_lagrange: bool } /// Dispatcher -/// Performs a radix-$2$ Fast-Fourier Transformation (FFT) on a vector of size -/// $n = 2^k$, when provided `log_n` = $k$ and an element of multiplicative -/// order $n$ called `omega` ($\omega$). The result is that the vector `a`, when -/// interpreted as the coefficients of a polynomial of degree $n - 1$, is -/// transformed into the evaluations of this polynomial at each of the $n$ -/// distinct powers of $\omega$. This transformation is invertible by providing -/// $\omega^{-1}$ in place of $\omega$ and dividing each resulting field element -/// by $n$. -/// -/// This will use multithreading if beneficial. -pub fn best_fft>(a: &mut [G], omega: Scalar, log_n: u32) { - fn bitreverse(mut n: usize, l: usize) -> usize { - let mut r = 0; - for _ in 0..l { - r = (r << 1) | (n & 1); - n >>= 1; - } - r - } - - let threads = multicore::current_num_threads(); - let log_threads = log2_floor(threads); - let n = a.len(); - assert_eq!(n, 1 << log_n); - - for k in 0..n { - let rk = bitreverse(k, log_n as usize); - if k < rk { - a.swap(rk, k); - } - } - - // precompute twiddle factors - let twiddles: Vec<_> = (0..(n / 2)) - .scan(Scalar::ONE, |w, _| { - let tw = *w; - *w *= ω - Some(tw) - }) - .collect(); - - if log_n <= log_threads { - let mut chunk = 2_usize; - let mut twiddle_chunk = n / 2; - for _ in 0..log_n { - a.chunks_mut(chunk).for_each(|coeffs| { - let (left, right) = coeffs.split_at_mut(chunk / 2); - - // case when twiddle factor is one - let (a, left) = left.split_at_mut(1); - let (b, right) = right.split_at_mut(1); - let t = b[0]; - b[0] = a[0]; - a[0] += &t; - b[0] -= &t; - - left.iter_mut() - .zip(right.iter_mut()) - .enumerate() - .for_each(|(i, (a, b))| { - let mut t = *b; - t *= &twiddles[(i + 1) * twiddle_chunk]; - *b = *a; - *a += &t; - *b -= &t; - }); - }); - chunk *= 2; - twiddle_chunk /= 2; - } - } else { - recursive_butterfly_arithmetic(a, n, 1, &twiddles) - } -} - -/// This perform recursive butterfly arithmetic -pub fn recursive_butterfly_arithmetic>( +pub fn best_fft>( a: &mut [G], - n: usize, - twiddle_chunk: usize, - twiddles: &[Scalar], + omega: Scalar, + log_n: u32, + data: &FFTData, + inverse: bool, ) { - if n == 2 { - let t = a[1]; - a[1] = a[0]; - a[0] += &t; - a[1] -= &t; - } else { - let (left, right) = a.split_at_mut(n / 2); - multicore::join( - || recursive_butterfly_arithmetic(left, n / 2, twiddle_chunk * 2, twiddles), - || recursive_butterfly_arithmetic(right, n / 2, twiddle_chunk * 2, twiddles), - ); - - // case when twiddle factor is one - let (a, left) = left.split_at_mut(1); - let (b, right) = right.split_at_mut(1); - let t = b[0]; - b[0] = a[0]; - a[0] += &t; - b[0] -= &t; - - left.iter_mut() - .zip(right.iter_mut()) - .enumerate() - .for_each(|(i, (a, b))| { - let mut t = *b; - t *= &twiddles[(i + 1) * twiddle_chunk]; - *b = *a; - *a += &t; - *b -= &t; - }); - } + fft::fft(a, omega, log_n, data, inverse); } + /// Convert coefficient bases group elements to lagrange basis by inverse FFT. pub fn g_to_lagrange(g_projective: Vec, k: u32) -> Vec { let n_inv = C::Scalar::TWO_INV.pow_vartime([k as u64, 0, 0, 0]); @@ -176,8 +72,9 @@ pub fn g_to_lagrange(g_projective: Vec, k: u32) - let mut g_lagrange_projective = g_projective; let n = g_lagrange_projective.len(); + let fft_data = FFTData::new(n, omega, omega_inv); - best_fft(&mut g_lagrange_projective, omega_inv, k); + best_fft(&mut g_lagrange_projective, omega_inv, k, &fft_data, true); parallelize(&mut g_lagrange_projective, |g, _| { for g in g.iter_mut() { *g *= n_inv; @@ -194,7 +91,6 @@ pub fn g_to_lagrange(g_projective: Vec, k: u32) - g_lagrange } - /// This evaluates a provided polynomial (in coefficient form) at `point`. pub fn eval_polynomial(poly: &[F], point: F) -> F { fn evaluate(poly: &[F], point: F) -> F { @@ -421,6 +317,7 @@ pub fn bitreverse(mut n: usize, l: usize) -> usize { #[cfg(test)] use rand_core::OsRng; +use crate::fft::{self, recursive::FFTData}; #[cfg(test)] use crate::halo2curves::pasta::Fp; // use crate::plonk::{get_duration, get_time, start_measure, stop_measure}; diff --git a/halo2_proofs/src/fft.rs b/halo2_proofs/src/fft.rs new file mode 100644 index 0000000000..9a163f9a41 --- /dev/null +++ b/halo2_proofs/src/fft.rs @@ -0,0 +1,136 @@ +//! This is a module for dispatching between different FFT implementations at runtime based on environment variable `FFT`. + +use ff::Field; + +use self::recursive::FFTData; +use crate::arithmetic::FftGroup; + +pub mod baseline; +pub mod parallel; +pub mod recursive; + +/// Runtime dispatcher to concrete FFT implementation +pub fn fft>( + a: &mut [G], + omega: Scalar, + log_n: u32, + data: &FFTData, + inverse: bool, +) { + // Empirically, the parallel implementation requires less memory bandwidth, which is more performant on x86_64. + #[cfg(target_arch = "x86_64")] + parallel::fft(a, omega, log_n, data, inverse); + #[cfg(not(target_arch = "x86_64"))] + recursive::fft(a, omega, log_n, data, inverse) +} + +#[cfg(test)] +mod tests { + use ark_std::{end_timer, start_timer}; + use ff::Field; + use halo2curves::bn256::Fr as Scalar; + use rand_core::OsRng; + + use crate::{arithmetic::best_fft, fft, multicore, poly::EvaluationDomain}; + + #[test] + fn test_fft_recursive() { + let k = 22; + + let domain = EvaluationDomain::::new(1, k); + let n = domain.get_n() as usize; + + let input = vec![Scalar::random(OsRng); n]; + + let num_threads = multicore::current_num_threads(); + + let mut a = input.clone(); + let l_a = a.len(); + let start = start_timer!(|| format!("best fft {} ({})", a.len(), num_threads)); + fft::baseline::fft( + &mut a, + domain.get_omega(), + k, + domain.get_fft_data(l_a), + false, + ); + end_timer!(start); + + let mut c = input.clone(); + let l_c = c.len(); + let start = start_timer!(|| format!("parallel fft {} ({})", a.len(), num_threads)); + fft::parallel::fft( + &mut c, + domain.get_omega(), + k, + domain.get_fft_data(l_c), + false, + ); + end_timer!(start); + + let mut b = input; + let l_b = b.len(); + let start = start_timer!(|| format!("recursive fft {} ({})", a.len(), num_threads)); + fft::recursive::fft( + &mut b, + domain.get_omega(), + k, + domain.get_fft_data(l_b), + false, + ); + end_timer!(start); + + for i in 0..n { + //log_info(format!("{}: {} {}", i, a[i], b[i])); + assert_eq!(a[i], b[i]); + assert_eq!(a[i], c[i]); + } + } + + #[test] + fn test_ifft_recursive() { + let k = 22; + + let domain = EvaluationDomain::::new(1, k); + let n = domain.get_n() as usize; + + let input = vec![Scalar::random(OsRng); n]; + + let mut a = input.clone(); + let l_a = a.len(); + fft::recursive::fft( + &mut a, + domain.get_omega(), + k, + domain.get_fft_data(l_a), + false, + ); + fft::recursive::fft( + &mut a, + domain.get_omega_inv(), // doesn't actually do anything + k, + domain.get_fft_data(l_a), + true, + ); + let ifft_divisor = Scalar::from(n as u64).invert().unwrap(); + + for i in 0..n { + assert_eq!(input[i], a[i] * ifft_divisor); + } + } + + #[test] + fn test_mem_leak() { + let j = 1; + let k = 3; + let domain = EvaluationDomain::new(j, k); + let omega = domain.get_omega(); + let l = 1 << k; + let data = domain.get_fft_data(l); + let mut a = (0..(1 << k)) + .map(|_| Scalar::random(OsRng)) + .collect::>(); + + best_fft(&mut a, omega, k, data, false); + } +} diff --git a/halo2_proofs/src/fft/baseline.rs b/halo2_proofs/src/fft/baseline.rs new file mode 100644 index 0000000000..8af165dc56 --- /dev/null +++ b/halo2_proofs/src/fft/baseline.rs @@ -0,0 +1,128 @@ +//! This contains the baseline FFT implementation + +use ff::Field; + +use super::recursive::FFTData; +use crate::{ + arithmetic::{self, log2_floor, FftGroup}, + multicore, +}; + +/// Performs a radix-$2$ Fast-Fourier Transformation (FFT) on a vector of size +/// $n = 2^k$, when provided `log_n` = $k$ and an element of multiplicative +/// order $n$ called `omega` ($\omega$). The result is that the vector `a`, when +/// interpreted as the coefficients of a polynomial of degree $n - 1$, is +/// transformed into the evaluations of this polynomial at each of the $n$ +/// distinct powers of $\omega$. This transformation is invertible by providing +/// $\omega^{-1}$ in place of $\omega$ and dividing each resulting field element +/// by $n$. +/// +/// This will use multithreading if beneficial. +fn best_fft>(a: &mut [G], omega: Scalar, log_n: u32) { + let threads = multicore::current_num_threads(); + let log_threads = log2_floor(threads); + let n = a.len(); + assert_eq!(n, 1 << log_n); + + for k in 0..n { + let rk = arithmetic::bitreverse(k, log_n as usize); + if k < rk { + a.swap(rk, k); + } + } + + //let start = start_measure(format!("twiddles {} ({})", a.len(), threads), false); + // precompute twiddle factors + let twiddles: Vec<_> = (0..(n / 2)) + .scan(Scalar::ONE, |w, _| { + let tw = *w; + *w *= ω + Some(tw) + }) + .collect(); + //stop_measure(start); + + if log_n <= log_threads { + let mut chunk = 2_usize; + let mut twiddle_chunk = n / 2; + for _ in 0..log_n { + a.chunks_mut(chunk).for_each(|coeffs| { + let (left, right) = coeffs.split_at_mut(chunk / 2); + + // case when twiddle factor is one + let (a, left) = left.split_at_mut(1); + let (b, right) = right.split_at_mut(1); + let t = b[0]; + b[0] = a[0]; + a[0] += &t; + b[0] -= &t; + + left.iter_mut() + .zip(right.iter_mut()) + .enumerate() + .for_each(|(i, (a, b))| { + let mut t = *b; + t *= &twiddles[(i + 1) * twiddle_chunk]; + *b = *a; + *a += &t; + *b -= &t; + }); + }); + chunk *= 2; + twiddle_chunk /= 2; + } + } else { + recursive_butterfly_arithmetic(a, n, 1, &twiddles) + } +} + +/// This perform recursive butterfly arithmetic +fn recursive_butterfly_arithmetic>( + a: &mut [G], + n: usize, + twiddle_chunk: usize, + twiddles: &[Scalar], +) { + if n == 2 { + let t = a[1]; + a[1] = a[0]; + a[0] += &t; + a[1] -= &t; + } else { + let (left, right) = a.split_at_mut(n / 2); + multicore::join( + || recursive_butterfly_arithmetic(left, n / 2, twiddle_chunk * 2, twiddles), + || recursive_butterfly_arithmetic(right, n / 2, twiddle_chunk * 2, twiddles), + ); + + // case when twiddle factor is one + let (a, left) = left.split_at_mut(1); + let (b, right) = right.split_at_mut(1); + let t = b[0]; + b[0] = a[0]; + a[0] += &t; + b[0] -= &t; + + left.iter_mut() + .zip(right.iter_mut()) + .enumerate() + .for_each(|(i, (a, b))| { + let mut t = *b; + t *= &twiddles[(i + 1) * twiddle_chunk]; + *b = *a; + *a += &t; + *b -= &t; + }); + } +} + +/// Generic adaptor +pub fn fft>( + a: &mut [G], + omega: Scalar, + log_n: u32, + _data: &FFTData, + _inverse: bool, +) { + best_fft(a, omega, log_n) +} diff --git a/halo2_proofs/src/fft/parallel.rs b/halo2_proofs/src/fft/parallel.rs new file mode 100644 index 0000000000..014cb538d6 --- /dev/null +++ b/halo2_proofs/src/fft/parallel.rs @@ -0,0 +1,282 @@ +//! This module provides common utilities, traits and structures for group, +//! field and polynomial arithmetic. + +use crate::arithmetic::{self, log2_floor, FftGroup}; + +use crate::multicore; +pub use ff::Field; +pub use halo2curves::{CurveAffine, CurveExt}; + +use super::recursive::FFTData; + +/// A constant +pub const SPARSE_TWIDDLE_DEGREE: u32 = 10; + +/// Dispatcher +fn best_fft_opt>(a: &mut [G], omega: Scalar, log_n: u32) { + let threads = multicore::current_num_threads(); + let log_split = log2_floor(threads) as usize; + let n = a.len(); + let sub_n = n >> log_split; + let split_m = 1 << log_split; + + if sub_n >= split_m { + parallel_fft(a, omega, log_n); + } else { + serial_fft(a, omega, log_n); + } +} + +fn serial_fft>(a: &mut [G], omega: Scalar, log_n: u32) { + let n = a.len() as u32; + assert_eq!(n, 1 << log_n); + + for k in 0..n as usize { + let rk = arithmetic::bitreverse(k, log_n as usize); + if k < rk { + a.swap(rk, k); + } + } + + let mut m = 1; + for _ in 0..log_n { + let w_m: Scalar = omega.pow_vartime([u64::from(n / (2 * m)), 0, 0, 0]); + + let mut k = 0; + while k < n { + let mut w = Scalar::ONE; + for j in 0..m { + let mut t = a[(k + j + m) as usize]; + t *= &w; + a[(k + j + m) as usize] = a[(k + j) as usize]; + a[(k + j + m) as usize] -= &t; + a[(k + j) as usize] += &t; + w *= &w_m; + } + + k += 2 * m; + } + + m *= 2; + } +} + +fn serial_split_fft>( + a: &mut [G], + twiddle_lut: &[Scalar], + twiddle_scale: usize, + log_n: u32, +) { + let n = a.len() as u32; + assert_eq!(n, 1 << log_n); + + let mut m = 1; + for _ in 0..log_n { + let omega_idx = twiddle_scale * n as usize / (2 * m as usize); // 1/2, 1/4, 1/8, ... + let low_idx = omega_idx % (1 << SPARSE_TWIDDLE_DEGREE); + let high_idx = omega_idx >> SPARSE_TWIDDLE_DEGREE; + let mut w_m = twiddle_lut[low_idx]; + if high_idx > 0 { + w_m *= twiddle_lut[(1 << SPARSE_TWIDDLE_DEGREE) + high_idx]; + } + + let mut k = 0; + while k < n { + let mut w = Scalar::ONE; + for j in 0..m { + let mut t = a[(k + j + m) as usize]; + t *= &w; + a[(k + j + m) as usize] = a[(k + j) as usize]; + a[(k + j + m) as usize] -= &t; + a[(k + j) as usize] += &t; + w *= &w_m; + } + + k += 2 * m; + } + + m *= 2; + } +} + +fn split_radix_fft>( + tmp: &mut [G], + a: &[G], + twiddle_lut: &[Scalar], + n: usize, + sub_fft_offset: usize, + log_split: usize, +) { + let split_m = 1 << log_split; + let sub_n = n >> log_split; + + // we use out-place bitreverse here, split_m <= num_threads, so the buffer spase is small + // and it's is good for data locality + let tmp_filler_val = tmp[0]; + let mut t1 = vec![tmp_filler_val; split_m]; + for i in 0..split_m { + t1[arithmetic::bitreverse(i, log_split)] = a[i * sub_n + sub_fft_offset]; + } + serial_split_fft(&mut t1, twiddle_lut, sub_n, log_split as u32); + + let sparse_degree = SPARSE_TWIDDLE_DEGREE; + let omega_idx = sub_fft_offset; + let low_idx = omega_idx % (1 << sparse_degree); + let high_idx = omega_idx >> sparse_degree; + let mut omega = twiddle_lut[low_idx]; + if high_idx > 0 { + omega *= twiddle_lut[(1 << sparse_degree) + high_idx]; + } + let mut w_m = Scalar::ONE; + for i in 0..split_m { + t1[i] *= &w_m; + tmp[i] = t1[i]; + w_m *= omega; + } +} + +/// Precalculate twiddles factors +fn generate_twiddle_lookup_table( + omega: F, + log_n: u32, + sparse_degree: u32, + with_last_level: bool, +) -> Vec { + let without_last_level = !with_last_level; + let is_lut_len_large = sparse_degree > log_n; + + // dense + if is_lut_len_large { + let mut twiddle_lut = vec![F::ZERO; (1 << log_n) as usize]; + parallelize(&mut twiddle_lut, |twiddle_lut, start| { + let mut w_n = omega.pow_vartime([start as u64, 0, 0, 0]); + for twiddle_lut in twiddle_lut.iter_mut() { + *twiddle_lut = w_n; + w_n *= omega; + } + }); + return twiddle_lut; + } + + // sparse + let low_degree_lut_len = 1 << sparse_degree; + let high_degree_lut_len = 1 << (log_n - sparse_degree - without_last_level as u32); + let mut twiddle_lut = vec![F::ZERO; low_degree_lut_len + high_degree_lut_len]; + parallelize( + &mut twiddle_lut[..low_degree_lut_len], + |twiddle_lut, start| { + let mut w_n = omega.pow_vartime([start as u64, 0, 0, 0]); + for twiddle_lut in twiddle_lut.iter_mut() { + *twiddle_lut = w_n; + w_n *= omega; + } + }, + ); + let high_degree_omega = omega.pow_vartime([(1 << sparse_degree) as u64, 0, 0, 0]); + parallelize( + &mut twiddle_lut[low_degree_lut_len..], + |twiddle_lut, start| { + let mut w_n = high_degree_omega.pow_vartime([start as u64, 0, 0, 0]); + for twiddle_lut in twiddle_lut.iter_mut() { + *twiddle_lut = w_n; + w_n *= high_degree_omega; + } + }, + ); + twiddle_lut +} + +/// The parallel implementation +fn parallel_fft>(a: &mut [G], omega: Scalar, log_n: u32) { + let n = a.len(); + assert_eq!(n, 1 << log_n); + + let log_split = log2_floor(multicore::current_num_threads()) as usize; + let split_m = 1 << log_split; + let sub_n = n >> log_split; + let twiddle_lut = generate_twiddle_lookup_table(omega, log_n, SPARSE_TWIDDLE_DEGREE, true); + + // split fft + let tmp_filler_val = a[0]; + let mut tmp = vec![tmp_filler_val; n]; + multicore::scope(|scope| { + let a = &*a; + let twiddle_lut = &*twiddle_lut; + for (chunk_idx, tmp) in tmp.chunks_mut(sub_n).enumerate() { + scope.spawn(move |_| { + let split_fft_offset = (chunk_idx * sub_n) >> log_split; + for (i, tmp) in tmp.chunks_mut(split_m).enumerate() { + let split_fft_offset = split_fft_offset + i; + split_radix_fft(tmp, a, twiddle_lut, n, split_fft_offset, log_split); + } + }); + } + }); + + // shuffle + parallelize(a, |a, start| { + for (idx, a) in a.iter_mut().enumerate() { + let idx = start + idx; + let i = idx / sub_n; + let j = idx % sub_n; + *a = tmp[j * split_m + i]; + } + }); + + // sub fft + let new_omega = omega.pow_vartime([split_m as u64, 0, 0, 0]); + multicore::scope(|scope| { + for a in a.chunks_mut(sub_n) { + scope.spawn(move |_| { + serial_fft(a, new_omega, log_n - log_split as u32); + }); + } + }); + + // copy & unshuffle + let mask = (1 << log_split) - 1; + parallelize(&mut tmp, |tmp, start| { + for (idx, tmp) in tmp.iter_mut().enumerate() { + let idx = start + idx; + *tmp = a[idx]; + } + }); + parallelize(a, |a, start| { + for (idx, a) in a.iter_mut().enumerate() { + let idx = start + idx; + *a = tmp[sub_n * (idx & mask) + (idx >> log_split)]; + } + }); +} + +/// This simple utility function will parallelize an operation that is to be +/// performed over a mutable slice. +fn parallelize(v: &mut [T], f: F) { + let n = v.len(); + let num_threads = multicore::current_num_threads(); + let mut chunk = n / num_threads; + if chunk < num_threads { + chunk = n; + } + + multicore::scope(|scope| { + for (chunk_num, v) in v.chunks_mut(chunk).enumerate() { + let f = f.clone(); + scope.spawn(move |_| { + let start = chunk_num * chunk; + f(v, start); + }); + } + }); +} + +/// Generic adaptor +pub fn fft>( + data_in: &mut [G], + omega: Scalar, + log_n: u32, + _data: &FFTData, + _inverse: bool, +) { + best_fft_opt(data_in, omega, log_n) +} diff --git a/halo2_proofs/src/fft/recursive.rs b/halo2_proofs/src/fft/recursive.rs new file mode 100644 index 0000000000..e905904463 --- /dev/null +++ b/halo2_proofs/src/fft/recursive.rs @@ -0,0 +1,461 @@ +//! This contains the recursive FFT. + +use crate::{ + arithmetic::{self, parallelize, FftGroup}, + multicore, +}; + +pub use ff::Field; +pub use halo2curves::{CurveAffine, CurveExt}; + +/// FFTStage +#[derive(Clone, Debug)] +pub struct FFTStage { + radix: usize, + length: usize, +} + +/// FFT stages +fn get_stages(size: usize, radixes: Vec) -> Vec { + let mut stages: Vec = vec![]; + + let mut n = size; + + // Use the specified radices + for &radix in &radixes { + n /= radix; + stages.push(FFTStage { radix, length: n }); + } + + // Fill in the rest of the tree if needed + let mut p = 2; + while n > 1 { + while n % p != 0 { + if p == 4 { + p = 2; + } + } + n /= p; + stages.push(FFTStage { + radix: p, + length: n, + }); + } + + /*for i in 0..stages.len() { + log_info(format!("Stage {}: {}, {}", i, stages[i].radix, stages[i].length)); + }*/ + + stages +} + +/// FFTData +#[derive(Clone, Debug)] +pub struct FFTData { + n: usize, + + stages: Vec, + + f_twiddles: Vec>, + inv_twiddles: Vec>, + //scratch: Vec, +} + +impl Default for FFTData { + fn default() -> Self { + Self { + n: Default::default(), + stages: Default::default(), + f_twiddles: Default::default(), + inv_twiddles: Default::default(), + } + } +} + +impl FFTData { + /// Create FFT data + pub fn new(n: usize, omega: F, omega_inv: F) -> Self { + let stages = get_stages(n, vec![]); + let mut f_twiddles = vec![]; + let mut inv_twiddles = vec![]; + let mut scratch = vec![F::ZERO; n]; + + // Generate stage twiddles + for inv in 0..2 { + let inverse = inv == 0; + let o = if inverse { omega_inv } else { omega }; + let stage_twiddles = if inverse { + &mut inv_twiddles + } else { + &mut f_twiddles + }; + + let twiddles = &mut scratch; + + // Twiddles + parallelize(twiddles, |twiddles, start| { + let w_m = o; + let mut w = o.pow_vartime([start as u64]); + for value in twiddles.iter_mut() { + *value = w; + w *= w_m; + } + }); + + // Re-order twiddles for cache friendliness + let num_stages = stages.len(); + stage_twiddles.resize(num_stages, vec![]); + for l in 0..num_stages { + let radix = stages[l].radix; + let stage_length = stages[l].length; + + let num_twiddles = stage_length * (radix - 1); + stage_twiddles[l].resize(num_twiddles + 1, F::ZERO); + + // Set j + stage_twiddles[l][num_twiddles] = twiddles[(twiddles.len() * 3) / 4]; + + let stride = n / (stage_length * radix); + let mut tws = vec![0usize; radix - 1]; + for i in 0..stage_length { + for j in 0..radix - 1 { + stage_twiddles[l][i * (radix - 1) + j] = twiddles[tws[j]]; + tws[j] += (j + 1) * stride; + } + } + } + } + + Self { + n, + stages, + f_twiddles, + inv_twiddles, + //scratch, + } + } + + /// Return private field `n` + pub fn get_n(&self) -> usize { + self.n + } +} + +/// Radix 2 butterfly +fn butterfly_2>( + out: &mut [G], + twiddles: &[Scalar], + stage_length: usize, +) { + let mut out_offset = 0; + let mut out_offset2 = stage_length; + + let t = out[out_offset2]; + out[out_offset2] = out[out_offset] - &t; + out[out_offset] += &t; + out_offset2 += 1; + out_offset += 1; + + for twiddle in twiddles[1..stage_length].iter() { + let t = out[out_offset2] * twiddle; + out[out_offset2] = out[out_offset] - &t; + out[out_offset] += &t; + out_offset2 += 1; + out_offset += 1; + } +} + +/// Radix 2 butterfly +fn butterfly_2_parallel>( + out: &mut [G], + twiddles: &[Scalar], + _stage_length: usize, + num_threads: usize, +) { + let n = out.len(); + let mut chunk = n / num_threads; + if chunk < num_threads { + chunk = n; + } + + multicore::scope(|scope| { + let (part_a, part_b) = out.split_at_mut(n / 2); + for (i, (part0, part1)) in part_a + .chunks_mut(chunk) + .zip(part_b.chunks_mut(chunk)) + .enumerate() + { + scope.spawn(move |_| { + let offset = i * chunk; + for k in 0..part0.len() { + let t = part1[k] * &twiddles[offset + k]; + part1[k] = part0[k] - &t; + part0[k] += &t; + } + }); + } + }); +} + +/// Radix 4 butterfly +fn butterfly_4>( + out: &mut [G], + twiddles: &[Scalar], + stage_length: usize, +) { + let j = twiddles[twiddles.len() - 1]; + let mut tw = 0; + + /* Case twiddle == one */ + { + let i0 = 0; + let i1 = stage_length; + let i2 = stage_length * 2; + let i3 = stage_length * 3; + + let z0 = out[i0]; + let z1 = out[i1]; + let z2 = out[i2]; + let z3 = out[i3]; + + let t1 = z0 + &z2; + let t2 = z1 + &z3; + let t3 = z0 - &z2; + let t4j = (z1 - &z3) * &j; + + out[i0] = t1 + &t2; + out[i1] = t3 - &t4j; + out[i2] = t1 - &t2; + out[i3] = t3 + &t4j; + + tw += 3; + } + + for k in 1..stage_length { + let i0 = k; + let i1 = k + stage_length; + let i2 = k + stage_length * 2; + let i3 = k + stage_length * 3; + + let z0 = out[i0]; + let z1 = out[i1] * &twiddles[tw]; + let z2 = out[i2] * &twiddles[tw + 1]; + let z3 = out[i3] * &twiddles[tw + 2]; + + let t1 = z0 + &z2; + let t2 = z1 + &z3; + let t3 = z0 - &z2; + let t4j = (z1 - &z3) * &j; + + out[i0] = t1 + &t2; + out[i1] = t3 - &t4j; + out[i2] = t1 - &t2; + out[i3] = t3 + &t4j; + + tw += 3; + } +} + +/// Radix 4 butterfly +fn butterfly_4_parallel>( + out: &mut [G], + twiddles: &[Scalar], + _stage_length: usize, + num_threads: usize, +) { + let j = twiddles[twiddles.len() - 1]; + + let n = out.len(); + let mut chunk = n / num_threads; + if chunk < num_threads { + chunk = n; + } + multicore::scope(|scope| { + //let mut parts: Vec<&mut [F]> = out.chunks_mut(4).collect(); + //out.chunks_mut(4).map(|c| c.chunks_mut(chunk)).fold(predicate) + let (part_a, part_b) = out.split_at_mut(n / 2); + let (part_aa, part_ab) = part_a.split_at_mut(n / 4); + let (part_ba, part_bb) = part_b.split_at_mut(n / 4); + for (i, (((part0, part1), part2), part3)) in part_aa + .chunks_mut(chunk) + .zip(part_ab.chunks_mut(chunk)) + .zip(part_ba.chunks_mut(chunk)) + .zip(part_bb.chunks_mut(chunk)) + .enumerate() + { + scope.spawn(move |_| { + let offset = i * chunk; + let mut tw = offset * 3; + for k in 0..part1.len() { + let z0 = part0[k]; + let z1 = part1[k] * &twiddles[tw]; + let z2 = part2[k] * &twiddles[tw + 1]; + let z3 = part3[k] * &twiddles[tw + 2]; + + let t1 = z0 + &z2; + let t2 = z1 + &z3; + let t3 = z0 - &z2; + let t4j = (z1 - &z3) * &j; + + part0[k] = t1 + &t2; + part1[k] = t3 - &t4j; + part2[k] = t1 - &t2; + part3[k] = t3 + &t4j; + + tw += 3; + } + }); + } + }); +} + +/// Inner recursion +#[allow(clippy::too_many_arguments)] +fn recursive_fft_inner>( + data_in: &[G], + data_out: &mut [G], + twiddles: &Vec>, + stages: &Vec, + in_offset: usize, + stride: usize, + level: usize, + num_threads: usize, +) { + let radix = stages[level].radix; + let stage_length = stages[level].length; + + if num_threads > 1 { + if stage_length == 1 { + for i in 0..radix { + data_out[i] = data_in[in_offset + i * stride]; + } + } else { + let num_threads_recursive = if num_threads >= radix { + radix + } else { + num_threads + }; + parallelize_count(data_out, num_threads_recursive, |data_out, i| { + let num_threads_in_recursion = if num_threads < radix { + 1 + } else { + (num_threads + i) / radix + }; + recursive_fft_inner( + data_in, + data_out, + twiddles, + stages, + in_offset + i * stride, + stride * radix, + level + 1, + num_threads_in_recursion, + ) + }); + } + match radix { + 2 => butterfly_2_parallel(data_out, &twiddles[level], stage_length, num_threads), + 4 => butterfly_4_parallel(data_out, &twiddles[level], stage_length, num_threads), + _ => unimplemented!("radix unsupported"), + } + } else { + if stage_length == 1 { + for i in 0..radix { + data_out[i] = data_in[in_offset + i * stride]; + } + } else { + for i in 0..radix { + recursive_fft_inner( + data_in, + &mut data_out[i * stage_length..(i + 1) * stage_length], + twiddles, + stages, + in_offset + i * stride, + stride * radix, + level + 1, + num_threads, + ); + } + } + match radix { + 2 => butterfly_2(data_out, &twiddles[level], stage_length), + 4 => butterfly_4(data_out, &twiddles[level], stage_length), + _ => unimplemented!("radix unsupported"), + } + } +} + +/// Todo: Brechts impl starts here +fn recursive_fft>( + data: &FFTData, + data_in: &mut Vec, + inverse: bool, +) { + let num_threads = multicore::current_num_threads(); + //let start = start_measure(format!("recursive fft {} ({})", data_in.len(), num_threads), false); + + // TODO: reuse scratch buffer between FFTs + //let start_mem = start_measure(format!("alloc"), false); + let filler = data_in[0]; + let mut scratch = vec![filler; data_in.len()]; + //stop_measure(start_mem); + + recursive_fft_inner( + data_in, + &mut /*data.*/scratch, + if inverse { + &data.inv_twiddles + } else { + &data.f_twiddles + }, + &data.stages, + 0, + 1, + 0, + num_threads, + ); + //let duration = stop_measure(start); + + //let start = start_measure(format!("copy"), false); + // Will simply swap the vector's buffer, no data is actually copied + std::mem::swap(data_in, &mut /*data.*/scratch); + //stop_measure(start); +} + +/// This simple utility function will parallelize an operation that is to be +/// performed over a mutable slice. +fn parallelize_count( + v: &mut [T], + num_threads: usize, + f: F, +) { + let n = v.len(); + let mut chunk = n / num_threads; + if chunk < num_threads { + chunk = n; + } + + multicore::scope(|scope| { + for (chunk_num, v) in v.chunks_mut(chunk).enumerate() { + let f = f.clone(); + scope.spawn(move |_| { + f(v, chunk_num); + }); + } + }); +} + +/// Generic adaptor +pub fn fft>( + data_in: &mut [G], + _omega: Scalar, + _log_n: u32, + data: &FFTData, + inverse: bool, +) { + let orig_len = data_in.len(); + let mut data_in_vec = data_in.to_vec(); + recursive_fft(data, &mut data_in_vec, inverse); + data_in.copy_from_slice(&data_in_vec); + assert_eq!(orig_len, data_in.len()); +} diff --git a/halo2_proofs/src/lib.rs b/halo2_proofs/src/lib.rs index a91729d645..992fb5562b 100644 --- a/halo2_proofs/src/lib.rs +++ b/halo2_proofs/src/lib.rs @@ -30,6 +30,7 @@ lazy_static! { pub mod arithmetic; pub mod circuit; +pub mod fft; pub use halo2curves; mod multicore; pub mod plonk; diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 3831ca93d6..abe8340440 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -400,7 +400,7 @@ impl Evaluator { .map(|advice_polys| { advice_polys .par_iter() - .map(|poly| domain.coeff_to_extended(poly.clone())) + .map(|poly| domain.coeff_to_extended(&poly)) .collect() }) .collect(); @@ -412,7 +412,7 @@ impl Evaluator { .map(|instance_polys| { instance_polys .par_iter() - .map(|poly| domain.coeff_to_extended(poly.clone())) + .map(|poly| domain.coeff_to_extended(&poly)) .collect() }) .collect(); @@ -551,8 +551,10 @@ impl Evaluator { // The outer vector has capacity self.lookups.len() // The middle vector has capacity domain.extended_len() // The inner vector has capacity + log::trace!("num lookups: {}", lookups.len()); + #[cfg(feature = "mv-lookup")] - let mut inputs_inv_sum_cosets: Vec<_> = lookups + let inputs_inv_sum_cosets: Vec<_> = lookups .par_iter() .enumerate() .map(|(n, lookup)| { @@ -603,8 +605,8 @@ impl Evaluator { ( inputs_inv_sums, - domain.coeff_to_extended(lookup.phi_poly.clone()), - domain.coeff_to_extended(lookup.m_poly.clone()), + domain.coeff_to_extended(&lookup.phi_poly), + domain.coeff_to_extended(&lookup.m_poly), ) }) .collect(); @@ -829,7 +831,7 @@ impl Evaluator { // Shuffle constraints let start = std::time::Instant::now(); for (n, shuffle) in shuffles.iter().enumerate() { - let product_coset = pk.vk.domain.coeff_to_extended(shuffle.product_poly.clone()); + let product_coset = pk.vk.domain.coeff_to_extended(&shuffle.product_poly); // Shuffle constraints parallelize(&mut values, |values, start| { diff --git a/halo2_proofs/src/plonk/keygen.rs b/halo2_proofs/src/plonk/keygen.rs index f684def6fe..8db11b40da 100644 --- a/halo2_proofs/src/plonk/keygen.rs +++ b/halo2_proofs/src/plonk/keygen.rs @@ -355,7 +355,7 @@ where let fixed_cosets = fixed_polys .iter() - .map(|poly| vk.domain.coeff_to_extended(poly.clone())) + .map(|poly| vk.domain.coeff_to_extended(&poly)) .collect(); let permutation_pk = assembly @@ -367,7 +367,7 @@ where let mut l0 = vk.domain.empty_lagrange(); l0[0] = C::Scalar::ONE; let l0 = vk.domain.lagrange_to_coeff(l0); - let l0 = vk.domain.coeff_to_extended(l0); + let l0 = vk.domain.coeff_to_extended(&l0); // Compute l_blind(X) which evaluates to 1 for each blinding factor row // and 0 otherwise over the domain. @@ -376,14 +376,14 @@ where *evaluation = C::Scalar::ONE; } let l_blind = vk.domain.lagrange_to_coeff(l_blind); - let l_blind = vk.domain.coeff_to_extended(l_blind); + let l_blind = vk.domain.coeff_to_extended(&l_blind); // Compute l_last(X) which evaluates to 1 on the first inactive row (just // before the blinding factors) and 0 otherwise over the domain let mut l_last = vk.domain.empty_lagrange(); l_last[params.n() as usize - cs.blinding_factors() - 1] = C::Scalar::ONE; let l_last = vk.domain.lagrange_to_coeff(l_last); - let l_last = vk.domain.coeff_to_extended(l_last); + let l_last = vk.domain.coeff_to_extended(&l_last); // Compute l_active_row(X) let one = C::Scalar::ONE; diff --git a/halo2_proofs/src/plonk/permutation/keygen.rs b/halo2_proofs/src/plonk/permutation/keygen.rs index 2ac6eb8e76..3e91f4a7b7 100644 --- a/halo2_proofs/src/plonk/permutation/keygen.rs +++ b/halo2_proofs/src/plonk/permutation/keygen.rs @@ -384,7 +384,7 @@ pub(crate) fn build_pk<'params, C: CurveAffine, P: Params<'params, C>>( for (x, coset) in o.iter_mut().enumerate() { let i = start + x; let poly = polys[i].clone(); - *coset = domain.coeff_to_extended(poly); + *coset = domain.coeff_to_extended(&poly); } }); } diff --git a/halo2_proofs/src/plonk/permutation/prover.rs b/halo2_proofs/src/plonk/permutation/prover.rs index d6b108554d..3565723399 100644 --- a/halo2_proofs/src/plonk/permutation/prover.rs +++ b/halo2_proofs/src/plonk/permutation/prover.rs @@ -173,7 +173,7 @@ impl Argument { let z = domain.lagrange_to_coeff(z); let permutation_product_poly = z.clone(); - let permutation_product_coset = domain.coeff_to_extended(z.clone()); + let permutation_product_coset = domain.coeff_to_extended(&z); let permutation_product_commitment = permutation_product_commitment_projective.to_affine(); diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs index 3bf12643c6..8d2252a6e6 100644 --- a/halo2_proofs/src/poly/domain.rs +++ b/halo2_proofs/src/poly/domain.rs @@ -3,14 +3,15 @@ use crate::{ arithmetic::{best_fft, parallelize}, + fft::recursive::FFTData, plonk::Assigned, }; use super::{Coeff, ExtendedLagrangeCoeff, LagrangeCoeff, Polynomial, Rotation}; -use ff::WithSmallOrderMulGroup; -use group::ff::{BatchInvert, Field}; -use std::marker::PhantomData; +use group::ff::{BatchInvert, Field, WithSmallOrderMulGroup}; + +use std::{collections::HashMap, marker::PhantomData}; /// This structure contains precomputed constants and other details needed for /// performing operations on an evaluation domain of size $2^k$ and an extended @@ -31,6 +32,9 @@ pub struct EvaluationDomain { extended_ifft_divisor: F, t_evaluations: Vec, barycentric_weight: F, + + // Recursive stuff + fft_data: HashMap>, } impl> EvaluationDomain { @@ -71,19 +75,22 @@ impl> EvaluationDomain { extended_omega = extended_omega.square(); } let extended_omega = extended_omega; - let mut extended_omega_inv = extended_omega; // Inversion computed later // Get omega, the 2^{k}'th root of unity (i.e. n'th root of unity) // The loop computes omega = extended_omega ^ {2 ^ (extended_k - k)} // = (omega^{2 ^ (S - extended_k)}) ^ {2 ^ (extended_k - k)} // = omega ^ {2 ^ (S - k)}. // Notice that omega ^ {2^k} = omega ^ {2^S} = 1. + let mut omegas = Vec::with_capacity((extended_k - k + 1) as usize); let mut omega = extended_omega; + omegas.push(omega); for _ in k..extended_k { omega = omega.square(); + omegas.push(omega); } let omega = omega; - let mut omega_inv = omega; // Inversion computed later + omegas.reverse(); + let mut omegas_inv = omegas.clone(); // Inversion computed later // We use zeta here because we know it generates a coset, and it's available // already. @@ -96,8 +103,8 @@ impl> EvaluationDomain { { // Compute the evaluations of t(X) = X^n - 1 in the coset evaluation domain. // We don't have to compute all of them, because it will repeat. - let orig = F::ZETA.pow_vartime([n, 0, 0, 0]); - let step = extended_omega.pow_vartime([n, 0, 0, 0]); + let orig = F::ZETA.pow_vartime([n]); + let step = extended_omega.pow_vartime([n]); let mut cur = orig; loop { t_evaluations.push(cur); @@ -130,10 +137,18 @@ impl> EvaluationDomain { .chain(Some(&mut ifft_divisor)) .chain(Some(&mut extended_ifft_divisor)) .chain(Some(&mut barycentric_weight)) - .chain(Some(&mut extended_omega_inv)) - .chain(Some(&mut omega_inv)) + .chain(&mut omegas_inv) .batch_invert(); + let omega_inv = omegas_inv[0]; + let extended_omega_inv = *omegas_inv.last().unwrap(); + let mut fft_data = HashMap::new(); + for (i, (omega, omega_inv)) in omegas.into_iter().zip(omegas_inv).enumerate() { + let intermediate_k = k as usize + i; + let len = 1usize << intermediate_k; + fft_data.insert(len, FFTData::::new(len, omega, omega_inv)); + } + EvaluationDomain { n, k, @@ -149,6 +164,7 @@ impl> EvaluationDomain { extended_ifft_divisor, t_evaluations, barycentric_weight, + fft_data, } } @@ -164,6 +180,19 @@ impl> EvaluationDomain { } } + /// + pub fn lagrange_assigned_from_vec( + &self, + values: Vec>, + ) -> Polynomial, LagrangeCoeff> { + assert_eq!(values.len(), self.n as usize); + + Polynomial { + values, + _marker: PhantomData, + } + } + /// Obtains a polynomial in coefficient form when given a vector of /// coefficients of size `n`; panics if the provided vector is the wrong /// length. @@ -176,6 +205,58 @@ impl> EvaluationDomain { } } + /// Obtains a polynomial in ExtendedLagrange form when given a vector of + /// Lagrange polynomials with total size `extended_n`; panics if the + /// provided vector is the wrong length. + pub fn lagrange_vec_to_extended( + &self, + values: Vec>, + ) -> Polynomial { + assert_eq!(values.len(), self.extended_len() >> self.k); + assert_eq!(values[0].len(), self.n as usize); + + // transpose the values in parallel + let mut transposed = vec![vec![F::ZERO; values.len()]; self.n as usize]; + values.into_iter().enumerate().for_each(|(i, p)| { + parallelize(&mut transposed, |transposed, start| { + for (transposed, p) in transposed.iter_mut().zip(p.values[start..].iter()) { + transposed[i] = *p; + } + }); + }); + + Polynomial { + values: transposed.into_iter().flatten().collect(), + _marker: PhantomData, + } + } + + /// Obtains a polynomial in ExtendedLagrange form when given a vector of + /// Lagrange polynomials with total size `extended_n`; panics if the + /// provided vector is the wrong length. + pub fn extended_from_lagrange_vec( + &self, + values: Vec>, + ) -> Polynomial { + assert_eq!(values.len(), self.extended_len() >> self.k); + assert_eq!(values[0].len(), self.n as usize); + + // transpose the values in parallel + let mut transposed = vec![vec![F::ZERO; values.len()]; self.n as usize]; + values.into_iter().enumerate().for_each(|(i, p)| { + parallelize(&mut transposed, |transposed, start| { + for (transposed, p) in transposed.iter_mut().zip(p.values[start..].iter()) { + transposed[i] = *p; + } + }); + }); + + Polynomial { + values: transposed.into_iter().flatten().collect(), + _marker: PhantomData, + } + } + /// Returns an empty (zero) polynomial in the coefficient basis pub fn empty_coeff(&self) -> Polynomial { Polynomial { @@ -194,7 +275,7 @@ impl> EvaluationDomain { /// Returns an empty (zero) polynomial in the Lagrange coefficient basis, with /// deferred inversions. - pub fn empty_lagrange_assigned(&self) -> Polynomial, LagrangeCoeff> { + pub(crate) fn empty_lagrange_assigned(&self) -> Polynomial, LagrangeCoeff> { Polynomial { values: vec![F::ZERO.into(); self.n as usize], _marker: PhantomData, @@ -235,7 +316,7 @@ impl> EvaluationDomain { assert_eq!(a.values.len(), 1 << self.k); // Perform inverse FFT to obtain the polynomial in coefficient form - Self::ifft(&mut a.values, self.omega_inv, self.k, self.ifft_divisor); + self.ifft(&mut a.values, self.omega_inv, self.k, self.ifft_divisor); Polynomial { values: a.values, @@ -247,13 +328,92 @@ impl> EvaluationDomain { /// evaluation domain, rotating by `rotation` if desired. pub fn coeff_to_extended( &self, - mut a: Polynomial, + p: &Polynomial, ) -> Polynomial { + assert_eq!(p.values.len(), 1 << self.k); + + let mut a = Vec::with_capacity(self.extended_len()); + a.extend(&p.values); + + self.distribute_powers_zeta(&mut a, true); + a.resize(self.extended_len(), F::ZERO); + self.fft_inner(&mut a, self.extended_omega, self.extended_k, false); + + Polynomial { + values: a, + _marker: PhantomData, + } + } + + /// This takes us from an n-length coefficient vector into parts of the + /// extended evaluation domain. For example, for a polynomial with size n, + /// and an extended domain of size mn, we can compute all parts + /// independently, which are + /// `FFT(f(zeta * X), n)` + /// `FFT(f(zeta * extended_omega * X), n)` + /// ... + /// `FFT(f(zeta * extended_omega^{m-1} * X), n)` + pub fn coeff_to_extended_parts( + &self, + a: &Polynomial, + ) -> Vec> { + assert_eq!(a.values.len(), 1 << self.k); + + let num_parts = self.extended_len() >> self.k; + let mut extended_omega_factor = F::ONE; + (0..num_parts) + .map(|_| { + let part = self.coeff_to_extended_part(a.clone(), extended_omega_factor); + extended_omega_factor *= self.extended_omega; + part + }) + .collect() + } + + /// This takes us from several n-length coefficient vectors each into parts + /// of the extended evaluation domain. For example, for a polynomial with + /// size n, and an extended domain of size mn, we can compute all parts + /// independently, which are + /// `FFT(f(zeta * X), n)` + /// `FFT(f(zeta * extended_omega * X), n)` + /// ... + /// `FFT(f(zeta * extended_omega^{m-1} * X), n)` + pub fn batched_coeff_to_extended_parts( + &self, + a: &[Polynomial], + ) -> Vec>> { + assert_eq!(a[0].values.len(), 1 << self.k); + + let mut extended_omega_factor = F::ONE; + let num_parts = self.extended_len() >> self.k; + (0..num_parts) + .map(|_| { + let a_lagrange = a + .iter() + .map(|poly| self.coeff_to_extended_part(poly.clone(), extended_omega_factor)) + .collect(); + extended_omega_factor *= self.extended_omega; + a_lagrange + }) + .collect() + } + + /// This takes us from an n-length coefficient vector into a part of the + /// extended evaluation domain. For example, for a polynomial with size n, + /// and an extended domain of size mn, we can compute one of the m parts + /// separately, which is + /// `FFT(f(zeta * extended_omega_factor * X), n)` + /// where `extended_omega_factor` is `extended_omega^i` with `i` in `[0, m)`. + pub fn coeff_to_extended_part( + &self, + mut a: Polynomial, + extended_omega_factor: F, + ) -> Polynomial { assert_eq!(a.values.len(), 1 << self.k); - self.distribute_powers_zeta(&mut a.values, true); - a.values.resize(self.extended_len(), F::ZERO); - best_fft(&mut a.values, self.extended_omega, self.extended_k); + self.distribute_powers(&mut a.values, self.g_coset * extended_omega_factor); + let data = self.get_fft_data(a.len()); + best_fft(&mut a.values, self.omega, self.k, data, false); Polynomial { values: a.values, @@ -290,7 +450,7 @@ impl> EvaluationDomain { assert_eq!(a.values.len(), self.extended_len()); // Inverse FFT - Self::ifft( + self.ifft( &mut a.values, self.extended_omega_inv, self.extended_k, @@ -310,6 +470,72 @@ impl> EvaluationDomain { a.values } + /// This takes us from the a list of lagrange-based polynomials with + /// different degrees and gets their extended lagrange-based summation. + pub fn lagrange_vecs_to_extended( + &self, + mut a: Vec>>, + ) -> Polynomial { + let mut result_poly = if a[a.len() - 1].len() == 1 << (self.extended_k - self.k) { + self.lagrange_vec_to_extended(a.pop().unwrap()) + } else { + self.empty_extended() + }; + + // Transform from each cluster of lagrange representations to coeff representations. + let mut ifft_divisor = self.extended_ifft_divisor; + let mut omega_inv = self.extended_omega_inv; + { + let mut i = a.last().unwrap().len() << self.k; + while i < (1 << self.extended_k) { + ifft_divisor = ifft_divisor + ifft_divisor; + omega_inv = omega_inv * omega_inv; + i <<= 1; + } + } + + let mut result = vec![F::ZERO; 1 << self.extended_k as usize]; + for (i, a_parts) in a.into_iter().enumerate().rev() { + // transpose the values in parallel + assert_eq!(1 << i, a_parts.len()); + let mut a_poly: Vec = { + let mut transposed = vec![vec![F::ZERO; a_parts.len()]; self.n as usize]; + a_parts.into_iter().enumerate().for_each(|(j, p)| { + parallelize(&mut transposed, |transposed, start| { + for (transposed, p) in transposed.iter_mut().zip(p.values[start..].iter()) { + transposed[j] = *p; + } + }); + }); + transposed.into_iter().flatten().collect() + }; + + self.ifft(&mut a_poly, omega_inv, self.k + i as u32, ifft_divisor); + ifft_divisor = ifft_divisor + ifft_divisor; + omega_inv = omega_inv * omega_inv; + + parallelize(&mut result[0..(self.n << i) as usize], |result, start| { + for (other, current) in result.iter_mut().zip(a_poly[start..].iter()) { + *other += current; + } + }); + } + let data = self.get_fft_data(result.len()); + best_fft( + &mut result, + self.extended_omega, + self.extended_k, + data, + false, + ); + parallelize(&mut result_poly.values, |values, start| { + for (value, other) in values.iter_mut().zip(result[start..].iter()) { + *value += other; + } + }); + result_poly + } + /// This divides the polynomial (in the extended domain) by the vanishing /// polynomial of the $2^k$ size domain. pub fn divide_by_vanishing_poly( @@ -358,8 +584,23 @@ impl> EvaluationDomain { }); } - fn ifft(a: &mut [F], omega_inv: F, log_n: u32, divisor: F) { - best_fft(a, omega_inv, log_n); + /// Given a slice of group elements `[a_0, a_1, a_2, ...]`, this returns + /// `[a_0, [c]a_1, [c^2]a_2, [c^3]a_3, [c^4]a_4, ...]`, + /// + fn distribute_powers(&self, a: &mut [F], c: F) { + parallelize(a, |a, index| { + let mut c_power = c.pow_vartime([index as u64]); + for a in a { + *a *= c_power; + c_power *= c; + } + }); + } + + fn ifft(&self, a: &mut Vec, omega_inv: F, log_n: u32, divisor: F) { + let fft_data = self.get_fft_data(a.len()); + crate::fft::parallel::fft(a, omega_inv, log_n, fft_data, true); + // self.fft_inner(a, omega_inv, log_n, true); parallelize(a, |a, _| { for a in a { // Finish iFFT @@ -368,6 +609,11 @@ impl> EvaluationDomain { }); } + fn fft_inner(&self, a: &mut Vec, omega: F, log_n: u32, inverse: bool) { + let fft_data = self.get_fft_data(a.len()); + best_fft(a, omega, log_n, fft_data, inverse) + } + /// Get the size of the domain pub fn k(&self) -> u32 { self.k @@ -482,6 +728,18 @@ impl> EvaluationDomain { omega: &self.omega, } } + + /// Get the private field `n` + pub fn get_n(&self) -> u64 { + self.n + } + + /// Get the private `fft_data` + pub fn get_fft_data(&self, l: usize) -> &FFTData { + self.fft_data + .get(&l) + .expect("log_2(l) must be in k..=extended_k") + } } /// Represents the minimal parameters that determine an `EvaluationDomain`. @@ -563,3 +821,176 @@ fn test_l_i() { assert_eq!(eval_polynomial(&l[(8 - i) % 8][..], x), evaluations[7 - i]); } } + +#[test] +fn test_coeff_to_extended_part() { + use halo2curves::pasta::pallas::Scalar; + use rand_core::OsRng; + + let domain = EvaluationDomain::::new(1, 3); + let rng = OsRng; + let mut poly = domain.empty_coeff(); + assert_eq!(poly.len(), 8); + for value in poly.iter_mut() { + *value = Scalar::random(rng); + } + + let want = domain.coeff_to_extended(&poly); + let got = { + let parts = domain.coeff_to_extended_parts(&poly); + domain.lagrange_vec_to_extended(parts) + }; + assert_eq!(want.values, got.values); +} + +#[test] +fn bench_coeff_to_extended_parts() { + use halo2curves::pasta::pallas::Scalar; + use rand_core::OsRng; + use std::time::Instant; + + let k = 20; + let domain = EvaluationDomain::::new(3, k); + let rng = OsRng; + let mut poly1 = domain.empty_coeff(); + assert_eq!(poly1.len(), 1 << k); + + for value in poly1.iter_mut() { + *value = Scalar::random(rng); + } + + let poly2 = poly1.clone(); + + let coeff_to_extended_timer = Instant::now(); + let _ = domain.coeff_to_extended(&poly1); + println!( + "domain.coeff_to_extended time: {}s", + coeff_to_extended_timer.elapsed().as_secs_f64() + ); + + let coeff_to_extended_parts_timer = Instant::now(); + let _ = domain.coeff_to_extended_parts(&poly2); + println!( + "domain.coeff_to_extended_parts time: {}s", + coeff_to_extended_parts_timer.elapsed().as_secs_f64() + ); +} + +#[test] +fn test_lagrange_vecs_to_extended() { + use halo2curves::pasta::pallas::Scalar; + use rand_core::OsRng; + + let rng = OsRng; + let domain = EvaluationDomain::::new(8, 10); + let mut poly_vec = vec![]; + let mut poly_lagrange_vecs = vec![]; + let mut want = domain.empty_extended(); + let mut omega = domain.extended_omega; + for i in (0..(domain.extended_k - domain.k + 1)).rev() { + let mut poly = vec![Scalar::zero(); (1 << i) * domain.n as usize]; + for value in poly.iter_mut() { + *value = Scalar::random(rng); + } + // poly under coeff representation. + poly_vec.push(poly.clone()); + // poly under lagrange vector representation. + let mut poly2 = poly.clone(); + let data = domain.get_fft_data(poly2.len()); + best_fft(&mut poly2, omega, i + domain.k, data, false); + let transposed_poly: Vec> = (0..(1 << i)) + .map(|j| { + let mut p = domain.empty_lagrange(); + for k in 0..domain.n { + p[k as usize] = poly2[j + (k as usize) * (1 << i)]; + } + p + }) + .collect(); + poly_lagrange_vecs.push(transposed_poly); + // poly under extended representation. + poly.resize(domain.extended_len(), Scalar::zero()); + let data = domain.get_fft_data(poly.len()); + best_fft( + &mut poly, + domain.extended_omega, + domain.extended_k, + data, + false, + ); + let poly = { + let mut p = domain.empty_extended(); + p.values = poly; + p + }; + want = want + &poly; + omega = omega * omega; + } + poly_lagrange_vecs.reverse(); + let got = domain.lagrange_vecs_to_extended(poly_lagrange_vecs); + assert_eq!(want.values, got.values); +} + +#[test] +fn bench_lagrange_vecs_to_extended() { + use halo2curves::pasta::pallas::Scalar; + use rand_core::OsRng; + use std::time::Instant; + + let rng = OsRng; + let domain = EvaluationDomain::::new(8, 10); + let mut poly_vec = vec![]; + let mut poly_lagrange_vecs = vec![]; + let mut poly_extended_vecs = vec![]; + let mut omega = domain.extended_omega; + + for i in (0..(domain.extended_k - domain.k + 1)).rev() { + let mut poly = vec![Scalar::zero(); (1 << i) * domain.n as usize]; + for value in poly.iter_mut() { + *value = Scalar::random(rng); + } + // poly under coeff representation. + poly_vec.push(poly.clone()); + // poly under lagrange vector representation. + let mut poly2 = poly.clone(); + let data = domain.get_fft_data(poly2.len()); + best_fft(&mut poly2, omega, i + domain.k, data, false); + let transposed_poly: Vec> = (0..(1 << i)) + .map(|j| { + let mut p = domain.empty_lagrange(); + for k in 0..domain.n { + p[k as usize] = poly2[j + (k as usize) * (1 << i)]; + } + p + }) + .collect(); + poly_lagrange_vecs.push(transposed_poly); + // poly under extended representation. + poly.resize(domain.extended_len(), Scalar::zero()); + let data = domain.get_fft_data(poly.len()); + best_fft( + &mut poly, + domain.extended_omega, + domain.extended_k, + data, + false, + ); + let poly = { + let mut p = domain.empty_extended(); + p.values = poly; + p + }; + poly_extended_vecs.push(poly); + omega = omega * omega; + } + + let want_timer = Instant::now(); + let _ = poly_extended_vecs + .iter() + .fold(domain.empty_extended(), |acc, p| acc + p); + println!("want time: {}s", want_timer.elapsed().as_secs_f64()); + poly_lagrange_vecs.reverse(); + let got_timer = Instant::now(); + let _ = domain.lagrange_vecs_to_extended(poly_lagrange_vecs); + println!("got time: {}s", got_timer.elapsed().as_secs_f64()); +} From 127938f23e7aece10b0f32d2ffc07a6c9f244d03 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Mon, 19 Aug 2024 17:31:25 -0400 Subject: [PATCH 10/18] Update Cargo.toml --- halo2_proofs/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index d1ede87cf6..a6a245e62e 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -97,6 +97,7 @@ test-dev-graph = [ "plotters/bitmap_encoder", "plotters/ttf", ] +asm = ["halo2curves/asm"] bits = ["halo2curves/bits"] gadget-traces = ["backtrace"] thread-safe-region = [] From 0b6d243f6c97fa024c7dff72b19d8bbce43322b2 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 23 Aug 2024 18:42:39 -0400 Subject: [PATCH 11/18] patch --- .github/workflows/ci.yml | 2 +- halo2_gadgets/src/poseidon/pow5.rs | 7 +------ halo2_proofs/src/plonk/mv_lookup/verifier.rs | 4 +--- rust-toolchain | 2 +- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0f18df8c1..2e567f9625 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: os: [ubuntu-latest, windows-latest, macOS-latest] include: - feature_set: basic - features: batch,dev-graph,gadget-traces,multicore + features: batch,dev-graph,gadget-traces - feature_set: all features: batch,dev-graph,gadget-traces,test-dev-graph,thread-safe-region,sanity-checks,circuit-params diff --git a/halo2_gadgets/src/poseidon/pow5.rs b/halo2_gadgets/src/poseidon/pow5.rs index e288947179..293e4a934d 100644 --- a/halo2_gadgets/src/poseidon/pow5.rs +++ b/halo2_gadgets/src/poseidon/pow5.rs @@ -369,12 +369,7 @@ impl< // The capacity element is never altered by the input. .unwrap_or_else(|| Value::known(F::ZERO)); region - .assign_advice( - || format!("load output_{i}"), - config.state[i], - 2, - || value, - ) + .assign_advice(|| format!("load output_{i}"), config.state[i], 2, || value) .map(StateWord) }; diff --git a/halo2_proofs/src/plonk/mv_lookup/verifier.rs b/halo2_proofs/src/plonk/mv_lookup/verifier.rs index 426ceeb89d..7966b4bc21 100644 --- a/halo2_proofs/src/plonk/mv_lookup/verifier.rs +++ b/halo2_proofs/src/plonk/mv_lookup/verifier.rs @@ -1,8 +1,6 @@ use std::iter; -use super::super::{ - circuit::Expression, ChallengeBeta, ChallengeTheta, ChallengeX, -}; +use super::super::{circuit::Expression, ChallengeBeta, ChallengeTheta, ChallengeX}; use super::Argument; use crate::{ arithmetic::CurveAffine, diff --git a/rust-toolchain b/rust-toolchain index 65ee095984..5e3a425662 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.67.0 +1.73.0 From a13e9859ad5ba490cd5f54afb8a61e0803090c56 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 23 Aug 2024 18:48:35 -0400 Subject: [PATCH 12/18] bump --- .../examples/simple-lookup-unblinded.rs | 4 ++-- halo2_proofs/examples/simple-lookup.rs | 4 ++-- halo2_proofs/src/dev.rs | 6 +++--- halo2_proofs/src/helpers.rs | 2 +- halo2_proofs/src/multicore.rs | 4 +--- halo2_proofs/src/plonk.rs | 3 +-- halo2_proofs/src/plonk/evaluation.rs | 19 +++++++------------ halo2_proofs/src/plonk/keygen.rs | 2 +- halo2_proofs/src/plonk/mv_lookup.rs | 2 +- halo2_proofs/src/plonk/mv_lookup/prover.rs | 1 - halo2_proofs/src/plonk/prover.rs | 6 ++---- halo2_proofs/src/poly.rs | 2 +- halo2_proofs/src/poly/query.rs | 2 +- rust-toolchain | 2 +- 14 files changed, 24 insertions(+), 35 deletions(-) diff --git a/halo2_proofs/examples/simple-lookup-unblinded.rs b/halo2_proofs/examples/simple-lookup-unblinded.rs index 71c922b855..16ab1ae1cc 100644 --- a/halo2_proofs/examples/simple-lookup-unblinded.rs +++ b/halo2_proofs/examples/simple-lookup-unblinded.rs @@ -267,7 +267,7 @@ fn main() { println!("reloading proving key 💾"); - let f = std::fs::File::open(path.clone()).unwrap(); + let f = std::fs::File::open(path).unwrap(); let mut reader = std::io::BufReader::new(f); #[cfg(feature = "circuit-params")] let pk = ProvingKey::::read::<_, MyCircuit>( @@ -297,7 +297,7 @@ fn main() { println!("reloading verifier key 💾"); - let f = std::fs::File::open(path.clone()).unwrap(); + let f = std::fs::File::open(path).unwrap(); let mut reader = std::io::BufReader::new(f); #[cfg(feature = "circuit-params")] let vk = VerifyingKey::::read::<_, MyCircuit>( diff --git a/halo2_proofs/examples/simple-lookup.rs b/halo2_proofs/examples/simple-lookup.rs index aa9ddefc8a..42dab14dde 100644 --- a/halo2_proofs/examples/simple-lookup.rs +++ b/halo2_proofs/examples/simple-lookup.rs @@ -267,7 +267,7 @@ fn main() { println!("reloading proving key 💾"); - let f = std::fs::File::open(path.clone()).unwrap(); + let f = std::fs::File::open(path).unwrap(); let mut reader = std::io::BufReader::new(f); #[cfg(feature = "circuit-params")] let pk = ProvingKey::::read::<_, MyCircuit>( @@ -297,7 +297,7 @@ fn main() { println!("reloading verifier key 💾"); - let f = std::fs::File::open(path.clone()).unwrap(); + let f = std::fs::File::open(path).unwrap(); let mut reader = std::io::BufReader::new(f); #[cfg(feature = "circuit-params")] let vk = VerifyingKey::::read::<_, MyCircuit>( diff --git a/halo2_proofs/src/dev.rs b/halo2_proofs/src/dev.rs index 066ad43f6c..b2ddb764ae 100644 --- a/halo2_proofs/src/dev.rs +++ b/halo2_proofs/src/dev.rs @@ -908,9 +908,9 @@ impl + Ord> MockProver { cell_values: util::cell_values( gate, poly, - &util::load(n, row, &self.cs.fixed_queries, &self.fixed), - &util::load(n, row, &self.cs.advice_queries, &self.advice), - &util::load_instance( + util::load(n, row, &self.cs.fixed_queries, &self.fixed), + util::load(n, row, &self.cs.advice_queries, &self.advice), + util::load_instance( n, row, &self.cs.instance_queries, diff --git a/halo2_proofs/src/helpers.rs b/halo2_proofs/src/helpers.rs index faf7351a3e..5d80e63abd 100644 --- a/halo2_proofs/src/helpers.rs +++ b/halo2_proofs/src/helpers.rs @@ -150,5 +150,5 @@ pub(crate) fn write_polynomial_slice( /// Gets the total number of bytes of a slice of polynomials, assuming all polynomials are the same length pub(crate) fn polynomial_slice_byte_length(slice: &[Polynomial]) -> usize { let field_len = F::default().to_repr().as_ref().len(); - 4 + slice.len() * (4 + field_len * slice.get(0).map(|poly| poly.len()).unwrap_or(0)) + 4 + slice.len() * (4 + field_len * slice.first().map(|poly| poly.len()).unwrap_or(0)) } diff --git a/halo2_proofs/src/multicore.rs b/halo2_proofs/src/multicore.rs index 8be2bfba9d..b85e823f5b 100644 --- a/halo2_proofs/src/multicore.rs +++ b/halo2_proofs/src/multicore.rs @@ -1,9 +1,7 @@ pub use maybe_rayon::{ current_num_threads, - iter::{IntoParallelIterator, IntoParallelRefMutIterator, ParallelIterator}, + iter::{IntoParallelIterator, ParallelIterator}, join, scope, - slice::ParallelSliceMut, - Scope, }; pub trait TryFoldAndReduce { diff --git a/halo2_proofs/src/plonk.rs b/halo2_proofs/src/plonk.rs index 540c895082..ad0e80362e 100644 --- a/halo2_proofs/src/plonk.rs +++ b/halo2_proofs/src/plonk.rs @@ -228,8 +228,7 @@ impl VerifyingKey { + self.permutation.bytes_length(format) + self.selectors.len() * (self - .selectors - .get(0) + .selectors.first() .map(|selector| (selector.len() + 7) / 8) .unwrap_or(0)) } diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index abe8340440..b2fb38b4b6 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -17,7 +17,6 @@ use crate::{ use group::ff::{Field, PrimeField, WithSmallOrderMulGroup}; use maybe_rayon::iter::IndexedParallelIterator; use maybe_rayon::iter::IntoParallelRefIterator; -use maybe_rayon::iter::IntoParallelRefMutIterator; use maybe_rayon::iter::ParallelIterator; use super::{shuffle, ConstraintSystem, Expression}; @@ -400,7 +399,7 @@ impl Evaluator { .map(|advice_polys| { advice_polys .par_iter() - .map(|poly| domain.coeff_to_extended(&poly)) + .map(|poly| domain.coeff_to_extended(poly)) .collect() }) .collect(); @@ -412,7 +411,7 @@ impl Evaluator { .map(|instance_polys| { instance_polys .par_iter() - .map(|poly| domain.coeff_to_extended(&poly)) + .map(|poly| domain.coeff_to_extended(poly)) .collect() }) .collect(); @@ -754,15 +753,11 @@ impl Evaluator { #[cfg(not(feature = "precompute-coset"))] let (product_coset, permuted_input_coset, permuted_table_coset) = { - let product_coset = pk.vk.domain.coeff_to_extended(lookup.product_poly.clone()); - let permuted_input_coset = pk - .vk - .domain - .coeff_to_extended(lookup.permuted_input_poly.clone()); - let permuted_table_coset = pk - .vk - .domain - .coeff_to_extended(lookup.permuted_table_poly.clone()); + let product_coset = pk.vk.domain.coeff_to_extended(&lookup.product_poly); + let permuted_input_coset = + pk.vk.domain.coeff_to_extended(&lookup.permuted_input_poly); + let permuted_table_coset = + pk.vk.domain.coeff_to_extended(&lookup.permuted_table_poly); (product_coset, permuted_input_coset, permuted_table_coset) }; diff --git a/halo2_proofs/src/plonk/keygen.rs b/halo2_proofs/src/plonk/keygen.rs index 8db11b40da..0be315ea92 100644 --- a/halo2_proofs/src/plonk/keygen.rs +++ b/halo2_proofs/src/plonk/keygen.rs @@ -355,7 +355,7 @@ where let fixed_cosets = fixed_polys .iter() - .map(|poly| vk.domain.coeff_to_extended(&poly)) + .map(|poly| vk.domain.coeff_to_extended(poly)) .collect(); let permutation_pk = assembly diff --git a/halo2_proofs/src/plonk/mv_lookup.rs b/halo2_proofs/src/plonk/mv_lookup.rs index 783666158c..d66983c4b8 100644 --- a/halo2_proofs/src/plonk/mv_lookup.rs +++ b/halo2_proofs/src/plonk/mv_lookup.rs @@ -58,7 +58,7 @@ impl Argument { let inputs_expressions_degree: usize = self .inputs_expressions .iter() - .map(|input_expressions| expr_degree(input_expressions)) + .map(expr_degree) .sum(); let mut table_degree = 0; diff --git a/halo2_proofs/src/plonk/mv_lookup/prover.rs b/halo2_proofs/src/plonk/mv_lookup/prover.rs index fa224565f7..74e8d00704 100644 --- a/halo2_proofs/src/plonk/mv_lookup/prover.rs +++ b/halo2_proofs/src/plonk/mv_lookup/prover.rs @@ -20,7 +20,6 @@ use group::{ }; use rustc_hash::FxHashMap as HashMap; -use rand_core::RngCore; use std::{ iter, diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index 895200ff24..349da94112 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -21,8 +21,6 @@ use super::{ ChallengeY, Error, ProvingKey, }; use maybe_rayon::iter::IndexedParallelIterator; -use maybe_rayon::iter::IntoParallelIterator; -use maybe_rayon::iter::IntoParallelRefIterator; use maybe_rayon::iter::ParallelIterator; #[cfg(not(feature = "mv-lookup"))] @@ -328,7 +326,7 @@ where }; instances.len() ]; - let s = FxBuildHasher::default(); + let s = FxBuildHasher; let mut challenges = HashMap::::with_capacity_and_hasher(meta.num_challenges, s); @@ -790,7 +788,7 @@ where .map(|lookups| -> Result, _> { lookups .into_iter() - .map(|p| p.evaluate(&pk.vk, x, transcript)) + .map(|p| p.evaluate(pk, x, transcript)) .collect::, _>>() }) .collect::, _>>()?; diff --git a/halo2_proofs/src/poly.rs b/halo2_proofs/src/poly.rs index cd022cff12..88299cb333 100644 --- a/halo2_proofs/src/poly.rs +++ b/halo2_proofs/src/poly.rs @@ -250,7 +250,7 @@ pub(crate) fn batch_invert_assigned( impl Polynomial, LagrangeCoeff> { pub(crate) fn invert( &self, - inv_denoms: impl Iterator + ExactSizeIterator, + inv_denoms: impl ExactSizeIterator, ) -> Polynomial { assert_eq!(inv_denoms.len(), self.values.len()); Polynomial { diff --git a/halo2_proofs/src/poly/query.rs b/halo2_proofs/src/poly/query.rs index bc7a20c240..30be4fbec7 100644 --- a/halo2_proofs/src/poly/query.rs +++ b/halo2_proofs/src/poly/query.rs @@ -132,7 +132,7 @@ pub enum CommitmentReference<'r, C: CurveAffine, M: MSM> { impl<'r, C: CurveAffine, M: MSM> Copy for CommitmentReference<'r, C, M> {} impl<'r, C: CurveAffine, M: MSM> PartialEq for CommitmentReference<'r, C, M> { - #![allow(clippy::vtable_address_comparisons)] + #![allow(ambiguous_wide_pointer_comparisons)] fn eq(&self, other: &Self) -> bool { match (self, other) { (&CommitmentReference::Commitment(a), &CommitmentReference::Commitment(b)) => { diff --git a/rust-toolchain b/rust-toolchain index 5e3a425662..aef0d2eefa 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.73.0 +nightly-1.73.0 From 164ddfba28364463cb371bfeed16e4ad94c26d7c Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 23 Aug 2024 18:50:30 -0400 Subject: [PATCH 13/18] Update rust-toolchain --- rust-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain b/rust-toolchain index aef0d2eefa..7bf6ebb1ee 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-1.73.0 +1.73.0-nightly From 61c2badb7a269f5797cfdd8717d849884467e980 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 23 Aug 2024 18:52:17 -0400 Subject: [PATCH 14/18] bump toolchain --- rust-toolchain | 1 - rust-toolchain.toml | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) delete mode 100644 rust-toolchain create mode 100644 rust-toolchain.toml diff --git a/rust-toolchain b/rust-toolchain deleted file mode 100644 index 7bf6ebb1ee..0000000000 --- a/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -1.73.0-nightly diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000000..6ae3510966 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly-2024-07-18" From c77399d04c40deb2e44895cc940ea6f2b641dd3c Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 23 Aug 2024 18:53:15 -0400 Subject: [PATCH 15/18] bump --- rust-toolchain | 1 + rust-toolchain.toml | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) create mode 100644 rust-toolchain delete mode 100644 rust-toolchain.toml diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 0000000000..a6d32a878c --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly-2024-07-18 diff --git a/rust-toolchain.toml b/rust-toolchain.toml deleted file mode 100644 index 6ae3510966..0000000000 --- a/rust-toolchain.toml +++ /dev/null @@ -1,2 +0,0 @@ -[toolchain] -channel = "nightly-2024-07-18" From 8db8fa5f55dbf4ea84468e5316a83549bf0ab0b3 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 23 Aug 2024 18:54:29 -0400 Subject: [PATCH 16/18] fmt --- halo2_proofs/src/plonk.rs | 3 ++- halo2_proofs/src/plonk/mv_lookup.rs | 7 ++----- halo2_proofs/src/plonk/mv_lookup/prover.rs | 1 - 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/halo2_proofs/src/plonk.rs b/halo2_proofs/src/plonk.rs index ad0e80362e..72305731ee 100644 --- a/halo2_proofs/src/plonk.rs +++ b/halo2_proofs/src/plonk.rs @@ -228,7 +228,8 @@ impl VerifyingKey { + self.permutation.bytes_length(format) + self.selectors.len() * (self - .selectors.first() + .selectors + .first() .map(|selector| (selector.len() + 7) / 8) .unwrap_or(0)) } diff --git a/halo2_proofs/src/plonk/mv_lookup.rs b/halo2_proofs/src/plonk/mv_lookup.rs index d66983c4b8..c8a947f888 100644 --- a/halo2_proofs/src/plonk/mv_lookup.rs +++ b/halo2_proofs/src/plonk/mv_lookup.rs @@ -55,11 +55,8 @@ impl Argument { input_degree }; - let inputs_expressions_degree: usize = self - .inputs_expressions - .iter() - .map(expr_degree) - .sum(); + let inputs_expressions_degree: usize = + self.inputs_expressions.iter().map(expr_degree).sum(); let mut table_degree = 0; for expr in self.table_expressions.iter() { diff --git a/halo2_proofs/src/plonk/mv_lookup/prover.rs b/halo2_proofs/src/plonk/mv_lookup/prover.rs index 74e8d00704..abf3a63279 100644 --- a/halo2_proofs/src/plonk/mv_lookup/prover.rs +++ b/halo2_proofs/src/plonk/mv_lookup/prover.rs @@ -20,7 +20,6 @@ use group::{ }; use rustc_hash::FxHashMap as HashMap; - use std::{ iter, ops::{Mul, MulAssign}, From db0fbbe6ebb5cc9526e90eabc82a6dc81e1672e0 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 23 Aug 2024 19:20:27 -0400 Subject: [PATCH 17/18] mvlookup patch --- halo2_proofs/src/plonk/evaluation.rs | 3 +++ halo2_proofs/src/plonk/prover.rs | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index b2fb38b4b6..6509271dd7 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -24,6 +24,9 @@ use super::{shuffle, ConstraintSystem, Expression}; #[cfg(feature = "mv-lookup")] use ff::BatchInvert; +#[cfg(feature = "mv-lookup")] +use maybe_rayon::iter::IntoParallelRefMutIterator; + /// Return the index in the polynomial of size `isize` after rotation `rot`. fn get_rotation_idx(idx: usize, rot: i32, rot_scale: i32, isize: i32) -> usize { (((idx as i32) + (rot * rot_scale)).rem_euclid(isize)) as usize diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index 349da94112..1c0dbe6cda 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -28,6 +28,11 @@ use super::lookup; #[cfg(feature = "mv-lookup")] use super::mv_lookup as lookup; +#[cfg(feature = "mv-lookup")] +use maybe_rayon::iter::{ + IntoParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator, +}; + use crate::{ arithmetic::{eval_polynomial, CurveAffine}, circuit::Value, @@ -782,13 +787,21 @@ where log::trace!("Permutation evaluation: {:?}", start.elapsed()); // Evaluate the lookups, if any, at omega^i x. + let start = Instant::now(); + let lookups: Vec>> = lookups .into_iter() .map(|lookups| -> Result, _> { lookups .into_iter() - .map(|p| p.evaluate(pk, x, transcript)) + .map(|p| { + #[cfg(not(feature = "mv-lookup"))] + let res = { p.evaluate(pk, x, transcript) }; + #[cfg(feature = "mv-lookup")] + let res = { p.evaluate(&pk.vk, x, transcript) }; + res + }) .collect::, _>>() }) .collect::, _>>()?; From 8b13a0d2a7a34d8daab010dadb2c47dfa47d37d0 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Sat, 24 Aug 2024 19:07:31 -0400 Subject: [PATCH 18/18] fix: wasm compatible timings --- halo2_proofs/Cargo.toml | 1 + .../examples/simple-lookup-unblinded.rs | 12 +++++----- halo2_proofs/examples/simple-lookup.rs | 12 +++++----- halo2_proofs/examples/vector-mul-unblinded.rs | 4 ++-- halo2_proofs/examples/vector-mul.rs | 4 ++-- halo2_proofs/src/plonk/evaluation.rs | 17 +++++++------- halo2_proofs/src/plonk/lookup/prover.rs | 2 +- halo2_proofs/src/plonk/mv_lookup/prover.rs | 22 +++++++++---------- halo2_proofs/src/plonk/prover.rs | 2 +- halo2_proofs/src/poly/domain.rs | 4 ++-- 10 files changed, 40 insertions(+), 40 deletions(-) diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index a6a245e62e..4e7cb401ee 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -68,6 +68,7 @@ rustacuda = { version = "0.1", optional = true } serde_derive = { version = "1", optional = true} bincode = { version = "1.3.3", default_features = false } serde = { version = "1.0.126", features = ["derive"] } +instant = { version = "0.1" } # Developer tooling dependencies diff --git a/halo2_proofs/examples/simple-lookup-unblinded.rs b/halo2_proofs/examples/simple-lookup-unblinded.rs index 16ab1ae1cc..70425f19a7 100644 --- a/halo2_proofs/examples/simple-lookup-unblinded.rs +++ b/halo2_proofs/examples/simple-lookup-unblinded.rs @@ -250,9 +250,9 @@ fn main() { println!("k = {K}"); // time it println!("keygen"); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let (params, pk) = keygen(K); - let end = std::time::Instant::now(); + let end = instant::Instant::now(); println!("keygen time: {:?}", end.duration_since(start)); println!("saving proving key 💾"); @@ -315,14 +315,14 @@ fn main() { // time it println!("prover"); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let proof = prover(K, ¶ms, &pk); - let end = std::time::Instant::now(); + let end = instant::Instant::now(); println!("prover time: {:?}", end.duration_since(start)); // time it println!("verifier"); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); verifier(¶ms, &vk, &proof); - let end = std::time::Instant::now(); + let end = instant::Instant::now(); println!("verifier time: {:?}", end.duration_since(start)); } diff --git a/halo2_proofs/examples/simple-lookup.rs b/halo2_proofs/examples/simple-lookup.rs index 42dab14dde..824335d354 100644 --- a/halo2_proofs/examples/simple-lookup.rs +++ b/halo2_proofs/examples/simple-lookup.rs @@ -250,9 +250,9 @@ fn main() { println!("k = {K}"); // time it println!("keygen"); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let (params, pk) = keygen(K); - let end = std::time::Instant::now(); + let end = instant::Instant::now(); println!("keygen time: {:?}", end.duration_since(start)); println!("saving proving key 💾"); @@ -315,14 +315,14 @@ fn main() { // time it println!("prover"); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let proof = prover(K, ¶ms, &pk); - let end = std::time::Instant::now(); + let end = instant::Instant::now(); println!("prover time: {:?}", end.duration_since(start)); // time it println!("verifier"); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); verifier(¶ms, &vk, &proof); - let end = std::time::Instant::now(); + let end = instant::Instant::now(); println!("verifier time: {:?}", end.duration_since(start)); } diff --git a/halo2_proofs/examples/vector-mul-unblinded.rs b/halo2_proofs/examples/vector-mul-unblinded.rs index 5b7c08914a..26ef164d57 100644 --- a/halo2_proofs/examples/vector-mul-unblinded.rs +++ b/halo2_proofs/examples/vector-mul-unblinded.rs @@ -334,14 +334,14 @@ fn main() { // of the instance column, so we position it there in our public inputs. let mut public_inputs = c; - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // Given the correct public input, our circuit will verify. let prover = MockProver::run(k, &circuit, vec![public_inputs.clone()]).unwrap(); assert_eq!(prover.verify(), Ok(())); println!("positive test took {:?}", start.elapsed()); // If we try some other public input, the proof will fail! - let start = std::time::Instant::now(); + let start = instant::Instant::now(); public_inputs[0] += Fp::one(); let prover = MockProver::run(k, &circuit, vec![public_inputs]).unwrap(); assert!(prover.verify().is_err()); diff --git a/halo2_proofs/examples/vector-mul.rs b/halo2_proofs/examples/vector-mul.rs index 01728fdf36..8b93fa294c 100644 --- a/halo2_proofs/examples/vector-mul.rs +++ b/halo2_proofs/examples/vector-mul.rs @@ -300,14 +300,14 @@ fn main() { // of the instance column, so we position it there in our public inputs. let mut public_inputs = c; - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // Given the correct public input, our circuit will verify. let prover = MockProver::run(k, &circuit, vec![public_inputs.clone()]).unwrap(); assert_eq!(prover.verify(), Ok(())); println!("positive test took {:?}", start.elapsed()); // If we try some other public input, the proof will fail! - let start = std::time::Instant::now(); + let start = instant::Instant::now(); public_inputs[0] += Fp::one(); let prover = MockProver::run(k, &circuit, vec![public_inputs]).unwrap(); assert!(prover.verify().is_err()); diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 6509271dd7..4ce97a333e 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -381,7 +381,7 @@ impl Evaluator { shuffles: &[Vec>], permutations: &[permutation::prover::Committed], ) -> Polynomial { - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let domain = &pk.vk.domain; let size = domain.extended_len(); let rot_scale = 1 << (domain.extended_k() - domain.k()); @@ -395,7 +395,7 @@ impl Evaluator { let p = &pk.vk.cs.permutation; log::trace!(" - Initialization: {:?}", start.elapsed()); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // Calculate the advice and instance cosets let advice: Vec>> = advice_polys .iter() @@ -408,7 +408,7 @@ impl Evaluator { .collect(); log::trace!(" - Advice cosets: {:?}", start.elapsed()); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let instance: Vec>> = instance_polys .iter() .map(|instance_polys| { @@ -424,7 +424,7 @@ impl Evaluator { // Core expression evaluations - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let num_threads = multicore::current_num_threads(); for ((((advice, instance), lookups), shuffles), permutation) in advice .iter() @@ -465,7 +465,7 @@ impl Evaluator { log::trace!(" - Custom gates: {:?}", start.elapsed()); // Permutations - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let sets = &permutation.sets; if !sets.is_empty() { let blinding_factors = pk.vk.cs.blinding_factors(); @@ -548,7 +548,7 @@ impl Evaluator { } log::trace!(" - Permutations: {:?}", start.elapsed()); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // For lookups, compute inputs_inv_sum = ∑ 1 / (f_i(X) + α) // The outer vector has capacity self.lookups.len() // The middle vector has capacity domain.extended_len() @@ -616,7 +616,7 @@ impl Evaluator { log::trace!(" - Lookups inv sum: {:?}", start.elapsed()); #[cfg(feature = "mv-lookup")] - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // Lookups #[cfg(feature = "mv-lookup")] parallelize(&mut values, |values, start| { @@ -764,7 +764,6 @@ impl Evaluator { (product_coset, permuted_input_coset, permuted_table_coset) }; - let start = std::time::Instant::now(); // Lookup constraints parallelize(&mut values, |values, start| { let lookup_evaluator = &self.lookups[n]; @@ -827,7 +826,7 @@ impl Evaluator { log::trace!(" - Lookups constraints: {:?}", start.elapsed()); // Shuffle constraints - let start = std::time::Instant::now(); + let start = instant::Instant::now(); for (n, shuffle) in shuffles.iter().enumerate() { let product_coset = pk.vk.domain.coeff_to_extended(&shuffle.product_poly); diff --git a/halo2_proofs/src/plonk/lookup/prover.rs b/halo2_proofs/src/plonk/lookup/prover.rs index b27b451fff..60a97a739c 100644 --- a/halo2_proofs/src/plonk/lookup/prover.rs +++ b/halo2_proofs/src/plonk/lookup/prover.rs @@ -463,7 +463,7 @@ fn permute_expression_pair<'params, C: CurveAffine, P: Params<'params, C>, R: Rn ); }*/ #[cfg(not(target_arch = "wasm32"))] - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let res = permute_expression_pair_par(pk, params, domain, rng, input_expression, table_expression); #[cfg(not(target_arch = "wasm32"))] diff --git a/halo2_proofs/src/plonk/mv_lookup/prover.rs b/halo2_proofs/src/plonk/mv_lookup/prover.rs index abf3a63279..4a7c934fdb 100644 --- a/halo2_proofs/src/plonk/mv_lookup/prover.rs +++ b/halo2_proofs/src/plonk/mv_lookup/prover.rs @@ -114,7 +114,7 @@ impl> Argument { compressed_expression }; - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // Get values of input expressions involved in the lookup and compress them let compressed_inputs_expressions: Vec<_> = self .inputs_expressions @@ -124,7 +124,7 @@ impl> Argument { log::trace!("compressed_inputs_expressions {:?}", start.elapsed()); // Get values of table expressions involved in the lookup and compress them - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let compressed_table_expression = compress_expressions(&self.table_expressions); log::trace!("compressed_table_expression {:?}", start.elapsed()); @@ -133,7 +133,7 @@ impl> Argument { let chunk_size = n - blinding_factors - 1; // compute m(X) - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let table_index_value_mapping: HashMap, usize> = compressed_table_expression .par_iter() .take(chunk_size) @@ -142,7 +142,7 @@ impl> Argument { .collect(); log::trace!("table_index_value_mapping {:?}", start.elapsed()); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let m_values: Vec = { use std::sync::atomic::{AtomicU64, Ordering}; let m_values: Vec = (0..params.n()).map(|_| AtomicU64::new(0)).collect(); @@ -217,7 +217,7 @@ impl> Argument { } // commit to m(X) - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let blind = Blind(C::Scalar::ZERO); let m_commitment = params.commit_lagrange(&m_values, blind).to_affine(); log::trace!("m_commitment {:?}", start.elapsed()); @@ -248,7 +248,7 @@ impl Prepared { RHS = τ(X) * Π(φ_i(X)) * (∑ 1/(φ_i(X)) - m(X) / τ(X)))) */ - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // ∑ 1/(φ_i(X)) let mut inputs_log_derivatives = vec![C::Scalar::ZERO; params.n() as usize]; for compressed_input_expression in self.compressed_inputs_expressions.iter() { @@ -275,7 +275,7 @@ impl Prepared { log::trace!(" - inputs_log_derivatives {:?}", start.elapsed()); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // 1 / τ(X) let mut table_log_derivatives = vec![C::Scalar::ZERO; params.n() as usize]; parallelize( @@ -292,14 +292,14 @@ impl Prepared { log::trace!(" - table_log_derivatives {:?}", start.elapsed()); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); table_log_derivatives.iter_mut().batch_invert(); log::trace!( " - table_log_derivatives batch_invert {:?}", start.elapsed() ); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // (Σ 1/(φ_i(X)) - m(X) / τ(X)) let mut log_derivatives_diff = vec![C::Scalar::ZERO; params.n() as usize]; parallelize(&mut log_derivatives_diff, |log_derivatives_diff, start| { @@ -316,7 +316,7 @@ impl Prepared { log::trace!(" - log_derivatives_diff {:?}", start.elapsed()); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); // Compute the evaluations of the lookup grand sum polynomial // over our domain, starting with phi[0] = 0 let blinding_factors = vk.cs.blinding_factors(); @@ -397,7 +397,7 @@ impl Prepared { } let grand_sum_blind = Blind(C::Scalar::ZERO); - let start = std::time::Instant::now(); + let start = instant::Instant::now(); let phi_commitment = params.commit_lagrange(&phi, grand_sum_blind).to_affine(); log::trace!(" - phi_commitment {:?}", start.elapsed()); diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index 1c0dbe6cda..e74ffe74f0 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -2,6 +2,7 @@ use ff::{Field, FromUniformBytes, WithSmallOrderMulGroup}; use group::Curve; use halo2curves::serde::SerdeObject; +use instant::Instant; use rand_core::RngCore; use rustc_hash::FxBuildHasher; use rustc_hash::FxHashMap as HashMap; @@ -9,7 +10,6 @@ use rustc_hash::FxHashSet as HashSet; use std::collections::BTreeSet; use std::iter; use std::ops::RangeTo; -use std::time::Instant; use super::{ circuit::{ diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs index 8d2252a6e6..ee1a8d1bc8 100644 --- a/halo2_proofs/src/poly/domain.rs +++ b/halo2_proofs/src/poly/domain.rs @@ -847,7 +847,7 @@ fn test_coeff_to_extended_part() { fn bench_coeff_to_extended_parts() { use halo2curves::pasta::pallas::Scalar; use rand_core::OsRng; - use std::time::Instant; + use instant::Instant; let k = 20; let domain = EvaluationDomain::::new(3, k); @@ -935,7 +935,7 @@ fn test_lagrange_vecs_to_extended() { fn bench_lagrange_vecs_to_extended() { use halo2curves::pasta::pallas::Scalar; use rand_core::OsRng; - use std::time::Instant; + use instant::Instant; let rng = OsRng; let domain = EvaluationDomain::::new(8, 10);