Sync with upstream PSE (#7)

* Add field conversion to/from `[u64;4]` (privacy-scaling-explorations#80) * feat: add field conversion to/from `[u64;4]` * Added conversion tests * Added `montgomery_reduce_short` for no-asm * For bn256, uses assembly conversion when asm feature is on * fix: remove conflict for asm * chore: bump rust-toolchain to 1.67.0 * Compute Legendre symbol for `hash_to_curve` (privacy-scaling-explorations#77) * Add `Legendre` trait and macro - Add Legendre macro with norm and legendre symbol computation - Add macro for automatic implementation in prime fields * Add legendre macro call for prime fields * Remove unused imports * Remove leftover * Add `is_quadratic_non_residue` for hash_to_curve * Add `legendre` function * Compute modulus separately * Substitute division for shift * Update modulus computation * Add quadratic residue check func * Add quadratic residue tests * Add hash_to_curve bench * Implement Legendre trait for all curves * Move misplaced comment * Add all curves to hash bench * fix: add suggestion for legendre_exp * fix: imports after rebase * Add simplified SWU method (privacy-scaling-explorations#81) * Fix broken link * Add simple SWU algorithm * Add simplified SWU hash_to_curve for secp256r1 * add: sswu z reference * update MAP_ID identifier Co-authored-by: Han <[email protected]> --------- Co-authored-by: Han <[email protected]> * Bring back curve algorithms for `a = 0` (privacy-scaling-explorations#82) * refactor: bring back curve algorithms for `a = 0` * fix: clippy warning * fix: Improve serialization for prime fields (privacy-scaling-explorations#85) * fix: Improve serialization for prime fields Summary: 256-bit field serialization is currently 4x u64, ie. the native format. This implements the standard of byte-serialization (corresponding to the PrimeField::{to,from}_repr), and an hex-encoded variant of that for (de)serializers that are human-readable (concretely, json). - Added a new macro `serialize_deserialize_32_byte_primefield!` for custom serialization and deserialization of 32-byte prime field in different struct (Fq, Fp, Fr) across the secp256r, bn256, and derive libraries. - Implemented the new macro for serialization and deserialization in various structs, replacing the previous `serde::{Deserialize, Serialize}` direct use. - Enhanced error checking in the custom serialization methods to ensure valid field elements. - Updated the test function in the tests/field.rs file to include JSON serialization and deserialization tests for object integrity checking. * fixup! fix: Improve serialization for prime fields --------- Co-authored-by: Carlos Pérez <[email protected]> * refactor: (De)Serialization of points using `GroupEncoding` (privacy-scaling-explorations#88) * refactor: implement (De)Serialization of points using the `GroupEncoding` trait - Updated curve point (de)serialization logic from the internal representation to the representation offered by the implementation of the `GroupEncoding` trait. * fix: add explicit json serde tests * Insert MSM and FFT code and their benchmarks. (privacy-scaling-explorations#86) * Insert MSM and FFT code and their benchmarks. Resolves taikoxyz/zkevm-circuits#150. * feedback * Add instructions * feeback * Implement feedback: Actually supply the correct arguments to `best_multiexp`. Split into `singlecore` and `multicore` benchmarks so Criterion's result caching and comparison over multiple runs makes sense. Rewrite point and scalar generation. * Use slicing and parallelism to to decrease running time. Laptop measurements: k=22: 109 sec k=16: 1 sec * Refactor msm * Refactor fft * Update module comments * Fix formatting * Implement suggestion for fixing CI --------- Co-authored-by: David Nevado <[email protected]> Co-authored-by: Han <[email protected]> Co-authored-by: François Garillot <[email protected]> Co-authored-by: Carlos Pérez <[email protected]> Co-authored-by: einar-taiko <[email protected]>
axiom-crypto · Sep 23, 2023 · e185711 · e185711
1 parent 78c2b26
commit e185711
Show file tree

Hide file tree

Showing 25 changed files with 1,066 additions and 162 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -14,6 +14,7 @@ criterion = { version = "0.3", features = ["html_reports"] }
 rand_xorshift = "0.3"
 ark-std = { version = "0.3" }
 bincode = "1.3.3"
+serde_json = "1.0.105"
 
 [dependencies]
 subtle = "2.4"
@@ -30,14 +31,17 @@ num-traits = "0.2"
 paste = "1.0.11"
 serde = { version = "1.0", default-features = false, optional = true }
 serde_arrays = { version = "0.1.0", optional = true }
+hex = { version = "0.4", optional = true, default-features = false, features = ["alloc", "serde"] }
 blake2b_simd = "1"
+maybe-rayon = { version = "0.1.0", default-features = false }
 
 [features]
-default = ["reexport", "bits", "bn256-table", "derive_serde"]
+default = ["reexport", "bits", "multicore", "bn256-table", "derive_serde"]
+multicore = ["maybe-rayon/threads"]
 asm = []
 bits = ["ff/bits"]
 bn256-table = []
-derive_serde = ["serde/derive", "serde_arrays"]
+derive_serde = ["serde/derive", "serde_arrays", "hex"]
 prefetch = []
 print-trace = ["ark-std/print-trace"]
 reexport = []
@@ -63,3 +67,16 @@ required-features = ["reexport"]
 [[bench]]
 name = "group"
 harness = false
+
+[[bench]]
+name = "hash_to_curve"
+harness = false
+
+[[bench]]
+name = "fft"
+harness = false
+
+[[bench]]
+name = "msm"
+harness = false
+required-features = ["multicore"]
diff --git a/benches/fft.rs b/benches/fft.rs
@@ -0,0 +1,57 @@
+//! This benchmarks Fast-Fourier Transform (FFT).
+//! Since it is over a finite field, it is actually the Number Theoretical
+//! Transform (NNT).  It uses the `Fr` scalar field from the BN256 curve.
+//!
+//! To run this benchmark:
+//!
+//!     cargo bench -- fft
+//!
+//! Caveat:  The multicore benchmark assumes:
+//!     1. a multi-core system
+//!     2. that the `multicore` feature is enabled.  It is by default.
+
+#[macro_use]
+extern crate criterion;
+
+use criterion::{BenchmarkId, Criterion};
+use group::ff::Field;
+use halo2curves::bn256::Fr as Scalar;
+use halo2curves::fft::best_fft;
+use rand_core::OsRng;
+use std::ops::Range;
+use std::time::SystemTime;
+
+const RANGE: Range<u32> = 3..19;
+
+fn generate_data(k: u32) -> Vec<Scalar> {
+    let n = 1 << k;
+    let timer = SystemTime::now();
+    println!("\n\nGenerating 2^{k} = {n} values..",);
+    let data: Vec<Scalar> = (0..n).map(|_| Scalar::random(OsRng)).collect();
+    let end = timer.elapsed().unwrap();
+    println!(
+        "Generating 2^{k} = {n} values took: {} sec.\n\n",
+        end.as_secs()
+    );
+    data
+}
+
+fn fft(c: &mut Criterion) {
+    let max_k = RANGE.max().unwrap_or(16);
+    let mut data = generate_data(max_k);
+    let omega = Scalar::random(OsRng);
+    let mut group = c.benchmark_group("fft");
+    for k in RANGE {
+        group.bench_function(BenchmarkId::new("k", k), |b| {
+            let n = 1 << k;
+            assert!(n <= data.len());
+            b.iter(|| {
+                best_fft(&mut data[..n], omega, k);
+            });
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(benches, fft);
+criterion_main!(benches);
diff --git a/benches/group.rs b/benches/group.rs
@@ -18,28 +18,28 @@ fn criterion_benchmark<G: CurveExt>(c: &mut Criterion) {
         let v = vec![G::generator(); N];
         let mut q = vec![G::AffineExt::identity(); N];
 
-        c.bench_function(&format!("{} check on curve", name), move |b| {
+        c.bench_function(&format!("{name} check on curve"), move |b| {
             b.iter(|| black_box(p1).is_on_curve())
         });
-        c.bench_function(&format!("{} check equality", name), move |b| {
+        c.bench_function(&format!("{name} check equality"), move |b| {
             b.iter(|| black_box(p1) == black_box(p1))
         });
-        c.bench_function(&format!("{} to affine", name), move |b| {
+        c.bench_function(&format!("{name} to affine"), move |b| {
             b.iter(|| G::AffineExt::from(black_box(p1)))
         });
-        c.bench_function(&format!("{} doubling", name), move |b| {
+        c.bench_function(&format!("{name} doubling"), move |b| {
             b.iter(|| black_box(p1).double())
         });
-        c.bench_function(&format!("{} addition", name), move |b| {
+        c.bench_function(&format!("{name} addition"), move |b| {
             b.iter(|| black_box(p1).add(&p2))
         });
-        c.bench_function(&format!("{} mixed addition", name), move |b| {
+        c.bench_function(&format!("{name} mixed addition"), move |b| {
             b.iter(|| black_box(p2).add(&p1_affine))
         });
-        c.bench_function(&format!("{} scalar multiplication", name), move |b| {
+        c.bench_function(&format!("{name} scalar multiplication"), move |b| {
             b.iter(|| black_box(p1) * black_box(s))
         });
-        c.bench_function(&format!("{} batch to affine n={}", name, N), move |b| {
+        c.bench_function(&format!("{name} batch to affine n={N}"), move |b| {
             b.iter(|| {
                 G::batch_normalize(black_box(&v), black_box(&mut q));
                 black_box(&q)[0]

diff --git a/benches/hash_to_curve.rs b/benches/hash_to_curve.rs
@@ -0,0 +1,59 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use pasta_curves::arithmetic::CurveExt;
+use rand_core::{OsRng, RngCore};
+use std::iter;
+
+fn hash_to_secp256k1(c: &mut Criterion) {
+    hash_to_curve::<halo2curves::secp256k1::Secp256k1>(c, "Secp256k1");
+}
+
+fn hash_to_secq256k1(c: &mut Criterion) {
+    hash_to_curve::<halo2curves::secq256k1::Secq256k1>(c, "Secq256k1");
+}
+
+fn hash_to_secp256r1(c: &mut Criterion) {
+    hash_to_curve::<halo2curves::secp256r1::Secp256r1>(c, "Secp256r1");
+}
+
+fn hash_to_pallas(c: &mut Criterion) {
+    hash_to_curve::<halo2curves::pasta::Ep>(c, "Pallas");
+}
+
+fn hash_to_vesta(c: &mut Criterion) {
+    hash_to_curve::<halo2curves::pasta::Eq>(c, "Vesta");
+}
+
+fn hash_to_bn256(c: &mut Criterion) {
+    hash_to_curve::<halo2curves::bn256::G1>(c, "Bn256");
+}
+
+fn hash_to_grumpkin(c: &mut Criterion) {
+    hash_to_curve::<halo2curves::grumpkin::G1>(c, "Grumpkin");
+}
+
+fn hash_to_curve<G: CurveExt>(c: &mut Criterion, name: &'static str) {
+    {
+        let hasher = G::hash_to_curve("test");
+        let mut rng = OsRng;
+        let message = iter::repeat_with(|| rng.next_u32().to_be_bytes())
+            .take(1024)
+            .flatten()
+            .collect::<Vec<_>>();
+
+        c.bench_function(&format!("Hash to {name}"), move |b| {
+            b.iter(|| hasher(black_box(&message)))
+        });
+    }
+}
+
+criterion_group!(
+    benches,
+    hash_to_secp256k1,
+    hash_to_secq256k1,
+    hash_to_secp256r1,
+    hash_to_pallas,
+    hash_to_vesta,
+    hash_to_bn256,
+    hash_to_grumpkin,
+);
+criterion_main!(benches);
diff --git a/benches/msm.rs b/benches/msm.rs
@@ -0,0 +1,116 @@
+//! This benchmarks Multi Scalar Multiplication (MSM).
+//! It measures `G1` from the BN256 curve.
+//!
+//! To run this benchmark:
+//!
+//!     cargo bench -- msm
+//!
+//! Caveat:  The multicore benchmark assumes:
+//!     1. a multi-core system
+//!     2. that the `multicore` feature is enabled.  It is by default.
+
+#[macro_use]
+extern crate criterion;
+
+use criterion::{BenchmarkId, Criterion};
+use ff::Field;
+use group::prime::PrimeCurveAffine;
+use halo2curves::bn256::{Fr as Scalar, G1Affine as Point};
+use halo2curves::msm::{best_multiexp, multiexp_serial};
+use maybe_rayon::current_thread_index;
+use maybe_rayon::prelude::{IntoParallelIterator, ParallelIterator};
+use rand_core::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::time::SystemTime;
+
+const SAMPLE_SIZE: usize = 10;
+const SINGLECORE_RANGE: [u8; 6] = [3, 8, 10, 12, 14, 16];
+const MULTICORE_RANGE: [u8; 9] = [3, 8, 10, 12, 14, 16, 18, 20, 22];
+const SEED: [u8; 16] = [
+    0x59, 0x62, 0xbe, 0x5d, 0x76, 0x3d, 0x31, 0x8d, 0x17, 0xdb, 0x37, 0x32, 0x54, 0x06, 0xbc, 0xe5,
+];
+
+fn generate_coefficients_and_curvepoints(k: u8) -> (Vec<Scalar>, Vec<Point>) {
+    let n: u64 = {
+        assert!(k < 64);
+        1 << k
+    };
+
+    println!("\n\nGenerating 2^{k} = {n} coefficients and curve points..",);
+    let timer = SystemTime::now();
+    let coeffs = (0..n)
+        .into_par_iter()
+        .map_init(
+            || {
+                let mut thread_seed = SEED;
+                let uniq = current_thread_index().unwrap().to_ne_bytes();
+                assert!(std::mem::size_of::<usize>() == 8);
+                for i in 0..uniq.len() {
+                    thread_seed[i] += uniq[i];
+                    thread_seed[i + 8] += uniq[i];
+                }
+                XorShiftRng::from_seed(thread_seed)
+            },
+            |rng, _| Scalar::random(rng),
+        )
+        .collect();
+    let bases = (0..n)
+        .into_par_iter()
+        .map_init(
+            || {
+                let mut thread_seed = SEED;
+                let uniq = current_thread_index().unwrap().to_ne_bytes();
+                assert!(std::mem::size_of::<usize>() == 8);
+                for i in 0..uniq.len() {
+                    thread_seed[i] += uniq[i];
+                    thread_seed[i + 8] += uniq[i];
+                }
+                XorShiftRng::from_seed(thread_seed)
+            },
+            |rng, _| Point::random(rng),
+        )
+        .collect();
+    let end = timer.elapsed().unwrap();
+    println!(
+        "Generating 2^{k} = {n} coefficients and curve points took: {} sec.\n\n",
+        end.as_secs()
+    );
+
+    (coeffs, bases)
+}
+
+fn msm(c: &mut Criterion) {
+    let mut group = c.benchmark_group("msm");
+    let max_k = *SINGLECORE_RANGE
+        .iter()
+        .chain(MULTICORE_RANGE.iter())
+        .max()
+        .unwrap_or(&16);
+    let (coeffs, bases) = generate_coefficients_and_curvepoints(max_k);
+
+    for k in SINGLECORE_RANGE {
+        group
+            .bench_function(BenchmarkId::new("singlecore", k), |b| {
+                assert!(k < 64);
+                let n: usize = 1 << k;
+                let mut acc = Point::identity().into();
+                b.iter(|| multiexp_serial(&coeffs[..n], &bases[..n], &mut acc));
+            })
+            .sample_size(10);
+    }
+    for k in MULTICORE_RANGE {
+        group
+            .bench_function(BenchmarkId::new("multicore", k), |b| {
+                assert!(k < 64);
+                let n: usize = 1 << k;
+                b.iter(|| {
+                    best_multiexp(&coeffs[..n], &bases[..n]);
+                })
+            })
+            .sample_size(SAMPLE_SIZE);
+    }
+    group.finish();
+}
+
+criterion_group!(benches, msm);
+criterion_main!(benches);