Skip to content

Commit

Permalink
merged with dev
Browse files Browse the repository at this point in the history
  • Loading branch information
dloghin committed Apr 18, 2024
2 parents 15c427e + 472a8a6 commit 3779c03
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 11 deletions.
2 changes: 1 addition & 1 deletion field/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ rand = { workspace = true, features = ["getrandom"] }
serde = { workspace = true, features = ["alloc"] }
static_assertions = { workspace = true }
unroll = { workspace = true }
cryptography_cuda = { git = "ssh://[email protected]/okx/cryptography_cuda.git", branch="dev-dumi", optional=true }
cryptography_cuda ={git="ssh://[email protected]/okx/cryptography_cuda.git", rev="56cee09dd044de44f05c7d54383c6a8cb4078b29", optional=true}

[dev-dependencies]
rand = { version = "0.8.5", default-features = false, features = ["getrandom"] }
Expand Down
2 changes: 1 addition & 1 deletion plonky2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ once_cell = { version = "1.18.0" }
plonky2_field = { version = "0.2.0", path = "../field", default-features = false }
plonky2_maybe_rayon = { version = "0.2.0", path = "../maybe_rayon", default-features = false }
plonky2_util = { version = "0.2.0", path = "../util", default-features = false }
cryptography_cuda = { git = "ssh://[email protected]/okx/cryptography_cuda.git", branch="dev-dumi", optional=true}
cryptography_cuda ={git="ssh://[email protected]/okx/cryptography_cuda.git", rev="56cee09dd044de44f05c7d54383c6a8cb4078b29", optional=true}

[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies]
getrandom = { version = "0.2", default-features = false, features = ["js"] }
Expand Down
10 changes: 10 additions & 0 deletions plonky2/src/fri/oracle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ use cryptography_cuda::{
device::memory::HostOrDeviceSlice, lde_batch, lde_batch_multi_gpu, transpose_rev_batch,
types::*,
};
#[cfg(feature = "cuda")]
use crate::hash::merkle_tree::GPU_LOCK;

use itertools::Itertools;
use plonky2_field::types::Field;
use plonky2_maybe_rayon::*;
Expand Down Expand Up @@ -242,6 +245,10 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
log_n: usize,
_degree: usize,
) -> MerkleTree<F, <C as GenericConfig<D>>::Hasher> {

let mut lock = GPU_LOCK.lock().unwrap();
*lock += 1;

// let salt_size = if blinding { SALT_SIZE } else { 0 };
// println!("salt_size: {:?}", salt_size);
let output_domain_size = log_n + rate_bits;
Expand Down Expand Up @@ -367,6 +374,9 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>

#[cfg(all(feature = "cuda", feature = "batch"))]
if log_n > 10 && polynomials.len() > 0 {
let mut lock = GPU_LOCK.lock().unwrap();
*lock += 1;

println!("log_n: {:?}", log_n);
let start_lde = std::time::Instant::now();

Expand Down
22 changes: 13 additions & 9 deletions plonky2/src/hash/merkle_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use crate::plonk::config::{GenericHashOut, Hasher};
use crate::util::log2_strict;

#[cfg(feature = "cuda")]
static GPU_LOCK: Lazy<Arc<Mutex<i32>>> = Lazy::new(|| Arc::new(Mutex::new(0)));
pub static GPU_LOCK: Lazy<Arc<Mutex<u64>>> = Lazy::new(|| Arc::new(Mutex::new(0)));

#[cfg(feature = "cuda_timing")]
fn print_time(now: Instant, msg: &str) {
Expand Down Expand Up @@ -283,7 +283,8 @@ fn fill_digests_buf_gpu_v1<F: RichField, H: Hasher<F>>(
let cap_height: u64 = cap_height.try_into().unwrap();
let hash_size: u64 = H::HASH_SIZE.try_into().unwrap();

let _lock = GPU_LOCK.lock().unwrap();
let mut lock = GPU_LOCK.lock().unwrap();
*lock += 1;

unsafe {
let now = Instant::now();
Expand Down Expand Up @@ -405,6 +406,7 @@ fn fill_digests_buf_gpu_v1<F: RichField, H: Hasher<F>>(
}
}

/*
#[allow(dead_code)]
#[cfg(feature = "cuda")]
fn fill_digests_buf_gpu_v2<F: RichField, H: Hasher<F>>(
Expand Down Expand Up @@ -436,7 +438,8 @@ fn fill_digests_buf_gpu_v2<F: RichField, H: Hasher<F>>(
cap_buf.len() * NUM_HASH_OUT_ELTS
};
let _lock = GPU_LOCK.lock().unwrap();
let mut lock = GPU_LOCK.lock().unwrap();
*lock += 1;
// println!("{} {} {} {} {:?}", leaves_count, leaf_size, digests_count, caps_count, H::HASHER_TYPE);
let mut gpu_leaves_buf: HostOrDeviceSlice<'_, F> =
Expand Down Expand Up @@ -516,7 +519,7 @@ fn fill_digests_buf_gpu_v2<F: RichField, H: Hasher<F>>(
let mut host_digests_buf: Vec<F> = vec![F::ZERO; digests_size];
let _ = gpu_digests_buf.copy_to_host(host_digests_buf.as_mut_slice(), digests_size);
host_digests_buf
.par_chunks_exact(4)
.chunks_exact(4)
.zip(digests_buf)
.for_each(|(x, y)| {
unsafe {
Expand All @@ -536,7 +539,7 @@ fn fill_digests_buf_gpu_v2<F: RichField, H: Hasher<F>>(
let mut host_caps_buf: Vec<F> = vec![F::ZERO; caps_size];
let _ = gpu_caps_buf.copy_to_host(host_caps_buf.as_mut_slice(), caps_size);
host_caps_buf
.par_chunks_exact(4)
.chunks_exact(4)
.zip(cap_buf)
.for_each(|(x, y)| {
unsafe {
Expand All @@ -553,6 +556,7 @@ fn fill_digests_buf_gpu_v2<F: RichField, H: Hasher<F>>(
}
print_time(now, "copy results");
}
*/

#[cfg(feature = "cuda")]
fn fill_digests_buf_gpu_ptr<F: RichField, H: Hasher<F>>(
Expand All @@ -569,7 +573,7 @@ fn fill_digests_buf_gpu_ptr<F: RichField, H: Hasher<F>>(
let cap_height: u64 = cap_height.try_into().unwrap();
let leaf_size: u64 = leaf_len.try_into().unwrap();

let _lock = GPU_LOCK.lock().unwrap();
GPU_LOCK.try_lock().expect_err("GPU_LOCK should be locked!");

let now = Instant::now();
// if digests_buf is empty (size 0), just allocate a few bytes to avoid errors
Expand Down Expand Up @@ -649,7 +653,7 @@ fn fill_digests_buf_gpu_ptr<F: RichField, H: Hasher<F>>(

if digests_buf.len() > 0 {
host_digests
.par_chunks_exact(4)
.chunks_exact(4)
.zip(digests_buf)
.for_each(|(x, y)| {
unsafe {
Expand All @@ -667,7 +671,7 @@ fn fill_digests_buf_gpu_ptr<F: RichField, H: Hasher<F>>(

if cap_buf.len() > 0 {
host_caps
.par_chunks_exact(4)
.chunks_exact(4)
.zip(cap_buf)
.for_each(|(x, y)| {
unsafe {
Expand Down Expand Up @@ -960,7 +964,7 @@ impl<F: RichField, H: Hasher<F>> MerkleTree<F, H> {
let mut leaves = self.leaves.clone();

leaves[start_index * self.leaf_size..end_index * self.leaf_size]
.par_chunks_exact_mut(self.leaf_size)
.chunks_exact_mut(self.leaf_size)
.zip(new_leaves.clone())
.for_each(|(x, y)| {
for j in 0..self.leaf_size {
Expand Down

0 comments on commit 3779c03

Please sign in to comment.