diff --git a/examples/madness/mra-device/kernels.cu b/examples/madness/mra-device/kernels.cu index 1e6526ab3..99ab0e516 100644 --- a/examples/madness/mra-device/kernels.cu +++ b/examples/madness/mra-device/kernels.cu @@ -428,10 +428,10 @@ GLOBALSCOPE void compress_kernel( SHARED TensorView hgT; if (is_t0) { s = TensorView(&tmp[0], 2*K); + workspace = TensorView(&tmp[TWOK2NDIM], 2*K); d = TensorView(result_ptr, 2*K); p = TensorView(p_ptr, K); - hgT = TensorView(hgT_ptr, K); - workspace = TensorView(&tmp[TWOK2NDIM], K); + hgT = TensorView(hgT_ptr, 2*K); } SYNCTHREADS(); d = 0.0; @@ -512,7 +512,7 @@ GLOBALSCOPE void reconstruct_kernel( node = TensorView(node_ptr, 2*K); s = TensorView(&tmp_ptr[0], 2*K); workspace = TensorView(&tmp_ptr[TWOK2NDIM], 2*K); - hg = TensorView(hg_ptr, K); + hg = TensorView(hg_ptr, 2*K); from_parent = TensorView(from_parent_ptr, K); } SYNCTHREADS(); diff --git a/examples/madness/mra-device/kernels.h b/examples/madness/mra-device/kernels.h index cd9ab9f36..8447a7f1d 100644 --- a/examples/madness/mra-device/kernels.h +++ b/examples/madness/mra-device/kernels.h @@ -40,8 +40,7 @@ template std::size_t compress_tmp_size(std::size_t K) { const size_t TWOK2NDIM = std::pow(2*K,NDIM); const size_t K2NDIM = std::pow(K,NDIM); - return (TWOK2NDIM) // s - + K2NDIM // workspace + return (2*TWOK2NDIM) // s & workspace + mra::Key::num_children() // sumsq for each child and result ; }