diff --git a/CMakeLists.txt b/CMakeLists.txt index 70cf734b..6a55fa38 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ option(APR_BUILD_STATIC_LIB "Builds static library" ON) option(APR_BUILD_EXAMPLES "Build APR examples" OFF) option(APR_USE_LIBTIFF "Use LibTIFF" ON) option(APR_TESTS "Build APR tests" OFF) -option(APR_PREFER_EXTERNAL_GTEST "When found, use the installed GTEST libs instead of included sources" ON) +option(APR_PREFER_EXTERNAL_GTEST "When found, use the installed GTEST libs instead of included sources" OFF) option(APR_PREFER_EXTERNAL_BLOSC "When found, use the installed BLOSC libs instead of included sources" OFF) option(APR_USE_CUDA "should APR use CUDA? (experimental - under development)" OFF) option(APR_USE_OPENMP "should APR use OpenMP?" ON) diff --git a/README.md b/README.md index 43ff7cb9..f1af2825 100644 --- a/README.md +++ b/README.md @@ -58,8 +58,8 @@ cmake -DAPR_USE_OPENMP=OFF .. | APR_TESTS | Build unit tests | OFF | | APR_BENCHMARK | Build executable performance benchmarks | OFF | | APR_USE_LIBTIFF | Enable LibTIFF (Required for tests and examples) | ON | -| APR_PREFER_EXTERNAL_GTEST | Use installed gtest instead of included sources | ON | -| APR_PREFER_EXTERNAL_BLOSC | Use installed blosc instead of included sources | ON | +| APR_PREFER_EXTERNAL_GTEST | Use installed gtest instead of included sources | OFF | +| APR_PREFER_EXTERNAL_BLOSC | Use installed blosc instead of included sources | OFF | | APR_USE_OPENMP | Enable multithreading via OpenMP | ON | | APR_USE_CUDA | Enable CUDA functionality (under development) | OFF | | APR_DENOISE | Enable denoising code (requires Eigen3) | OFF | diff --git a/src/data_structures/APR/APR.hpp b/src/data_structures/APR/APR.hpp index b2b34fd5..7c9a2cf2 100644 --- a/src/data_structures/APR/APR.hpp +++ b/src/data_structures/APR/APR.hpp @@ -84,10 +84,6 @@ class APR { * @param with_tree include the tree access */ void init_cuda(bool with_tree=true) { - gpuAccess.genInfo = &aprInfo; - gpuTreeAccess.genInfo = &treeInfo; - linearAccess.genInfo = &aprInfo; - linearAccessTree.genInfo = &treeInfo; auto apr_helper = gpuAPRHelper(); if(with_tree) { auto tree_helper = gpuTreeHelper(); diff --git a/src/data_structures/APR/access/GPUAccess.hpp b/src/data_structures/APR/access/GPUAccess.hpp index 4d5f676a..1020673c 100644 --- a/src/data_structures/APR/access/GPUAccess.hpp +++ b/src/data_structures/APR/access/GPUAccess.hpp @@ -31,7 +31,7 @@ class GPUAccess { void init_level_xz_vec(VectorData& level_xz_vec); GenInfo* genInfo; - uint64_t total_number_particles() { return genInfo->total_number_particles; } + uint64_t total_number_particles() const { return genInfo->total_number_particles; } int level_max() const { return genInfo->l_max; } int level_min() const { return genInfo->l_min; } @@ -67,6 +67,7 @@ class GPUAccessHelper { gpuAccess->init_y_vec(linearAccess->y_vec); gpuAccess->init_level_xz_vec(linearAccess->level_xz_vec); gpuAccess->init_xz_end_vec(linearAccess->xz_end_vec); + gpuAccess->genInfo = linearAccess->genInfo; gpuAccess->copy2Device(); gpuAccess->initialized = true; } @@ -77,6 +78,7 @@ class GPUAccessHelper { gpuAccess->init_y_vec(linearAccess->y_vec); gpuAccess->init_level_xz_vec(linearAccess->level_xz_vec); gpuAccess->init_xz_end_vec(linearAccess->xz_end_vec); + gpuAccess->genInfo = linearAccess->genInfo; gpuAccess->copy2Device(total_number_particles(tree_access.level_max()), tree_access.gpuAccess); gpuAccess->initialized = true; } @@ -86,9 +88,9 @@ class GPUAccessHelper { gpuAccess->copy2Host(); } - uint64_t total_number_particles() { return gpuAccess->genInfo->total_number_particles; } + uint64_t total_number_particles() const { return gpuAccess->total_number_particles(); } - uint64_t total_number_particles(const int level) { + uint64_t total_number_particles(const int level) const { uint64_t index = linearAccess->level_xz_vec[level] + linearAccess->x_num(level) - 1 + (linearAccess->z_num(level)-1)*linearAccess->x_num(level); return linearAccess->xz_end_vec[index]; } diff --git a/src/numerics/APRDownsampleGPU.cu b/src/numerics/APRDownsampleGPU.cu index 9da3410e..4f934015 100644 --- a/src/numerics/APRDownsampleGPU.cu +++ b/src/numerics/APRDownsampleGPU.cu @@ -899,8 +899,7 @@ __global__ void _fill_ne_rows_tree_cuda(const uint64_t* __restrict__ level_xz_ve template void compute_ne_rows_tree_cuda(GPUAccessHelper& tree_access, VectorData& ne_count, ScopedCudaMemHandler& ne_rows_gpu) { - ne_count.resize(tree_access.level_max() + 3); - ne_count[0] = 0; + ne_count.resize(tree_access.level_max() + 3, 0); int z_blocks_max = (tree_access.z_num(tree_access.level_max()) + blockSize_z - 1) / blockSize_z; int num_levels = tree_access.level_max() - tree_access.level_min() + 1; @@ -979,8 +978,7 @@ void compute_ne_rows_tree_cuda(GPUAccessHelper& tree_access, VectorData& ne void compute_ne_rows_tree(GPUAccessHelper& tree_access, VectorData& ne_counter, VectorData& ne_rows) { - ne_counter.resize(tree_access.level_max() + 3); - ne_counter[0] = 0; + ne_counter.resize(tree_access.level_max() + 3, 0); int z = 0; int x = 0; diff --git a/src/numerics/miscCuda.cu b/src/numerics/miscCuda.cu index 93b5b94e..0076eb45 100644 --- a/src/numerics/miscCuda.cu +++ b/src/numerics/miscCuda.cu @@ -241,7 +241,7 @@ __global__ void fill_ne_rows_cuda(const uint64_t* level_xz_vec, template void compute_ne_rows_cuda(GPUAccessHelper& access, VectorData& ne_count, ScopedCudaMemHandler& ne_rows_gpu, int blockSize) { - ne_count.resize(access.level_max()+2); + ne_count.resize(access.level_max()+2, 0); int stride = blockSize_z * blockSize; @@ -360,7 +360,7 @@ inline void add_nonempty(GPUAccessHelper& access, uint64_t& counter, VectorData< void compute_ne_rows(GPUAccessHelper& access, VectorData& ne_counter, VectorData& ne_rows, int block_size) { - ne_counter.resize(access.level_max()+2); + ne_counter.resize(access.level_max()+2, 0); int z = 0; int x = 0;