Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #182 #183

Merged
merged 4 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ option(APR_BUILD_STATIC_LIB "Builds static library" ON)
option(APR_BUILD_EXAMPLES "Build APR examples" OFF)
option(APR_USE_LIBTIFF "Use LibTIFF" ON)
option(APR_TESTS "Build APR tests" OFF)
option(APR_PREFER_EXTERNAL_GTEST "When found, use the installed GTEST libs instead of included sources" ON)
option(APR_PREFER_EXTERNAL_GTEST "When found, use the installed GTEST libs instead of included sources" OFF)
option(APR_PREFER_EXTERNAL_BLOSC "When found, use the installed BLOSC libs instead of included sources" OFF)
option(APR_USE_CUDA "should APR use CUDA? (experimental - under development)" OFF)
option(APR_USE_OPENMP "should APR use OpenMP?" ON)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ cmake -DAPR_USE_OPENMP=OFF ..
| APR_TESTS | Build unit tests | OFF |
| APR_BENCHMARK | Build executable performance benchmarks | OFF |
| APR_USE_LIBTIFF | Enable LibTIFF (Required for tests and examples) | ON |
| APR_PREFER_EXTERNAL_GTEST | Use installed gtest instead of included sources | ON |
| APR_PREFER_EXTERNAL_BLOSC | Use installed blosc instead of included sources | ON |
| APR_PREFER_EXTERNAL_GTEST | Use installed gtest instead of included sources | OFF |
| APR_PREFER_EXTERNAL_BLOSC | Use installed blosc instead of included sources | OFF |
| APR_USE_OPENMP | Enable multithreading via OpenMP | ON |
| APR_USE_CUDA | Enable CUDA functionality (under development) | OFF |
| APR_DENOISE | Enable denoising code (requires Eigen3) | OFF |
Expand Down
4 changes: 0 additions & 4 deletions src/data_structures/APR/APR.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,6 @@ class APR {
* @param with_tree include the tree access
*/
void init_cuda(bool with_tree=true) {
gpuAccess.genInfo = &aprInfo;
gpuTreeAccess.genInfo = &treeInfo;
linearAccess.genInfo = &aprInfo;
linearAccessTree.genInfo = &treeInfo;
auto apr_helper = gpuAPRHelper();
if(with_tree) {
auto tree_helper = gpuTreeHelper();
Expand Down
8 changes: 5 additions & 3 deletions src/data_structures/APR/access/GPUAccess.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class GPUAccess {
void init_level_xz_vec(VectorData<uint64_t>& level_xz_vec);

GenInfo* genInfo;
uint64_t total_number_particles() { return genInfo->total_number_particles; }
uint64_t total_number_particles() const { return genInfo->total_number_particles; }

int level_max() const { return genInfo->l_max; }
int level_min() const { return genInfo->l_min; }
Expand Down Expand Up @@ -67,6 +67,7 @@ class GPUAccessHelper {
gpuAccess->init_y_vec(linearAccess->y_vec);
gpuAccess->init_level_xz_vec(linearAccess->level_xz_vec);
gpuAccess->init_xz_end_vec(linearAccess->xz_end_vec);
gpuAccess->genInfo = linearAccess->genInfo;
gpuAccess->copy2Device();
gpuAccess->initialized = true;
}
Expand All @@ -77,6 +78,7 @@ class GPUAccessHelper {
gpuAccess->init_y_vec(linearAccess->y_vec);
gpuAccess->init_level_xz_vec(linearAccess->level_xz_vec);
gpuAccess->init_xz_end_vec(linearAccess->xz_end_vec);
gpuAccess->genInfo = linearAccess->genInfo;
gpuAccess->copy2Device(total_number_particles(tree_access.level_max()), tree_access.gpuAccess);
gpuAccess->initialized = true;
}
Expand All @@ -86,9 +88,9 @@ class GPUAccessHelper {
gpuAccess->copy2Host();
}

uint64_t total_number_particles() { return gpuAccess->genInfo->total_number_particles; }
uint64_t total_number_particles() const { return gpuAccess->total_number_particles(); }

uint64_t total_number_particles(const int level) {
uint64_t total_number_particles(const int level) const {
uint64_t index = linearAccess->level_xz_vec[level] + linearAccess->x_num(level) - 1 + (linearAccess->z_num(level)-1)*linearAccess->x_num(level);
return linearAccess->xz_end_vec[index];
}
Expand Down
6 changes: 2 additions & 4 deletions src/numerics/APRDownsampleGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -899,8 +899,7 @@ __global__ void _fill_ne_rows_tree_cuda(const uint64_t* __restrict__ level_xz_ve
template<int blockSize_z, int blockSize_x>
void compute_ne_rows_tree_cuda(GPUAccessHelper& tree_access, VectorData<int>& ne_count, ScopedCudaMemHandler<int*, JUST_ALLOC>& ne_rows_gpu) {

ne_count.resize(tree_access.level_max() + 3);
ne_count[0] = 0;
ne_count.resize(tree_access.level_max() + 3, 0);

int z_blocks_max = (tree_access.z_num(tree_access.level_max()) + blockSize_z - 1) / blockSize_z;
int num_levels = tree_access.level_max() - tree_access.level_min() + 1;
Expand Down Expand Up @@ -979,8 +978,7 @@ void compute_ne_rows_tree_cuda(GPUAccessHelper& tree_access, VectorData<int>& ne


void compute_ne_rows_tree(GPUAccessHelper& tree_access, VectorData<int>& ne_counter, VectorData<int>& ne_rows) {
ne_counter.resize(tree_access.level_max() + 3);
ne_counter[0] = 0;
ne_counter.resize(tree_access.level_max() + 3, 0);

int z = 0;
int x = 0;
Expand Down
4 changes: 2 additions & 2 deletions src/numerics/miscCuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ __global__ void fill_ne_rows_cuda(const uint64_t* level_xz_vec,
template<int blockSize_z, int blockSize_x>
void compute_ne_rows_cuda(GPUAccessHelper& access, VectorData<int>& ne_count, ScopedCudaMemHandler<int*, JUST_ALLOC>& ne_rows_gpu, int blockSize) {

ne_count.resize(access.level_max()+2);
ne_count.resize(access.level_max()+2, 0);

int stride = blockSize_z * blockSize;

Expand Down Expand Up @@ -360,7 +360,7 @@ inline void add_nonempty(GPUAccessHelper& access, uint64_t& counter, VectorData<


void compute_ne_rows(GPUAccessHelper& access, VectorData<int>& ne_counter, VectorData<int>& ne_rows, int block_size) {
ne_counter.resize(access.level_max()+2);
ne_counter.resize(access.level_max()+2, 0);

int z = 0;
int x = 0;
Expand Down