From f0752d90572e64cd57c34035d157fbafa1a75d70 Mon Sep 17 00:00:00 2001 From: Xavier Andrade Date: Fri, 11 Oct 2024 13:40:31 -0700 Subject: [PATCH 1/3] Check that vector3 initialized to 0 with {}. --- external_libs/gpurun/include/gpu/reduce.hpp | 14 +++++--------- src/math/vector3.hpp | 8 ++++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/external_libs/gpurun/include/gpu/reduce.hpp b/external_libs/gpurun/include/gpu/reduce.hpp index 3534f9a7..a3c1deaa 100644 --- a/external_libs/gpurun/include/gpu/reduce.hpp +++ b/external_libs/gpurun/include/gpu/reduce.hpp @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -80,15 +81,10 @@ auto run(reduce const & red, kernel_type kernel) -> decltype(kernel(0)) { auto const size = red.size; using type = decltype(kernel(0)); - -#ifndef ENABLE_GPU - - type accumulator(0.0); - for(long ii = 0; ii < size; ii++){ - accumulator += kernel(ii); - } - return accumulator; +#ifndef ENABLE_GPU + auto range = boost::multi::extension_t{0l, size}; + return std::transform_reduce(range.begin(), range.end(), type{}, std::plus<>{}, kernel); #else const int blocksize = 1024; @@ -144,7 +140,7 @@ __global__ void reduce_kernel_rr(long sizex, long sizey, kernel_type kernel, arr #endif template -auto run(reduce const & redx, reduce const & redy, kernel_type kernel) -> decltype(kernel(0, 0)) { +auto run(gpu::reduce const & redx, gpu::reduce const & redy, kernel_type kernel) -> decltype(kernel(0, 0)) { auto const sizex = redx.size; auto const sizey = redy.size; diff --git a/src/math/vector3.hpp b/src/math/vector3.hpp index 509b511c..5b1a29d2 100644 --- a/src/math/vector3.hpp +++ b/src/math/vector3.hpp @@ -345,6 +345,14 @@ TEST_CASE(INQ_TEST_FILE, INQ_TEST_TAG) { vector3 vv; (void)vv; } + SECTION("Zero by default constructor"){ + vector3 vv{}; + + CHECK(vv[0] == 0.0); + CHECK(vv[1] == 0.0); + CHECK(vv[2] == 0.0); + } + SECTION("Scalar"){ vector3 vv(-45.677); From 28df0460d44c2616b13cb217f150b13db449c9bd Mon Sep 17 00:00:00 2001 From: Xavier Andrade Date: Fri, 11 Oct 2024 14:09:17 -0700 Subject: [PATCH 2/3] Use transform_reduce for the 1D reduction in gpu::reduce. --- external_libs/gpurun/include/gpu/reduce.hpp | 59 +++------------------ 1 file changed, 8 insertions(+), 51 deletions(-) diff --git a/external_libs/gpurun/include/gpu/reduce.hpp b/external_libs/gpurun/include/gpu/reduce.hpp index a3c1deaa..cdb40a16 100644 --- a/external_libs/gpurun/include/gpu/reduce.hpp +++ b/external_libs/gpurun/include/gpu/reduce.hpp @@ -12,7 +12,13 @@ #include #include + +#ifndef ENABLE_GPU #include +#else +#include +#include +#endif #include #include @@ -27,40 +33,6 @@ struct reduce { long size; }; - -#ifdef ENABLE_GPU -template -__global__ void reduce_kernel_r(long size, kernel_type kernel, array_type odata) { - - extern __shared__ char shared_mem[]; - auto reduction_buffer = (typename array_type::element *) shared_mem; - - // each thread loads one element from global to shared mem - unsigned int tid = threadIdx.x; - unsigned int ii = blockIdx.x*blockDim.x + threadIdx.x; - - if(ii < size){ - reduction_buffer[tid] = kernel(ii); - } else { - reduction_buffer[tid] = (typename array_type::element) 0.0; - } - - __syncthreads(); - - // do reduction in shared mem - for (unsigned int s = blockDim.x/2; s > 0; s >>= 1){ - if (tid < s) { - reduction_buffer[tid] += reduction_buffer[tid + s]; - } - __syncthreads(); - } - - // write result for this block to global mem - if (tid == 0) odata[blockIdx.x] = reduction_buffer[0]; - -} -#endif - template struct array_access { array_type array; @@ -81,27 +53,12 @@ auto run(reduce const & red, kernel_type kernel) -> decltype(kernel(0)) { auto const size = red.size; using type = decltype(kernel(0)); + auto range = boost::multi::extension_t{0l, size}; #ifndef ENABLE_GPU - auto range = boost::multi::extension_t{0l, size}; return std::transform_reduce(range.begin(), range.end(), type{}, std::plus<>{}, kernel); #else - - const int blocksize = 1024; - - unsigned nblock = (size + blocksize - 1)/blocksize; - gpu::array result(nblock); - - reduce_kernel_r<<>>(size, kernel, begin(result)); - check_error(last_error()); - - if(nblock == 1) { - gpu::sync(); - return result[0]; - } else { - return run(gpu::reduce(nblock), array_access{begin(result)}); - } - + return thrust::transform_reduce(thrust::device, range.begin(), range.end(), kernel, type{}, std::plus<>{}); #endif } From 92b1f945e2114b7140c82293f8aacd987d4b4773 Mon Sep 17 00:00:00 2001 From: Xavier Andrade Date: Sun, 13 Oct 2024 02:14:48 -0700 Subject: [PATCH 3/3] Do not restrict the number of scf steps in the XC test calculation. --- src/hamiltonian/xc_term.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hamiltonian/xc_term.hpp b/src/hamiltonian/xc_term.hpp index 03bdbbcf..7eb87b7f 100644 --- a/src/hamiltonian/xc_term.hpp +++ b/src/hamiltonian/xc_term.hpp @@ -496,7 +496,7 @@ TEST_CASE(INQ_TEST_FILE, INQ_TEST_TAG){ ions.insert("H", {0.0_b, 0.0_b, 0.0_b}); auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_unpolarized()); ground_state::initial_guess(ions, electrons); - auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100)); + auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha)); auto nvxc = result.energy.nvxc(); auto exc = result.energy.xc(); Approx target = Approx(nvxc).epsilon(1.e-10); @@ -517,7 +517,7 @@ TEST_CASE(INQ_TEST_FILE, INQ_TEST_TAG){ ions.insert("H", {0.0_b, 0.0_b, 0.0_b}); auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_polarized()); ground_state::initial_guess(ions, electrons); - auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100)); + auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha)); auto nvxc = result.energy.nvxc(); auto exc = result.energy.xc(); Approx target = Approx(nvxc).epsilon(1.e-10); @@ -538,7 +538,7 @@ TEST_CASE(INQ_TEST_FILE, INQ_TEST_TAG){ ions.insert("H", {0.0_b, 0.0_b, 0.0_b}); auto electrons = systems::electrons(par, ions, options::electrons{}.cutoff(30.0_Ha).extra_states(2).spin_non_collinear()); ground_state::initial_guess(ions, electrons); - auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha).max_steps(100)); + auto result = ground_state::calculate(ions, electrons, options::theory{}.lda(), options::ground_state{}.steepest_descent().energy_tolerance(1.e-8_Ha)); auto nvxc = result.energy.nvxc(); auto exc = result.energy.xc(); Approx target = Approx(nvxc).epsilon(1.e-10);