Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Daint: change cuda module #1553

Merged
merged 7 commits into from
Sep 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/gridtools/common/hugepage_alloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ namespace gridtools {
* reduce cache set conflicts.
*/
inline void *hugepage_alloc(std::size_t size) {
#ifdef __cray__
// see issue https://github.com/GridTools/gridtools/issues/1557
#warning "hugepage_alloc on Cray might be suboptimal"
#endif
static std::atomic<std::size_t> s_offset(64);
auto offset = s_offset.load(std::memory_order_relaxed);
auto next_offset = offset;
Expand Down
12 changes: 4 additions & 8 deletions include/gridtools/stencil/cpu_ifirst/loops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,7 @@ namespace gridtools {
namespace loops_impl_ {
template <class Stage, class Ptr, class Strides>
GT_FORCE_INLINE void i_loop(int_t size, Stage stage, Ptr &ptr, Strides const &strides) {
#ifdef NDEBUG
// TODO(anstaf & fthaler):
// Maybe we have to re-run tests with different combinations of pragmas on different compilers,
// the current set of pragmas is at the border of legality for the present code, so maybe we can find a better option.
#pragma ivdep
#pragma omp simd
#endif
for (int_t i = 0; i < size; ++i) {
using namespace literals;
stage(ptr, strides);
Expand Down Expand Up @@ -109,7 +103,8 @@ namespace gridtools {
int_t i_blocks = info.i_blocks();
int_t j_blocks = info.j_blocks();
int_t k_size = grid.k_size();
thread_pool::parallel_for_loop(ThreadPool(),
thread_pool::parallel_for_loop(
ThreadPool(),
[&](auto i, auto k, auto j) {
tuple_util::for_each([block = info.block(i, j, k)](auto &&loop) { loop(block); }, loops);
},
Expand Down Expand Up @@ -157,7 +152,8 @@ namespace gridtools {
template <class ThreadPool, class Grid, class Loops>
void run_loops(std::false_type, Grid const &grid, Loops loops) {
execinfo info(ThreadPool(), grid);
thread_pool::parallel_for_loop(ThreadPool(),
thread_pool::parallel_for_loop(
ThreadPool(),
[&](auto i, auto j) {
tuple_util::for_each([block = info.block(i, j)](auto &&loop) { loop(block); }, loops);
},
Expand Down
2 changes: 1 addition & 1 deletion jenkins/envs/daint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function module() {
}

module load daint-gpu
module load cudatoolkit/10.1.105_3.27-7.0.1.1_4.1__ga311ce7
module load cudatoolkit
module rm CMake
module load /users/jenkins/easybuild/daint/haswell/modules/all/CMake/3.14.5

Expand Down
2 changes: 0 additions & 2 deletions jenkins/envs/daint_cray.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,3 @@ export GTCMAKE_CMAKE_CUDA_HOST_COMPILER="$CXX"

export CUDAHOSTCXX="$CXX"
export CTEST_PARALLEL_LEVEL=1
export CXXFLAGS='-fno-cray-gpu -fno-cray'
export CFLAGS='-fno-cray-gpu -fno-cray-mallopt -fno-cray'
Loading