diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c7e51d1e..2c6f1658 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,7 +6,7 @@ jobs: strategy: matrix: name: [mac, ubuntu-mpich, ubuntu-standard, ubuntu-debug, - ubuntu-nogather] + ubuntu-nogather, mac-thread-cap] include: - name: ubuntu-standard os: ubuntu-latest @@ -16,6 +16,7 @@ jobs: debug: 0 noalligather: 0 mpich: 0 + threadoff: 0 - name: ubuntu-debug os: ubuntu-latest testos: LINUX @@ -24,6 +25,7 @@ jobs: debug: 1 noalligather: 0 mpich: 0 + threadoff: 0 - name: ubuntu-nogather os: ubuntu-latest testos: LINUX @@ -32,6 +34,7 @@ jobs: debug: 0 noalligather: 1 mpich: 0 + threadoff: 0 - name: ubuntu-mpich os: ubuntu-latest testos: LINUX @@ -40,6 +43,16 @@ jobs: debug: 0 noalligather: 0 mpich: 1 + threadoff: 0 + - name: mac-thread-cap + os: macos-latest + testos: OSX + maketest: "ctest -R Regression111" + testexamples: 0 + debug: 0 + noalligather: 0 + mpich: 0 + threadoff: 1 - name: mac os: macos-latest testos: OSX @@ -49,6 +62,7 @@ jobs: noalligather: 0 mpich: 0 lint: 1 + threadoff: 0 runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v1 @@ -71,6 +85,7 @@ jobs: env: MAKETEST: ${{ matrix.maketest }} TESTOS: ${{ matrix.testos }} + THREADOFF: ${{ matrix.threadoff }} - name: check examples run: | bash -l UnitTests/check_examples.sh diff --git a/Source/Fortran/ProcessGridModule.F90 b/Source/Fortran/ProcessGridModule.F90 index 1d25f2a0..ddada7fd 100644 --- a/Source/Fortran/ProcessGridModule.F90 +++ b/Source/Fortran/ProcessGridModule.F90 @@ -6,7 +6,7 @@ MODULE ProcessGridModule & WriteHeader, WriteListElement USE NTMPIModule #ifdef _OPENMP - USE omp_lib, ONLY : omp_get_num_threads + USE omp_lib, ONLY : omp_get_num_threads, omp_get_max_threads #endif IMPLICIT NONE PRIVATE @@ -51,6 +51,8 @@ MODULE ProcessGridModule INTEGER, DIMENSION(:,:), ALLOCATABLE, PUBLIC :: blocked_within_slice_comm !> blocked communicator between slices. INTEGER, DIMENSION(:,:), ALLOCATABLE, PUBLIC :: blocked_between_slice_comm + !> The maximum number of openmp threads. + INTEGER :: omp_max_threads END TYPE ProcessGrid_t !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !> The default process grid. @@ -246,9 +248,11 @@ SUBROUTINE ConstructNewProcessGrid_full(grid, world_comm_, process_rows_, & !! threads. #if defined NOBLOCK grid%block_multiplier = 1 + grid%omp_max_threads = 1 #elif defined _OPENMP !$omp PARALLEL num_threads = omp_get_num_threads() + grid%omp_max_threads = omp_get_max_threads() !$omp end PARALLEL grid%block_multiplier = num_threads/& & (column_block_multiplier+row_block_multiplier) @@ -357,6 +361,7 @@ SUBROUTINE CopyProcessGrid(old_grid, new_grid) new_grid%block_multiplier = old_grid%block_multiplier new_grid%number_of_blocks_columns = old_grid%number_of_blocks_columns new_grid%number_of_blocks_rows = old_grid%number_of_blocks_rows + new_grid%omp_max_threads = old_grid%omp_max_threads !! Allocate Blocks ALLOCATE(new_grid%blocked_row_comm(old_grid%number_of_blocks_rows)) diff --git a/Source/Fortran/distributed_algebra_includes/MatrixMultiply.f90 b/Source/Fortran/distributed_algebra_includes/MatrixMultiply.f90 index 1197d0fe..3969dbac 100644 --- a/Source/Fortran/distributed_algebra_includes/MatrixMultiply.f90 +++ b/Source/Fortran/distributed_algebra_includes/MatrixMultiply.f90 @@ -232,6 +232,10 @@ END SELECT END DO END DO + !! Prevent deadlock in the case where the number of tasks is capped. + IF (matA%process_grid%omp_max_threads .EQ. 1) THEN + !$OMP taskwait + END IF END DO !$OMP END MASTER !$OMP END PARALLEL diff --git a/UnitTests/run_ci_test.sh b/UnitTests/run_ci_test.sh index f0313729..c3c773d0 100644 --- a/UnitTests/run_ci_test.sh +++ b/UnitTests/run_ci_test.sh @@ -4,6 +4,11 @@ if [[ "$TESTOS" == "LINUX" ]]; then conda activate ntpoly-conda fi +if [[ "$THREADOFF" == "1" ]]; then + export OMP_NUM_THREADS=1 + echo "Setting threads to 1" +fi + cd Build export CTEST_OUTPUT_ON_FAILURE=1 eval "$MAKETEST"