diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f371905..b9f404d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,15 @@ Change Log Notable project changes in various releases. +Unreleased +========== + +Fixed +----- + +For windows, lock and unlock renamed to lock_all() and unlock_all() to +match the MPI calls they are actually using. + 1.5 === diff --git a/lib/mpifx_win.fpp b/lib/mpifx_win.fpp index 1e4b301..1afdcde 100644 --- a/lib/mpifx_win.fpp +++ b/lib/mpifx_win.fpp @@ -6,8 +6,8 @@ module mpifx_win_module use mpi_f08, only : MPI_ADDRESS_KIND, mpi_barrier, mpi_comm, MPI_INFO_NULL, MPI_MODE_NOCHECK,& & mpi_win, mpi_win_allocate_shared, mpi_win_fence, mpi_win_free, mpi_win_lock_all,& - & mpi_win_shared_query, mpi_win_sync, mpi_win_unlock - use mpifx_helper_module, only : handle_errorflag, sp, dp + & mpi_win_shared_query, mpi_win_sync, mpi_win_unlock_all + use mpifx_helper_module, only : dp, getoptarg, handle_errorflag, sp use mpifx_comm_module, only : mpifx_comm use mpifx_constants_module, only : MPIFX_SIZE_T use iso_c_binding, only : c_ptr, c_f_pointer @@ -24,19 +24,26 @@ module mpifx_win_module type(mpi_comm) :: comm !< MPI communicator handle. contains - #:for TYPE in WIN_DATA_TYPES - #:for _, ADDRESS_SUFFIX in ADDRESS_KINDS_SUFFIXES - #:set SUFFIX = TYPE_ABBREVS[TYPE] + '_' + ADDRESS_SUFFIX - procedure, private :: mpifx_win_allocate_shared_${SUFFIX}$ - generic :: allocate_shared => mpifx_win_allocate_shared_${SUFFIX}$ - #:endfor +#:for TYPE in WIN_DATA_TYPES + #:for _, ADDRESS_SUFFIX in ADDRESS_KINDS_SUFFIXES + #:set SUFFIX = TYPE_ABBREVS[TYPE] + '_' + ADDRESS_SUFFIX + procedure, private :: mpifx_win_allocate_shared_${SUFFIX}$ + generic :: allocate_shared => mpifx_win_allocate_shared_${SUFFIX}$ #:endfor +#:endfor !> Locks a shared memory segment for remote access. - procedure :: lock => mpifx_win_lock - - !> Unlocks a shared memory segment. - procedure :: unlock => mpifx_win_unlock + !! + !! Notes based on the MPI3.1 documentation: Start RMA access epoch for all processes in win, + !! (lock of type MPI_LOCK_SHARED). During the epoch, any window member processses calling + !! lock_all can access the window memory on all processes (using RMA ops). Routine is not + !! collective — All is a from being a lock on all members of the win group. + !! Accesses protected by a shared lock are not concurrent in the window. + procedure :: lock_all => mpifx_win_lock_all + + !> Unlocks a shared memory window. + !! Ends the RMA access epoch at all processes with access to the window. + procedure :: unlock_all => mpifx_win_unlock_all !> Synchronizes shared memory across MPI ranks after remote access. procedure :: sync => mpifx_win_sync @@ -112,21 +119,32 @@ contains !> Locks a shared memory segment for remote access. Starts a remote access epoch. !! - !! \param self Handle of the shared memory window. - !! \param error Optional error code on return. + !! \param self Handle of the shared memory window. + !! \param checkLock Optional check if other locks are also applied to the window. + !! \param error Optional error code on return. !! !! \see MPI documentation (\c MPI_WIN_LOCK_ALL) !! - subroutine mpifx_win_lock(self, error) + subroutine mpifx_win_lock_all(self, checkLock, error) class(mpifx_win), intent(inout) :: self + logical, intent(in), optional :: checkLock integer, intent(out), optional :: error integer :: error0 + ! May be MPI implementation dependent, but if true no other process holds (or attempts to + ! acquire) a conflicting lock, while the caller(s) holds the window lock: + logical :: assert - call mpi_win_lock_all(MPI_MODE_NOCHECK, self%win, error0) - call handle_errorflag(error0, "MPI_WIN_LOCK_ALL in mpifx_win_lock", error) + call getoptarg(.false., assert, checkLock) + + if (assert) then + call mpi_win_lock_all(0, self%win, error0) + else + call mpi_win_lock_all(MPI_MODE_NOCHECK, self%win, error0) + end if + call handle_errorflag(error0, "MPI_WIN_LOCK_ALL in mpifx_win_lock_all", error) - end subroutine mpifx_win_lock + end subroutine mpifx_win_lock_all !> Unlocks a shared memory segment. Finishes a remote access epoch. @@ -136,16 +154,16 @@ contains !! !! \see MPI documentation (\c MPI_WIN_UNLOCK_ALL) !! - subroutine mpifx_win_unlock(self, error) + subroutine mpifx_win_unlock_all(self, error) class(mpifx_win), intent(inout) :: self integer, intent(out), optional :: error integer :: error0 call mpi_win_unlock_all(self%win, error0) - call handle_errorflag(error0, "MPI_WIN_UNLOCK_ALL in mpifx_win_unlock", error) + call handle_errorflag(error0, "MPI_WIN_UNLOCK_ALL in mpifx_win_unlock_all", error) - end subroutine mpifx_win_unlock + end subroutine mpifx_win_unlock_all !> Synchronizes shared memory across MPI ranks after remote access. diff --git a/test/test_win_shared_mem.f90 b/test/test_win_shared_mem.f90 index ead4583..852907c 100644 --- a/test/test_win_shared_mem.f90 +++ b/test/test_win_shared_mem.f90 @@ -2,18 +2,27 @@ program test_win_shared_mem use libmpifx_module implicit none + ! global communicator and within each shared memory node type(mpifx_comm) :: globalcomm, nodecomm + ! RMA window, in this case shared memory type(mpifx_win) :: win - integer, parameter :: sample_value = 42, size_rank_0 = 7, size_rank_other = 4 + ! Value to store for testing + integer, parameter :: sample_value = 42 + ! Specific local sub-region sizes for one of the tests, either on the leader or followers in a + ! node + integer, parameter :: size_rank_0 = 7, size_rank_other = 4 + ! Global and local sizes of array in window integer(MPIFX_SIZE_T) :: global_length, local_length + integer :: global_length_int32, local_length_int32 integer :: rank, ii + ! Pointer to whole array in window and the local part integer, pointer :: global_pointer(:), local_pointer(:) call mpifx_init() call globalcomm%init() - ! Create a new communicator for all ranks on a node first + ! Create a new communicator for all ranks that are on the same node first call globalcomm%split_type(MPI_COMM_TYPE_SHARED, globalcomm%rank, nodecomm) if (nodecomm%lead) then @@ -23,9 +32,14 @@ program test_win_shared_mem end if global_length = size_rank_0 + size_rank_other * (nodecomm%size - 1) + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! First example, global array, distributed with only one process on the node writing + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ! Allocate a global window on the node call win%allocate_shared(nodecomm, global_length, global_pointer) - call win%lock() + call win%lock_all() ! Only rank 0 writes data into the array if (nodecomm%lead) then @@ -33,16 +47,22 @@ program test_win_shared_mem end if call win%sync() - call win%unlock() + call win%unlock_all() ! All ranks on the node will read the same value in the global array view if (any(global_pointer(1:global_length) /= sample_value)) then - write(*, "(3(A,1X,I0,1X))") "ERROR! ID:", nodecomm%rank, "VALUE:", global_pointer(1), "EXPECTED:", sample_value + write(*, "(3(A,1X,I0,1X))") "ERROR! ID:", nodecomm%rank, "VALUE:", global_pointer(1),& + & "EXPECTED:", sample_value call mpifx_abort(globalcomm) end if call win%free() + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! Second example, global array, lead rank writing to all of it, then local parts being written by + !! individual ranks on the node + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Initialize again with specific local length call win%allocate_shared(nodecomm, global_length, global_pointer, local_length, local_pointer) @@ -57,7 +77,8 @@ program test_win_shared_mem ! All ranks on the node will read the same value in their local view if (any(local_pointer(1:local_length) /= sample_value)) then - write(*, "(2(A,1X,I0,1X))") "ERROR! ID:", nodecomm%rank, "VALUE:", local_pointer(1), "EXPECTED:", sample_value + write(*, "(2(A,1X,I0,1X))") "ERROR! ID:", nodecomm%rank, "VALUE:", local_pointer(1),& + & "EXPECTED:", sample_value call mpifx_abort(globalcomm) end if @@ -66,25 +87,32 @@ program test_win_shared_mem call win%fence() - ! All ranks should now read the correct global values + ! All ranks should now be able to read the correct global values if (any(global_pointer(1:size_rank_0) /= 0)) then - write(*, "(2(A,1X,I0,1X))") "ERROR! ID:", nodecomm%rank, "VALUE:", global_pointer(1), "EXPECTED:", 0 + write(*, "(2(A,1X,I0,1X))") "ERROR! ID:", nodecomm%rank, "VALUE:", global_pointer(1),& + & "EXPECTED:", 0 call mpifx_abort(globalcomm) end if do rank = 1, nodecomm%size - 1 ii = size_rank_0 + 1 + size_rank_other * (rank - 1) if (any(global_pointer(ii:ii+size_rank_other-1) /= rank)) then - write(*, "(2(A,1X,I0,1X))") "ERROR! ID:", nodecomm%rank, "VALUE:", global_pointer(ii), "EXPECTED:", rank + write(*, "(2(A,1X,I0,1X))") "ERROR! ID:", nodecomm%rank, "VALUE:", global_pointer(ii),& + & "EXPECTED:", rank call mpifx_abort(globalcomm) end if end do call win%free() + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! 32 bit sized indexing as a test + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Initialize again with int32 sizes global_length_int32 = global_length local_length_int32 = local_length - call win%allocate_shared(nodecomm, global_length_int32, global_pointer, local_length_int32, local_pointer) + call win%allocate_shared(nodecomm, global_length_int32, global_pointer, local_length_int32,& + & local_pointer) call win%free() call mpifx_finalize()