diff --git a/model/src/w3pro2md.F90 b/model/src/w3pro2md.F90 index a23f893ef..ca79193a9 100644 --- a/model/src/w3pro2md.F90 +++ b/model/src/w3pro2md.F90 @@ -855,6 +855,12 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) WRITE (NDST,9010) #endif ! +#ifdef W3_GPU +!$ACC DATA CREATE(VLCFLY, VLCFLX, VDXX, VDYY, VDXY, CXTOT, CYTOT) & +!$ACC CREATE(VFDIFX_FAC, VFDIFY_FAC, VFDIFC_FAC, VQ_OLD) & +!$ACC CREATE(HQFAC, HPFAC) +!$ACC KERNELS +#endif VLCFLX = 0. VLCFLY = 0. VFDIFX = 0. @@ -879,6 +885,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IXY) +#elif W3_GPU + !$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -899,11 +907,17 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! IF ( FLCUR ) THEN #ifdef W3_T WRITE (NDST,9022) +#endif +#ifdef W3_GPU +!$ACC KERNELS +!$ACC LOOP INDEPENDENT #endif DO ISEA=1, NSEA IXY = MAPSF(ISEA,3) @@ -914,11 +928,17 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) VQ(IXY), CXTOT(IXY), CYTOT(IXY) #endif END DO +#ifdef W3_GPU +!$ACC END KERNELS +#endif END IF ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, CP, CQ) +#elif W3_GPU +!$ACC KERNELS +!$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -933,6 +953,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! ! 2.b Diffusion coefficients @@ -942,6 +964,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, & !$OMP& DCELL, XWIND, TFAC, DSS, DNN) +#elif W3_GPU + !$ACC KERNELS + !$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -978,6 +1003,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! END IF @@ -990,8 +1017,10 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY ) +#elif W3_GPU + !$ACC KERNELS + !$ACC LOOP INDEPENDENT #endif - ! DO ISEA=1, NSEA IX = MAPSF(ISEA,1) IY = MAPSF(ISEA,2) @@ -1001,6 +1030,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! IF ( YFIRST ) THEN @@ -1055,6 +1086,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY ) +#elif W3_GPU + !$ACC KERNELS + !$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -1088,11 +1122,16 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) / CG(IK,ISEA) * CLATS(ISEA) END DO END IF +#ifdef W3_GPU + !$ACC END KERNELS +#endif ! ! 3.c Diffusion correction ! IF ( DTME .NE. 0. ) THEN - +#ifdef W3_GPU + !$ACC KERNELS +#endif IF ( GLOBAL ) THEN DO IY=1, NY VQ(IY+NX*NY) = VQ(IY) @@ -1129,8 +1168,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, & !$OMP& QXX, QYY, QXY, DVQ ) +#elif W3_GPU + !$ACC LOOP INDEPENDENT #endif - ! DO IP=1, NACT IXY = MAPAXY(IP) ISEA = MAPFS(IXY) @@ -1200,6 +1240,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! END IF @@ -1215,6 +1257,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IXY ) +#elif W3_GPU + !$ACC KERNELS + !$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -1231,6 +1276,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS + !$ACC END DATA #endif ! RETURN diff --git a/model/src/w3uno2md.F90 b/model/src/w3uno2md.F90 index cb8fce7fc..93ff743d8 100644 --- a/model/src/w3uno2md.F90 +++ b/model/src/w3uno2md.F90 @@ -983,6 +983,10 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & ! ! 1. Initialize aux. array FLA and closure ------------------------- * ! +#ifdef W3_GPU + !$ACC DATA CREATE(FLA) + !$ACC KERNELS +#endif FLA = 0. ! IF ( BCLOSE ) THEN @@ -1026,6 +1030,8 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & !$OMP QBO, IX, IY, IY2, IX2, QN & #endif !$OMP IXYC, IXYD, QB) +#elif W3_GPU + !$ACC LOOP INDEPENDENT #endif ! DO IP=1, NB0 @@ -1073,6 +1079,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & WRITE (NDST,9011) NB1-NB0, 'BOUNDARY ABOVE' #endif ! +#ifdef W3_GPU + !$ACC LOOP INDEPENDENT +#endif DO IP=NB0+1, NB1 IXY = MAPBOU(IP) CFL = CFLL(IXY) @@ -1099,6 +1108,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & WRITE (NDST,9011) NB2-NB1, 'BOUNDARY BELOW' #endif ! +#ifdef W3_GPU + !$ACC LOOP INDEPENDENT +#endif DO IP=NB1+1, NB2 IXY = MAPBOU(IP) CFL = CFLL(IXY+INC) @@ -1141,6 +1153,8 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & !$OMP PRIVATE(QOLD), & #endif !$OMP PRIVATE (IP, IXY, JN, JP) +#elif W3_GPU + !$ACC LOOP INDEPENDENT #endif ! DO IP=1, NACT @@ -1172,6 +1186,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS + !$ACC END DATA #endif ! #ifdef W3_T0 diff --git a/model/src/w3wavemd.F90 b/model/src/w3wavemd.F90 index 6db2f03af..fe88eb732 100644 --- a/model/src/w3wavemd.F90 +++ b/model/src/w3wavemd.F90 @@ -1853,6 +1853,9 @@ SUBROUTINE W3WAVE ( IMOD, ODAT, TEND, STAMP, NO_OUT & ! ! ! Initialize FIELD variable +#ifdef W3_GPU +!$ACC UPDATE DEVICE(VA) +#endif FIELD = 0. ! DO ISPEC=1, NSPEC @@ -1919,6 +1922,9 @@ SUBROUTINE W3WAVE ( IMOD, ODAT, TEND, STAMP, NO_OUT & END IF END DO +#ifdef W3_GPU +!$ACC UPDATE SELF(VA) +#endif ! #ifdef W3_MPI IF ( NRQSG1 .GT. 0 ) THEN diff --git a/regtests/bin/matrix.base b/regtests/bin/matrix.base index 824b358f1..c0f37e34d 100755 --- a/regtests/bin/matrix.base +++ b/regtests/bin/matrix.base @@ -263,6 +263,10 @@ echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.2" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.3" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.4" >> matrix.body + echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.1" >> matrix.body + echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.2" >> matrix.body + echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.3" >> matrix.body + echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.4" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO_curv -g curv $ww3 ww3_tp2.4" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.5" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO_a -g a $ww3 ww3_tp2.9" >> matrix.body @@ -312,6 +316,10 @@ echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.2" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.3" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body + echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.1" >> matrix.body + echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.2" >> matrix.body + echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.3" >> matrix.body + echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_curv_MPI -g curv -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.5" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_a_MPI -g a -f -p $mpi -n $np $ww3 ww3_tp2.9" >> matrix.body diff --git a/regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU b/regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU new file mode 100644 index 000000000..733d91d56 --- /dev/null +++ b/regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU @@ -0,0 +1 @@ +NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU b/regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU new file mode 100644 index 000000000..21c790cc2 --- /dev/null +++ b/regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU @@ -0,0 +1 @@ +NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU b/regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU new file mode 100644 index 000000000..69bb8a2f8 --- /dev/null +++ b/regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU @@ -0,0 +1 @@ +NOGRB SHRD PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU b/regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU new file mode 100644 index 000000000..21c790cc2 --- /dev/null +++ b/regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU @@ -0,0 +1 @@ +NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU b/regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU new file mode 100644 index 000000000..733d91d56 --- /dev/null +++ b/regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU @@ -0,0 +1 @@ +NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU b/regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU new file mode 100644 index 000000000..21c790cc2 --- /dev/null +++ b/regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU @@ -0,0 +1 @@ +NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU b/regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU new file mode 100644 index 000000000..733d91d56 --- /dev/null +++ b/regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU @@ -0,0 +1 @@ +NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU b/regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU new file mode 100644 index 000000000..21c790cc2 --- /dev/null +++ b/regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU @@ -0,0 +1 @@ +NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11