Skip to content

Commit

Permalink
count flops manually so it does not depend on yflop or perfm stuff
Browse files Browse the repository at this point in the history
Signed-off-by: Jeff Hammond <[email protected]>
  • Loading branch information
jeffhammond committed Aug 26, 2024
1 parent 152bb31 commit 981eeb7
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 38 deletions.
83 changes: 48 additions & 35 deletions src/ccsd/ccsd_trpdrv_omp.F
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ subroutine ccsd_trpdrv_omp(t1,
#include "ccsdps.fh"
#include "util.fh"
#include "msgids.fh"
#include "yflop.fh"
!#include "yflop.fh"
!
double precision, intent(inout) :: emp4,emp5
double precision, intent(in) :: t1(*)
Expand Down Expand Up @@ -56,7 +56,7 @@ subroutine ccsd_trpdrv_omp(t1,
!
double precision :: emp4i,emp5i,emp4k,emp5k
double precision :: eaijk,denom
integer :: inode,next,nodes,iam
integer :: inode,next,nodes,me
integer :: a,b,c,i,j,k,akold,av
! chunking is the loop blocking size in the loop nest
! formerly associated with the tengy routine.
Expand All @@ -68,9 +68,8 @@ subroutine ccsd_trpdrv_omp(t1,
integer :: klo, khi
integer nxtask
external nxtask
double precision perfm_flop,tzero,flopzero,t_flops,agg_flops
external perfm_flop

integer :: dgemm_flops, tengy_flops
double precision agg_flops
!
! Dependencies (global array, local array, handle):
!
Expand Down Expand Up @@ -112,26 +111,33 @@ subroutine ccsd_trpdrv_omp(t1,
logical i_progr(n_progr+1)
logical ldyn_org
logical got_ak
!
! timers
double precision :: tt0, tt1, tc0, tc1
#if defined(USE_OPENMP)
integer omp_get_thread_num
external omp_get_thread_num
integer omp_get_num_threads
external omp_get_num_threads
integer omp_get_max_threads
external omp_get_max_threads
if (ga_nodeid().eq.0) write(6,99) omp_get_max_threads()
99 format(2x,'Using ',i2,' OpenMP threads in CCSD(T)')
#endif
!
nodes = ga_nnodes()
me = ga_nodeid()
!
#if defined(USE_OPENMP)
if (me.eq.0) write(6,99) omp_get_max_threads()
#else
if (ga_nodeid().eq.0) then
if (me.eq.0) then
write(6,99) 1
write(6,999)
endif
99 format(2x,'Using ',i2,' OpenMP thread in CCSD(T)')
999 format(2x,'Recompile w/ USE_OPENMP=1 to use threads in CCSD(T)')
#endif
tzero=util_wallsec()
flopzero=perfm_flop()
99 format(2x,'Using ',i2,' OpenMP threads in CCSD(T)')
agg_flops = 0
!
tt0 = util_wallsec()
#ifdef USE_F90_ALLOCATABLE
allocate( dintc1(1:nvir), dintc2(1:nvir),
& dintx1(1:nvir), dintx2(1:nvir),
Expand All @@ -146,8 +152,6 @@ subroutine ccsd_trpdrv_omp(t1,
call util_setthreads_fromenv()
c call util_blas_set_num_threads(4)
#endif
nodes = ga_nnodes()
iam = ga_nodeid()
!
! call ga_sync() ! ga_sync called just before trpdrv in aoccsd2
!
Expand Down Expand Up @@ -273,6 +277,8 @@ subroutine ccsd_trpdrv_omp(t1,
call ga_nbwait(nbh_objv7)
endif ! k==klo

tc0 = util_wallsec()

#if USE_OMP_SECTIONS
!$omp parallel
!$omp& shared(eorb)
Expand Down Expand Up @@ -365,6 +371,10 @@ subroutine ccsd_trpdrv_omp(t1,
!$omp end sections
!$omp master
#endif
! 8 pairs of DGEMM w/ VVV and VVO cost, 2 for FMA
dgemm_flops = 8*nvir*nvir*(nocc+nvir)*2
agg_flops = agg_flops + dgemm_flops

if (occsdps) then
call pstat_off(ps_doxxx)
call pstat_on(ps_tengy)
Expand All @@ -381,11 +391,11 @@ subroutine ccsd_trpdrv_omp(t1,
& +eorb(ncor+k) )
#ifdef USE_YFLOP
flops_ycount = flops_ycount + nvir*nvir*(
D 3 + 2*(
E 12 +
E 11 +
E 11 ) +
5 2*27 )
& 3 + 2*(
& 12 +
& 11 +
& 11 ) +
& 2*27 )
#endif

#if USE_OMP_SECTIONS
Expand Down Expand Up @@ -447,6 +457,11 @@ subroutine ccsd_trpdrv_omp(t1,
!$omp end do
!$omp end parallel
#endif
tengy_flops = nvir*nvir*( 3 + 2*( 12 + 11 + 11 ) + 2*27 )
agg_flops = agg_flops + tengy_flops

tc1 = util_wallsec()

if (occsdps) then
call pstat_off(ps_tengy)
else
Expand All @@ -462,24 +477,23 @@ subroutine ccsd_trpdrv_omp(t1,
end do ! k
end do ! i
if (iprt.gt.50)then
write(6,1234)iam,a,j,emp4,emp5
write(6,1234) me,a,j,emp4,emp5
1234 format(' iam aijk',3i5,2e15.5)
end if
next=nxtask(nodes, 1)
if(ga_nodeid().eq.0) then
if(me.eq.0) then
pct_progr=(a-(ncor+nocc)+((klo-1)/kchunk)*nvir)*n_progr/
/ ((nocc/kchunk)*nvir)+1
& ((nocc/kchunk)*nvir)+1
if(i_progr(pct_progr)) then
i_progr(pct_progr)=.false.
write(6,4321) ' ccsd(t): done ',
A a-(ncor+nocc)+((klo-1)/kchunk)*nvir,
O ' out of ',(nocc/kchunk)*nvir,
O ' progress: ',
O ((a-(ncor+nocc)+((klo-1)/kchunk)*nvir)*100)/
D ((nocc/kchunk)*nvir),
P '%, Gflops=',(perfm_flop()-flopzero)/
D (util_wallsec()-tzero),
P ' at ',(util_wallsec()-tzero),' secs'
& a-(ncor+nocc)+((klo-1)/kchunk)*nvir,
& ' out of ',(nocc/kchunk)*nvir,
& ' progress: ',
& ((a-(ncor+nocc)+((klo-1)/kchunk)*nvir)*100)/
& ((nocc/kchunk)*nvir),
& '%, Gflops=',1e-9*(dgemm_flops+tengy_flops)/(tc1-tc0),
& ' at ',(util_wallsec()-tt0),' secs'
call util_flush(6)
4321 format(a,i8,a,i8,a,i3,a,1pg11.4,a,0pf10.1,a)
endif
Expand All @@ -490,13 +504,11 @@ subroutine ccsd_trpdrv_omp(t1,
end do
call ga_sync()
next=nxtask(-nodes, 1)
t_flops=util_wallsec()-tzero
agg_flops=perfm_flop()-flopzero
tt1=util_wallsec()
call ga_dgop(msg_cc_diis1,agg_flops,1, '+')
if(ga_nodeid().eq.0) then
if(me.eq.0) then
write(6,4322) ' ccsd(t): 100% done, Aggregate Gflops=',
P agg_flops/t_flops,
P ' in ',t_flops,' secs'
& 1e-9*agg_flops/(tt1-tt0),' in ',(tt1-tt0),' secs'
4322 format(a,1pg11.4,a,0pf10.1,a)
call util_flush(6)
endif
Expand All @@ -513,6 +525,7 @@ subroutine ccsd_trpdrv_omp(t1,
call util_blas_set_num_threads(1)
#endif
!
tt0 = util_wallsec()
#ifdef USE_F90_ALLOCATABLE
deallocate( dintc1, dintx1, t1v1, dintc2, dintx2, t1v2,
& stat=alloc_error)
Expand Down
6 changes: 3 additions & 3 deletions src/ccsd/ccsd_trpdrv_openacc.F
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ subroutine ccsd_trpdrv_openacc(t1,xeorb,
#include "ccsdps.fh"
#include "util.fh"
#include "msgids.fh"
#include "yflop.fh"
!#include "yflop.fh"
!
double precision, intent(inout) :: emp4,emp5
double precision, intent(in) :: t1(*)
Expand Down Expand Up @@ -707,8 +707,8 @@ subroutine ccsd_trpdrv_openacc(t1,xeorb,
emp4 = emp4 + emp4k
emp5 = emp5 + emp5k
if (iprt.gt.50)then
write(6,1234)me,a,j,emp4,emp5
1234 format(' me aijk',3i5,2e15.5)
write(6,1234) me,a,j,emp4,emp5
1234 format(' iam aijk',3i5,2e15.5)
end if
next=nxtask(nodes, 1)
if(me.eq.0) then
Expand Down

0 comments on commit 981eeb7

Please sign in to comment.