Skip to content

Commit

Permalink
template for double buffering
Browse files Browse the repository at this point in the history
Signed-off-by: Jeff Hammond <[email protected]>
  • Loading branch information
jeffhammond committed Jun 10, 2024
1 parent 491fd9a commit 9a1cda5
Showing 1 changed file with 167 additions and 2 deletions.
169 changes: 167 additions & 2 deletions src/tce/ccsd/icsd_t2.F
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ SUBROUTINE icsd_t2(d_f1,d_i0,d_t1,d_t2,d_v2,k_f1_offset,k_i0_offse
integer ctx
parameter(num_count=41)
c --------------------------------
integer icsd_t2_8_alg
c - T1/X1 LOCALIZATION -------------------
integer l_i1_local,k_i1_local
c ---------------------------------------
Expand All @@ -148,6 +149,10 @@ SUBROUTINE icsd_t2(d_f1,d_i0,d_t1,d_t2,d_v2,k_f1_offset,k_i0_offse
c
nodezero=(ga_nodeid().eq.0)
c
if (.not.rtdb_get(rtdb,'tce:icsd_t2_8_alg',mt_int,1,
& icsd_t2_8_alg)) then
icsd_t2_8_alg = 1
endif
c --------------------------------
c ALL OFFSET OPENINGS HERE
CALL OFFSET_icsd_t2_2_1(l_i1_2_offset,k_i1_2_offset,size_i1_2)
Expand Down Expand Up @@ -535,8 +540,15 @@ SUBROUTINE icsd_t2(d_f1,d_i0,d_t1,d_t2,d_v2,k_f1_offset,k_i0_offse
c
c
if(level_x.eq.1) then
CALL icsd_t2_8(d_c2,k_t2_offset,d_v2,k_v2_offset,
&d_i0,k_i0_offset,ctx,41)
if (ccsd_t2_8_alg.eq.1) then
CALL icsd_t2_8(d_c2,k_t2_offset,d_v2,k_v2_offset,
& d_i0,k_i0_offset,ctx,41)
else if (ccsd_t2_8_alg.eq.2) then
CALL icsd_t2_8_x(d_c2,k_t2_offset,d_v2,k_v2_offset,
& d_i0,k_i0_offset,ctx,41)
else
CALL errquit('icsd_t2_8_alg',icsd_t2_8_alg,INPUT_ERR)
end if
end if
c
cc if(level_x.eq.1) then
Expand Down Expand Up @@ -9258,3 +9270,156 @@ SUBROUTINE icsd_t2_8(d_a,k_a_offset,
END


SUBROUTINE icsd_t2_8_x(d_a,k_a_offset,
& d_b,k_b_offset,
& d_c,k_c_offset,
& ctx,icounter)
C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $
C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0
C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002)
C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v
IMPLICIT NONE
#include "global.fh"
#include "mafdecls.fh"
#include "sym.fh"
#include "errquit.fh"
#include "tce.fh"
INTEGER d_a,d_b,d_c
INTEGER k_a_offset,k_b_offset,k_c_offset
INTEGER maxh,maxp,dimhhpp,dimpppp,dimtemp
INTEGER next,nprocs,count
INTEGER p5b,p6b,p3b,p4b,h1b,h2b
INTEGER p5b_1,p6b_1,h1b_1,h2b_1
INTEGER p3b_2,p4b_2,p5b_2,p6b_2
INTEGER dima,dimb,dimc,dim_common,dima_sort,dimb_sort
integer :: h21d, p43d, p65d
double precision, allocatable :: f_a(:), f_b(:), f_c(:)
integer :: e_a,e_b,e_c
double precision alpha
integer p5b_in,p6b_in
integer ctx,icounter
external nxt_ctx_create, nxt_ctx_destroy, nxt_ctx_next
integer :: p,h
nprocs = GA_NNODES()
count = 0
!next = NXTASK(nprocs, 1)
call nxt_ctx_next(ctx, icounter, next)

! TODO hoist this like ccsd_t2 path
maxp = 0
do p = noab+1,noab+nvab
maxp = max(maxp,int_mb(k_range+p-1))
enddo
maxh = 0
do h = 1,noab
maxh = max(maxh,int_mb(k_range+h-1))
enddo

dimhhpp = maxh*maxh*maxp*maxp
dimpppp = maxp*maxp*maxp*maxp
dimtemp = max(dimpppp,dimhhpp)

e_a=0
e_b=0
e_c=0
allocate(f_a(1:dimhhpp),stat=e_a)
allocate(f_b(1:dimpppp),stat=e_b)
allocate(f_c(1:dimhhpp),stat=e_c)
if (e_a.ne.0) call errquit("MA a",dimhhpp,MA_ERR)
if (e_b.ne.0) call errquit("MA b",dimpppp,MA_ERR)
if (e_c.ne.0) call errquit("MA c",dimhhpp,MA_ERR)
DO p3b = noab+1,noab+nvab
DO p4b = p3b,noab+nvab
DO h1b = 1,noab
DO h2b = h1b,noab
IF ((.not.restricted).or.
& ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1)
& +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN
IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq.
& int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1),
& ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
& .eq. ieor(irrep_v,irrep_t)) THEN
IF (next.eq.count) THEN
dima_sort = int_mb(k_range+h1b-1)
& * int_mb(k_range+h2b-1)
dimb_sort = int_mb(k_range+p3b-1)
& * int_mb(k_range+p4b-1)
dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1)
& * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1)
CALL DFILL(dimc,0.0d0,f_c,1)
DO p5b_in =ga_nodeid(),ga_nodeid()+nvab-1
p5b=mod(p5b_in,nvab)+noab+1
DO p6b_in=ga_nodeid(),ga_nodeid()+nvab+noab-p5b
p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b
IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq.
& int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN
IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1),
& ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1))))
& .eq. irrep_t) THEN
CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b,
& p5b_1,p6b_1,h1b_1,h2b_1)
CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b,
& p3b_2,p4b_2,p5b_2,p6b_2)
dim_common = int_mb(k_range+p5b-1)
& * int_mb(k_range+p6b-1)
dima = dim_common * dima_sort
dimb = dim_common * dimb_sort
IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN
CALL GET_HASH_BLOCK(d_a,f_a,dima,
& int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab*
& (p6b_1-noab-1+nvab*(p5b_1-noab-1)))))
if(.not.intorb) then
CALL GET_HASH_BLOCK(d_b,f_b,dimb,
& int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
& (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
& (p3b_2-1)))))
else
CALL GET_HASH_BLOCK_I(d_b,f_b,dimb,
& int_mb(k_b_offset),(p6b_2-1+(noab+nvab)*
& (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)*
& (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2)
end if
if (p5b .eq. p6b) then
alpha = 1.0d0
else
alpha = 2.0d0
end if
h21d = int_mb(k_range+h1b-1)*int_mb(k_range+h2b-1)
p43d = int_mb(k_range+p3b-1)*int_mb(k_range+p4b-1)
p65d = int_mb(k_range+p5b-1)*int_mb(k_range+p6b-1)
call DGEMM('n','n',h21d,p43d,p65d,
& 0.5d0*alpha,f_a,h21d,
& f_b,p65d,
& 1.0d0, f_c,h21d)
END IF
END IF
END IF
END DO
END DO
CALL ADD_HASH_BLOCK(d_c,f_c,dimc,
& int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab*
& (p4b-noab-1+nvab*(p3b-noab-1)))))
!next = NXTASK(nprocs, 1)
call nxt_ctx_next(ctx, icounter, next)
END IF
count = count + 1
END IF
END IF
END IF
END DO
END DO
END DO
END DO
!next = NXTASK(-nprocs, 1)
!call GA_SYNC()
deallocate(f_a,stat=e_a)
if (e_a.ne.0) call errquit("MA pops a",0,MA_ERR)
deallocate(f_b,stat=e_b)
if (e_b.ne.0) call errquit("MA pops a",0,MA_ERR)
deallocate(f_c,stat=e_c)
if (e_c.ne.0) call errquit("MA pops a",0,MA_ERR)
RETURN
END


0 comments on commit 9a1cda5

Please sign in to comment.