diff --git a/src/tce/ccsd/icsd_t2.F b/src/tce/ccsd/icsd_t2.F index 7e4bb8dd2f..0d25f3d6ac 100644 --- a/src/tce/ccsd/icsd_t2.F +++ b/src/tce/ccsd/icsd_t2.F @@ -138,6 +138,7 @@ SUBROUTINE icsd_t2(d_f1,d_i0,d_t1,d_t2,d_v2,k_f1_offset,k_i0_offse integer ctx parameter(num_count=41) c -------------------------------- + integer icsd_t2_8_alg c - T1/X1 LOCALIZATION ------------------- integer l_i1_local,k_i1_local c --------------------------------------- @@ -148,6 +149,10 @@ SUBROUTINE icsd_t2(d_f1,d_i0,d_t1,d_t2,d_v2,k_f1_offset,k_i0_offse c nodezero=(ga_nodeid().eq.0) c + if (.not.rtdb_get(rtdb,'tce:icsd_t2_8_alg',mt_int,1, + & icsd_t2_8_alg)) then + icsd_t2_8_alg = 1 + endif c -------------------------------- c ALL OFFSET OPENINGS HERE CALL OFFSET_icsd_t2_2_1(l_i1_2_offset,k_i1_2_offset,size_i1_2) @@ -535,8 +540,15 @@ SUBROUTINE icsd_t2(d_f1,d_i0,d_t1,d_t2,d_v2,k_f1_offset,k_i0_offse c c if(level_x.eq.1) then - CALL icsd_t2_8(d_c2,k_t2_offset,d_v2,k_v2_offset, - &d_i0,k_i0_offset,ctx,41) + if (ccsd_t2_8_alg.eq.1) then + CALL icsd_t2_8(d_c2,k_t2_offset,d_v2,k_v2_offset, + & d_i0,k_i0_offset,ctx,41) + else if (ccsd_t2_8_alg.eq.2) then + CALL icsd_t2_8_x(d_c2,k_t2_offset,d_v2,k_v2_offset, + & d_i0,k_i0_offset,ctx,41) + else + CALL errquit('icsd_t2_8_alg',icsd_t2_8_alg,INPUT_ERR) + end if end if c cc if(level_x.eq.1) then @@ -9258,3 +9270,156 @@ SUBROUTINE icsd_t2_8(d_a,k_a_offset, END + SUBROUTINE icsd_t2_8_x(d_a,k_a_offset, + & d_b,k_b_offset, + & d_c,k_c_offset, + & ctx,icounter) +C $Id: ccsd_t2.F 27404 2015-08-24 14:20:43Z jhammond $ +C This is a Fortran77 program generated by Tensor Contraction Engine v.1.0 +C Copyright (c) Battelle & Pacific Northwest National Laboratory (2002) +C i0 ( p3 p4 h1 h2 )_vt + = 1/2 * Sum ( p5 p6 ) * t ( p5 p6 h1 h2 )_t * v ( p3 p4 p5 p6 )_v + IMPLICIT NONE +#include "global.fh" +#include "mafdecls.fh" +#include "sym.fh" +#include "errquit.fh" +#include "tce.fh" + INTEGER d_a,d_b,d_c + INTEGER k_a_offset,k_b_offset,k_c_offset + INTEGER maxh,maxp,dimhhpp,dimpppp,dimtemp + INTEGER next,nprocs,count + INTEGER p5b,p6b,p3b,p4b,h1b,h2b + INTEGER p5b_1,p6b_1,h1b_1,h2b_1 + INTEGER p3b_2,p4b_2,p5b_2,p6b_2 + INTEGER dima,dimb,dimc,dim_common,dima_sort,dimb_sort + integer :: h21d, p43d, p65d + double precision, allocatable :: f_a(:), f_b(:), f_c(:) + integer :: e_a,e_b,e_c + double precision alpha + integer p5b_in,p6b_in + integer ctx,icounter + external nxt_ctx_create, nxt_ctx_destroy, nxt_ctx_next + integer :: p,h + nprocs = GA_NNODES() + count = 0 + !next = NXTASK(nprocs, 1) + call nxt_ctx_next(ctx, icounter, next) + + ! TODO hoist this like ccsd_t2 path + maxp = 0 + do p = noab+1,noab+nvab + maxp = max(maxp,int_mb(k_range+p-1)) + enddo + maxh = 0 + do h = 1,noab + maxh = max(maxh,int_mb(k_range+h-1)) + enddo + + dimhhpp = maxh*maxh*maxp*maxp + dimpppp = maxp*maxp*maxp*maxp + dimtemp = max(dimpppp,dimhhpp) + + e_a=0 + e_b=0 + e_c=0 + allocate(f_a(1:dimhhpp),stat=e_a) + allocate(f_b(1:dimpppp),stat=e_b) + allocate(f_c(1:dimhhpp),stat=e_c) + if (e_a.ne.0) call errquit("MA a",dimhhpp,MA_ERR) + if (e_b.ne.0) call errquit("MA b",dimpppp,MA_ERR) + if (e_c.ne.0) call errquit("MA c",dimhhpp,MA_ERR) + DO p3b = noab+1,noab+nvab + DO p4b = p3b,noab+nvab + DO h1b = 1,noab + DO h2b = h1b,noab + IF ((.not.restricted).or. + & ( int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) + & +int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1).ne.8)) THEN + IF (int_mb(k_spin+p3b-1)+int_mb(k_spin+p4b-1) .eq. + & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN + IF (ieor(int_mb(k_sym+p3b-1),ieor(int_mb(k_sym+p4b-1), + & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) + & .eq. ieor(irrep_v,irrep_t)) THEN + IF (next.eq.count) THEN + dima_sort = int_mb(k_range+h1b-1) + & * int_mb(k_range+h2b-1) + dimb_sort = int_mb(k_range+p3b-1) + & * int_mb(k_range+p4b-1) + dimc = int_mb(k_range+p3b-1) * int_mb(k_range+p4b-1) + & * int_mb(k_range+h1b-1) * int_mb(k_range+h2b-1) + CALL DFILL(dimc,0.0d0,f_c,1) + DO p5b_in =ga_nodeid(),ga_nodeid()+nvab-1 + p5b=mod(p5b_in,nvab)+noab+1 + DO p6b_in=ga_nodeid(),ga_nodeid()+nvab+noab-p5b + p6b=mod(p6b_in,noab+nvab-p5b+1)+p5b + IF (int_mb(k_spin+p5b-1)+int_mb(k_spin+p6b-1) .eq. + & int_mb(k_spin+h1b-1)+int_mb(k_spin+h2b-1)) THEN + IF (ieor(int_mb(k_sym+p5b-1),ieor(int_mb(k_sym+p6b-1), + & ieor(int_mb(k_sym+h1b-1),int_mb(k_sym+h2b-1)))) + & .eq. irrep_t) THEN + CALL TCE_RESTRICTED_4(p5b,p6b,h1b,h2b, + & p5b_1,p6b_1,h1b_1,h2b_1) + CALL TCE_RESTRICTED_4(p3b,p4b,p5b,p6b, + & p3b_2,p4b_2,p5b_2,p6b_2) + dim_common = int_mb(k_range+p5b-1) + & * int_mb(k_range+p6b-1) + dima = dim_common * dima_sort + dimb = dim_common * dimb_sort + IF ((dima .gt. 0) .and. (dimb .gt. 0)) THEN + CALL GET_HASH_BLOCK(d_a,f_a,dima, + & int_mb(k_a_offset),(h2b_1-1+noab*(h1b_1-1+noab* + & (p6b_1-noab-1+nvab*(p5b_1-noab-1))))) + if(.not.intorb) then + CALL GET_HASH_BLOCK(d_b,f_b,dimb, + & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* + & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* + & (p3b_2-1))))) + else + CALL GET_HASH_BLOCK_I(d_b,f_b,dimb, + & int_mb(k_b_offset),(p6b_2-1+(noab+nvab)* + & (p5b_2-1+(noab+nvab)*(p4b_2-1+(noab+nvab)* + & (p3b_2-1)))),p6b_2,p5b_2,p4b_2,p3b_2) + end if + if (p5b .eq. p6b) then + alpha = 1.0d0 + else + alpha = 2.0d0 + end if + h21d = int_mb(k_range+h1b-1)*int_mb(k_range+h2b-1) + p43d = int_mb(k_range+p3b-1)*int_mb(k_range+p4b-1) + p65d = int_mb(k_range+p5b-1)*int_mb(k_range+p6b-1) + call DGEMM('n','n',h21d,p43d,p65d, + & 0.5d0*alpha,f_a,h21d, + & f_b,p65d, + & 1.0d0, f_c,h21d) + END IF + END IF + END IF + END DO + END DO + CALL ADD_HASH_BLOCK(d_c,f_c,dimc, + & int_mb(k_c_offset),(h2b-1+noab*(h1b-1+noab* + & (p4b-noab-1+nvab*(p3b-noab-1))))) + !next = NXTASK(nprocs, 1) + call nxt_ctx_next(ctx, icounter, next) + END IF + count = count + 1 + END IF + END IF + END IF + END DO + END DO + END DO + END DO + !next = NXTASK(-nprocs, 1) + !call GA_SYNC() + deallocate(f_a,stat=e_a) + if (e_a.ne.0) call errquit("MA pops a",0,MA_ERR) + deallocate(f_b,stat=e_b) + if (e_b.ne.0) call errquit("MA pops a",0,MA_ERR) + deallocate(f_c,stat=e_c) + if (e_c.ne.0) call errquit("MA pops a",0,MA_ERR) + RETURN + END + +