From 5294e1c081eb93d13a0d70e90773a652afb9e2b1 Mon Sep 17 00:00:00 2001 From: QuanSheng Wu Date: Mon, 6 May 2024 22:48:39 +0800 Subject: [PATCH] 1. added -DARPACK in the Makefile to control whether use ARPACK or not; 2. try to support non-orthogonal basis read from openmx; 3. redefined the sparse hr.dat where the degeneracy number of R points are removed; fixed several bugs when gfortran fails --- examples/Bi2Se3/wt.in | 3 +- examples/MoTe2/bulk/wt.in | 4 +- examples/WTe2/wannier90.win | 247 ++- examples/WTe2/wt.in | 4 +- src/Boltz_transport_anomalous.f90 | 4 +- src/Makefile.gfortran | 47 +- src/Makefile.gfortran_travis | 3 +- src/Makefile.intel-mpi | 20 +- src/c_fortran_zgssv.c | 181 ++ src/dos.f90 | 10 - src/ek_bulk.f90 | 11 +- src/ek_slab.f90 | 2 - src/fermisurface.f90 | 14 +- src/ham_bulk.f90 | 82 +- src/inverse.f90 | 318 +++ src/lanczos_sparse.f90 | 14 +- src/landau_level_sparse.f90 | 6 - src/main.f90 | 17 - src/mgmres.f90 | 1700 +++++++++++++++++ src/module.f90 | 11 +- src/readHmnR.f90 | 153 +- src/readinput.f90 | 4 +- src/sigma_OHE.f90 | 79 +- src/sparse.f90 | 330 ++-- src/superlu_config.fh | 22 + src/unfolding.f90 | 8 - src/wanniercenter.f90 | 16 +- src/wt_aux.f90 | 4 +- .../system.in | 3 +- 29 files changed, 3009 insertions(+), 308 deletions(-) create mode 100644 src/c_fortran_zgssv.c create mode 100644 src/mgmres.f90 create mode 100644 src/superlu_config.fh diff --git a/examples/Bi2Se3/wt.in b/examples/Bi2Se3/wt.in index 8784407c..ed4ade9e 100644 --- a/examples/Bi2Se3/wt.in +++ b/examples/Bi2Se3/wt.in @@ -29,8 +29,7 @@ Se pz px py !> bulk band structure calculation flag &CONTROL BulkBand_calc = T -!BulkBand_points_calc = T -Symmetry_Import_calc = T +BulkBand_points_calc = T / &SYSTEM diff --git a/examples/MoTe2/bulk/wt.in b/examples/MoTe2/bulk/wt.in index f716e56d..43a9852c 100644 --- a/examples/MoTe2/bulk/wt.in +++ b/examples/MoTe2/bulk/wt.in @@ -23,10 +23,10 @@ SOC = 1 ! soc &PARAMETERS E_arc = -5 ! specify the Fermi energy -OmegaNum = 32 ! omega number +OmegaNum = 50 ! omega number OmegaMin = -0.6 ! energy interval OmegaMax = 0.2 ! energy interval -Nk1 = 10 ! number k points +Nk1 = 40 ! number k points NP = 1 ! number of principle layers / diff --git a/examples/WTe2/wannier90.win b/examples/WTe2/wannier90.win index ccb057a9..d2d92fda 100644 --- a/examples/WTe2/wannier90.win +++ b/examples/WTe2/wannier90.win @@ -1,16 +1,257 @@ -num_bands = 192 +num_bands = 192 ! set to NBANDS by VASP num_wann = 96 -dis_num_iter = 2000 -num_iter = 100 +dis_num_iter = 1000 +num_iter = 0 +!postproc_setup = .true. +iprint=2 +!wvfn_formatted=.true. +!bands_plot = .true. +!min of outer window dis_win_min = -1.0 dis_win_max = 16 +!inner - dis_froz_min = 6.0000 dis_froz_max = 8.0000 +wvfn_formatted=.true. +bands_plot = .true. hr_plot =.true. begin projections W : s; dxy; dyz; dxz; dx2-y2; dz2 Te : px; py; pz end projections + + +begin kpoint_path +G 0.00000 0.00000 0.0000 Y 0.00000 0.50000 0.0000 +Y 0.00000 0.50000 0.0000 S 0.50000 0.50000 0.0000 +S 0.50000 0.50000 0.0000 X 0.50000 0.00000 0.0000 +X 0.50000 0.00000 0.0000 G 0.00000 0.00000 0.0000 +G 0.00000 0.00000 0.0000 Z 0.00000 0.00000 0.5000 +Z 0.00000 0.00000 0.5000 T 0.00000 0.50000 0.5000 +T 0.0 0.5 0.5 R 0.5 0.5 0.5 +R 0.5 0.5 0.5 U 0.5 0.0 0.5 +U 0.5 0.0 0.5 Z 0.0 0.0 0.5 +end kpoint_path + +spinors = .true. + +begin unit_cell_cart + 3.4770000 0.0000000 0.0000000 + 0.0000000 6.2490000 0.0000000 + 0.0000000 0.0000000 14.0180000 +end unit_cell_cart + +begin atoms_cart +W 0.0000000 3.7532744 10.7171100 +W 0.0000000 0.2487102 3.9214640 +W 1.7385000 2.4957256 3.7081100 +W 1.7385000 6.0002898 10.9304640 +Te 0.0000000 5.3592049 12.8934045 +Te 1.7385000 0.8897951 5.8844045 +Te 0.0000000 4.0387912 5.2657902 +Te 1.7385000 2.2102088 12.2747902 +Te 0.0000000 1.8650141 1.7432069 +Te 1.7385000 4.3839859 8.7522069 +Te 0.0000000 1.2949178 9.3695597 +Te 1.7385000 4.9540822 2.3605597 +end atoms_cart + +mp_grid = 8 6 4 + +begin kpoints + 0.000000000000 0.000000000000 0.000000000000 + 0.125000000000 0.000000000000 0.000000000000 + 0.250000000000 0.000000000000 0.000000000000 + 0.375000000000 0.000000000000 0.000000000000 + 0.500000000000 0.000000000000 0.000000000000 + -0.375000000000 0.000000000000 0.000000000000 + -0.250000000000 0.000000000000 0.000000000000 + -0.125000000000 0.000000000000 0.000000000000 + 0.000000000000 0.166666666667 0.000000000000 + 0.125000000000 0.166666666667 0.000000000000 + 0.250000000000 0.166666666667 0.000000000000 + 0.375000000000 0.166666666667 0.000000000000 + 0.500000000000 0.166666666667 0.000000000000 + -0.375000000000 0.166666666667 0.000000000000 + -0.250000000000 0.166666666667 0.000000000000 + -0.125000000000 0.166666666667 0.000000000000 + 0.000000000000 0.333333333333 0.000000000000 + 0.125000000000 0.333333333333 0.000000000000 + 0.250000000000 0.333333333333 0.000000000000 + 0.375000000000 0.333333333333 0.000000000000 + 0.500000000000 0.333333333333 0.000000000000 + -0.375000000000 0.333333333333 0.000000000000 + -0.250000000000 0.333333333333 0.000000000000 + -0.125000000000 0.333333333333 0.000000000000 + 0.000000000000 0.500000000000 0.000000000000 + 0.125000000000 0.500000000000 0.000000000000 + 0.250000000000 0.500000000000 0.000000000000 + 0.375000000000 0.500000000000 0.000000000000 + 0.500000000000 0.500000000000 0.000000000000 + -0.375000000000 0.500000000000 0.000000000000 + -0.250000000000 0.500000000000 0.000000000000 + -0.125000000000 0.500000000000 0.000000000000 + 0.000000000000 -0.333333333333 0.000000000000 + 0.125000000000 -0.333333333333 0.000000000000 + 0.250000000000 -0.333333333333 0.000000000000 + 0.375000000000 -0.333333333333 0.000000000000 + 0.500000000000 -0.333333333333 0.000000000000 + -0.375000000000 -0.333333333333 0.000000000000 + -0.250000000000 -0.333333333333 0.000000000000 + -0.125000000000 -0.333333333333 0.000000000000 + 0.000000000000 -0.166666666667 0.000000000000 + 0.125000000000 -0.166666666667 0.000000000000 + 0.250000000000 -0.166666666667 0.000000000000 + 0.375000000000 -0.166666666667 0.000000000000 + 0.500000000000 -0.166666666667 0.000000000000 + -0.375000000000 -0.166666666667 0.000000000000 + -0.250000000000 -0.166666666667 0.000000000000 + -0.125000000000 -0.166666666667 0.000000000000 + 0.000000000000 0.000000000000 0.250000000000 + 0.125000000000 0.000000000000 0.250000000000 + 0.250000000000 0.000000000000 0.250000000000 + 0.375000000000 0.000000000000 0.250000000000 + 0.500000000000 0.000000000000 0.250000000000 + -0.375000000000 0.000000000000 0.250000000000 + -0.250000000000 0.000000000000 0.250000000000 + -0.125000000000 0.000000000000 0.250000000000 + 0.000000000000 0.166666666667 0.250000000000 + 0.125000000000 0.166666666667 0.250000000000 + 0.250000000000 0.166666666667 0.250000000000 + 0.375000000000 0.166666666667 0.250000000000 + 0.500000000000 0.166666666667 0.250000000000 + -0.375000000000 0.166666666667 0.250000000000 + -0.250000000000 0.166666666667 0.250000000000 + -0.125000000000 0.166666666667 0.250000000000 + 0.000000000000 0.333333333333 0.250000000000 + 0.125000000000 0.333333333333 0.250000000000 + 0.250000000000 0.333333333333 0.250000000000 + 0.375000000000 0.333333333333 0.250000000000 + 0.500000000000 0.333333333333 0.250000000000 + -0.375000000000 0.333333333333 0.250000000000 + -0.250000000000 0.333333333333 0.250000000000 + -0.125000000000 0.333333333333 0.250000000000 + 0.000000000000 0.500000000000 0.250000000000 + 0.125000000000 0.500000000000 0.250000000000 + 0.250000000000 0.500000000000 0.250000000000 + 0.375000000000 0.500000000000 0.250000000000 + 0.500000000000 0.500000000000 0.250000000000 + -0.375000000000 0.500000000000 0.250000000000 + -0.250000000000 0.500000000000 0.250000000000 + -0.125000000000 0.500000000000 0.250000000000 + 0.000000000000 -0.333333333333 0.250000000000 + 0.125000000000 -0.333333333333 0.250000000000 + 0.250000000000 -0.333333333333 0.250000000000 + 0.375000000000 -0.333333333333 0.250000000000 + 0.500000000000 -0.333333333333 0.250000000000 + -0.375000000000 -0.333333333333 0.250000000000 + -0.250000000000 -0.333333333333 0.250000000000 + -0.125000000000 -0.333333333333 0.250000000000 + 0.000000000000 -0.166666666667 0.250000000000 + 0.125000000000 -0.166666666667 0.250000000000 + 0.250000000000 -0.166666666667 0.250000000000 + 0.375000000000 -0.166666666667 0.250000000000 + 0.500000000000 -0.166666666667 0.250000000000 + -0.375000000000 -0.166666666667 0.250000000000 + -0.250000000000 -0.166666666667 0.250000000000 + -0.125000000000 -0.166666666667 0.250000000000 + 0.000000000000 0.000000000000 0.500000000000 + 0.125000000000 0.000000000000 0.500000000000 + 0.250000000000 0.000000000000 0.500000000000 + 0.375000000000 0.000000000000 0.500000000000 + 0.500000000000 0.000000000000 0.500000000000 + -0.375000000000 0.000000000000 0.500000000000 + -0.250000000000 0.000000000000 0.500000000000 + -0.125000000000 0.000000000000 0.500000000000 + 0.000000000000 0.166666666667 0.500000000000 + 0.125000000000 0.166666666667 0.500000000000 + 0.250000000000 0.166666666667 0.500000000000 + 0.375000000000 0.166666666667 0.500000000000 + 0.500000000000 0.166666666667 0.500000000000 + -0.375000000000 0.166666666667 0.500000000000 + -0.250000000000 0.166666666667 0.500000000000 + -0.125000000000 0.166666666667 0.500000000000 + 0.000000000000 0.333333333333 0.500000000000 + 0.125000000000 0.333333333333 0.500000000000 + 0.250000000000 0.333333333333 0.500000000000 + 0.375000000000 0.333333333333 0.500000000000 + 0.500000000000 0.333333333333 0.500000000000 + -0.375000000000 0.333333333333 0.500000000000 + -0.250000000000 0.333333333333 0.500000000000 + -0.125000000000 0.333333333333 0.500000000000 + 0.000000000000 0.500000000000 0.500000000000 + 0.125000000000 0.500000000000 0.500000000000 + 0.250000000000 0.500000000000 0.500000000000 + 0.375000000000 0.500000000000 0.500000000000 + 0.500000000000 0.500000000000 0.500000000000 + -0.375000000000 0.500000000000 0.500000000000 + -0.250000000000 0.500000000000 0.500000000000 + -0.125000000000 0.500000000000 0.500000000000 + 0.000000000000 -0.333333333333 0.500000000000 + 0.125000000000 -0.333333333333 0.500000000000 + 0.250000000000 -0.333333333333 0.500000000000 + 0.375000000000 -0.333333333333 0.500000000000 + 0.500000000000 -0.333333333333 0.500000000000 + -0.375000000000 -0.333333333333 0.500000000000 + -0.250000000000 -0.333333333333 0.500000000000 + -0.125000000000 -0.333333333333 0.500000000000 + 0.000000000000 -0.166666666667 0.500000000000 + 0.125000000000 -0.166666666667 0.500000000000 + 0.250000000000 -0.166666666667 0.500000000000 + 0.375000000000 -0.166666666667 0.500000000000 + 0.500000000000 -0.166666666667 0.500000000000 + -0.375000000000 -0.166666666667 0.500000000000 + -0.250000000000 -0.166666666667 0.500000000000 + -0.125000000000 -0.166666666667 0.500000000000 + 0.000000000000 0.000000000000 -0.250000000000 + 0.125000000000 0.000000000000 -0.250000000000 + 0.250000000000 0.000000000000 -0.250000000000 + 0.375000000000 0.000000000000 -0.250000000000 + 0.500000000000 0.000000000000 -0.250000000000 + -0.375000000000 0.000000000000 -0.250000000000 + -0.250000000000 0.000000000000 -0.250000000000 + -0.125000000000 0.000000000000 -0.250000000000 + 0.000000000000 0.166666666667 -0.250000000000 + 0.125000000000 0.166666666667 -0.250000000000 + 0.250000000000 0.166666666667 -0.250000000000 + 0.375000000000 0.166666666667 -0.250000000000 + 0.500000000000 0.166666666667 -0.250000000000 + -0.375000000000 0.166666666667 -0.250000000000 + -0.250000000000 0.166666666667 -0.250000000000 + -0.125000000000 0.166666666667 -0.250000000000 + 0.000000000000 0.333333333333 -0.250000000000 + 0.125000000000 0.333333333333 -0.250000000000 + 0.250000000000 0.333333333333 -0.250000000000 + 0.375000000000 0.333333333333 -0.250000000000 + 0.500000000000 0.333333333333 -0.250000000000 + -0.375000000000 0.333333333333 -0.250000000000 + -0.250000000000 0.333333333333 -0.250000000000 + -0.125000000000 0.333333333333 -0.250000000000 + 0.000000000000 0.500000000000 -0.250000000000 + 0.125000000000 0.500000000000 -0.250000000000 + 0.250000000000 0.500000000000 -0.250000000000 + 0.375000000000 0.500000000000 -0.250000000000 + 0.500000000000 0.500000000000 -0.250000000000 + -0.375000000000 0.500000000000 -0.250000000000 + -0.250000000000 0.500000000000 -0.250000000000 + -0.125000000000 0.500000000000 -0.250000000000 + 0.000000000000 -0.333333333333 -0.250000000000 + 0.125000000000 -0.333333333333 -0.250000000000 + 0.250000000000 -0.333333333333 -0.250000000000 + 0.375000000000 -0.333333333333 -0.250000000000 + 0.500000000000 -0.333333333333 -0.250000000000 + -0.375000000000 -0.333333333333 -0.250000000000 + -0.250000000000 -0.333333333333 -0.250000000000 + -0.125000000000 -0.333333333333 -0.250000000000 + 0.000000000000 -0.166666666667 -0.250000000000 + 0.125000000000 -0.166666666667 -0.250000000000 + 0.250000000000 -0.166666666667 -0.250000000000 + 0.375000000000 -0.166666666667 -0.250000000000 + 0.500000000000 -0.166666666667 -0.250000000000 + -0.375000000000 -0.166666666667 -0.250000000000 + -0.250000000000 -0.166666666667 -0.250000000000 + -0.125000000000 -0.166666666667 -0.250000000000 +end kpoints diff --git a/examples/WTe2/wt.in b/examples/WTe2/wt.in index 3e2c6d03..f8aa087b 100644 --- a/examples/WTe2/wt.in +++ b/examples/WTe2/wt.in @@ -68,8 +68,8 @@ E_arc = 0.0 ! energy for calculate Fermi Arc OmegaNum = 400 ! omega number OmegaMin = -0.6 ! energy interval OmegaMax = 0.5 ! energy interval -Nk1 = 41 ! number k points -Nk2 = 41 ! number k points +Nk1 = 6 ! number k points +Nk2 = 6 ! number k points Nk3 = 6 ! number k points NP = 1 ! number of principle layers Gap_threshold = 0.001 ! threshold for GapCube output diff --git a/src/Boltz_transport_anomalous.f90 b/src/Boltz_transport_anomalous.f90 index 114476fc..37f8355a 100644 --- a/src/Boltz_transport_anomalous.f90 +++ b/src/Boltz_transport_anomalous.f90 @@ -111,8 +111,8 @@ subroutine sigma_AHC write(outfileindex, '(a)')'set xlabel "Energy (eV)"' write(outfileindex, '(a)')'set ylabel "AHC (S/cm)"' write(outfileindex, '(5a)')"plot '", trim(adjustl(ahcfilename)), "' u 1:2 w l title '\sigma_{xy}' lc rgb 'red' lw 4, \" - write(outfileindex, '(5a)')"'", trim(adjustl(ahcfilename)), "' u 1:3 w l title '\sigma_{zx}' lc rgb 'blue' lw 4, \" - write(outfileindex, '(5a)')"'", trim(adjustl(ahcfilename)), "' u 1:4 w l title '\sigma_{xz}' lc rgb 'orange' lw 4 " + write(outfileindex, '(5a)')"'", trim(adjustl(ahcfilename)), "' u 1:3 w l title '\sigma_{yz}' lc rgb 'blue' lw 4, \" + write(outfileindex, '(5a)')"'", trim(adjustl(ahcfilename)), "' u 1:4 w l title '\sigma_{zx}' lc rgb 'orange' lw 4 " close(outfileindex) endif diff --git a/src/Makefile.gfortran b/src/Makefile.gfortran index 5a278f70..0f80a7ad 100644 --- a/src/Makefile.gfortran +++ b/src/Makefile.gfortran @@ -1,4 +1,5 @@ -obj = module.o sparse.o wt_aux.o math_lib.o symmetry.o readHmnR.o inverse.o proteus.o \ +OBJ = module.o sparse.o wt_aux.o math_lib.o mgmres.o symmetry.o \ + readHmnR.o inverse.o proteus.o \ eigen.o ham_qlayer2qlayer.o psi.o unfolding.o rand.o \ ham_slab.o ham_bulk.o ek_slab.o ek_bulk_polar.o ek_bulk.o \ readinput.o fermisurface.o surfgreen.o surfstat.o \ @@ -12,28 +13,48 @@ obj = module.o sparse.o wt_aux.o math_lib.o symmetry.o readHmnR.o inverse.o pro main.o # compiler -f90 = gfortran -cpp -DINTELMKL +#F90 = mpif90 -fpp -DMPI -fpe3 -O3 # -DINTELMKL -DSUPERLU # -check all -traceback -g +#F90 = gfortran -fpp #-DMPI -fpe3 -O3 # -DINTELMKL -DSUPERLU # -check all -traceback -g +F90 = gfortran -cpp +#F90 = ifort -fpp -DINTELMKL -fpe3 -check all -traceback -g +#CC = mpicc -fpp -DMPI -fpe3 -O3 # -DINTELMKL -DSUPERLU +CC = gcc -E #-DMPI -fpe3 -O3 # -DINTELMKL -DSUPERLU -flag = -O3 -ffree-line-length-512 #-g -Wall -Wextra -Warray-temporaries -Wconversion -fimplicit-none -fbacktrace -ffree-line-length-0 -fcheck=all -ffpe-trap=zero,overflow,underflow -finit-real=nan +INCLUDE = #-I${MKLROOT}/include -I/Users/user/quan/work/workplace/github-repositories/superlu/SRC +WFLAG = #-nogen-interface +#OFLAG = -O3 -static-intel +OFLAG = -O3 -ffree-line-length-512 #-g -Wall -Wextra -Warray-temporaries -Wconversion -fimplicit-none -fbacktrace -ffree-line-length-0 -fcheck=all -ffpe-trap=zero,overflow,underflow -finit-real=nan +FFLAG = $(OFLAG) $(WFLAG) +LFLAG = $(OFLAG) -ARPACK=/Users/quan/work/workplace/ARPACK/libarpack_gfortran.a +# ARPACK LIBRARY +ARPACK= #-L/usr/local/lib -larpack #/Users/user/quan/work/workplace/ARPACK/libarpack_gfortran.a + +# need to specify if -DSUPERLU privided in the F90 +SUPERLULIB=#/usr/local/lib/libsuperlu.a # blas and lapack libraries -#libs = -L/usr/lib/ -llapack -lblas - -# Intel MKL also supports with gfortran comipler +# static linking +#LIBS = -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_lp64.a \ + ${MKLROOT}/lib/intel64/libmkl_sequential.a \ + ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread -lm -ldl \ + ${ARPACK} + # dynamic linking -libs = ${ARPACK} -L/${MKLROOT}/lib -lmkl_core -lmkl_sequential -lmkl_intel_lp64 -lpthread +#LIBS = ${ARPACK} -L/${MKLROOT}/lib/intel64 -lmkl_core -lmkl_sequential -lmkl_intel_lp64 -lpthread ${SUPERLULIB} +LIBS = ${ARPACK} -llapack -lblas - -main : $(obj) - $(f90) $(obj) -o wt.x $(libs) +main : $(OBJ) + $(F90) $(LFLAG) $(OBJ) -o wt.x $(LIBS) cp -f wt.x ../bin -.SUFFIXES: .o .f90 +.SUFFIXES: .o .f90 .c .f90.o : - $(f90) -c $(flag) $(includes) $*.f90 + $(F90) $(FFLAG) $(INCLUDE) -c $*.f90 +.c.o: + $(CC) $(CFLAGS) -I$(INCLUDE) -c $< $(VERBOSE) clean : rm -f *.o *.mod *~ wt.x + diff --git a/src/Makefile.gfortran_travis b/src/Makefile.gfortran_travis index 448c72de..43d87b41 100644 --- a/src/Makefile.gfortran_travis +++ b/src/Makefile.gfortran_travis @@ -1,4 +1,5 @@ -OBJ = module.o sparse.o wt_aux.o math_lib.o symmetry.o readHmnR.o inverse.o proteus.o \ +OBJ = module.o sparse.o wt_aux.o math_lib.o mgmres.o symmetry.o \ + readHmnR.o inverse.o proteus.o \ eigen.o ham_qlayer2qlayer.o psi.o unfolding.o rand.o \ ham_slab.o ham_bulk.o ek_slab.o ek_bulk_polar.o ek_bulk.o \ readinput.o fermisurface.o surfgreen.o surfstat.o \ diff --git a/src/Makefile.intel-mpi b/src/Makefile.intel-mpi index 53c32dcf..de8ba175 100644 --- a/src/Makefile.intel-mpi +++ b/src/Makefile.intel-mpi @@ -1,4 +1,5 @@ -OBJ = module.o sparse.o wt_aux.o math_lib.o symmetry.o readHmnR.o inverse.o proteus.o \ +OBJ = module.o sparse.o wt_aux.o math_lib.o mgmres.o symmetry.o \ + c_fortran_zgssv.o readHmnR.o inverse.o proteus.o \ eigen.o ham_qlayer2qlayer.o psi.o unfolding.o rand.o \ ham_slab.o ham_bulk.o ek_slab.o ek_bulk_polar.o ek_bulk.o \ readinput.o fermisurface.o surfgreen.o surfstat.o \ @@ -12,16 +13,21 @@ OBJ = module.o sparse.o wt_aux.o math_lib.o symmetry.o readHmnR.o inverse.o pro main.o # compiler -F90 = mpiifort -fpp -DMPI -DINTELMKL -fpe3 +F90 = mpif90 -fpp -DMPI -fpe3 -O3 # -DINTELMKL -DSUPERLU # -check all -traceback -g +#F90 = ifort -fpp -DINTELMKL -fpe3 -check all -traceback -g +CC = mpicc -fpp -DMPI -fpe3 -O3 # -DINTELMKL -DSUPERLU -INCLUDE = -I${MKLROOT}/include +INCLUDE = #-I${MKLROOT}/include -I/Users/user/quan/work/workplace/github-repositories/superlu/SRC WFLAG = -nogen-interface OFLAG = -O3 -static-intel FFLAG = $(OFLAG) $(WFLAG) LFLAG = $(OFLAG) # ARPACK LIBRARY -ARPACK=/Users/quan/work/workplace/ARPACK/libarpack_MAC.a +ARPACK=/Users/user/quan/work/workplace/ARPACK/libarpack_MAC.a + +# need to specify if -DSUPERLU privided in the F90 +SUPERLULIB=#/usr/local/lib/libsuperlu.a # blas and lapack libraries # static linking @@ -31,16 +37,18 @@ ARPACK=/Users/quan/work/workplace/ARPACK/libarpack_MAC.a ${ARPACK} # dynamic linking -LIBS = ${ARPACK} -L/${MKLROOT}/lib/intel64 -lmkl_core -lmkl_sequential -lmkl_intel_lp64 -lpthread +LIBS = ${ARPACK} -L/${MKLROOT}/lib/intel64 -lmkl_core -lmkl_sequential -lmkl_intel_lp64 -lpthread ${SUPERLULIB} main : $(OBJ) $(F90) $(LFLAG) $(OBJ) -o wt.x $(LIBS) cp -f wt.x ../bin -.SUFFIXES: .o .f90 +.SUFFIXES: .o .f90 .c .f90.o : $(F90) $(FFLAG) $(INCLUDE) -c $*.f90 +.c.o: + $(CC) $(CFLAGS) -I$(INCLUDE) -c $< $(VERBOSE) clean : rm -f *.o *.mod *~ wt.x diff --git a/src/c_fortran_zgssv.c b/src/c_fortran_zgssv.c new file mode 100644 index 00000000..7be69561 --- /dev/null +++ b/src/c_fortran_zgssv.c @@ -0,0 +1,181 @@ +#ifdef SUPERLU + +/* + * -- SuperLU routine (version 6.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * March 26, 2023 Add 64-bit indexing and METIS ordering + */ + +#include "slu_zdefs.h" + +#define HANDLE_SIZE 8 + +/* kind of integer to hold a pointer. Use 64-bit. */ +typedef long long int fptr; + +typedef struct { + SuperMatrix *L; + SuperMatrix *U; + int *perm_c; + int *perm_r; +} factors_t; + +/*! + * This routine can be called from Fortran. + * + * iopt (input) int + * Specifies the operation: + * = 1, performs LU decomposition for the first time + * = 2, performs triangular solve + * = 3, free all the storage in the end + * + * f_factors (input/output) fptr* + * If iopt == 1, it is an output and contains the pointer pointing to + * the structure of the factored matrices. + * Otherwise, it it an input. + */ +void +c_fortran_zgssv_(int *iopt, int *n, int_t *nnz, int *nrhs, + doublecomplex *values, int_t *rowind, int_t *colptr, + doublecomplex *b, int *ldb, + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ + int_t *info) +{ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + GlobalLU_t Glu; /* Not needed on return. */ + int_t *rowind0; /* counter 1-based indexing from Frotran arrays. */ + int_t *colptr0; + + trans = NOTRANS; + + if ( *iopt == 1 ) { /* LU decomposition */ + + /* Set the default input options. */ + set_default_options(&options); + + /* Initialize the statistics variables. */ + StatInit(&stat); + + + /* Adjust to 0-based indexing */ + if ( !(rowind0 = intMalloc(*nnz)) ) ABORT("Malloc fails for rowind0[]."); + if ( !(colptr0 = intMalloc(*n+1)) ) ABORT("Malloc fails for colptr0[]."); + for (i = 0; i < *nnz; ++i) rowind0[i] = rowind[i] - 1; + for (i = 0; i <= *n; ++i) colptr0[i] = colptr[i] - 1; + + zCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind0, colptr0, + SLU_NC, SLU_Z, SLU_GE); + L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + if ( !(perm_r = int32Malloc(*n)) ) ABORT("Malloc fails for perm_r[]."); + if ( !(perm_c = int32Malloc(*n)) ) ABORT("Malloc fails for perm_c[]."); + if ( !(etree = int32Malloc(*n)) ) ABORT("Malloc fails for etree[]."); + + /* + * Get column permutation vector perm_c[], according to permc_spec: + * permc_spec = 0: natural ordering + * permc_spec = 1: minimum degree on structure of A'*A + * permc_spec = 2: minimum degree on structure of A'+A + * permc_spec = 3: approximate minimum degree for unsymmetric matrices + * permc_spec = 6: METIS ordering on structure of A'*A + */ + permc_spec = options.ColPerm; + get_perm_c(permc_spec, &A, perm_c); + + sp_preorder(&options, &A, perm_c, etree, &AC); + + panel_size = sp_ienv(1); + relax = sp_ienv(2); + + zgstrf(&options, &AC, relax, panel_size, etree, + NULL, 0, perm_c, perm_r, L, U, &Glu, &stat, info); + + if ( *info == 0 ) { + Lstore = (SCformat *) L->Store; + Ustore = (NCformat *) U->Store; + //printf("No of nonzeros in factor L = %lld\n", (long long) Lstore->nnz); + //printf("No of nonzeros in factor U = %lld\n", (long long) Ustore->nnz); + //printf("No of nonzeros in L+U = %lld\n", (long long) Lstore->nnz + Ustore->nnz); + zQuerySpace(L, U, &mem_usage); + //printf("L\\U MB %.3f\ttotal MB needed %.3f\n", + // mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); + } else { + printf("zgstrf() error returns INFO= %lld\n", (long long) *info); + if ( *info <= *n ) { /* factorization completes */ + zQuerySpace(L, U, &mem_usage); + printf("L\\U MB %.3f\ttotal MB needed %.3f\n", + mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); + } + } + + /* Save the LU factors in the factors handle */ + LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t)); + LUfactors->L = L; + LUfactors->U = U; + LUfactors->perm_c = perm_c; + LUfactors->perm_r = perm_r; + *f_factors = (fptr) LUfactors; + + /* Free un-wanted storage */ + SUPERLU_FREE(etree); + Destroy_SuperMatrix_Store(&A); + Destroy_CompCol_Permuted(&AC); + SUPERLU_FREE(rowind0); + SUPERLU_FREE(colptr0); + StatFree(&stat); + + } else if ( *iopt == 2 ) { /* Triangular solve */ + int iinfo; + + /* Initialize the statistics variables. */ + StatInit(&stat); + + /* Extract the LU factors in the factors handle */ + LUfactors = (factors_t*) *f_factors; + L = LUfactors->L; + U = LUfactors->U; + perm_c = LUfactors->perm_c; + perm_r = LUfactors->perm_r; + + zCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_Z, SLU_GE); + + /* Solve the system A*X=B, overwriting B with X. */ + zgstrs (trans, L, U, perm_c, perm_r, &B, &stat, &iinfo); + *info = iinfo; + + Destroy_SuperMatrix_Store(&B); + StatFree(&stat); + + } else if ( *iopt == 3 ) { /* Free storage */ + /* Free the LU factors in the factors handle */ + LUfactors = (factors_t*) *f_factors; + SUPERLU_FREE (LUfactors->perm_r); + SUPERLU_FREE (LUfactors->perm_c); + Destroy_SuperNode_Matrix(LUfactors->L); + Destroy_CompCol_Matrix(LUfactors->U); + SUPERLU_FREE (LUfactors->L); + SUPERLU_FREE (LUfactors->U); + SUPERLU_FREE (LUfactors); + } else { + fprintf(stderr,"Invalid iopt=%d passed to c_fortran_zgssv()\n",*iopt); + exit(-1); + } +} + +#endif diff --git a/src/dos.f90 b/src/dos.f90 index eedd9943..f973cfd2 100644 --- a/src/dos.f90 +++ b/src/dos.f90 @@ -1,5 +1,4 @@ -#if defined (INTELMKL) subroutine dos_sparse !> calculate density of state for 3D bulk system ! @@ -189,10 +188,6 @@ subroutine dos_sparse return end subroutine dos_sparse -#endif - - -#if defined (INTELMKL) subroutine charge_density_sparse !> calculate charge density ! @@ -356,11 +351,6 @@ subroutine charge_density_sparse return end subroutine charge_density_sparse -#endif - - - - subroutine dos_sub diff --git a/src/ek_bulk.f90 b/src/ek_bulk.f90 index a54b5040..e4aadd7f 100644 --- a/src/ek_bulk.f90 +++ b/src/ek_bulk.f90 @@ -1108,7 +1108,6 @@ end subroutine ek_bulk_cube -#if defined (INTELMKL) subroutine sparse_ekbulk use sparse use para @@ -1184,7 +1183,11 @@ subroutine sparse_ekbulk if (neval>Num_wann-2) neval= Num_wann- 2 !> ncv - nvecs=int(2*neval) + if (NumLCZVecs.ne.0) then + nvecs= NumLCZVecs + else + nvecs=int(2*neval) + endif if (nvecs<20) nvecs= 20 if (nvecs>Num_wann) nvecs= Num_wann @@ -1556,7 +1559,8 @@ subroutine sparse_ekbulk_valley do ie2= 1, ND psi2= zeigv(:, IE+ie2-1) vpsi=0d0 - call mkl_zcoogemv('N', Num_wann, acoo_valley, icoo_valley, jcoo_valley, nnzmax_valley, psi2, vpsi) + !call mkl_zcoogemv('N', Num_wann, acoo_valley, icoo_valley, jcoo_valley, nnzmax_valley, psi2, vpsi) + call coomv_z(Num_wann, nnzmax_valley, acoo_valley, icoo_valley, jcoo_valley, psi2, vpsi) valley_k_nd(ie1, ie2)= zdotc(Num_wann, psi1, 1, vpsi, 1) enddo !if (abs(W(IE+ie1-1)/eV2Hartree)<2d0) & @@ -1918,7 +1922,6 @@ subroutine sparse_ekbulk_plane return end subroutine sparse_ekbulk_plane -#endif #if defined (ELPA) subroutine ekbulk_elpa diff --git a/src/ek_slab.f90 b/src/ek_slab.f90 index de983a3c..7dec956b 100644 --- a/src/ek_slab.f90 +++ b/src/ek_slab.f90 @@ -377,9 +377,7 @@ subroutine ek_slab_sparseHR !> diagonalization by call zheev in lapack W= 0d0 -#if defined (INTELMKL) call arpack_sparse_coo_eigs(Ndimq,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,W,sigma, zeigv, ritzvec) -#endif call now(time3) eigv(1:neval, ik)= W(1:neval) diff --git a/src/fermisurface.f90 b/src/fermisurface.f90 index e93768a5..9250f332 100644 --- a/src/fermisurface.f90 +++ b/src/fermisurface.f90 @@ -439,7 +439,7 @@ subroutine fermisurface_kplane integer :: ia, ik, i, j, i1, i2, ig, io integer :: knv3, nkx, nky, ierr, nwann - real(dp) :: kz, k(3), s0(3) + real(dp) :: kz, k(3), s0(3), s0t(3), s0_len ! Hamiltonian of bulk system complex(Dp), allocatable :: Hamk_bulk(:, :) @@ -463,6 +463,7 @@ subroutine fermisurface_kplane complex(dp), allocatable :: ones(:, :), ctemp(:, :) logical, external :: if_in_the_WS + real(dp), external :: norm nkx= Nk @@ -665,9 +666,14 @@ subroutine fermisurface_kplane write(outfileindex, "('#column', i5, 3000i16)")(i, i=1, 7+4*NumberofSelectedOrbitals_groups) do ik=1, knv3 do ig=1, NumberofSelectedOrbitals_groups - s0(1)= sx_selected_mpi(ik, ig) /dos_selected_mpi(ik, ig) - s0(2)= sy_selected_mpi(ik, ig) /dos_selected_mpi(ik, ig) - s0(3)= sz_selected_mpi(ik, ig) /dos_selected_mpi(ik, ig) + s0(1)= sx_selected_mpi(ik, ig) /dos_selected_mpi(ik, ig) + s0(2)= sy_selected_mpi(ik, ig) /dos_selected_mpi(ik, ig) + s0(3)= sz_selected_mpi(ik, ig) /dos_selected_mpi(ik, ig) + + !s0_len= norm(s0t) + !if (s0_len>eps6) then + ! s0= s0t/s0_len + !endif call rotate_k3_to_kplane(s0, s1(:, ig)) enddo write(outfileindex, '(3000E16.8)')kxy_shape(:, ik)*Angstrom2atomic, kxy_plane(:, ik)*Angstrom2atomic, & diff --git a/src/ham_bulk.f90 b/src/ham_bulk.f90 index 9705e98a..03fd510f 100644 --- a/src/ham_bulk.f90 +++ b/src/ham_bulk.f90 @@ -325,17 +325,17 @@ subroutine ham_bulk_latticegauge(k,Hamk_bulk) !Hamk_bulk= (Hamk_bulk+ mat2)/2d0 ! check hermitcity - do i1=1, Num_wann - do i2=1, Num_wann - if(abs(Hamk_bulk(i1,i2)-conjg(Hamk_bulk(i2,i1))).ge.1e-6)then - write(stdout,*)'there is something wrong with Hamk_bulk' - write(stdout,*)'i1, i2', i1, i2 - write(stdout,*)'value at (i1, i2)', Hamk_bulk(i1, i2) - write(stdout,*)'value at (i2, i1)', Hamk_bulk(i2, i1) - !stop - endif - enddo - enddo + !do i1=1, Num_wann + ! do i2=1, Num_wann + ! if(abs(Hamk_bulk(i1,i2)-conjg(Hamk_bulk(i2,i1))).ge.1e-6)then + ! write(stdout,*)'there is something wrong with Hamk_bulk' + ! write(stdout,*)'i1, i2', i1, i2 + ! write(stdout,*)'value at (i1, i2)', Hamk_bulk(i1, i2) + ! write(stdout,*)'value at (i2, i1)', Hamk_bulk(i2, i1) + ! !stop + ! endif + ! enddo + !enddo return end subroutine ham_bulk_latticegauge @@ -834,6 +834,66 @@ subroutine overlap_bulk_coo_sparse(k, acoo, icoo, jcoo) return end subroutine overlap_bulk_coo_sparse +subroutine ham_bulk_tpaw(mdim, k, Hamk_moire) + ! This subroutine caculates Hamiltonian for + ! a twist system + ! + ! Lattice gauge Hl + ! Atomic gauge Ha= U* Hl U + ! where U = e^ik.wc(i) on diagonal + ! contact wuquansheng@gmail.com + ! 2024 Jan 19 + + use para + implicit none + + !> leading dimension of Ham_moire + integer, intent(in) :: mdim + real(dp), intent(in) :: k(3) + complex(dp), intent(inout) :: hamk_moire(mdim, mdim) + + ! wave vector in 3d + real(Dp) :: kdotr, pos0(3), dis + + complex(dp) :: factor + + real(dp) :: pos(3), pos1(3), pos2(3), pos_cart(3), pos_direct(3) + + integer :: Num_gvectors + + !> dimension 3*Num_gvectors + real(dp), allocatable :: gvectors(:, :) + + integer :: i1, i2 + + !> for a test, we assume a twist homo-bilayer system + ! mdim= Num_gvectors* Folded_cell%NumberOfspinorbitals*2 + + hamk_moire=0d0 + + !> first, set G vectors + + !> construct T matrix + !> the dimension of the T matrix is num_wann + + + ! check hermitcity + do i1=1, mdim + do i2=1, mdim + if(abs(hamk_moire(i1,i2)-conjg(hamk_moire(i2,i1))).ge.1e-6)then + write(stdout,*)'there is something wrong with hamk_moire' + write(stdout,*)'i1, i2', i1, i2 + write(stdout,*)'value at (i1, i2)', hamk_moire(i1, i2) + write(stdout,*)'value at (i2, i1)', hamk_moire(i2, i1) + !stop + endif + enddo + enddo + + return +end subroutine ham_bulk_tpaw + + subroutine valley_k_coo_sparsehr(nnz, k,acoo,icoo,jcoo) !> This subroutine use sparse hr format !> Here we use atomic gauge which means the atomic position is taken into account diff --git a/src/inverse.f90 b/src/inverse.f90 index a846cbf5..efaa40e0 100644 --- a/src/inverse.f90 +++ b/src/inverse.f90 @@ -111,3 +111,321 @@ subroutine inv_r(ndim,Amat) return end subroutine inv_r + + + subroutine sparse_solver(ndims, nnz, acsr, icsr, jcsr, b, x) + !> this subroutine solves A*x= b, where A is a sparse matrix stored in compress row format + !> A is a Hermitian complex square matrix + + use para, only : dp + implicit none + + !> the dimension of a square matrix + integer, intent(in) :: ndims + + !> number of non-zeros elements + integer, intent(in) :: nnz + + !> sparse matrix + ! acsr(K) = value of entry, + ! icsr(K) = row of entry, + ! jcsr(K) = column of entry. + complex(dp), intent(in) :: acsr(nnz) + integer, intent(in) :: icsr(ndims+1) + integer, intent(in) :: jcsr(nnz) + + complex(dp), intent(out) :: x(ndims) + complex(dp), intent(in) :: b(ndims) + real(dp), allocatable :: acsr_real(:) + real(dp), allocatable :: acsr_imag(:) + real(dp), allocatable :: x_real(:) + real(dp), allocatable :: x_imag(:) + real(dp), allocatable :: b_real(:) + real(dp), allocatable :: b_imag(:) + + complex(dp), allocatable :: acsc(:) + integer, allocatable :: icsc(:) + integer, allocatable :: jcsc(:) + + ! ITR_MAX, the maximum number of (outer) iterations to take. + integer :: itr_max + + !MR, the maximum number of (inner) iterations to take. 0 < MR <= N. + integer :: mr + + !TOL_ABS, an absolute tolerance applied to the current residual. + !TOL_REL, a relative tolerance comparing the current residual to the initial residual. + real(dp) :: tol_abs, tol_rel + + integer :: iopt, info, nrhs, i, ipos, ljob + integer*8 :: factors + + + !>-------------------------------------------------- + !>> call mgmres_st + !> mgmres_st() applies restarted GMRES to a sparse triplet matrix. + + !allocate(acsr_real(nnz), acsr_imag(nnz)) + !allocate(x_real(ndims), x_imag(ndims)) + !allocate(b_real(ndims), b_imag(ndims)) + + !acsr_real= real(acsr) + !acsr_imag= aimag(acsr) + !b_real= real(b) + !b_imag= aimag(b) + + + !itr_max=1000 + !tol_abs=1E-7 + !tol_rel=1E-7 + !mr= min(100, ndims) + !call mgmres_csr ( ndims, nnz, icsr, jcsr, acsr_real, x_real, b_real, itr_max, mr, tol_abs, tol_rel ) + !call mgmres_csr ( ndims, nnz, icsr, jcsr, acsr_imag, x_imag, b_imag, itr_max, mr, tol_abs, tol_rel ) + + !do i=1, ndims + ! x(i) = cmplx(x_real(i), x_imag(i)) + !enddo + !>-------------------------------------------------- + + !>-------------------------------------------------- + !>> call superLU + +#ifdef SUPERLU + allocate( acsc(nnz), icsc(ndims+1), jcsc(nnz)) + !> convert csr to csc + ljob = 1 ! fill c + ipos = 1 + call csrcsc ( ndims, ljob, ipos, acsr, jcsr, icsr, acsc, jcsc, icsc ) + !> + x= b +! First, factorize the matrix. The factors are stored in *factors* handle. + iopt = 1 + nrhs = 1 + call c_fortran_zgssv( iopt, ndims, nnz, nrhs, acsc, jcsc, icsc, & + x, ndims, factors, info ) + + if (info .eq. 0) then + ! write (*,*) 'Factorization succeeded' + else + ! write(*,*) 'INFO from factorization = ', info + endif + +! Second, solve the system using the existing factors. + iopt = 2 + call c_fortran_zgssv( iopt, ndims, nnz, nrhs, acsc, jcsc, icsc, & + x, ndims, factors, info ) + + if (info .eq. 0) then + ! write (*,*) 'Solve succeeded' + else + ! write(*,*) 'INFO from triangular solve = ', info + endif + +! Last, free the storage allocated inside SuperLU + iopt = 3 + call c_fortran_zgssv( iopt, ndims, nnz, nrhs, acsc, jcsc, icsc, & + x, ndims, factors, info ) + !>-------------------------------------------------- +#elif defined (INTELMKL) +! !>-------------------------------------------------- + !>> call mkldss zgesv + call zmat_mkldss_zgesv(ndims, nnz, acsr, jcsr, icsr, b, x) + !>-------------------------------------------------- +#else + STOP "ERROR: Please add -DINTELMKL or -DSUPERLU in the Makefile to run the sparse_solver" +#endif + + + return + end subroutine sparse_solver + +#if defined (INTELMKL) + subroutine zmat_mkldss_zgesv(ndims, nnzmax, acsr, jcsr, icsr, xvec, yvec) + use mkl_dss + use para, only : dp, stdout, time_cost_t1, time_cost_t2, time_cost_t3 + + implicit none + + ! external arguments + ! dimension of matrix A + integer, intent(in) :: ndims + + ! non-zero elements of matrix A + integer, intent(in) :: nnzmax + + ! compressed sparse row storage of matrix A + complex(dp), intent(in) :: acsr(nnzmax) + integer, intent(in) :: jcsr(nnzmax) + integer, intent(in) :: icsr(ndims+1) + + ! xvec vector with dimension "ndims" + complex(dp), intent(in) :: xvec(ndims) + + ! yvec vector with dimension "ndims" + complex(dp), intent(out) :: yvec(ndims) + + ! local variables + type(mkl_dss_handle) :: handle + integer :: nrhs + integer :: ierr + integer :: perm(1) + real(dp) :: time_tic, time_toc + + !>>> step 0: setup the problem to be solved + nrhs = 1 + perm(1) = 0 + + !>>> step 1: initialize the solver. + ierr = dss_create( handle, mkl_dss_defaults ) + if (ierr .ne. mkl_dss_success) then + write(*,*) "mkl_dss_create returned error code", ierr; stop + endif + + !>>> step 2: define the non-zero structure of the matrix. + ierr = dss_define_structure( handle, mkl_dss_symmetric_structure_complex, icsr, ndims, ndims, jcsr, nnzmax ) + !ierr = dss_define_structure( handle, MKL_DSS_NON_SYMMETRIC_COMPLEX, icsr, ndims, ndims, jcsr, nnzmax ) + if (ierr .ne. mkl_dss_success) then + write(*,*) "mkl_dss_define_structure returned error code", ierr; stop + endif ! back if (ierr .ne. mkl_dss_success} block + + call now(time_tic) + !>>> step 3: reorder the matrix. + ierr = dss_reorder( handle, mkl_dss_defaults, perm ) + if (ierr .ne. mkl_dss_success) then + write(*,*) "mkl_dss_reorder returned error code", ierr; stop + endif ! back if (ierr .ne. mkl_dss_success} block + call now(time_toc) + time_cost_t1= time_cost_t1+ time_toc- time_tic + + call now(time_tic) + !>>> step 4: factor the matrix. + ierr = dss_factor_complex( handle, mkl_dss_defaults, acsr ) + if (ierr .ne. mkl_dss_success) then + write(*,*) "mkl_dss_factor_complex returned error code", ierr; stop + endif ! back if (ierr .ne. mkl_dss_success} block + call now(time_toc) + time_cost_t2= time_cost_t2+ time_toc- time_tic + + call now(time_tic) + ! allocate the solution vector and solve the problem. + ierr = dss_solve_complex( handle, mkl_dss_defaults, xvec, nrhs, yvec ) + call now(time_toc) + time_cost_t3= time_cost_t3+ time_toc- time_tic + + if (ierr .ne. mkl_dss_success) then + write(*,*) "mkl_dss_solve_complex returned error code", ierr; stop + endif ! back if (ierr .ne. mkl_dss_success} block + + ! deallocate solver storage and various local arrays. + ierr = dss_delete( handle, mkl_dss_defaults ) + + if (ierr .ne. mkl_dss_success) then + write(*,*) "mkl_dss_solver returned error code", ierr; stop + endif ! back if (ierr .ne. mkl_dss_success} block + + return + end subroutine zmat_mkldss_zgesv +#endif + +subroutine csrcsc ( n, job, ipos, a, ja, ia, ao, jao, iao ) + +!*****************************************************************************80 +! +!! CSRCSC converts Compressed Sparse Row to Compressed Sparse Column. +! +! Discussion: +! +! This is essentially a transposition operation. +! +! It is NOT an in-place algorithm. +! +! This routine transposes a matrix stored in a, ja, ia format. +! +! Modified: +! +! 07 January 2004 +! +! Author: +! +! Youcef Saad +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of the matrix. +! +! Input, integer ( kind = 4 ) JOB, indicates whether or not to fill the values of the +! matrix AO or only the pattern (IA, and JA). Enter 1 for yes. +! +! ipos = starting position in ao, jao of the transposed matrix. +! the iao array takes this into account (thus iao(1) is set to ipos.) +! Note: this may be useful if one needs to append the data structure +! of the transpose to that of A. In this case use +! call csrcsc (n,1,n+2,a,ja,ia,a,ja,ia(n+2)) +! for any other normal usage, enter ipos=1. +! +! Input, real A(*), integer ( kind = 4 ) JA(*), IA(N+1), the matrix in CSR +! Compressed Sparse Row format. +! +! Output, real AO(*), JAO(*), IAO(N+1), the matrix in CSC +! Compressed Sparse Column format. +! + implicit none + + integer ( kind = 4 ) n + + complex ( kind = 8 ) a(*) + complex ( kind = 8 ) ao(*) + integer ( kind = 4 ) i + integer ( kind = 4 ) ia(n+1) + integer ( kind = 4 ) iao(n+1) + integer ( kind = 4 ) ipos + integer ( kind = 4 ) j + integer ( kind = 4 ) ja(*) + integer ( kind = 4 ) jao(*) + integer ( kind = 4 ) job + integer ( kind = 4 ) k + integer ( kind = 4 ) next +! +! Compute lengths of rows of A'. +! + iao(1:n+1) = 0 + + do i = 1, n + do k = ia(i), ia(i+1)-1 + j = ja(k) + 1 + iao(j) = iao(j) + 1 + end do + end do +! +! Compute pointers from lengths. +! + iao(1) = ipos + do i = 1, n + iao(i+1) = iao(i) + iao(i+1) + end do +! +! Do the actual copying. +! + do i = 1, n + do k = ia(i), ia(i+1)-1 + j = ja(k) + next = iao(j) + if ( job == 1 ) then + ao(next) = a(k) + end if + jao(next) = i + iao(j) = next + 1 + end do + end do +! +! Reshift IAO and leave. +! + do i = n, 1, -1 + iao(i+1) = iao(i) + end do + iao(1) = ipos + + return +end + + diff --git a/src/lanczos_sparse.f90 b/src/lanczos_sparse.f90 index 89526e73..3b4173d1 100644 --- a/src/lanczos_sparse.f90 +++ b/src/lanczos_sparse.f90 @@ -83,9 +83,10 @@ subroutine lanczos_seqsparse_cpu_z(NumLczVectors, NumLczVectors_out, Mdim, nnz, vec_0= 1d0/betan(1)* vec_0 !* perform |vec(1,:)>=Ham |vec(0,:)> -#if defined (INTELMKL) - call mkl_zcsrgemv('N', Mdim, acsr, icsr, jcsr, vec_0, vec_1) -#endif +!#if defined (INTELMKL) +! call mkl_zcsrgemv('N', Mdim, acsr, icsr, jcsr, vec_0, vec_1) +!#endif + call csrmv_z(Mdim, nnz, acsr, icsr, jcsr, vec_0, vec_1) !* a_0= alpha(1) = zdotc(Mdim, vec_0, 1, vec_1, 1) @@ -114,9 +115,10 @@ subroutine lanczos_seqsparse_cpu_z(NumLczVectors, NumLczVectors_out, Mdim, nnz, call now(time_start) -#if defined (INTELMKL) - call mkl_zcsrgemv('N', Mdim, acsr, icsr, jcsr, vec_1, vec_2) -#endif +!#if defined (INTELMKL) +! call mkl_zcsrgemv('N', Mdim, acsr, icsr, jcsr, vec_1, vec_2) +!#endif + call csrmv_z(Mdim, nnz, acsr, icsr, jcsr, vec_1, vec_2) !* calculate alpha, betan t_z1= zdotc(Mdim, vec_1, 1, vec_2, 1) diff --git a/src/landau_level_sparse.f90 b/src/landau_level_sparse.f90 index a7263fcc..16ab28c6 100644 --- a/src/landau_level_sparse.f90 +++ b/src/landau_level_sparse.f90 @@ -750,9 +750,7 @@ subroutine sparse_landau_level_B !> diagonalization by call zheev in lapack W= 0d0 -#if defined (INTELMKL) call arpack_sparse_coo_eigs(Ndimq,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,W,sigma, zeigv, LandauLevel_wavefunction_calc) -#endif call now(time3) eigv(:, ib)= W if (cpuid==0)write(stdout, '(a, f20.2, a)')' >> Time cost for constructing H: ', time2-time1, ' s' @@ -1070,9 +1068,7 @@ subroutine sparse_landau_level_k !> diagonalization by call zheev in lapack W= 0d0 -#if defined (INTELMKL) call arpack_sparse_coo_eigs(Ndimq,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,W,sigma, zeigv, LandauLevel_wavefunction_calc) -#endif call now(time3) eigv(1:neval, ik)= W(1:neval) @@ -1401,9 +1397,7 @@ subroutine sparse_landau_dos !> diagonalization by call zheev in lapack W= 0d0 ! call eigensystem_c( 'N', 'U', Ndimq ,ham_landau, W) -#if defined (INTELMKL) call arpack_sparse_coo_eigs(Ndimq,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,W,sigma, zeigv, LandauLevel_wavefunction_calc) -#endif do ie= 1, NE do iv= 1, neval x= omega(ie)- W(iv) diff --git a/src/main.f90 b/src/main.f90 index 39793df4..2a567c1d 100644 --- a/src/main.f90 +++ b/src/main.f90 @@ -117,7 +117,6 @@ program main !> normal hmnr file if(.not. Is_Sparse_Hr) then !> for the dense hr file, we allocate HmnR - HmnR= 0d0 call readNormalHmnR() if (valley_projection_calc) call read_valley_operator !> sparse hmnr input @@ -167,13 +166,11 @@ program main if(cpuid.eq.0)write(stdout, *)'>> Start of calculating bulk band' call now(time_start) if (Is_Sparse_Hr) then -#if defined (INTELMKL) if (valley_projection_calc) then call sparse_ekbulk_valley else call sparse_ekbulk endif -#endif else if (valley_projection_calc) then call ek_bulk_line_valley @@ -207,9 +204,7 @@ program main call now(time_start) if (Is_Sparse_Hr) then -#if defined (INTELMKL) call sparse_ekbulk_plane -#endif else call ek_bulk_plane endif @@ -225,9 +220,7 @@ program main if(cpuid.eq.0)write(stdout, *)' ' if(cpuid.eq.0)write(stdout, *)'>> Start of calculating Landau level spectrum' call now(time_start) -#if defined (INTELMKL) call LandauLevel_B_dos_Lanczos -#endif call now(time_end) call print_time_cost(time_start, time_end, 'LandauLevel_B_dos_calc') if(cpuid.eq.0)write(stdout, *)'<< End of calculating Landau level spectrum' @@ -238,9 +231,7 @@ program main if(cpuid.eq.0)write(stdout, *)' ' if(cpuid.eq.0)write(stdout, *)'>> Start of calculating Landau level spectrum' call now(time_start) -#if defined (INTELMKL) call LandauLevel_k_dos_Lanczos -#endif call now(time_end) call print_time_cost(time_start, time_end, 'LandauLevel_k_dos_calc') if(cpuid.eq.0)write(stdout, *)'<< End of calculating Landau level spectrum' @@ -252,9 +243,7 @@ program main call now(time_start) if (Is_HrFile) then if(Is_Sparse_Hr) then -#if defined (INTELMKL) call sparse_landau_level_B -#endif else call landau_level_B end if @@ -286,9 +275,7 @@ program main call now(time_start) if (Is_HrFile) then if(Is_Sparse_Hr.or.Num_wann*Magq>2000) then -#if defined (INTELMKL) call sparse_landau_level_k -#endif else call landau_level_k endif @@ -384,9 +371,7 @@ program main if(.not. Is_Sparse_Hr) then call dos_sub else -#if defined (INTELMKL) call dos_sparse -#endif end if call now(time_end) call print_time_cost(time_start, time_end, 'Dos_calc') @@ -469,9 +454,7 @@ program main if(cpuid.eq.0)write(stdout, *)'>> Start of calculating the slab band structure' call now(time_start) if (Is_Sparse_Hr) then -#if defined (INTELMKL) call ek_slab_sparseHR -#endif else call ek_slab endif diff --git a/src/mgmres.f90 b/src/mgmres.f90 new file mode 100644 index 00000000..a5ba68e6 --- /dev/null +++ b/src/mgmres.f90 @@ -0,0 +1,1700 @@ +subroutine atx_cr ( n, nz_num, ia, ja, a, x, w ) + +!*****************************************************************************80 +! +!! atx_cr() computes A'*x for a matrix stored in sparse compressed row form. +! +! Discussion: +! +! The Sparse Compressed Row storage format is used. +! +! The matrix A is assumed to be sparse. To save on storage, only +! the nonzero entries of A are stored. The vector JA stores the +! column index of the nonzero value. The nonzero values are sorted +! by row, and the compressed row vector IA then has the property that +! the entries in A and JA that correspond to row I occur in indices +! IA[I] through IA[I+1]-1. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 17 July 2007 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Parameters: +! +! Input, integer N, the order of the system. +! +! Input, integer NZ_NUM, the number of nonzeros. +! +! Input, integer IA(N+1), JA(NZ_NUM), the row and column +! indices of the matrix values. The row vector has been compressed. +! +! Input, real ( kind = rk ) A(NZ_NUM), the matrix values. +! +! Input, real ( kind = rk ) X(N), the vector to be multiplied by A'. +! +! Output, real ( kind = rk ) W(N), the value of A'*X. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + integer i + integer ia(n+1) + integer ja(nz_num) + integer k1 + integer k2 + real ( kind = rk ) w(n) + real ( kind = rk ) x(n) + + w(1:n) = 0.0D+00 + + do i = 1, n + k1 = ia(i) + k2 = ia(i+1) - 1 + w(ja(k1:k2)) = w(ja(k1:k2)) + a(k1:k2) * x(i) + end do + + return +end +subroutine atx_st ( n, nz_num, ia, ja, a, x, w ) + +!*****************************************************************************80 +! +!! atx_st() computes A'*x for a matrix stored in sparset triplet form. +! +! Discussion: +! +! The matrix A is assumed to be sparse. To save on storage, only +! the nonzero entries of A are stored. For instance, the K-th nonzero +! entry in the matrix is stored by: +! +! A(K) = value of entry, +! IA(K) = row of entry, +! JA(K) = column of entry. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 08 August 2006 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Parameters: +! +! Input, integer N, the order of the system. +! +! Input, integer NZ_NUM, the number of nonzeros. +! +! Input, integer IA(NZ_NUM), JA(NZ_NUM), the row and column +! indices of the matrix values. +! +! Input, real ( kind = rk ) A(NZ_NUM), the matrix values. +! +! Input, real ( kind = rk ) X(N), the vector to be multiplied by A'. +! +! Output, real ( kind = rk ) W(N), the value of A'*X. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + integer i + integer ia(nz_num) + integer j + integer ja(nz_num) + integer k + real ( kind = rk ) w(n) + real ( kind = rk ) x(n) + + w(1:n) = 0.0D+00 + + do k = 1, nz_num + i = ia(k) + j = ja(k) + w(j) = w(j) + a(k) * x(i) + end do + + return +end +subroutine ax_cr ( n, nz_num, ia, ja, a, x, w ) + +!*****************************************************************************80 +! +!! ax_cr() computes A*x for a matrix stored in sparse compressed row form. +! +! Discussion: +! +! The Sparse Compressed Row storage format is used. +! +! The matrix A is assumed to be sparse. To save on storage, only +! the nonzero entries of A are stored. The vector JA stores the +! column index of the nonzero value. The nonzero values are sorted +! by row, and the compressed row vector IA then has the property that +! the entries in A and JA that correspond to row I occur in indices +! IA[I] through IA[I+1]-1. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 17 July 2007 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Parameters: +! +! Input, integer N, the order of the system. +! +! Input, integer NZ_NUM, the number of nonzeros. +! +! Input, integer IA(N+1), JA(NZ_NUM), the row and column +! indices of the matrix values. The row vector has been compressed. +! +! Input, real ( kind = rk ) A(NZ_NUM), the matrix values. +! +! Input, real ( kind = rk ) X(N), the vector to be multiplied by A. +! +! Output, real ( kind = rk ) W(N), the value of A*X. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + integer i + integer ia(n+1) + integer ja(nz_num) + integer k1 + integer k2 + real ( kind = rk ) w(n) + real ( kind = rk ) x(n) + + w(1:n) = 0.0D+00 + + do i = 1, n + k1 = ia(i) + k2 = ia(i+1) - 1 + w(i) = w(i) + dot_product ( a(k1:k2), x(ja(k1:k2)) ) + end do + + return +end +subroutine ax_st ( n, nz_num, ia, ja, a, x, w ) + +!*****************************************************************************80 +! +!! ax_st() computes A*x for a matrix stored in sparset triplet form. +! +! Discussion: +! +! The matrix A is assumed to be sparse. To save on storage, only +! the nonzero entries of A are stored. For instance, the K-th nonzero +! entry in the matrix is stored by: +! +! A(K) = value of entry, +! IA(K) = row of entry, +! JA(K) = column of entry. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 08 August 2006 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Parameters: +! +! Input, integer N, the order of the system. +! +! Input, integer NZ_NUM, the number of nonzeros. +! +! Input, integer IA(NZ_NUM), JA(NZ_NUM), the row and column +! indices of the matrix values. +! +! Input, real ( kind = rk ) A(NZ_NUM), the matrix values. +! +! Input, real ( kind = rk ) X(N), the vector to be multiplied by A. +! +! Output, real ( kind = rk ) W(N), the value of A*X. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + integer i + integer ia(nz_num) + integer j + integer ja(nz_num) + integer k + real ( kind = rk ) w(n) + real ( kind = rk ) x(n) + + w(1:n) = 0.0D+00 + + do k = 1, nz_num + i = ia(k) + j = ja(k) + w(i) = w(i) + a(k) * x(j) + end do + + return +end +subroutine diagonal_pointer_cr ( n, nz_num, ia, ja, ua ) + +!*****************************************************************************80 +! +!! diagonal_pointer_cr() finds diagonal entries in a sparse compressed row matrix. +! +! Discussion: +! +! The matrix A is assumed to be stored in compressed row format. Only +! the nonzero entries of A are stored. The vector JA stores the +! column index of the nonzero value. The nonzero values are sorted +! by row, and the compressed row vector IA then has the property that +! the entries in A and JA that correspond to row I occur in indices +! IA[I] through IA[I+1]-1. +! +! The array UA can be used to locate the diagonal elements of the matrix. +! +! It is assumed that every row of the matrix includes a diagonal element, +! and that the elements of each row have been ascending sorted. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 18 July 2007 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Parameters: +! +! Input, integer N, the order of the system. +! +! Input, integer NZ_NUM, the number of nonzeros. +! +! Input, integer IA(N+1), JA(NZ_NUM), the row and column +! indices of the matrix values. The row vector has been compressed. +! On output, the order of the entries of JA may have changed because of +! the sorting. +! +! Output, integer UA(N), the index of the diagonal element +! of each row. +! + implicit none + + integer n + integer nz_num + + integer i + integer ia(n+1) + integer k + integer ja(nz_num) + integer ua(n) + + ua(1:n) = -1 + + do i = 1, n + do k = ia(i), ia(i+1) - 1 + if ( ja(k) == i ) then + ua(i) = k + end if + end do + end do + + return +end +subroutine ilu_cr ( n, nz_num, ia, ja, a, ua, l ) + +!*****************************************************************************80 +! +!! ilu_cr() computes the incomplete LU factorization of a matrix. +! +! Discussion: +! +! The matrix A is assumed to be stored in compressed row format. Only +! the nonzero entries of A are stored. The vector JA stores the +! column index of the nonzero value. The nonzero values are sorted +! by row, and the compressed row vector IA then has the property that +! the entries in A and JA that correspond to row I occur in indices +! IA(I) through IA(I+1)-1. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 27 July 2007 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Parameters: +! +! Input, integer N, the order of the system. +! +! Input, integer NZ_NUM, the number of nonzeros. +! +! Input, integer IA(N+1), JA(NZ_NUM), the row and column +! indices of the matrix values. The row vector has been compressed. +! +! Input, real ( kind = rk ) A(NZ_NUM), the matrix values. +! +! Input, integer UA(N), the index of the diagonal element +! of each row. +! +! Output, real ( kind = rk ) L(NZ_NUM), the ILU factorization of A. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + integer i + integer ia(n+1) + integer iw(n) + integer j + integer ja(nz_num) + integer jj + integer jrow + integer jw + integer k + real ( kind = rk ) l(nz_num) + real ( kind = rk ) tl + integer ua(n) +! +! Copy A. +! + l(1:nz_num) = a(1:nz_num) + + do i = 1, n +! +! IW points to the nonzero entries in row I. +! + iw(1:n) = -1 + + do k = ia(i), ia(i+1) - 1 + iw(ja(k)) = k + end do + + do j = ia(i), ia(i+1) - 1 + jrow = ja(j) + if ( i <= jrow ) then + exit + end if + tl = l(j) * l(ua(jrow)) + l(j) = tl + do jj = ua(jrow) + 1, ia(jrow+1) - 1 + jw = iw(ja(jj)) + if ( jw /= -1 ) then + l(jw) = l(jw) - tl * l(jj) + end if + end do + end do + + ua(i) = j + + if ( jrow /= i ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'ILU_CR - Fatal error!' + write ( *, '(a)' ) ' JROW ~= I' + write ( *, '(a,i8)' ) ' JROW = ', jrow + write ( *, '(a,i8)' ) ' I = ', i + stop + end if + + if ( l(j) == 0.0D+00 ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'ILU_CR - Fatal error!' + write ( *, '(a,i8)' ) ' Zero pivot on step I = ', i + write ( *, '(a,i8,a)' ) ' L(', j, ') = 0.0' + stop + end if + + l(j) = 1.0D+00 / l(j) + + end do + + l(ua(1:n)) = 1.0D+00 / l(ua(1:n)) + + return +end +subroutine lus_cr ( n, nz_num, ia, ja, l, ua, r, z ) + +!*****************************************************************************80 +! +!! lus_cr() applies the incomplete LU preconditioner. +! +! Discussion: +! +! The linear system M * Z = R is solved for Z. M is the incomplete +! LU preconditioner matrix, and R is a vector supplied by the user. +! So essentially, we're solving L * U * Z = R. +! +! The matrix A is assumed to be stored in compressed row format. Only +! the nonzero entries of A are stored. The vector JA stores the +! column index of the nonzero value. The nonzero values are sorted +! by row, and the compressed row vector IA then has the property that +! the entries in A and JA that correspond to row I occur in indices +! IA(I) through IA(I+1)-1. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 18 July 2007 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Parameters: +! +! Input, integer N, the order of the system. +! +! Input, integer NZ_NUM, the number of nonzeros. +! +! Input, integer IA(N+1), JA(NZ_NUM), the row and column +! indices of the matrix values. The row vector has been compressed. +! +! Input, real ( kind = rk ) L(NZ_NUM), the matrix values. +! +! Input, integer UA(N), the index of the diagonal element +! of each row. +! +! Input, real ( kind = rk ) R(N), the right hand side. +! +! Output, real ( kind = rk ) Z(N), the solution of the system M * Z = R. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer n + integer nz_num + + integer i + integer ia(n+1) + integer j + integer ja(nz_num) + real ( kind = rk ) l(nz_num) + real ( kind = rk ) r(n) + integer ua(n) + real ( kind = rk ) w(n) + real ( kind = rk ) z(n) +! +! Copy R in. +! + w(1:n) = r(1:n) +! +! Solve L * w = w where L is unit lower triangular. +! + do i = 2, n + do j = ia(i), ua(i) - 1 + w(i) = w(i) - l(j) * w(ja(j)) + end do + end do +! +! Solve U * w = w, where U is upper triangular. +! + do i = n, 1, -1 + do j = ua(i) + 1, ia(i+1) - 1 + w(i) = w(i) - l(j) * w(ja(j)) + end do + w(i) = w(i) / l(ua(i)) + end do +! +! Copy Z out. +! + z(1:n) = w(1:n) + + return +end +subroutine mgmres_csr ( n, nz_num, ia, ja, a, x, rhs, itr_max, mr, tol_abs, & + tol_rel ) + +!*****************************************************************************80 +! +!! mgmres_csr() applies restarted GMRES to a sparse triplet matrix. +! +! Discussion: +! +! The linear system A*X=B is solved iteratively. +! +! The matrix A is assumed to be stored in sparse triplet form. Only +! the nonzero entries of A are stored. For instance, the K-th nonzero +! entry in the matrix is stored by: +! +! A(K) = value of entry, +! IA(K) = row of entry, +! JA(K) = column of entry. +! +! Thanks to Jesus Pueblas Sanchez-Guerra for supplying two +! corrections to the code on 31 May 2007. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 13 July 2007 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Parameters: +! +! Input, integer N, the order of the linear system. +! +! Input, integer NZ_NUM, the number of nonzero matrix values. +! +! Input, integer IA(NZ_NUM), JA(NZ_NUM), the row and column +! indices of the matrix values. +! +! Input, real ( kind = rk ) A(NZ_NUM), the matrix values. +! +! Input/output, real ( kind = rk ) X(N); on input, an approximation to +! the solution. On output, an improved approximation. +! +! Input, real ( kind = rk ) RHS(N), the right hand side of the linear system. +! +! Input, integer ITR_MAX, the maximum number of (outer) +! iterations to take. +! +! Input, integer MR, the maximum number of (inner) iterations +! to take. 0 < MR <= N. +! +! Input, real ( kind = rk ) TOL_ABS, an absolute tolerance applied to the +! current residual. +! +! Input, real ( kind = rk ) TOL_REL, a relative tolerance comparing the +! current residual to the initial residual. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer mr + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + real ( kind = rk ) av + real ( kind = rk ) c(1:mr) + real ( kind = rk ), parameter :: delta = 1.0D-03 + real ( kind = rk ) g(1:mr+1) + real ( kind = rk ) h(1:mr+1,1:mr) + real ( kind = rk ) htmp + integer i + integer ia(n+1) + integer itr + integer itr_max + integer itr_used + integer j + integer ja(nz_num) + integer k + integer k_copy + real ( kind = rk ) mu + real ( kind = rk ) r(1:n) + real ( kind = rk ) rho + real ( kind = rk ) rho_tol + real ( kind = rk ) rhs(1:n) + real ( kind = rk ) s(1:mr) + real ( kind = rk ) tol_abs + real ( kind = rk ) tol_rel + real ( kind = rk ) v(1:n,1:mr+1) + logical, parameter :: verbose = .false. + real ( kind = rk ) x(1:n) + real ( kind = rk ) y(1:mr+1) + + itr_used = 0 + + if ( n < mr ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MGMRES_csr - Fatal error!' + write ( *, '(a)' ) ' N < MR.' + write ( *, '(a,i8)' ) ' N = ', n + write ( *, '(a,i8)' ) ' MR = ', mr + stop + end if + + do itr = 1, itr_max + + call ax_cr ( n, nz_num, ia, ja, a, x, r ) + + r(1:n) = rhs(1:n) - r(1:n) + + rho = sqrt ( dot_product ( r(1:n), r(1:n) ) ) + + if ( verbose ) then + write ( *, '(a,i8,a,g14.6)' ) ' ITR = ', itr, ' Residual = ', rho + end if + + if ( itr == 1 ) then + rho_tol = rho * tol_rel + end if + + v(1:n,1) = r(1:n) / rho + + g(1) = rho + g(2:mr+1) = 0.0D+00 + + h(1:mr+1,1:mr) = 0.0D+00 + + do k = 1, mr + + k_copy = k + + call ax_cr ( n, nz_num, ia, ja, a, v(1:n,k), v(1:n,k+1) ) + + av = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + + do j = 1, k + h(j,k) = dot_product ( v(1:n,k+1), v(1:n,j) ) + v(1:n,k+1) = v(1:n,k+1) - h(j,k) * v(1:n,j) + end do + + h(k+1,k) = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + + if ( av + delta * h(k+1,k) == av ) then + + do j = 1, k + htmp = dot_product ( v(1:n,k+1), v(1:n,j) ) + h(j,k) = h(j,k) + htmp + v(1:n,k+1) = v(1:n,k+1) - htmp * v(1:n,j) + end do + + h(k+1,k) = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + + end if + + if ( h(k+1,k) /= 0.0D+00 ) then + v(1:n,k+1) = v(1:n,k+1) / h(k+1,k) + end if + + if ( 1 < k ) then + + y(1:k+1) = h(1:k+1,k) + + do j = 1, k - 1 + call mult_givens ( c(j), s(j), j, y(1:k+1) ) + end do + + h(1:k+1,k) = y(1:k+1) + + end if + + mu = sqrt ( h(k,k)**2 + h(k+1,k)**2 ) + c(k) = h(k,k) / mu + s(k) = -h(k+1,k) / mu + h(k,k) = c(k) * h(k,k) - s(k) * h(k+1,k) + h(k+1,k) = 0.0D+00 + call mult_givens ( c(k), s(k), k, g(1:k+1) ) + rho = abs ( g(k+1) ) + + itr_used = itr_used + 1 + + if ( verbose ) then + write ( *, '(a,i8,a,g14.6)' ) ' K = ', k, ' Residual = ', rho + end if + + if ( rho <= rho_tol .and. rho <= tol_abs ) then + exit + end if + + end do + + k = k_copy - 1 + + y(k+1) = g(k+1) / h(k+1,k+1) + + do i = k, 1, -1 + y(i) = ( g(i) - dot_product ( h(i,i+1:k+1), y(i+1:k+1) ) ) / h(i,i) + end do + + do i = 1, n + x(i) = x(i) + dot_product ( v(i,1:k+1), y(1:k+1) ) + end do + + if ( rho <= rho_tol .and. rho <= tol_abs ) then + exit + end if + + end do + + if ( verbose ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MGMRES_ST:' + write ( *, '(a,i8)' ) ' Iterations = ', itr_used + write ( *, '(a,g14.6)' ) ' Final residual = ', rho + end if + + return +end + +subroutine mgmres_st ( n, nz_num, ia, ja, a, x, rhs, itr_max, mr, tol_abs, & + tol_rel ) + +!*****************************************************************************80 +! +!! mgmres_st() applies restarted GMRES to a sparse triplet matrix. +! +! Discussion: +! +! The linear system A*X=B is solved iteratively. +! +! The matrix A is assumed to be stored in sparse triplet form. Only +! the nonzero entries of A are stored. For instance, the K-th nonzero +! entry in the matrix is stored by: +! +! A(K) = value of entry, +! IA(K) = row of entry, +! JA(K) = column of entry. +! +! Thanks to Jesus Pueblas Sanchez-Guerra for supplying two +! corrections to the code on 31 May 2007. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 13 July 2007 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Parameters: +! +! Input, integer N, the order of the linear system. +! +! Input, integer NZ_NUM, the number of nonzero matrix values. +! +! Input, integer IA(NZ_NUM), JA(NZ_NUM), the row and column +! indices of the matrix values. +! +! Input, real ( kind = rk ) A(NZ_NUM), the matrix values. +! +! Input/output, real ( kind = rk ) X(N); on input, an approximation to +! the solution. On output, an improved approximation. +! +! Input, real ( kind = rk ) RHS(N), the right hand side of the linear system. +! +! Input, integer ITR_MAX, the maximum number of (outer) +! iterations to take. +! +! Input, integer MR, the maximum number of (inner) iterations +! to take. 0 < MR <= N. +! +! Input, real ( kind = rk ) TOL_ABS, an absolute tolerance applied to the +! current residual. +! +! Input, real ( kind = rk ) TOL_REL, a relative tolerance comparing the +! current residual to the initial residual. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer mr + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + real ( kind = rk ) av + real ( kind = rk ) c(1:mr) + real ( kind = rk ), parameter :: delta = 1.0D-03 + real ( kind = rk ) g(1:mr+1) + real ( kind = rk ) h(1:mr+1,1:mr) + real ( kind = rk ) htmp + integer i + integer ia(nz_num) + integer itr + integer itr_max + integer itr_used + integer j + integer ja(nz_num) + integer k + integer k_copy + real ( kind = rk ) mu + real ( kind = rk ) r(1:n) + real ( kind = rk ) rho + real ( kind = rk ) rho_tol + real ( kind = rk ) rhs(1:n) + real ( kind = rk ) s(1:mr) + real ( kind = rk ) tol_abs + real ( kind = rk ) tol_rel + real ( kind = rk ) v(1:n,1:mr+1) + logical, parameter :: verbose = .true. + real ( kind = rk ) x(1:n) + real ( kind = rk ) y(1:mr+1) + + itr_used = 0 + + if ( n < mr ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MGMRES_ST - Fatal error!' + write ( *, '(a)' ) ' N < MR.' + write ( *, '(a,i8)' ) ' N = ', n + write ( *, '(a,i8)' ) ' MR = ', mr + stop + end if + + do itr = 1, itr_max + + call ax_st ( n, nz_num, ia, ja, a, x, r ) + + r(1:n) = rhs(1:n) - r(1:n) + + rho = sqrt ( dot_product ( r(1:n), r(1:n) ) ) + + if ( verbose ) then + write ( *, '(a,i8,a,g14.6)' ) ' ITR = ', itr, ' Residual = ', rho + end if + + if ( itr == 1 ) then + rho_tol = rho * tol_rel + end if + + v(1:n,1) = r(1:n) / rho + + g(1) = rho + g(2:mr+1) = 0.0D+00 + + h(1:mr+1,1:mr) = 0.0D+00 + + do k = 1, mr + + k_copy = k + + call ax_st ( n, nz_num, ia, ja, a, v(1:n,k), v(1:n,k+1) ) + + av = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + + do j = 1, k + h(j,k) = dot_product ( v(1:n,k+1), v(1:n,j) ) + v(1:n,k+1) = v(1:n,k+1) - h(j,k) * v(1:n,j) + end do + + h(k+1,k) = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + + if ( av + delta * h(k+1,k) == av ) then + + do j = 1, k + htmp = dot_product ( v(1:n,k+1), v(1:n,j) ) + h(j,k) = h(j,k) + htmp + v(1:n,k+1) = v(1:n,k+1) - htmp * v(1:n,j) + end do + + h(k+1,k) = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + + end if + + if ( h(k+1,k) /= 0.0D+00 ) then + v(1:n,k+1) = v(1:n,k+1) / h(k+1,k) + end if + + if ( 1 < k ) then + + y(1:k+1) = h(1:k+1,k) + + do j = 1, k - 1 + call mult_givens ( c(j), s(j), j, y(1:k+1) ) + end do + + h(1:k+1,k) = y(1:k+1) + + end if + + mu = sqrt ( h(k,k)**2 + h(k+1,k)**2 ) + c(k) = h(k,k) / mu + s(k) = -h(k+1,k) / mu + h(k,k) = c(k) * h(k,k) - s(k) * h(k+1,k) + h(k+1,k) = 0.0D+00 + call mult_givens ( c(k), s(k), k, g(1:k+1) ) + rho = abs ( g(k+1) ) + + itr_used = itr_used + 1 + + if ( verbose ) then + write ( *, '(a,i8,a,g14.6)' ) ' K = ', k, ' Residual = ', rho + end if + + if ( rho <= rho_tol .and. rho <= tol_abs ) then + exit + end if + + end do + + k = k_copy - 1 + + y(k+1) = g(k+1) / h(k+1,k+1) + + do i = k, 1, -1 + y(i) = ( g(i) - dot_product ( h(i,i+1:k+1), y(i+1:k+1) ) ) / h(i,i) + end do + + do i = 1, n + x(i) = x(i) + dot_product ( v(i,1:k+1), y(1:k+1) ) + end do + + if ( rho <= rho_tol .and. rho <= tol_abs ) then + exit + end if + + end do + + if ( verbose ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'MGMRES_ST:' + write ( *, '(a,i8)' ) ' Iterations = ', itr_used + write ( *, '(a,g14.6)' ) ' Final residual = ', rho + end if + + return +end +subroutine mult_givens ( c, s, k, g ) + +!*****************************************************************************80 +! +!! mult_givens() applies a Givens rotation to two successive entries of a vector. +! +! Discussion: +! +! In order to make it easier to compare this code with the Original C, +! the vector indexing is 0-based. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 08 August 2006 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Input: +! +! real ( kind = rk ) C, S, the cosine and sine of a Givens rotation. +! +! integer K, indicates the location of the first vector entry. +! +! real ( kind = rk ) G(1:K+1), the vector to be modified. +! +! Output: +! +! real ( kind = rk ) G(1:K+1): the Givens rotation has been +! applied to entries G(K) and G(K+1). +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer k + + real ( kind = rk ) c + real ( kind = rk ) g(1:k+1) + real ( kind = rk ) g1 + real ( kind = rk ) g2 + real ( kind = rk ) s + + g1 = c * g(k) - s * g(k+1) + g2 = s * g(k) + c * g(k+1) + + g(k) = g1 + g(k+1) = g2 + + return +end +subroutine pmgmres_ilu_cr ( n, nz_num, ia, ja, a, x, rhs, itr_max, mr, & + tol_abs, tol_rel ) + +!*****************************************************************************80 +! +!! pmgmres_ilu_cr() applies the preconditioned restarted GMRES algorithm. +! +! Discussion: +! +! The matrix A is assumed to be stored in compressed row format. Only +! the nonzero entries of A are stored. The vector JA stores the +! column index of the nonzero value. The nonzero values are sorted +! by row, and the compressed row vector IA then has the property that +! the entries in A and JA that correspond to row I occur in indices +! IA(I) through IA(I+1)-1. +! +! This routine uses the incomplete LU decomposition for the +! preconditioning. This preconditioner requires that the sparse +! matrix data structure supplies a storage position for each diagonal +! element of the matrix A, and that each diagonal element of the +! matrix A is not zero. +! +! Thanks to Jesus Pueblas Sanchez-Guerra for supplying two +! corrections to the code on 31 May 2007. +! +! Thanks to Zhitao Li for pointing out that the L array should be dimensioned +! NZ_NUM, rather than IA[N]+1, 26 October 2022. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 26 October 2022 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Input: +! +! integer N, the order of the linear system. +! +! integer NZ_NUM, the number of nonzero matrix values. +! +! integer IA(N+1), JA(NZ_NUM), the row and column indices +! of the matrix values. The row vector has been compressed. +! +! real ( kind = rk ) A(NZ_NUM), the matrix values. +! +! real ( kind = rk ) X(N); an approximation to the solution. +! +! real ( kind = rk ) RHS(N), the right hand side of the linear system. +! +! integer ITR_MAX, the maximum number of (outer) iterations to take. +! +! integer MR, the maximum number of (inner) iterations +! to take. MR must be less than N. +! +! real ( kind = rk ) TOL_ABS, an absolute tolerance applied to the +! current residual. +! +! real ( kind = rk ) TOL_REL, a relative tolerance comparing the +! current residual to the initial residual. +! +! Output: +! +! real ( kind = rk ) X(N); an improved approximate solution. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer mr + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + real ( kind = rk ) av + real ( kind = rk ) c(mr+1) + real ( kind = rk ), parameter :: delta = 1.0D-03 + real ( kind = rk ) g(mr+1) + real ( kind = rk ) h(mr+1,mr) + real ( kind = rk ) htmp + integer i + integer ia(n+1) + integer itr + integer itr_max + integer itr_used + integer j + integer ja(nz_num) + integer k + integer k_copy +! +! Dimension of L changed from ia(n+1)+1 to nz_num, 26 October 2022. +! + real ( kind = rk ) l(nz_num) +! real ( kind = rk ) l(ia(n+1)+1) + real ( kind = rk ) mu + real ( kind = rk ) r(n) + real ( kind = rk ) rho + real ( kind = rk ) rho_tol + real ( kind = rk ) rhs(n) + real ( kind = rk ) s(mr+1) + real ( kind = rk ) tol_abs + real ( kind = rk ) tol_rel + integer ua(n) + real ( kind = rk ) v(n,mr+1); + logical, parameter :: verbose = .true. + real ( kind = rk ) x(n) + real ( kind = rk ) y(mr+1) + + itr_used = 0 +! +! Sort the entries in the matrix. +! + call rearrange_cr ( n, nz_num, ia, ja, a ) +! +! Index the diagonal entries. +! + call diagonal_pointer_cr ( n, nz_num, ia, ja, ua ) +! +! Compute the incomplete LU factorization of the matrix. +! + call ilu_cr ( n, nz_num, ia, ja, a, ua, l ) + + if ( verbose ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'pmgmres_ilu_cr():' + write ( *, '(a,i4)' ) ' Number of unknowns = ', n + end if + + do itr = 1, itr_max + + call ax_cr ( n, nz_num, ia, ja, a, x, r ) + + r(1:n) = rhs(1:n) - r(1:n) +! +! Apply the ILU preconditioner. +! + call lus_cr ( n, nz_num, ia, ja, l, ua, r, r ) + + rho = sqrt ( dot_product ( r, r ) ) + + if ( verbose ) then + write ( *, '(a,i4,a,g14.6)' ) ' ITR = ', itr, ' Residual = ', rho + end if + + if ( itr == 1 ) then + rho_tol = rho * tol_rel + end if + + v(1:n,1) = r(1:n) / rho + + g(1) = rho + g(2:mr+1) = 0.0D+00 + + h(1:mr+1,1:mr) = 0.0D+00 + + do k = 1, mr + + k_copy = k + + call ax_cr ( n, nz_num, ia, ja, a, v(1:n,k), v(1:n,k+1) ) +! +! Apply the ILU preconditioner. +! + call lus_cr ( n, nz_num, ia, ja, l, ua, v(1:n,k+1), v(1:n,k+1) ) + + av = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + + do j = 1, k + h(j,k) = dot_product ( v(1:n,k+1), v(1:n,j) ) + v(1:n,k+1) = v(1:n,k+1) - v(1:n,j) * h(j,k) + end do + + h(k+1,k) = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + + if ( ( av + delta * h(k+1,k)) == av ) then + do j = 1, k + htmp = dot_product ( v(1:n,k+1), v(1:n,j) ) + h(j,k) = h(j,k) + htmp + v(1:n,k+1) = v(1:n,k+1) - htmp * v(1:n,j) + end do + h(k+1,k) = sqrt ( dot_product ( v(1:n,k+1), v(1:n,k+1) ) ) + end if + + if ( h(k+1,k) /= 0.0D+00 ) then + v(1:n,k+1) = v(1:n,k+1) / h(k+1,k) + end if + + if ( 1 < k ) then + y(1:k+1) = h(1:k+1,k) + do j = 1, k - 1 + call mult_givens ( c(j), s(j), j, y ) + end do + h(1:k+1,k) = y(1:k+1) + end if + + mu = sqrt ( h(k,k)**2 + h(k+1,k)**2 ) + + c(k) = h(k,k) / mu + s(k) = -h(k+1,k) / mu + h(k,k) = c(k) * h(k,k) - s(k) * h(k+1,k) + h(k+1,k) = 0.0D+00 + call mult_givens ( c(k), s(k), k, g ) + + rho = abs ( g(k+1) ) + + itr_used = itr_used + 1 + + if ( verbose ) then + write ( *, '(a,i4,a,g14.6)' ) ' K = ', k, ' Residual = ', rho + end if + + if ( rho <= rho_tol .and. rho <= tol_abs ) then + exit + end if + + end do + + k = k_copy - 1 + + y(k+1) = g(k+1) / h(k+1,k+1) + + do i = k, 1, -1 + y(i) = ( g(i) - dot_product ( h(i,i+1:k+1), y(i+1:k+1) ) ) / h(i,i) + end do + + do i = 1, n + x(i) = x(i) + dot_product ( v(i,1:k+1), y(1:k+1) ) + end do + + if ( rho <= rho_tol .and. rho <= tol_abs ) then + exit + end if + + end do + + if ( verbose ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'pmgmres_ilu_cr():' + write ( *, '(a,i6)' ) ' Iterations = ', itr_used + write ( *, '(a,g14.6)' ) ' Final residual = ', rho + end if + + return +end +subroutine rearrange_cr ( n, nz_num, ia, ja, a ) + +!*****************************************************************************80 +! +!! rearrange_cr() sorts a sparse compressed row matrix. +! +! Discussion: +! +! This routine guarantees that the entries in the CR matrix +! are properly sorted. +! +! After the sorting, the entries of the matrix are rearranged in such +! a way that the entries of each column are listed in ascending order +! of their column values. +! +! The matrix A is assumed to be stored in compressed row format. Only +! the nonzero entries of A are stored. The vector JA stores the +! column index of the nonzero value. The nonzero values are sorted +! by row, and the compressed row vector IA then has the property that +! the entries in A and JA that correspond to row I occur in indices +! IA(I) through IA(I+1)-1. +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 17 July 2007 +! +! Author: +! +! Original C version by Lili Ju. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Richard Barrett, Michael Berry, Tony Chan, James Demmel, +! June Donato, Jack Dongarra, Victor Eijkhout, Roidan Pozo, +! Charles Romine, Henk van der Vorst, +! Templates for the Solution of Linear Systems: +! Building Blocks for Iterative Methods, +! SIAM, 1994. +! ISBN: 0898714710, +! LC: QA297.8.T45. +! +! Tim Kelley, +! Iterative Methods for Linear and Nonlinear Equations, +! SIAM, 2004, +! ISBN: 0898713528, +! LC: QA297.8.K45. +! +! Yousef Saad, +! Iterative Methods for Sparse Linear Systems, +! Second Edition, +! SIAM, 2003, +! ISBN: 0898715342, +! LC: QA188.S17. +! +! Parameters: +! +! Input, integer N, the order of the system. +! +! Input, integer NZ_NUM, the number of nonzeros. +! +! Input, integer IA(N+1), the compressed row indices. +! +! Input/output, integer JA(NZ_NUM), the column indices. +! On output, these may have been rearranged by the sorting. +! +! Input/output, real ( kind = rk ) A(NZ_NUM), the matrix values. On output, +! the matrix values may have been moved somewhat because of the sorting. +! + implicit none + + integer, parameter :: rk = kind ( 1.0D+00 ) + + integer n + integer nz_num + + real ( kind = rk ) a(nz_num) + integer i + integer ia(n+1) + integer i4temp + integer ja(nz_num) + integer k + integer l + real ( kind = rk ) r8temp + + do i = 1, n + + do k = ia(i), ia(i+1) - 2 + do l = k + 1, ia(i+1) - 1 + + if ( ja(l) < ja(k) ) then + i4temp = ja(l) + ja(l) = ja(k) + ja(k) = i4temp + + r8temp = a(l) + a(l) = a(k) + a(k) = r8temp + end if + + end do + end do + + end do + + return +end +subroutine timestamp ( ) + +!*****************************************************************************80 +! +!! timestamp() prints the current YMDHMS date as a time stamp. +! +! Example: +! +! 31 May 2001 9:45:54.872 AM +! +! Licensing: +! +! This code is distributed under the MIT license. +! +! Modified: +! +! 18 May 2013 +! +! Author: +! +! John Burkardt +! + implicit none + + character ( len = 8 ) ampm + integer d + integer h + integer m + integer mm + character ( len = 9 ), parameter, dimension(12) :: month = (/ & + 'January ', 'February ', 'March ', 'April ', & + 'May ', 'June ', 'July ', 'August ', & + 'September', 'October ', 'November ', 'December ' /) + integer n + integer s + integer values(8) + integer y + + call date_and_time ( values = values ) + + y = values(1) + m = values(2) + d = values(3) + h = values(5) + n = values(6) + s = values(7) + mm = values(8) + + if ( h < 12 ) then + ampm = 'AM' + else if ( h == 12 ) then + if ( n == 0 .and. s == 0 ) then + ampm = 'Noon' + else + ampm = 'PM' + end if + else + h = h - 12 + if ( h < 12 ) then + ampm = 'PM' + else if ( h == 12 ) then + if ( n == 0 .and. s == 0 ) then + ampm = 'Midnight' + else + ampm = 'AM' + end if + end if + end if + + write ( *, '(i2.2,1x,a,1x,i4,2x,i2,a1,i2.2,a1,i2.2,a1,i3.3,1x,a)' ) & + d, trim ( month(m) ), y, h, ':', n, ':', s, '.', mm, trim ( ampm ) + + return +end diff --git a/src/module.f90 b/src/module.f90 index 1a2b8efc..68bc9843 100644 --- a/src/module.f90 +++ b/src/module.f90 @@ -450,7 +450,11 @@ module para logical :: valley_projection_calc ! for valley projection, only for sparse hamiltonina, you need to provide the valley operator logical :: w3d_nested_calc = .false. - + + !> an integer to print the messages + !> iprint_level=3 : print all the debug messages + integer :: iprint_level = 1 + namelist / Control / BulkBand_calc, BulkFS_calc, BulkFS_Plane_calc, & BulkFS_plane_stack_calc, BulkGap_plane_calc, & QPI_unfold_plane_calc, & @@ -482,7 +486,7 @@ module para LandauLevel_B_dos_calc,LanczosBand_calc,LanczosDos_calc, & LandauLevel_B_calc, LandauLevel_kplane_calc,landau_chern_calc, & FermiLevel_calc,ANE_calc, export_newhr,export_maghr,w3d_nested_calc, & - valley_projection_calc + valley_projection_calc, iprint_level integer :: Nslab ! Number of slabs for 2d Slab system integer :: Nslab1 ! Number of slabs for 1D wire system @@ -1062,6 +1066,9 @@ module para !> dimension number_group_operators, Num_atoms integer, allocatable :: imap_sym(:, :) + real(dp) :: time_cost_t1=0d0 + real(dp) :: time_cost_t2=0d0 + real(dp) :: time_cost_t3=0d0 end module para diff --git a/src/readHmnR.f90 b/src/readHmnR.f90 index 1fbaa9f1..d2d32174 100644 --- a/src/readHmnR.f90 +++ b/src/readHmnR.f90 @@ -181,6 +181,11 @@ subroutine readNormalHmnR() endif endif + !> get the ir index that the R(ir)=(0, 0, 0) + ir0=0 + do ir=1, nrpts + if (irvec(1, ir)==0.and.irvec(2, ir)==0.and.irvec(3, ir)==0) ir0=ir + enddo !> Adding zeeman field !> Bx=Bdirection(1) @@ -190,22 +195,19 @@ subroutine readNormalHmnR() !> sx, sy, sz are Pauli matrices. if (Add_Zeeman_Field) then if (abs(Zeeman_energy_in_eV) After considering the Zeeman field, we already extended the spin space to spin-full. SOC = 1 - endif ! Add_Zeeman_Field + endif ! Add_Zeeman_Field call get_stacking_direction_and_pos(add_electric_field, pos) - ir0=0 - do ir=1, nrpts - if (irvec(1, ir)==0.and.irvec(2, ir)==0.and.irvec(3, ir)==0) ir0=ir - enddo if (add_electric_field>0) then io=0 @@ -797,7 +799,7 @@ subroutine readsparse_overlap 1003 continue close(13) - if (cpuid.eq.0) write(stdout, '(a, i)')' >> Number of non-zeros splen_overlap_input', splen_overlap_input + if (cpuid.eq.0) write(stdout, '(a, i10)')' >> Number of non-zeros splen_overlap_input', splen_overlap_input if (cpuid.eq.0) write(stdout, '(a)')' >> Sparse overlap matrix reading finished ' return @@ -1129,71 +1131,70 @@ end subroutine reorder_wannierbasis -!subroutine add_zeeman_normal_hr(Bx_in_Tesla, By_in_Tesla, Bz_in_Tesla) -! !> add Zeeman energy on the sparse hmnr based on the magnetic field strength -! !> magnetic_field_in_tesla is in Tesla -! !> Here the coordinate system is the same as user-specified in the LATTICE card. -! use para -! implicit none - -! real(dp), intent(in) :: Bx_in_Tesla, By_in_Tesla, Bz_in_Tesla - -! integer :: nwann_nsoc, i, j, ir0, ir -! real(dp) :: Zeeman_energy_in_eV_factor - -! nwann_nsoc= Num_wann/2 - -! if (.not.Add_Zeeman_Field) return - -! ir0=0 -! do ir=1, Nrpts -! if (irvec(1,ir)==0.and.irvec(2,ir)==0.and.irvec(3,ir)==0) then -! ir0= ir -! endif -! enddo -! if (ir0==0) stop 'something wrong with irvec in subroutine add_zeeman_sparse_hr' - -! Zeeman_energy_in_eV_factor= Effective_gfactor*Bohr_magneton - -! if (SOC==0) then -! do ir=1, Nrpts -! do n=1, nwann_nsoc -! do m=1, nwann_nsoc -! if (irvec(1, ir)==0.and.irvec(2, ir)==0.and.irvec(3, ir)==0.and.n==m) then -! HmnR(n+nwann_nsoc, m+nwann_nsoc, ir)= HmnR(n, m, ir)- Zeeman_energy_in_eV_factor/2d0*Bz_in_Tesla -! HmnR(n, m, ir) = HmnR(n, m, ir)+ Zeeman_energy_in_eV_factor/2d0*Bz_in_Tesla -! HmnR(n , m+nwann_nsoc, ir)= Zeeman_energy_in_eV_factor/2d0*Bx_in_Tesla & -! - zi*Zeeman_energy_in_eV_factor/2d0*By_in_Tesla -! HmnR(n+nwann_nsoc, m , ir)= Zeeman_energy_in_eV_factor/2d0*Bx_in_Tesla & -! + zi*Zeeman_energy_in_eV_factor/2d0*By_in_Tesla -! else -! HmnR(n+nwann_nsoc, m+nwann_nsoc, ir)= HmnR(n, m, ir) -! endif ! R=0 -! enddo ! m -! enddo ! n -! enddo ! ir -! else -! do ir=1, Nrpts -! if (irvec(1, ir)/=0.or.irvec(2, ir)/=0.or.irvec(3, ir)/=0) cycle -! do n=1, nwann_nsoc -! do m=1, nwann_nsoc -! if (n==m) then -! HmnR(n+nwann_nsoc, m+nwann_nsoc, ir)= HmnR(n+nwann_nsoc, & -! m+nwann_nsoc, ir)-Zeeman_energy_in_eV_factor/2d0*Bz_in_Tesla -! HmnR(n , m+nwann_nsoc, ir)= HmnR(n, m+nwann_nsoc, ir)+ & -! Zeeman_energy_in_eV_factor/2d0*Bx_in_Tesla & -! -zi*Zeeman_energy_in_eV_factor/2d0*By_in_Tesla -! HmnR(n+nwann_nsoc, m , ir)= HmnR(n+nwann_nsoc, m, ir)+ & -! Zeeman_energy_in_eV_factor/2d0*Bx_in_Tesla & -! +zi*Zeeman_energy_in_eV_factor/2d0*By_in_Tesla -! HmnR(n, m, ir)= HmnR(n, m, ir)+Zeeman_energy_in_eV_factor/2d0*Bz_in_Tesla -! endif ! R=0 -! enddo ! m -! enddo ! n -! enddo ! ir -! endif ! SOC - - -! return - -!end subroutine add_zeeman_normal_hr + subroutine add_zeeman_normal_hr() + !> add Zeeman energy on the sparse hmnr based on Zeeman_energy_in_eV and the magnetic field direction + !> Bx=Bdirection(1) + !> By=Bdirection(2) + !> Bz=Bdirection(3) + !> magnetic_field_in_tesla is in Tesla + !> Here the coordinate system is the same as user-specified in the LATTICE card. + !> H_zeeman + use para + implicit none + + real(dp) :: Bdirection_x, Bdirection_y, Bdirection_z + + integer :: nwann_nsoc, n, m, ir0, ir + real(dp) :: Zeeman_energy_in_au + + if (.not.Add_Zeeman_Field) return + + nwann_nsoc= Num_wann/2 + + ir0=0 + do ir=1, Nrpts + if (irvec(1,ir)==0.and.irvec(2,ir)==0.and.irvec(3,ir)==0) then + ir0= ir + endif + enddo + if (ir0==0) stop 'something wrong with irvec in subroutine add_zeeman_sparse_hr' + + if (abs(Zeeman_energy_in_eV) in atomic unit + Zeeman_energy_in_au= Effective_gfactor*Bohr_magneton*Bmagnitude + else + Zeeman_energy_in_au= Zeeman_energy_in_eV*eV2Hartree + endif + + if (SOC==0) then + !> We need to extend the spin up to the spin down part + do n=1, nwann_nsoc + HmnR(n, n, ir0) = HmnR(n, n, ir0)+ Zeeman_energy_in_au/2d0*Bdirection(3) + HmnR(n+nwann_nsoc, n+nwann_nsoc, ir0)= HmnR(n, n, ir0)- Zeeman_energy_in_au/2d0*Bdirection(3) + HmnR(n , n+nwann_nsoc, ir0)= Zeeman_energy_in_au/2d0*Bdirection(1) & + - zi*Zeeman_energy_in_au/2d0*Bdirection(2) + HmnR(n+nwann_nsoc, n , ir0)= Zeeman_energy_in_au/2d0*Bdirection(1) & + + zi*Zeeman_energy_in_au/2d0*Bdirection(2) + enddo + + do ir=1, Nrpts + do m=1, nwann_nsoc + do n=1, nwann_nsoc + HmnR(n+nwann_nsoc, m+nwann_nsoc, ir)= HmnR(n, m, ir) + enddo ! m + enddo ! n + enddo ! ir + else + do n=1, nwann_nsoc + HmnR(n, n, ir0) = HmnR(n, n, ir0)+ Zeeman_energy_in_au/2d0*Bdirection(3) + HmnR(n+nwann_nsoc, n+nwann_nsoc, ir0)= HmnR(n, n, ir0)- Zeeman_energy_in_au/2d0*Bdirection(3) + HmnR(n , n+nwann_nsoc, ir0)= Zeeman_energy_in_au/2d0*Bdirection(1) & + - zi*Zeeman_energy_in_au/2d0*Bdirection(2) + HmnR(n+nwann_nsoc, n , ir0)= Zeeman_energy_in_au/2d0*Bdirection(1) & + + zi*Zeeman_energy_in_au/2d0*Bdirection(2) + enddo + endif ! SOC + + return + + end subroutine add_zeeman_normal_hr diff --git a/src/readinput.f90 b/src/readinput.f90 index 2bb1f423..0f925549 100644 --- a/src/readinput.f90 +++ b/src/readinput.f90 @@ -562,7 +562,7 @@ subroutine readinput Magp_max=0 wcc_calc_tol= 0.08 wcc_neighbour_tol= 0.3 - NumLCZVecs=400 + NumLCZVecs=100 NumRandomConfs=1 NumSelectedEigenVals=0 Beta= 100 @@ -3523,7 +3523,7 @@ subroutine readinput ! build the map between supercell (Origin_cell) and primitive cell (Folded_cell) if (BulkBand_unfold_line_calc.or.BulkBand_unfold_plane_calc.or.QPI_unfold_plane_calc.or.Landaulevel_unfold_line_calc)then - call build_map_supercell_primitivecell(Origin_cell, Folded_cell) + call build_map_supercell_primitivecell endif !> close wt.in diff --git a/src/sigma_OHE.f90 b/src/sigma_OHE.f90 index 2e7886b9..93b8c0cb 100644 --- a/src/sigma_OHE.f90 +++ b/src/sigma_OHE.f90 @@ -78,6 +78,8 @@ subroutine sigma_ohe_calc_symm(mu_array, KBT_array, BTau_array, Nband_Fermi_Leve real(dp) :: time_start, time_end real(dp) :: time_start0, time_end0 + real(dp) :: time_start1, time_end1 + real(dp) :: time_rkf45, time_velocity, time_integral integer :: NSlice_Btau_inuse !> Btau slices for Runge-Kutta integration @@ -117,7 +119,8 @@ subroutine sigma_ohe_calc_symm(mu_array, KBT_array, BTau_array, Nband_Fermi_Leve !> number of steps used in the Runge-Kutta integration integer :: NSlice_Btau integer :: NSlice_Btau_local - real(dp), allocatable :: kout(:, :) + real(dp), allocatable :: kout(:, :), kout_all(:, :) + real(dp), allocatable :: weights(:) !> define some arrays for different bands. Since there are different number !> of k points left for different bands. @@ -148,7 +151,7 @@ subroutine sigma_ohe_calc_symm(mu_array, KBT_array, BTau_array, Nband_Fermi_Leve !> file index !integer, allocatable :: myfileindex(:) - character(80) :: sigmafilename, bandname, tname, muname + character(80) :: sigmafilename, bandname, tname, muname, ibname, ikname, filename !> inverse of group operator @@ -234,6 +237,7 @@ subroutine sigma_ohe_calc_symm(mu_array, KBT_array, BTau_array, Nband_Fermi_Leve Nk_start= KCube3D_total%Nk_start Nk_end= KCube3D_total%Nk_end + allocate( weights(Nslice_BTau_Max)) allocate( Ek(Nband_Fermi_Level)) allocate( Enk(Nk_start:Nk_end, Nband_Fermi_Level)) allocate( velocity(3, Nk_start:Nk_end, Nband_Fermi_Level)) @@ -246,6 +250,7 @@ subroutine sigma_ohe_calc_symm(mu_array, KBT_array, BTau_array, Nband_Fermi_Leve velocity_k= 0d0 velocity_bar= 0d0 velocity_bar_k= 0d0 + weights = 0d0 time_start= 0d0 time_end= 0d0 @@ -525,7 +530,9 @@ subroutine sigma_ohe_calc_symm(mu_array, KBT_array, BTau_array, Nband_Fermi_Leve !> a temp array used in RKFS allocate(kout(3, NSlice_Btau)) + allocate(kout_all(3, NSlice_Btau)) kout= 0d0 + kout_all= 0d0 !> allocate array for sigmafileindex and rhofileindex allocate(sigmafileindex(OmegaNum)) @@ -871,6 +878,7 @@ subroutine cal_sigma_iband_k !> Runge-Kutta only applied with BTauMax>0 !> if the magnetic field is zero. + call now(time_start1) if (BTauMax>eps3) then NSlice_Btau_inuse= NSlice_Btau call RKF45_pack(magnetic_field, bands_fermi_level(iband), & @@ -882,6 +890,8 @@ subroutine cal_sigma_iband_k enddo NSlice_Btau_inuse = NSlice_Btau endif + call now(time_end1) + time_rkf45= time_end1-time_start1 if (NSlice_Btau_inuse==1) then write(stdout, '(a, i6, a, i4, a, i6, a, 3f12.6)')& @@ -896,10 +906,12 @@ subroutine cal_sigma_iband_k cycle endif + call now(time_start1) if (NSlice_Btau_inuse==1)then ! vxB=0 call velocity_calc_iband(bands_fermi_level(iband), k_start, v_t) do ibtau=1, NSlice_Btau kout(:, ibtau)= k_start(:) + kout_all(:, ibtau)= k_start(:) klist_iband(iband)%velocity_k(:, ibtau)= v_t enddo NSlice_Btau_inuse = NSlice_Btau @@ -908,20 +920,39 @@ subroutine cal_sigma_iband_k k= kout(:, it) call velocity_calc_iband(bands_fermi_level(iband), k, v_t) klist_iband(iband)%velocity_k(:, it)= v_t + kout_all(:, it) = k enddo ! integrate over time step !> periodic kpath in the BZ can be reused do i=2, NSlice_Btau_inuse/icycle do j=1, icycle klist_iband(iband)%velocity_k(:, j+(i-1)*icycle)= klist_iband(iband)%velocity_k(:, j) + kout_all(:, j+(i-1)*icycle) = kout(:, j) enddo enddo do i=(NSlice_Btau_inuse/icycle)*icycle+1, NSlice_Btau klist_iband(iband)%velocity_k(:, i)= klist_iband(iband)%velocity_k(:, i-(NSlice_Btau_inuse/icycle)*icycle) + kout_all(:, i) = kout(:, i-(NSlice_Btau_inuse/icycle)*icycle) enddo endif + call now(time_end1) + time_velocity= time_end1- time_start1 + + if (cpuid==0.and.iprint_level==3) then + write(ibname, '(i6)')iband + write(ikname, '(i6)')ik + write(filename, '(5a)')'klist_ib_', trim(Adjustl(ibname)), '_ik', trim(Adjustl(ikname)), '.txt' + open(unit=324232, file=filename) + + write(324232, *)'#icycle= ', icycle, ', NSlice_Btau= ', NSlice_Btau + do i=1, NSlice_Btau + write(324232, "(i9, 6f16.6)") i, kout_all(:, i), klist_iband(iband)%velocity_k(:, i) + enddo + close(324232) - + endif + + call now(time_start1) !> calculate the conductivity/tau do ikt=1, NumT KBT= KBT_array(ikt) @@ -950,6 +981,11 @@ subroutine cal_sigma_iband_k !> in the relaxation time approximation v_k= klist_iband(iband)%velocity_k(:, 1+ishift) if (BTau>eps3.and.NSlice_Btau_local>2) then + ! set weight + do it=1, NSlice_Btau_local + weights(it) = exp(-DeltaBtau*(it-1d0)) + enddo + !> trapezoidal integral velocity_bar_k= 0d0 do it=1, NSlice_Btau_local velocity_bar_k= velocity_bar_k+ & @@ -959,6 +995,23 @@ subroutine cal_sigma_iband_k - 0.5d0*DeltaBtau*(exp(-(NSlice_Btau_local-1d0)*DeltaBtau)& * klist_iband(iband)%velocity_k(:, NSlice_Btau_local+ishift) & + klist_iband(iband)%velocity_k(:, 1+ishift)) + + !> Simpson's integral + !> add the first and the last term + ! velocity_bar_k= weights(1)*klist_iband(iband)%velocity_k(:, 1) + ! velocity_bar_k= velocity_bar_k+ weights(NSlice_Btau_local)*klist_iband(iband)%velocity_k(:, NSlice_Btau_local) + ! + ! !> add the middle part + ! do it = 2, NSlice_Btau_local-1, 2 + ! velocity_bar_k = velocity_bar_k + 4d0 * weights(it) * klist_iband(iband)%velocity_k(:, it) + ! end do + ! + ! do it = 3, NSlice_Btau_local-2, 2 + ! velocity_bar_k = velocity_bar_k + 2d0 * weights(it) * klist_iband(iband)%velocity_k(:, it) + ! end do + ! + ! velocity_bar_k = velocity_bar_k * (DeltaBtau / 3d0) + else velocity_bar_k= v_k endif @@ -1019,9 +1072,15 @@ subroutine cal_sigma_iband_k enddo ! ibtau Btau enddo ! ie mu enddo ! ikt KBT + call now(time_end1) + time_integral= time_end1- time_start1 + if (cpuid.eq.0) write(stdout, '(a, i9, a, i10)')'>> icycle = ', icycle, ' , NSlice_Btau', NSlice_Btau + if (cpuid.eq.0) write(stdout, '(a, f16.2, a)')'>> time cost for RKF45_pack is ', time_rkf45, ' s' + if (cpuid.eq.0) write(stdout, '(a, f16.2, a)')'>> time cost for velocity_calc is ', time_velocity, ' s' + if (cpuid.eq.0) write(stdout, '(a, f16.2, a)')'>> time cost for integral is ', time_integral, ' s' call now(time_end) sigma_iband_k(iband)%time_cost_mpi(ik)= time_end- time_start - if (cpuid.eq.0) write(stdout, '(a, f16.2, a)')'>> time cost for this loop is ', time_end- time_start, ' s' + if (cpuid.eq.0) write(stdout, '(a, f16.2, a)')'>> time cost for this loop is ', time_end- time_start, ' s' enddo ! ik kpoints end subroutine cal_sigma_iband_k @@ -1420,6 +1479,16 @@ subroutine velocity_calc_iband(iband, k, velocity_k) ! calculation bulk hamiltonian call ham_bulk_latticegauge(k, Hamk_bulk) +! call eigensystem_c( 'V', 'U', Num_wann, Hamk_bulk, W) + +! !> Only the energy levels close to the Fermi level contribute to the conductivity +! if (abs(W(iband))/eV2Hartree>EF_broadening) then +! velocity_k= 0d0 +! return +! endif + +! UU= Hamk_bulk(:, iband) + call zheevx_pack('V', 'U', Num_wann, iband, iband, Hamk_bulk, W, UU) !> Only the energy levels close to the Fermi level contribute to the conductivity @@ -1558,7 +1627,7 @@ subroutine RKF45_pack(magnetic_field, iband, NSlice_Btau, k_start, Btau_start, B call periodic_diff(kout(:,2), kout(:,1), kdiff) - if (it>2)dis_smallest= norm(kdiff) + if (it>2)dis_smallest= norm(kdiff)/1 !> check if kout(:, it)==kout(:, 1) !> if it's a close orbit, we don't have to calculate all of them. diff --git a/src/sparse.f90 b/src/sparse.f90 index e4ee7017..e0288880 100644 --- a/src/sparse.f90 +++ b/src/sparse.f90 @@ -196,10 +196,10 @@ module sparse public :: WTParCSRMatrixCreate public :: csr_sort_indices public :: csr_sum_duplicates -#if defined (INTELMKL) public :: arpack_sparse_coo_eigs public :: arpack_sparse_coo_eigs_nonorth -#endif + public :: csrmv_z + public :: coomv_z contains @@ -2163,7 +2163,102 @@ subroutine csr_sum_duplicates(n, nnz, ia, ja, A_val) return end subroutine csr_sum_duplicates -# if defined (INTELMKL) + !> csrmv_z multiplies a CSR matrix A times a vector x; y=A*x + !> inputs: + !> ndim, integer, the row dimension of the matrix + !> nnz, integer, number of non-zero entries, the dimension of A_csr, + !ja_csr + !> complex A_csr(nnz), integer ia_csr(ndim+1), ja_csr(nnz), the matrix in + !CSR Compresed Sparse Row format + !> complex x(ndim) + !> outputs: + !> complex y(ndim) + subroutine csrmv_z(ndim, nnz, A_csr, ia_csr, ja_csr, x, y) + + use para, only : dp + implicit none + + integer, intent(in) :: ndim + integer, intent(in) :: nnz + complex(dp), intent(in) :: A_csr(nnz) + integer, intent(in) :: ia_csr(ndim+1) + integer, intent(in) :: ja_csr(nnz) + complex(dp), intent(in) :: x(ndim) + complex(dp), intent(out) :: y(ndim) + + integer :: i, k + complex(dp) :: t_z + +! real(dp) :: time1, time2 + +! call now(time1) +#if defined (INTELMKL) + call mkl_zcsrgemv('N', ndim, a_csr, ia_csr, ja_csr, x, y) +#else + !> A naive implementation + do i=1, ndim + t_z= 0d0 + do k=ia_csr(i), ia_csr(i+1)-1 + t_z= t_z+ A_csr(k) * x(ja_csr(k)) + + enddo + y(i) = t_z + enddo +#endif + +! call now(time2) +! time_total_debug= time_total_debug+ time2- time1 + return + + end subroutine csrmv_z + + !> coomv_z multiplies a COO matrix A times a vector x; y=A*x + !> inputs: + !> ndim, integer, the row dimension of the matrix + !> nnz, integer, number of non-zero entries, the dimension of A_COO, + !ja_COO + !> complex A_COO(nnz), integer ia_COO(ndim+1), ja_COO(nnz), the matrix in + !COO Compresed Sparse Row format + !> complex x(ndim) + !> outputs: + !> complex y(ndim) + subroutine coomv_z(ndim, nnz, A_coo, ia_coo, ja_coo, x, y) + + use para, only : dp + implicit none + + integer, intent(in) :: ndim + integer, intent(in) :: nnz + complex(dp), intent(in) :: A_coo(nnz) + integer, intent(in) :: ia_coo(ndim+1) + integer, intent(in) :: ja_coo(nnz) + complex(dp), intent(in) :: x(ndim) + complex(dp), intent(out) :: y(ndim) + + integer :: i, k + complex(dp) :: t_z + +! real(dp) :: time1, time2 + +! call now(time1) +#if defined (INTELMKL) + call mkl_zcoogemv('N', ndim, a_coo, ia_coo, ja_coo, nnz, x, y) +#else + !> A naive implementation + do i=1, nnz + y(ia_coo(i)) = y(ia_coo(i))+ a_coo(i)* x(ja_coo(i)) + enddo +#endif + +! call now(time2) +! time_total_debug= time_total_debug+ time2- time1 + return + + end subroutine coomv_z + + + + subroutine arpack_sparse_coo_eigs(ndims,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,deval,sigma,zeigv, ritzvec) use para, only : dp implicit none @@ -2205,12 +2300,16 @@ subroutine arpack_sparse_coo_eigs(ndims,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,de complex(dp),intent(out) :: zeigv(ndims, nvecs) zeigv= 0d0 - !> get eigenvalues of a sparse matrix by calling arpack subroutine - !> here acoo, icoo, jcoo are stored in COO format - !call zmat_arpack_zndrv1(ndims, nnzmax, nnz, acoo, jcoo, icoo, sigma, neval, nvecs, deval, zeigv) +#if defined (INTELMKL) + !> get eigenvalues of a sparse matrix by calling arpack subroutine !> acoo, jcoo, icoo would be converted in to A-sigma*I, then converted into CSR format call zmat_arpack_zndrv2(ndims, nnzmax, nnz, acoo, jcoo, icoo, sigma, neval, nvecs, deval, zeigv, ritzvec) +#else + !> here acoo, icoo, jcoo are stored in COO format + !> use matrix vector multiplication + call zmat_arpack_zndrv1(ndims, nnzmax, nnz, acoo, jcoo, icoo, sigma, neval, nvecs, deval, zeigv, ritzvec) +#endif return @@ -2339,6 +2438,9 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & ! axuiliary integer variables integer :: itmp, iter + real(dp) :: time_cost_mv, time_cost_zgesv, time_cost_convert, time_cost_znaupd + real(dp) :: time_start, time_end + ! auxiliary real(dp) variables real(dp) :: dtmp @@ -2425,6 +2527,10 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & bmat = 'G' which = 'LM' + + time_cost_mv= 0d0; time_cost_zgesv= 0d0; time_cost_convert= 0d0 + time_cost_znaupd= 0d0 + ! ! %----------------------------------------------------% ! | Construct C = A - SIGMA*S | @@ -2440,6 +2546,7 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & if (nnz>nnzmax) stop "ERROR: in zndrv4, nnz is larger than nnzmax" + call now(time_start) !> prepare hamiltonian !> transform coo format to csr format call ConvertCooToCsr(ndims, nnz, acsr, icsr, jcsr, iwk) @@ -2454,6 +2561,8 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & !> eleminate the same entries in the sparse matrix call csr_sum_duplicates(ndims, snnz, sicsr, sjcsr, sacsr) call csr_sort_indices(ndims, snnz, sicsr, sjcsr, sacsr) + call now(time_end) + time_cost_convert= time_end- time_start ! ! %---------------------------------------------------% @@ -2505,8 +2614,15 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & ! | has been exceeded. | ! %---------------------------------------------% ! + call now(time_start) +#if defined (ARPACK) call znaupd ( ido, bmat, n, which, nev, tol, resid, ncv, & zeigv, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info ) +#else + STOP "ERROR : Please install WannierTools with ARPACK" +#endif + call now(time_end) + time_cost_znaupd= time_cost_znaupd+ time_end- time_start iter = iter + 1 if (mod(iter,100) .eq. 0 .and. cpuid==0) then @@ -2531,10 +2647,18 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & ! !> first perform S*x - call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) + call now(time_start) + !call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) + call csrmv_z(ndims, snnz, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) + call now(time_end) + time_cost_mv= time_cost_mv+ time_end- time_start call zcopy (ndims, workd(ipntr(2)), 1, workd(ipntr(1)), 1) !> then perform inv[A-SIGMA*S]*S*x - call zmat_mkldss_zgesv(ndims, nnz, acsr, jcsr, icsr, workd(ipntr(1)), workd(ipntr(2))) + call now(time_start) + !call zmat_mkldss_zgesv(ndims, nnz, acsr, jcsr, icsr, workd(ipntr(1)), workd(ipntr(2))) + call sparse_solver(ndims, nnz, acsr, icsr, jcsr, workd(ipntr(1)), workd(ipntr(2))) + call now(time_end) + time_cost_zgesv= time_cost_zgesv+ time_cost_mv+ time_end- time_start ! %-----------------------------------------% ! | L O O P B A C K to call ZNAUPD again. | @@ -2554,7 +2678,11 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & ! | y =workd(ipntr(2)) ! %-----------------------------------------% ! - call zmat_mkldss_zgesv(ndims, nnz, acsr, jcsr, icsr, workd(ipntr(3)), workd(ipntr(2))) + call now(time_start) + !call zmat_mkldss_zgesv(ndims, nnz, acsr, jcsr, icsr, workd(ipntr(3)), workd(ipntr(2))) + call sparse_solver(ndims, nnz, acsr, icsr, jcsr, workd(ipntr(3)), workd(ipntr(2))) + call now(time_end) + time_cost_zgesv= time_cost_zgesv+ time_cost_mv+ time_end- time_start go to 10 ! @@ -2570,7 +2698,11 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & ! | y = workd(ipntr(2)) ! %-------------------------------------% ! - call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) + call now(time_start) + !call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) + call csrmv_z(ndims, snnz, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) + call now(time_end) + time_cost_mv= time_cost_mv+ time_end- time_start ! ! %-----------------------------------------% ! | L O O P B A C K to call ZNAUPD again. | @@ -2613,10 +2745,14 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & rvec = .false. if (LandauLevel_wavefunction_calc.or.SlabBand_calc) rvec = .true. +#if defined (ARPACK) call zneupd (rvec, 'A', select, d, zeigv, ldv, sigma, & workev, bmat, n, which, nev, tol, resid, ncv,& zeigv, ldv, iparam, ipntr, workd, workl, lworkl, & rwork, ierr) +#else + STOP "ERROR : Please install WannierTools with ARPACK" +#endif ! ! %----------------------------------------------% ! | Eigenvalues are returned in the one | @@ -2660,8 +2796,10 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & ! %---------------------------% ! - call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, zeigV(1,j), mx) - call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, zeigV(1,j), ax) + !call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, zeigV(1,j), mx) + call csrmv_z(ndims, snnz, sacsr, sicsr, sjcsr, zeigV(1,j), mx) + !call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, zeigV(1,j), ax) + call csrmv_z(ndims, nnz, acsr, icsr, jcsr, zeigV(1,j), ax) call zaxpy(n, -d(j), mx, 1, ax, 1) rd(j,1) = dble(d(j)) rd(j,2) = dimag(d(j)) @@ -2709,6 +2847,13 @@ subroutine zmat_arpack_zndrv4(ndims, nnzmax, nnz, acsr, jcsr, icsr, & if (cpuid==0) write(stdout, *) ' The number of OP*x is ', iparam(9) if (cpuid==0) write(stdout, *) ' The convergence criterion is ', tol if (cpuid==0) write(stdout, *) ' ' + if (cpuid==0) write(stdout, '(a,f8.4, " s")') ' time_cost_zgesv: ', time_cost_zgesv + if (cpuid==0) write(stdout, '(a,f8.4, " s")') ' time_cost_mv: ', time_cost_mv + if (cpuid==0) write(stdout, '(a,f8.4, " s")') ' time_cost_t1: ', time_cost_t1 + if (cpuid==0) write(stdout, '(a,f8.4, " s")') ' time_cost_t2: ', time_cost_t2 + if (cpuid==0) write(stdout, '(a,f8.4, " s")') ' time_cost_t3: ', time_cost_t3 + if (cpuid==0) write(stdout, '(a,f8.4, " s")') ' time_cost_convert', time_cost_convert + if (cpuid==0) write(stdout, '(a,f8.4, " s")') ' time_cost_znaupd:', time_cost_znaupd end if ! @@ -2954,8 +3099,12 @@ subroutine zmat_arpack_zndrv3(ndims, nnzmax, nnz, acsr, jcsr, icsr, snnzmax, snn ! | has been exceeded. | ! %---------------------------------------------% ! +#if defined (ARPACK) call znaupd ( ido, bmat, n, which, nev, tol, resid, ncv, & zeigv, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info ) +#else + STOP "ERROR : Please install WannierTools with ARPACK" +#endif iter = iter + 1 if (mod(iter,100) .eq. 0 .and. cpuid==0) then @@ -2976,10 +3125,12 @@ subroutine zmat_arpack_zndrv3(ndims, nnzmax, nnz, acsr, jcsr, icsr, snnzmax, snn ! !> first perform A*x - call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, workd(ipntr(1)), workd(ipntr(2))) + !call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, workd(ipntr(1)), workd(ipntr(2))) + call csrmv_z(ndims, nnz, acsr, icsr, jcsr, workd(ipntr(1)), workd(ipntr(2))) call zcopy (ndims, workd(ipntr(2)), 1, workd(ipntr(1)), 1) !> first perform inv[S]*A*x - call zmat_mkldss_zgesv(ndims, snnz, sacsr, sjcsr, sicsr, workd(ipntr(1)), workd(ipntr(2))) + !call zmat_mkldss_zgesv(ndims, snnz, sacsr, sjcsr, sicsr, workd(ipntr(1)), workd(ipntr(2))) + call sparse_solver(ndims, snnz, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) ! ! %-----------------------------------------% ! | L O O P B A C K to call ZNAUPD again. | @@ -2996,7 +3147,8 @@ subroutine zmat_arpack_zndrv3(ndims, nnzmax, nnz, acsr, jcsr, icsr, snnzmax, snn ! | workd(ipntr(2)). | ! %-------------------------------------% ! - call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) + !call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) + call csrmv_z(ndims, snnz, sacsr, sicsr, sjcsr, workd(ipntr(1)), workd(ipntr(2))) ! ! %-----------------------------------------% ! | L O O P B A C K to call ZNAUPD again. | @@ -3039,10 +3191,14 @@ subroutine zmat_arpack_zndrv3(ndims, nnzmax, nnz, acsr, jcsr, icsr, snnzmax, snn rvec = .false. if (LandauLevel_wavefunction_calc.or.SlabBand_calc) rvec = .true. +#if defined (ARPACK) call zneupd (rvec, 'A', select, d, zeigv, ldv, sigma, & workev, bmat, n, which, nev, tol, resid, ncv,& zeigv, ldv, iparam, ipntr, workd, workl, lworkl, & rwork, ierr) +#else + STOP "ERROR : Please install WannierTools with ARPACK" +#endif ! ! %----------------------------------------------% ! | Eigenvalues are returned in the one | @@ -3086,8 +3242,10 @@ subroutine zmat_arpack_zndrv3(ndims, nnzmax, nnz, acsr, jcsr, icsr, snnzmax, snn ! %---------------------------% ! - call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, zeigV(1,j), mx) - call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, zeigV(1,j), ax) + !call mkl_zcsrgemv('N', ndims, sacsr, sicsr, sjcsr, zeigV(1,j), mx) + call csrmv_z(ndims, snnz, sacsr, sicsr, sjcsr, zeigV(1,j), mx) + !call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, zeigV(1,j), ax) + call csrmv_z(ndims, nnz, acsr, icsr, jcsr, zeigV(1,j), ax) call zaxpy(n, -d(j), mx, 1, ax, 1) rd(j,1) = dble(d(j)) rd(j,2) = dimag(d(j)) @@ -3168,7 +3326,7 @@ subroutine zmat_arpack_zndrv3(ndims, nnzmax, nnz, acsr, jcsr, icsr, snnzmax, snn return end subroutine zmat_arpack_zndrv3 - subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval, nvecs, deval, zeigv) + subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval, nvecs, deval, zeigv, ritzvec) use para, only : dp, stdout, cpuid implicit none @@ -3202,6 +3360,9 @@ subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval ! eigenvector for selected "which" complex(dp), intent(out) :: zeigv(ndims, nvecs) +!> calculate eigenvector or not + logical, intent(in) :: ritzvec + ! loop index over neval integer :: ival, jval @@ -3237,12 +3398,11 @@ subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval ! %---------------% ! character :: bmat*1, which*2 - integer :: ido, n, nev, ncv, lworkl, info, j, & + integer :: ido, n, nev, ncv, lworkl, info, i, j, & ierr, nconv, maxitr, ishfts, mode, ldv - integer :: istat + integer :: istat integer :: lwrkl, lwrkv, lwrkd real(dp) :: tol - logical :: rvec ! ! %-----------------------------% ! | BLAS & LAPACK routines used | @@ -3297,6 +3457,15 @@ subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval bmat = 'I' which = 'SM' + !> added in the diagonal part, shift the spectrum by sigma + do i=1, ndims + j=i+nnz + icsr(j)= i + jcsr(j)= i + acsr(j)= -sigma + enddo + nnz= nnz+ ndims + !> prepare hamiltonian !> transform coo format to csr format call ConvertCooToCsr(ndims, nnz, acsr, icsr, jcsr, iwk) @@ -3355,8 +3524,12 @@ subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval ! | has been exceeded. | ! %---------------------------------------------% ! +#if defined (ARPACK) call znaupd ( ido, bmat, n, which, nev, tol, resid, ncv, & zeigv, ldv, iparam, ipntr, workd, workl, lworkl, rwork, info ) +#else + STOP "ERROR : Please install WannierTools with ARPACK" +#endif iter = iter + 1 if (mod(iter,100) .eq. 0 .and. cpuid==0) then @@ -3375,7 +3548,8 @@ subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval ! | product to workd(ipntr(2)). | ! %-------------------------------------------% ! - call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, workd(ipntr(1)), workd(ipntr(2))) + !call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, workd(ipntr(1)), workd(ipntr(2))) + call csrmv_z(ndims, nnz, acsr, icsr, jcsr, workd(ipntr(1)), workd(ipntr(2))) ! ! %-----------------------------------------% ! | L O O P B A C K to call ZNAUPD again. | @@ -3415,13 +3589,14 @@ subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval ! | desired. (indicated by rvec = .true.) | ! %-------------------------------------------% ! - rvec = .false. - if (LandauLevel_wavefunction_calc.or.SlabBand_calc) rvec = .true. - - call zneupd (rvec, 'A', select, d, zeigv, ldv, sigma, & +#if defined (ARPACK) + call zneupd (ritzvec, 'A', select, d, zeigv, ldv, sigma, & workev, bmat, n, which, nev, tol, resid, ncv,& zeigv, ldv, iparam, ipntr, workd, workl, lworkl, & rwork, ierr) +#else + STOP "ERROR : Please install WannierTools with ARPACK" +#endif ! ! %----------------------------------------------% ! | Eigenvalues are returned in the one | @@ -3464,10 +3639,8 @@ subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval ! | tolerance) | ! %---------------------------% ! - !$ call av(nx, zeigv(1,j), ax) - - !$ call zmat_sparse_csrmv0(ndims, nnzmax, acsr, jcsr, icsr, zeigv(1,j), ax) - call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, zeigV(1,j), ax) + !call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, zeigV(1,j), ax) + call csrmv_z(ndims, nnz, acsr, icsr, jcsr, zeigV(1,j), ax) call zaxpy(n, -d(j), zeigv(1,j), 1, ax, 1) rd(j,1) = dble(d(j)) rd(j,2) = dimag(d(j)) @@ -3521,9 +3694,9 @@ subroutine zmat_arpack_zndrv1(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma, neval ! | Done with program zndrv1. | ! %---------------------------% ! - + !shift back the spectrum by sigma do ival=1,neval - deval(ival) = real(d(ival)) + deval(ival) = real(d(ival)+sigma) !zeigv(1:ndims, ival) = V(1:ndims, ival) enddo ! over ival={1,neval} loop @@ -3774,8 +3947,12 @@ subroutine zmat_arpack_zndrv2(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma,neval, ! write(*,*) 'ncv,nev',ncv,nev,n +#if defined (ARPACK) call znaupd ( ido, bmat, n, which, nev, tol, resid, ncv, & zeigv, ldv, iparam, ipntr, workd, workl, lworkl, rwork,info ) +#else + STOP "ERROR : Please install WannierTools with ARPACK" +#endif iter = iter + 1 if (mod(iter,10) .eq. 0 .and. cpuid==0 ) then @@ -3795,8 +3972,8 @@ subroutine zmat_arpack_zndrv2(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma,neval, ! call zcopy( n, workd(ipntr(1)),1, workd(ipntr(2)), 1) ! ! call zgttrs('N', n, 1, dl, dd, du, du2, ipiv, workd(ipntr(2)), n, ierr) -! call sparse_zgesv_csr(ndims,nnz,acsr,jcsr,icsr,workd(ipntr(1)), workd(ipntr(2))) - call zmat_mkldss_zgesv(ndims, nnz, acsr, jcsr, icsr, workd(ipntr(1)), workd(ipntr(2))) + !call zmat_mkldss_zgesv(ndims, nnz, acsr, jcsr, icsr, workd(ipntr(1)), workd(ipntr(2))) + call sparse_solver(ndims, nnz, acsr, icsr, jcsr, workd(ipntr(1)), workd(ipntr(2))) ! ! %-----------------------------------------% ! | L O O P B A C K to call ZNAUPD again. | @@ -3836,10 +4013,14 @@ subroutine zmat_arpack_zndrv2(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma,neval, ! %-------------------------------------------% ! +#if defined (ARPACK) call zneupd (ritzvec, 'A', select, d, zeigv, ldv, sigma, & workev, bmat, n, which, nev, tol, resid, ncv,& zeigv, ldv, iparam, ipntr, workd, workl, lworkl, & rwork, ierr) +#else + STOP "ERROR : Please install WannierTools with ARPACK" +#endif ! ! %----------------------------------------------% ! | Eigenvalues are returned in the one | @@ -3882,7 +4063,8 @@ subroutine zmat_arpack_zndrv2(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma,neval, ! %---------------------------% ! !$ call av(n, zeigv(1,j), ax) - call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, zeigv(1,j), ax) + !call mkl_zcsrgemv('N', ndims, acsr, icsr, jcsr, zeigv(1,j), ax) + call csrmv_z(ndims, nnz, acsr, icsr, jcsr, zeigV(1,j), ax) call zaxpy(n, -d(j), zeigv(1,j), 1, ax, 1) rd(j,1) = dble(d(j)) rd(j,2) = dimag(d(j)) @@ -3977,84 +4159,6 @@ subroutine zmat_arpack_zndrv2(ndims, nnzmax, nnz, acsr, jcsr, icsr, sigma,neval, return end subroutine zmat_arpack_zndrv2 - subroutine zmat_mkldss_zgesv(ndims, nnzmax, acsr, jcsr, icsr, xvec, yvec) - use mkl_dss - use para, only : dp, stdout - - implicit none - - ! external arguments - ! dimension of matrix A - integer, intent(in) :: ndims - - ! non-zero elements of matrix A - integer, intent(in) :: nnzmax - - ! compressed sparse row storage of matrix A - complex(dp), intent(in) :: acsr(nnzmax) - integer, intent(in) :: jcsr(nnzmax) - integer, intent(in) :: icsr(ndims+1) - - ! xvec vector with dimension "ndims" - complex(dp), intent(in) :: xvec(ndims) - - ! yvec vector with dimension "ndims" - complex(dp), intent(out) :: yvec(ndims) - - ! local variables - type(mkl_dss_handle) :: handle - integer :: nrhs - integer :: ierr - integer :: perm(1) - - !>>> step 0: setup the problem to be solved - nrhs = 1 - perm(1) = 0 - - !>>> step 1: initialize the solver. - ierr = dss_create( handle, mkl_dss_defaults ) - if (ierr .ne. mkl_dss_success) then - write(*,*) "mkl_dss_create returned error code", ierr; stop - endif - - !>>> step 2: define the non-zero structure of the matrix. - !ierr = dss_define_structure( handle, mkl_dss_symmetric_structure_complex, icsr, ndims, ndims, jcsr, nnzmax ) - ierr = dss_define_structure( handle, MKL_DSS_NON_SYMMETRIC_COMPLEX, icsr, ndims, ndims, jcsr, nnzmax ) - if (ierr .ne. mkl_dss_success) then - write(*,*) "mkl_dss_define_structure returned error code", ierr; stop - endif ! back if (ierr .ne. mkl_dss_success} block - - !>>> step 3: reorder the matrix. - ierr = dss_reorder( handle, mkl_dss_defaults, perm ) - if (ierr .ne. mkl_dss_success) then - write(*,*) "mkl_dss_reorder returned error code", ierr; stop - endif ! back if (ierr .ne. mkl_dss_success} block - - !>>> step 4: factor the matrix. - ierr = dss_factor_complex( handle, mkl_dss_defaults, acsr ) - if (ierr .ne. mkl_dss_success) then - write(*,*) "mkl_dss_factor_complex returned error code", ierr; stop - endif ! back if (ierr .ne. mkl_dss_success} block - - ! allocate the solution vector and solve the problem. - ierr = dss_solve_complex( handle, mkl_dss_defaults, xvec, nrhs, yvec ) - - if (ierr .ne. mkl_dss_success) then - write(*,*) "mkl_dss_solve_complex returned error code", ierr; stop - endif ! back if (ierr .ne. mkl_dss_success} block - - ! deallocate solver storage and various local arrays. - ierr = dss_delete( handle, mkl_dss_defaults ) - - if (ierr .ne. mkl_dss_success) then - write(*,*) "mkl_dss_solver returned error code", ierr; stop - endif ! back if (ierr .ne. mkl_dss_success} block - - return - end subroutine zmat_mkldss_zgesv -#endif - - end module sparse diff --git a/src/superlu_config.fh b/src/superlu_config.fh new file mode 100644 index 00000000..bacb6183 --- /dev/null +++ b/src/superlu_config.fh @@ -0,0 +1,22 @@ + +#ifndef SUPERLU_CONFIG_H +#define SUPERLU_CONFIG_H + + + + + + + + + + + +#if (XSDK_INDEX_SIZE == 64) +#include +#define _LONGINT 1 +#else +#endif + +#endif + diff --git a/src/unfolding.f90 b/src/unfolding.f90 index 99aeca8f..f9caf013 100644 --- a/src/unfolding.f90 +++ b/src/unfolding.f90 @@ -176,9 +176,7 @@ subroutine unfolding_kpath W= 0d0 !> after arpack_sparse_coo_eigs, nnz will be updated. ritzvec= .true. -#if defined (INTELMKL) call arpack_sparse_coo_eigs(Ndimq,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,W,sigma, zeigv, ritzvec) -#endif else call ham_3Dlandau(Ndimq, Magq, k_SBZ_direct, hamk_bulk) zeigv=hamk_bulk @@ -197,9 +195,7 @@ subroutine unfolding_kpath W= 0d0 !> after arpack_sparse_coo_eigs, nnz will be updated. ritzvec= .true. -#if defined (INTELMKL) call arpack_sparse_coo_eigs(Num_wann,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,W,sigma, zeigv, ritzvec) -#endif call now(time3) else ! dense hr @@ -490,9 +486,7 @@ subroutine unfolding_kplane W= 0d0 !> after arpack_sparse_coo_eigs, nnz will be updated. ritzvec= .true. -#if defined (INTELMKL) call arpack_sparse_coo_eigs(Num_wann,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,W,sigma, zeigv, ritzvec) -#endif call now(time3) else @@ -505,9 +499,7 @@ subroutine unfolding_kplane W= 0d0 !> after arpack_sparse_coo_eigs, nnz will be updated. ritzvec= .true. -#if defined (INTELMKL) call arpack_sparse_coo_eigs(Num_wann,nnzmax,nnz,acoo,jcoo,icoo,neval,nvecs,W,sigma, zeigv, ritzvec) -#endif call now(time3) else ! dense hr diff --git a/src/wanniercenter.f90 b/src/wanniercenter.f90 index 70529e25..2a1664e6 100644 --- a/src/wanniercenter.f90 +++ b/src/wanniercenter.f90 @@ -1657,7 +1657,7 @@ subroutine wannier_center3D_plane_mirror_minus real(dp) :: maxgap0 !> Z2 calculation for time reversal invariant system - integer :: Z2 + real(dp) :: Z2 integer :: Delta real(dp) :: g @@ -2042,7 +2042,7 @@ subroutine wannier_center3D_plane_mirror_plus real(dp) :: maxgap0 !> Z2 calculation for time reversal invariant system - integer :: Z2 + real(dp) :: Z2 integer :: Delta real(dp) :: g @@ -2344,7 +2344,7 @@ subroutine wannier_center3D_plane0 real(dp), allocatable :: largestgap(:) !> Z2 calculation for time reversal invariant system - integer :: Z2 + real(dp) :: Z2 allocate(wcc(NumberofSelectedOccupiedBands, Nk2)) allocate(largestgap(Nk2)) @@ -2471,7 +2471,7 @@ subroutine wannier_center3D_plane_func(kstart, kvec1, kvec2, largest_gap, wcc, real(dp) :: maxgap0 !> Z2 calculation for time reversal invariant system - integer :: Z2 + real(dp) :: Z2 integer :: Delta real(dp) :: g @@ -2719,7 +2719,7 @@ subroutine wannier_center3D_k real(dp), allocatable :: largestgap(:) !> Z2 calculation for time reversal invariant system - integer :: Z2 + real(dp) :: Z2 real(dp),allocatable :: kpoints(:,:,:) integer :: nkp1,nkp2 @@ -2879,7 +2879,7 @@ subroutine wannier_center3D_kpath(kpoints, nkp1, nkp2, largest_gap, wcc, Z2) real(dp) :: maxgap0 !> Z2 calculation for time reversal invariant system - integer :: Z2 + real(dp) :: Z2 integer :: Delta real(dp) :: g @@ -3863,8 +3863,8 @@ subroutine Z2_3D real(dp) :: kvec1(3) real(dp) :: kvec2(3) - integer :: Z2 - integer :: Z2_all(6) + real(dp) :: Z2 + real(dp) :: Z2_all(6) !> wannier centers for each ky, bands real(dp), allocatable :: wcc(:, :) diff --git a/src/wt_aux.f90 b/src/wt_aux.f90 index c1fae869..e4a981e4 100644 --- a/src/wt_aux.f90 +++ b/src/wt_aux.f90 @@ -13,8 +13,8 @@ subroutine now(time_now) real(dp), intent(inout) :: time_now call Date_and_time(values=time_new) - time_now= time_new(3)*24*3600+time_new(5)*3600+& - time_new(6)*60+time_new(7)+time_new(8)/1000d0 + time_now= time_new(3)*24d0*3600d0+time_new(5)*3600d0+& + time_new(6)*60d0+time_new(7)+time_new(8)/1000d0 return end subroutine now diff --git a/utility/twisted_graphene_system_kp_model/system.in b/utility/twisted_graphene_system_kp_model/system.in index 26043a06..b30bc032 100644 --- a/utility/twisted_graphene_system_kp_model/system.in +++ b/utility/twisted_graphene_system_kp_model/system.in @@ -1,5 +1,5 @@ &PARAMETERS - number_layers = 4 + number_layers = 2 !> magic angle twisted_angle_degree= 1.0661 @@ -25,6 +25,7 @@ u_AA=0.0797 ! eV parameters from Phys. Rev. X 8, 031087 u_AB=0.0975 ! eV vf=5.253084 ! eV/Angstrom 2.1354*2.46 + gamma_4 = 0.03 !u_AA=0.000 ! eV parameters from PhysRevLett.122.106405 !u_AB=0.110 ! eV