From d776425668abae727e852ee2bd7a8cddc71907f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Polack?= Date: Thu, 8 Feb 2024 15:07:06 +0100 Subject: [PATCH 1/3] big squash --- docs/src/developer/data_structures.md | 5 +- docs/src/guide/discretisation.jl | 3 +- examples/custom_potential.jl | 5 +- examples/error_estimates_forces.jl | 68 +++++----- examples/gross_pitaevskii.jl | 9 +- .../DFTKGenericLinearAlgebraExt.jl | 24 +++- ext/DFTKIntervalArithmeticExt.jl | 4 +- ext/DFTKJLD2Ext.jl | 6 + ext/DFTKWriteVTKExt.jl | 14 ++- src/DFTK.jl | 3 + src/Model.jl | 16 ++- src/PlaneWaveBasis.jl | 11 +- src/Psi.jl | 49 ++++++++ src/common/ortho.jl | 12 +- src/common/types.jl | 1 + src/densities.jl | 53 +++++--- src/eigen/diag.jl | 42 ++++--- src/eigen/lobpcg_hyper_impl.jl | 2 +- src/eigen/preconditioners.jl | 26 +++- src/external/wannier_shared.jl | 10 +- src/fft.jl | 89 ++++++++++--- src/input_output.jl | 3 + src/interpolation.jl | 16 +-- src/orbitals.jl | 22 ++-- src/postprocess/current.jl | 20 +-- src/response/chi0.jl | 43 ++++--- src/response/hessian.jl | 54 ++++---- src/scf/direct_minimization.jl | 21 ++-- src/scf/nbands_algorithm.jl | 4 +- src/scf/newton.jl | 39 +++--- src/scf/self_consistent_field.jl | 9 +- src/supercell.jl | 13 +- src/symmetry.jl | 14 ++- src/terms/Hamiltonian.jl | 65 ++++++---- src/terms/anyonic.jl | 8 +- src/terms/entropy.jl | 4 +- src/terms/kinetic.jl | 6 +- src/terms/magnetic.jl | 6 +- src/terms/nonlocal.jl | 52 ++++---- src/terms/operators.jl | 110 +++++++++++----- src/transfer.jl | 20 +-- src/workarounds/forwarddiff_rules.jl | 12 +- test/anyons.jl | 10 +- test/chi0.jl | 2 +- test/compute_density.jl | 31 ++--- test/compute_jacobian_eigen.jl | 25 ++-- test/fourier_transforms.jl | 16 +-- test/hamiltonian_consistency.jl | 26 ++-- test/hessian.jl | 16 ++- test/multicomponents.jl | 119 ++++++++++++++++++ test/scf_compare.jl | 4 +- test/serialisation.jl | 1 + test/todict.jl | 14 ++- test/transfer.jl | 13 +- 54 files changed, 860 insertions(+), 410 deletions(-) create mode 100644 src/Psi.jl create mode 100644 test/multicomponents.jl diff --git a/docs/src/developer/data_structures.md b/docs/src/developer/data_structures.md index 74a18bdb43..bf35cf1238 100644 --- a/docs/src/developer/data_structures.md +++ b/docs/src/developer/data_structures.md @@ -140,10 +140,13 @@ For example let us check the normalization of the first eigenfunction at the first ``k``-point in reciprocal space: ```@example data_structures -ψtest = scfres.ψ[1][:, 1] +ψtest = scfres.ψ[1][:, :, 1] sum(abs2.(ψtest)) ``` +The first index of `ψtest` is the component (e.g., the spin) and the second is the +``G``-component. + We now perform an IFFT to get ψ in real space. The ``k``-point has to be passed because ψ is expressed on the ``k``-dependent basis. Again the function is normalised: diff --git a/docs/src/guide/discretisation.jl b/docs/src/guide/discretisation.jl index b3737fb896..f5c0d67546 100644 --- a/docs/src/guide/discretisation.jl +++ b/docs/src/guide/discretisation.jl @@ -109,7 +109,8 @@ scfres.energies # We can also get the first eigenvector (in the plane wave basis) and plot it using Plots -ψ_fourier = scfres.ψ[1][:, 1]; # first k-point, all G components, first eigenvector +ψ_fourier = scfres.ψ[1][1, :, 1]; # first k-point, first (and only) component, + # all G components, first eigenvector ## Transform the wave function to real space ψ = ifft(basis, basis.kpoints[1], ψ_fourier)[:, 1, 1] ## Eigenvectors are only defined up to a phase. We fix it by imposing that psi(0) is real diff --git a/examples/custom_potential.jl b/examples/custom_potential.jl index 43f166df6f..86235ce750 100644 --- a/examples/custom_potential.jl +++ b/examples/custom_potential.jl @@ -72,8 +72,9 @@ compute_forces(scfres) tot_local_pot = DFTK.total_local_potential(scfres.ham)[:, 1, 1]; # use only dimension 1 # Extract other quantities before plotting them -ρ = scfres.ρ[:, 1, 1, 1] # converged density, first spin component -ψ_fourier = scfres.ψ[1][:, 1] # first k-point, all G components, first eigenvector +ρ = scfres.ρ[:, 1, 1, 1] # converged density, first spin component +ψ_fourier = scfres.ψ[1][1, :, 1] # first k-point, first (and only) component, + # all G components, first eigenvector # Transform the wave function to real space and fix the phase: ψ = ifft(basis, basis.kpoints[1], ψ_fourier)[:, 1, 1] diff --git a/examples/error_estimates_forces.jl b/examples/error_estimates_forces.jl index 7e7000ca2b..db79d1ad27 100644 --- a/examples/error_estimates_forces.jl +++ b/examples/error_estimates_forces.jl @@ -85,50 +85,60 @@ res, occ = DFTK.select_occupied_orbitals(basis_ref, res, scfres.occupation) # size ``N×N`` (``N`` being the number of electrons) whose solution is # ``U = S(S^*S)^{-1/2}`` where ``S`` is the overlap matrix ``ψ^*ϕ``. function compute_error(basis, ϕ, ψ) - map(zip(ϕ, ψ)) do (ϕk, ψk) + ϕ = to_composite_σG(basis, ϕ) + ψ = to_composite_σG(basis, ψ) + map(zip(basis.kpoints, ϕ, ψ)) do (kpt, ϕk, ψk) S = ψk'ϕk U = S*(S'S)^(-1/2) - ϕk - ψk*U + from_composite_σG(basis, kpt, ϕk - ψk*U) end end err = compute_error(basis_ref, ψr, ψ_ref); # - Compute ``{\boldsymbol M}^{-1}R(P)`` with ``{\boldsymbol M}^{-1}`` defined in [^CDKL2021]: P = [PreconditionerTPA(basis_ref, kpt) for kpt in basis_ref.kpoints] -map(zip(P, ψr)) do (Pk, ψk) - DFTK.precondprep!(Pk, ψk) +map(zip(P, ψr, basis_ref.kpoints)) do (Pk, ψk, kpt) + DFTK.precondprep!(Pk, to_composite_σG(basis_ref, kpt, ψk)) end -function apply_M(φk, Pk, δφnk, n) - DFTK.proj_tangent_kpt!(δφnk, φk) - δφnk = sqrt.(Pk.mean_kin[n] .+ Pk.kin) .* δφnk - DFTK.proj_tangent_kpt!(δφnk, φk) - δφnk = sqrt.(Pk.mean_kin[n] .+ Pk.kin) .* δφnk - DFTK.proj_tangent_kpt!(δφnk, φk) +function apply_M(basis, kpt, φk, Pk, δφnk, n) + fact = reshape(sqrt.(Pk.mean_kin[n] .+ Pk.kin), size(δφnk)...) + DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk) + δφnk = fact .* δφnk + DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk) + δφnk = fact .* δφnk + DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk) end -function apply_inv_M(φk, Pk, δφnk, n) - DFTK.proj_tangent_kpt!(δφnk, φk) - op(x) = apply_M(φk, Pk, x, n) +function apply_inv_M(basis, kpt, φk, Pk, δφnk, n) + DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk) + op(x) = let + x = from_composite_σG(basis, kpt, x) + to_composite_σG(basis, kpt, apply_M(basis, kpt, φk, Pk, x, n)) + end function f_ldiv!(x, y) - x .= DFTK.proj_tangent_kpt(y, φk) + x_σG = from_composite_σG(basis, kpt, x) + y_σG = from_composite_σG(basis, kpt, y) + x_σG .= DFTK.proj_tangent_kpt(basis, kpt, y_σG, φk) x ./= (Pk.mean_kin[n] .+ Pk.kin) - DFTK.proj_tangent_kpt!(x, φk) + DFTK.proj_tangent_kpt!(x_σG, basis, kpt, φk) + x end - J = LinearMap{eltype(φk)}(op, size(δφnk, 1)) - δφnk = cg(J, δφnk; Pl=DFTK.FunctionPreconditioner(f_ldiv!), - verbose=false, reltol=0, abstol=1e-15) - DFTK.proj_tangent_kpt!(δφnk, φk) + J = LinearMap{eltype(φk)}(op, prod(size(δφnk)[1:2])) + δφnk_vec = to_composite_σG(basis, kpt, δφnk) + δφnk_vec = cg(J, δφnk_vec; Pl=DFTK.FunctionPreconditioner(f_ldiv!), + verbose=false, reltol=0, abstol=1e-15) + DFTK.proj_tangent_kpt!(δφnk, basis, kpt, φk) end -function apply_metric(φ, P, δφ, A::Function) +function apply_metric(basis, φ, P, δφ, A::Function) map(enumerate(δφ)) do (ik, δφk) Aδφk = similar(δφk) φk = φ[ik] - for n = 1:size(δφk,2) - Aδφk[:,n] = A(φk, P[ik], δφk[:,n], n) + for n = 1:size(δφk, 3) + Aδφk[:, :, n] = A(basis, basis.kpoints[ik], φk, P[ik], δφk[:, :, n], n) end Aδφk end end -Mres = apply_metric(ψr, P, res, apply_inv_M); +Mres = apply_metric(basis, ψr, P, res, apply_inv_M); # We can now compute the modified residual ``R_{\rm Schur}(P)`` using a Schur # complement to approximate the error on low-frequencies[^CDKL2021]: @@ -151,14 +161,14 @@ resLF = DFTK.transfer_blochwave(res, basis_ref, basis) resHF = res - DFTK.transfer_blochwave(resLF, basis, basis_ref); # - Compute ``{\boldsymbol M}^{-1}_{22}R_2(P)``: -e2 = apply_metric(ψr, P, resHF, apply_inv_M); +e2 = apply_metric(basis, ψr, P, resHF, apply_inv_M); # - Compute the right hand side of the Schur system: ## Rayleigh coefficients needed for `apply_Ω` -Λ = map(enumerate(ψr)) do (ik, ψk) - Hk = hamr.blocks[ik] - Hψk = Hk * ψk - ψk'Hψk +Λ = map(enumerate(zip(ψr, hamr * ψr))) do (ik, (ψk, Hψk)) + ψk = to_composite_σG(basis, basis.kpoints[ik], ψk) + Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk) + from_composite_σG(basis, basis.kpoints[ik], ψk'Hψk) end ΩpKe2 = DFTK.apply_Ω(e2, ψr, hamr, Λ) .+ DFTK.apply_K(basis_ref, e2, ψr, ρr, occ) ΩpKe2 = DFTK.transfer_blochwave(ΩpKe2, basis_ref, basis) @@ -204,7 +214,7 @@ end; # the actual error ``P-P_*``. Usually this is of course not the case, but this # is the "best" improvement we can hope for with a linearisation, so we are # aiming for this precision. -df_err = df(basis_ref, occ, ψr, DFTK.proj_tangent(err, ψr), ρr) +df_err = df(basis_ref, occ, ψr, DFTK.proj_tangent(basis, err, ψr), ρr) forces["F(P) - df(P)⋅(P-P_*)"] = f - df_err relerror["F(P) - df(P)⋅(P-P_*)"] = compute_relerror(f - df_err); diff --git a/examples/gross_pitaevskii.jl b/examples/gross_pitaevskii.jl index 63f386a33d..1c13a87d34 100644 --- a/examples/gross_pitaevskii.jl +++ b/examples/gross_pitaevskii.jl @@ -57,8 +57,9 @@ scfres.energies # We use the opportunity to explore some of DFTK internals. # # Extract the converged density and the obtained wave function: -ρ = real(scfres.ρ)[:, 1, 1, 1] # converged density, first spin component -ψ_fourier = scfres.ψ[1][:, 1]; # first k-point, all G components, first eigenvector +ρ = real(scfres.ρ)[:, 1, 1, 1] # converged density, first spin component +ψ_fourier = scfres.ψ[1][1, :, 1] # first k-point, first (and only) component, + # all G components, first eigenvector # Transform the wave function to real space and fix the phase: ψ = ifft(basis, basis.kpoints[1], ψ_fourier)[:, 1, 1] @@ -93,7 +94,7 @@ E, ham = energy_hamiltonian(basis, scfres.ψ, scfres.occupation; ρ=scfres.ρ) H = ham.blocks[1]; # `H` can be used as a linear operator (efficiently using FFTs), or converted to a dense matrix: -ψ11 = scfres.ψ[1][:, 1] # first k-point, first eigenvector +ψ11 = scfres.ψ[1][1, :, 1] # first k-point, first component, first eigenvector Hmat = Array(H) # This is now just a plain Julia matrix, ## which we can compute and store in this simple 1D example @assert norm(Hmat * ψ11 - H * ψ11) < 1e-10 @@ -107,4 +108,4 @@ A[1, end] = A[end, 1] = -1 K = A / dx^2 / 2 V = Diagonal(pot.(x) + C .* α .* (ρ.^(α-1))) H_findiff = K + V; -maximum(abs.(H_findiff*ψ - (dot(ψ, H_findiff*ψ) / dot(ψ, ψ)) * ψ)) +maximum(abs, H_findiff*ψ - (dot(ψ, H_findiff*ψ) / dot(ψ, ψ)) * ψ) diff --git a/ext/DFTKGenericLinearAlgebraExt/DFTKGenericLinearAlgebraExt.jl b/ext/DFTKGenericLinearAlgebraExt/DFTKGenericLinearAlgebraExt.jl index 1c16df7438..451afe3168 100644 --- a/ext/DFTKGenericLinearAlgebraExt/DFTKGenericLinearAlgebraExt.jl +++ b/ext/DFTKGenericLinearAlgebraExt/DFTKGenericLinearAlgebraExt.jl @@ -29,21 +29,21 @@ end DFTK.default_primes(::Any) = (2, ) # Generic fallback function, Float32 and Float64 specialization in fft.jl -function DFTK.build_fft_plans!(tmp::AbstractArray{<:Complex}) +function DFTK.build_fft_plans!(tmp::AbstractArray{<:Complex}; region=1:ndims(tmp)) # Note: FourierTransforms has no support for in-place FFTs at the moment # ... also it's extension to multi-dimensional arrays is broken and # the algo only works for some cases @assert all(ispow2, size(tmp)) - # opFFT = AbstractFFTs.plan_fft(tmp) # TODO When multidim works + # opFFT = AbstractFFTs.plan_fft(tmp) # TODO When multidim works # opBFFT = inv(opFFT).p - opFFT = generic_plan_fft(tmp) # Fallback for now + opFFT = generic_plan_fft(tmp) # Fallback for now opBFFT = generic_plan_bfft(tmp) # TODO Can be cut once FourierTransforms supports AbstractFFTs properly ipFFT = DummyInplace{typeof(opFFT)}(opFFT) ipBFFT = DummyInplace{typeof(opBFFT)}(opBFFT) - ipFFT, opFFT, ipBFFT, opBFFT + (; ipFFT, opFFT, ipBFFT, opBFFT) end struct GenericPlan{T} @@ -51,7 +51,7 @@ struct GenericPlan{T} factor::T end -function Base.:*(p::GenericPlan, X::AbstractArray) +function Base.:*(p::GenericPlan, X::AbstractArray{T, 3}) where {T} pl1, pl2, pl3 = p.subplans ret = similar(X) for i = 1:size(X, 1), j = 1:size(X, 2) @@ -86,4 +86,18 @@ function generic_plan_bfft(data::AbstractArray{T, 3}) where {T} plan_bfft(data[1, 1, :])], T(1)) end +# TODO: Let's not bother with multicomponents yet. +function generic_plan_fft(data::AbstractArray{T, 4}) where {T} + @assert size(data, 1) == 1 + generic_plan_fft(data[1, :, :, :]) +end +function generic_plan_bfft(data::AbstractArray{T, 4}) where {T} + @assert size(data, 1) == 1 + generic_plan_bfft(data[1, :, :, :]) +end +function Base.:*(p::GenericPlan, X::AbstractArray{T, 4}) where {T} + @assert size(X, 1) == 1 + reshape(p * X[1, :, :, :], size(X)...) +end + end diff --git a/ext/DFTKIntervalArithmeticExt.jl b/ext/DFTKIntervalArithmeticExt.jl index 8de5309115..53dcdd8ce3 100644 --- a/ext/DFTKIntervalArithmeticExt.jl +++ b/ext/DFTKIntervalArithmeticExt.jl @@ -42,8 +42,8 @@ function _is_well_conditioned(A::AbstractArray{<:Interval}; kwargs...) end function symmetry_operations(lattice::AbstractMatrix{<:Interval}, atoms, positions, - magnetic_moments=[]; - tol_symmetry=max(SYMMETRY_TOLERANCE, maximum(radius, lattice))) + magnetic_moments=[]; + tol_symmetry=max(SYMMETRY_TOLERANCE, maximum(radius, lattice))) @assert tol_symmetry < 1e-2 symmetry_operations(IntervalArithmetic.mid.(lattice), atoms, positions, magnetic_moments; tol_symmetry) diff --git a/ext/DFTKJLD2Ext.jl b/ext/DFTKJLD2Ext.jl index 57ab1904d3..ac0d93900a 100644 --- a/ext/DFTKJLD2Ext.jl +++ b/ext/DFTKJLD2Ext.jl @@ -89,6 +89,12 @@ function load_scfres_jld2(jld, basis; skip_hamiltonian, strict) "($(jld["n_spin_components"])) and supplied basis " * "($(basis.model.n_spin_components))") end + if jld["n_components"] != basis.model.n_components + consistent_kpts = false + errormsg = ("Mismatch in number of spin components between file " * + "($(jld["n_components"])) and supplied basis " * + "($(basis.model.n_components))") + end end errormsg = MPI.bcast(errormsg, 0, MPI.COMM_WORLD) isempty(errormsg) || error(errormsg) diff --git a/ext/DFTKWriteVTKExt.jl b/ext/DFTKWriteVTKExt.jl index 71ee529d1a..7c046d5508 100644 --- a/ext/DFTKWriteVTKExt.jl +++ b/ext/DFTKWriteVTKExt.jl @@ -24,12 +24,14 @@ function DFTK.save_scfres(::Val{:vts}, filename::AbstractString, scfres::NamedTu ψblock = gather_kpts_block(scfres.basis, ψblock_dist) if mpi_master() - for ik = 1:length(basis.kpoints), n = 1:size(ψblock, 2) - kpt_n_G = length(G_vectors(basis, basis.kpoints[ik])) - ψnk_real = ifft(basis, basis.kpoints[ik], ψblock[1:kpt_n_G, n, ik]) - prefix = @sprintf "ψ_k%03i_n%03i" ik n - vtkfile["$(prefix)_real"] = real.(ψnk_real) - vtkfile["$(prefix)_imag"] = imag.(ψnk_real) + for ik = 1:length(basis.kpoints) + for σ = 1:basis.model.n_components, n = 1:size(ψblock, 3) + kpt_n_G = length(G_vectors(basis, basis.kpoints[ik])) + ψσnk_real = ifft(basis, basis.kpoints[ik], ψblock[:, 1:kpt_n_G, n, ik]) + prefix = @sprintf "ψ_k%03i_n%03i_σ%03i" ik n σ + vtkfile["$(prefix)_real"] = real.(ψσnk_real) + vtkfile["$(prefix)_imag"] = imag.(ψσnk_real) + end end end end diff --git a/src/DFTK.jl b/src/DFTK.jl index b95f3bf2ee..202fd4f678 100644 --- a/src/DFTK.jl +++ b/src/DFTK.jl @@ -75,6 +75,8 @@ export compute_fft_size export G_vectors, G_vectors_cart, r_vectors, r_vectors_cart export Gplusk_vectors, Gplusk_vectors_cart export Kpoint +export to_composite_σG +export from_composite_σG export ifft export irfft export ifft! @@ -86,6 +88,7 @@ include("Model.jl") include("structure.jl") include("bzmesh.jl") include("PlaneWaveBasis.jl") +include("Psi.jl") include("fft.jl") include("orbitals.jl") include("input_output.jl") diff --git a/src/Model.jl b/src/Model.jl index 04e0128e9f..9f0979de5c 100644 --- a/src/Model.jl +++ b/src/Model.jl @@ -26,6 +26,10 @@ struct Model{T <: Real, VT <: Real} n_electrons::Union{Int, Nothing} εF::Union{T, Nothing} + # Option for multicomponents computations. + # TODO: Planed to replace current handling of spins, as it will allow full spin computations. + n_components::Int + # spin_polarization values: # :none No spin polarization, αα and ββ density identical, # αβ and βα blocks zero, 1 spin component treated explicitly @@ -106,7 +110,8 @@ function Model(lattice::AbstractMatrix{T}, terms=[Kinetic()], temperature=zero(T), smearing=temperature > 0 ? Smearing.FermiDirac() : Smearing.None(), - spin_polarization=determine_spin_polarization(magnetic_moments), + n_components=1, + spin_polarization=default_spin_polarization(magnetic_moments, n_components), symmetries=default_symmetries(lattice, atoms, positions, magnetic_moments, spin_polarization, terms), ) where {T <: Real} @@ -178,7 +183,8 @@ function Model(lattice::AbstractMatrix{T}, Model{T,value_type(T)}(model_name, lattice, recip_lattice, n_dim, inv_lattice, inv_recip_lattice, unit_cell_volume, recip_cell_volume, - n_electrons, εF, spin_polarization, n_spin, temperature, smearing, + n_electrons, εF, n_components, spin_polarization, n_spin, + temperature, smearing, atoms, positions, atom_groups, terms, symmetries) end function Model(lattice::AbstractMatrix{<:Integer}, atoms::Vector{<:Element}, @@ -224,6 +230,7 @@ function Model{T}(model::Model; model.temperature, model.smearing, model.εF, + model.n_components, model.spin_polarization, model.symmetries, # Can be safely disabled: this has been checked for model @@ -249,6 +256,11 @@ normalize_magnetic_moment(mm::AbstractVector)::Vec3{Float64} = mm """Number of valence electrons.""" n_electrons_from_atoms(atoms) = sum(n_elec_valence, atoms; init=0) +function default_spin_polarization(magnetic_moments, n_components) + n_components > 1 && return :spinless + determine_spin_polarization(magnetic_moments) +end + """ Default logic to determine the symmetry operations to be used in the model. """ diff --git a/src/PlaneWaveBasis.jl b/src/PlaneWaveBasis.jl index 52e0945b69..3d1f9f658e 100644 --- a/src/PlaneWaveBasis.jl +++ b/src/PlaneWaveBasis.jl @@ -73,6 +73,11 @@ struct PlaneWaveBasis{T, ipBFFT fft_normalization::T # fft = fft_normalization * FFT ifft_normalization::T # ifft = ifft_normalization * BFFT + # Multicomponents plans + opFFT_mc + ipFFT_mc + opBFFT_mc + ipBFFT_mc # "cubic" basis in reciprocal and real space, on which potentials and densities are stored G_vectors::T_G_vectors @@ -250,7 +255,10 @@ function PlaneWaveBasis(model::Model{T}, Ecut::Real, fft_size::Tuple{Int, Int, I # Setup FFT plans Gs = to_device(architecture, G_vectors(fft_size)) - (ipFFT, opFFT, ipBFFT, opBFFT) = build_fft_plans!(similar(Gs, Complex{T}, fft_size)) + (; ipFFT, opFFT, ipBFFT, opBFFT) = build_fft_plans!(similar(Gs, Complex{T}, fft_size)) + # strided FFT plans for multicomponents + ipFFT_mc, opFFT_mc, ipBFFT_mc, opBFFT_mc = + build_fft_plans!(similar(Gs, Complex{T}, model.n_components, fft_size...); region=2:4) # Normalization constants # fft = fft_normalization * FFT @@ -338,6 +346,7 @@ function PlaneWaveBasis(model::Model{T}, Ecut::Real, fft_size::Tuple{Int, Int, I Ecut, variational, opFFT, ipFFT, opBFFT, ipBFFT, fft_normalization, ifft_normalization, + opFFT_mc, ipFFT_mc, opBFFT_mc, ipBFFT_mc, Gs, r_vectors, kpoints, kweights, kgrid, kcoords_global, kweights_global, diff --git a/src/Psi.jl b/src/Psi.jl new file mode 100644 index 0000000000..fcb58b2acf --- /dev/null +++ b/src/Psi.jl @@ -0,0 +1,49 @@ +# Helpers functions converting to and fro implicit composite definition in [σG] and explicit +# definition in [σ,G]. +# The `basis` and `kpoint` arguments are not-necessary, but we keep them to match the +# signatures of most DFTK functions. + +# ψk[σ, G, n] → ψk[σG, n] +# `prod()` because of Sternheimer solver where some quantities might have 0-ending sizes. +to_composite_σG(::PlaneWaveBasis, ::Kpoint, ψk::AbstractArray3) = + reshape(ψk, prod(size(ψk)[1:2]), :) +# ψk[σ, G] → ψk[σG] +to_composite_σG(::PlaneWaveBasis, ::Kpoint, ψk::AbstractMatrix) = reshape(ψk, :) +""" +Converts each wave-function from a tensor `ψ[ik][σ, G, n]` to a matrix `ψ[ik][σG, n]`. +Useful for inner-working of DFTK that relies on simple matrix-vector operations. +""" +to_composite_σG(basis, ψ::AbstractVector) = to_composite_σG.(basis, basis.kpoints, ψ) + +# ψk[σG, n] → ψk[σ, G, n] +from_composite_σG(basis::PlaneWaveBasis, ::Kpoint, ψk::AbstractMatrix) = + reshape(ψk, basis.model.n_components, :, size(ψk, 2)) +# ψk[σG] → ψk[σ, G] +from_composite_σG(basis::PlaneWaveBasis, ::Kpoint, ψk::AbstractVector) = + reshape(ψk, basis.model.n_components, :) + +# For LOBPCG compability. +# ψk[σG] → ψk[σ, G, 1:1] +unfold_from_composite_σG(basis::PlaneWaveBasis, ::Kpoint, ψk::AbstractVector) = + reshape(ψk, basis.model.n_components, :, 1) +# ψk[1:1G, n] → ψk[1:1, G, n] +unfold_from_composite_σG(basis::PlaneWaveBasis, ::Kpoint, ψk::AbstractMatrix) = + reshape(ψk, basis.model.n_components, :, size(ψk, 2)) +""" +Reciprocal to [`to_composite_σG`](@ref). +""" +from_composite_σG(basis::PlaneWaveBasis, ψ::AbstractVector) = + from_composite_σG.(basis, basis.kpoints, ψ) + +@timing function enforce_phase!(basis::PlaneWaveBasis, kpt::Kpoint, ψk) + map(eachcol(to_composite_σG(basis, kpt, ψk))) do ψkn + ϕ = angle(ψkn[end]) + if abs(ϕ) > sqrt(eps(ϕ)) + ε = sign(real(ψkn[end] / cis(ϕ))) + ψkn ./= ε*cis(ϕ) + end + end + ψk +end +"""Ensure that the wave function is real.""" +enforce_phase!(basis, ψ::AbstractVector) = enforce_phase!.(basis, basis.kpoints, ψ) diff --git a/src/common/ortho.jl b/src/common/ortho.jl index fc5473ca3b..ef6aca4263 100644 --- a/src/common/ortho.jl +++ b/src/common/ortho.jl @@ -1,9 +1,5 @@ -@timing function ortho_qr(φk::ArrayType) where {ArrayType <: AbstractArray} - x = convert(ArrayType, qr(φk).Q) - if size(x) == size(φk) - return x - else - # Sometimes QR (but funnily not always) CUDA messes up the size here - return x[:, 1:size(φk, 2)] - end +@views @timing function ortho_qr(basis, kpt, φk::AbstractArray{T}) where {T} + x = convert(Matrix{T}, qr(to_composite_σG(basis, kpt, φk)).Q) + # Sometimes QR (but funnily not always) CUDA messes up the size here. + from_composite_σG(basis, kpt, x)[:, :, 1:size(φk, 3)] end diff --git a/src/common/types.jl b/src/common/types.jl index c70b8f75fc..ddd20533fb 100644 --- a/src/common/types.jl +++ b/src/common/types.jl @@ -9,3 +9,4 @@ value_type(T) = T const Mat3{T} = SMatrix{3, 3, T, 9} where {T} const Vec3{T} = SVector{3, T} where {T} const AbstractArray3{T} = AbstractArray{T, 3} +const AbstractArray4{T} = AbstractArray{T, 4} diff --git a/src/densities.jl b/src/densities.jl index d07697607f..12e6750599 100644 --- a/src/densities.jl +++ b/src/densities.jl @@ -1,6 +1,8 @@ # Densities (and potentials) are represented by arrays # ρ[ix,iy,iz,iσ] in real space, where iσ ∈ [1:n_spin_components] +# TODO: We reduce all components for the density. Will need to be though again when we merge +# the components and the spins. """ compute_density(basis::PlaneWaveBasis, ψ::AbstractVector, occupation::AbstractVector) @@ -17,6 +19,8 @@ using an optional `occupation_threshold`. By default all occupation numbers are # Note, that we special-case Tψ, since when T is Dual and eltype(ψ[1]) is not # (e.g. stress calculation), then only the normalisation factor introduces # dual numbers, but not yet the FFT + Tρ = promote_type(T, real(eltype(ψ[1]))) + n_components = basis.model.n_components # Occupation should be on the CPU as we are going to be doing scalar indexing. occupation = [to_cpu(oc) for oc in occupation] @@ -25,7 +29,7 @@ using an optional `occupation_threshold`. By default all occupation numbers are function allocate_local_storage() (; ρ=zeros_like(basis.G_vectors, Tρ, basis.fft_size..., basis.model.n_spin_components), - ψnk_real=zeros_like(basis.G_vectors, complex(Tψ), basis.fft_size...)) + ψnk_real=zeros_like(basis.G_vectors, complex(Tρ), n_components, basis.fft_size...)) end # We split the total iteration range (ik, n) in chunks, and parallelize over them. range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]] @@ -33,10 +37,12 @@ using an optional `occupation_threshold`. By default all occupation numbers are storages = parallel_loop_over_range(range; allocate_local_storage) do kn, storage (ik, n) = kn kpt = basis.kpoints[ik] - ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, n]; normalize=false) - storage.ρ[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik] - .* (basis.ifft_normalization)^2 - .* abs2.(storage.ψnk_real)) + ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n]; normalize=false) + for σ = 1:n_components + storage.ρ[:, :, :, kpt.spin] .+= (occupation[ik][n] .* basis.kweights[ik] + .* (basis.ifft_normalization)^2 + .* abs2.(storage.ψnk_real[σ, :, :, :])) + end synchronize_device(basis.architecture) end @@ -61,6 +67,7 @@ end # δρ is expected to be real when computations are not phonon-related. Tδρ = iszero(q) ? real(Tψ) : Tψ real_qzero = iszero(q) ? real : identity + n_components = basis.model.n_components # occupation should be on the CPU as we are going to be doing scalar indexing. occupation = [to_cpu(oc) for oc in occupation] @@ -69,8 +76,8 @@ end function allocate_local_storage() (; δρ=zeros_like(basis.G_vectors, Tδρ, basis.fft_size..., basis.model.n_spin_components), - ψnk_real=zeros_like(basis.G_vectors, Tψ, basis.fft_size...), - δψnk_real=zeros_like(basis.G_vectors, Tψ, basis.fft_size...)) + ψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...), + δψnk_real=zeros_like(basis.G_vectors, Tψ, n_components, basis.fft_size...)) end range = [(ik, n) for ik = 1:length(basis.kpoints) for n = mask_occ[ik]] @@ -84,14 +91,18 @@ end (ik, n) = kn kpt = basis.kpoints[ik] - ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, n]) + ifft!(storage.ψnk_real, basis, kpt, ψ[ik][:, :, n]) # … and then we compute the real Fourier transform in the adequate basis. - ifft!(storage.δψnk_real, basis, δψ_plus_k[ik].kpt, δψ_plus_k[ik].ψk[:, n]) - - storage.δρ[:, :, :, kpt.spin] .+= real_qzero( - 2 .* occupation[ik][n] .* basis.kweights[ik] .* conj(storage.ψnk_real) - .* storage.δψnk_real - .+ δoccupation[ik][n] .* basis.kweights[ik] .* abs2.(storage.ψnk_real)) + ifft!(storage.δψnk_real, basis, δψ_plus_k[ik].kpt, δψ_plus_k[ik].ψk[:, :, n]) + + for σ = 1:basis.model.n_components + storage.δρ[:, :, :, kpt.spin] .+= real_qzero( + 2 .* occupation[ik][n] .* basis.kweights[ik] + .* conj(storage.ψnk_real[σ, :, :, :]) + .* storage.δψnk_real[σ, :, :, :] + .+ δoccupation[ik][n] .* basis.kweights[ik] + .* abs2.(storage.ψnk_real[σ, :, :, :])) + end synchronize_device(basis.architecture) end @@ -101,16 +112,18 @@ end symmetrize_ρ(basis, δρ; do_lowpass=false) end -@views @timing function compute_kinetic_energy_density(basis::PlaneWaveBasis, ψ, occupation) - T = promote_type(eltype(basis), real(eltype(ψ[1]))) +@views @timing function compute_kinetic_energy_density(basis::PlaneWaveBasis{TT}, ψ, + occupation) where {TT} + @assert basis.model.n_components == 1 + T = promote_type(TT, real(eltype(ψ[1]))) τ = similar(ψ[1], T, (basis.fft_size..., basis.model.n_spin_components)) τ .= 0 - dαψnk_real = zeros(complex(eltype(basis)), basis.fft_size) + dαψσnk_real = zeros(complex(T), basis.fft_size) for (ik, kpt) in enumerate(basis.kpoints) G_plus_k = [[p[α] for p in Gplusk_vectors_cart(basis, kpt)] for α = 1:3] - for n = 1:size(ψ[ik], 2), α = 1:3 - ifft!(dαψnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][:, n]) - @. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dαψnk_real) + for n = 1:size(ψ[ik], 3), σ = 1:basis.model.n_components, α = 1:3 + ifft!(dαψσnk_real, basis, kpt, im .* G_plus_k[α] .* ψ[ik][σ, :, n]) + @. τ[:, :, :, kpt.spin] += occupation[ik][n] * basis.kweights[ik] / 2 * abs2(dαψσnk_real) end end mpi_sum!(τ, basis.comm_kpts) diff --git a/src/eigen/diag.jl b/src/eigen/diag.jl index e39c9fbbb9..b2500af0b3 100644 --- a/src/eigen/diag.jl +++ b/src/eigen/diag.jl @@ -7,48 +7,52 @@ that really does the work, operating on a single ``k``-Block. `prec_type` should be a function that returns a preconditioner when called as `prec(ham, kpt)` """ function diagonalize_all_kblocks(eigensolver, ham::Hamiltonian, nev_per_kpoint::Int; - ψguess=nothing, - prec_type=PreconditionerTPA, interpolate_kpoints=true, - tol=1e-6, miniter=1, maxiter=100, n_conv_check=nothing) - kpoints = ham.basis.kpoints + ψguess=nothing, prec_type=PreconditionerTPA, + interpolate_kpoints=true, tol=1e-6, miniter=1, maxiter=100, + n_conv_check=nothing) + basis = ham.basis + kpoints = basis.kpoints + n_components = basis.model.n_components results = Vector{Any}(undef, length(kpoints)) for (ik, kpt) in enumerate(kpoints) - n_Gk = length(G_vectors(ham.basis, kpt)) + n_Gk = length(G_vectors(basis, kpt)) if n_Gk < nev_per_kpoint error("The size of the plane wave basis is $n_Gk, and you are asking for " * "$nev_per_kpoint eigenvalues. Increase Ecut.") end # Get ψguessk if !isnothing(ψguess) - if n_Gk != size(ψguess[ik], 1) - error("Mismatch in dimension between guess ($(size(ψguess[ik], 1)) and " * + if n_Gk != size(ψguess[ik], 2) + error("Mismatch in dimension between guess ($(size(ψguess[ik], 2))) and " * "Hamiltonian ($n_Gk)") end - nev_guess = size(ψguess[ik], 2) + nev_guess = size(ψguess[ik], 3) if nev_guess > nev_per_kpoint ψguessk = ψguess[ik][:, 1:nev_per_kpoint] elseif nev_guess == nev_per_kpoint ψguessk = ψguess[ik] else - X0 = similar(ψguess[ik], n_Gk, nev_per_kpoint) - X0[:, 1:nev_guess] = ψguess[ik] - X0[:, nev_guess+1:end] = randn(eltype(X0), n_Gk, nev_per_kpoint - nev_guess) - ψguessk = ortho_qr(X0) + X0 = similar(ψguess[ik], n_components, n_Gk, nev_per_kpoint) + X0[:, :, 1:nev_guess] = ψguess[ik] + X0[:, :, nev_guess+1:end] = randn(eltype(X0), n_components, n_Gk, + nev_per_kpoint - nev_guess) + ψguessk = ortho_qr(basis, kpt, X0) end elseif interpolate_kpoints && ik > 1 # use information from previous k-point - ψguessk = interpolate_kpoint(results[ik - 1].X, ham.basis, kpoints[ik - 1], - ham.basis, kpoints[ik]) + ψguessk = interpolate_kpoint(results[ik - 1].X, basis, kpoints[ik - 1], + basis, kpoints[ik]) else - ψguessk = random_orbitals(ham.basis, kpt, nev_per_kpoint) + ψguessk = random_orbitals(basis, kpt, nev_per_kpoint) end - @assert size(ψguessk) == (n_Gk, nev_per_kpoint) + @assert size(ψguessk) == (n_components, n_Gk, nev_per_kpoint) prec = nothing !isnothing(prec_type) && (prec = prec_type(ham[ik])) - results[ik] = eigensolver(ham[ik], ψguessk; - prec, tol, miniter, maxiter, n_conv_check) + res = eigensolver(ham[ik], to_composite_σG(basis, kpt, ψguessk); + prec, tol, miniter, maxiter, n_conv_check) + results[ik] = merge(res, (; X=from_composite_σG(basis, kpt, res.X))) end # Transform results into a nicer datastructure @@ -68,7 +72,7 @@ Tuple returned by `diagonalize_all_kblocks`. """ function select_eigenpairs_all_kblocks(eigres, range) merge(eigres, (; λ=[λk[range] for λk in eigres.λ], - X=[Xk[:, range] for Xk in eigres.X], + X=[Xk[:, :, range] for Xk in eigres.X], residual_norms=[resk[range] for resk in eigres.residual_norms])) end diff --git a/src/eigen/lobpcg_hyper_impl.jl b/src/eigen/lobpcg_hyper_impl.jl index 88a792ac12..26326269b9 100644 --- a/src/eigen/lobpcg_hyper_impl.jl +++ b/src/eigen/lobpcg_hyper_impl.jl @@ -156,7 +156,7 @@ function B_ortho!(X, BX) rdiv!(BX, U) end -normest(M) = maximum(abs.(diag(M))) + norm(M - Diagonal(diag(M))) +normest(M) = maximum(abs, diag(M)) + norm(M - Diagonal(diag(M))) # Orthogonalizes X to tol # Returns the new X, the number of Cholesky factorizations algorithm, and the # growth factor by which small perturbations of X can have been diff --git a/src/eigen/preconditioners.jl b/src/eigen/preconditioners.jl index a9c213d9a8..182babf992 100644 --- a/src/eigen/preconditioners.jl +++ b/src/eigen/preconditioners.jl @@ -26,7 +26,7 @@ PreconditionerNone(::HamiltonianBlock) = I """ mutable struct PreconditionerTPA{T <: Real} basis::PlaneWaveBasis - kpt::Kpoint + kpoint::Kpoint kin::AbstractVector{T} # kinetic energy of every G mean_kin::Union{Nothing, Vector{T}} # mean kinetic energy of every band default_shift::T # if mean_kin is not set by `precondprep!`, this will be used for the shift @@ -46,12 +46,23 @@ function PreconditionerTPA(ham::HamiltonianBlock; kwargs...) PreconditionerTPA(ham.basis, ham.kpoint; kwargs...) end -@views function ldiv!(Y, P::PreconditionerTPA, R) +@views function ldiv!(Y, P::PreconditionerTPA, R::AbstractArray) if P.mean_kin === nothing ldiv!(Y, Diagonal(P.kin .+ P.default_shift), R) + elseif ndims(R) == 3 + Threads.@threads for n = 1:size(Y, 3) + for σ = 1:size(Y, 1) + Y[σ, :, n] .= P.mean_kin[n] ./ (P.mean_kin[n] .+ P.kin) .* R[σ, :, n] + end + end else - parallel_loop_over_range(1:size(Y, 2)) do n - Y[:, n] .= P.mean_kin[n] ./ (P.mean_kin[n] .+ P.kin) .* R[:, n] + # Fallback for LOBPCG as it works on matrices. + R_σG = from_composite_σG(P.basis, P.kpoint, R) + Y_σG = from_composite_σG(P.basis, P.kpoint, Y) + parallel_loop_over_range(1:size(Y, 2)) do n + for σ = 1:P.basis.model.n_components + Y_σG[σ, :, n] .= P.mean_kin[n] ./ (P.mean_kin[n] .+ P.kin) .* R_σG[σ, :, n] + end end end Y @@ -73,7 +84,10 @@ end (Base.:*)(P::PreconditionerTPA, R) = mul!(copy(R), P, R) function precondprep!(P::PreconditionerTPA, X::AbstractArray) - P.mean_kin = [real(dot(x, Diagonal(P.kin), x)) for x in eachcol(X)] + P.mean_kin = map(eachcol(X)) do x + x = from_composite_σG(P.basis, P.kpoint, x) + sum(real(dot(x[σ, :, :], Diagonal(P.kin), x[σ, :, :])) + for σ = 1:P.basis.model.n_components) + end end precondprep!(P::PreconditionerTPA, ::Nothing) = 1 # fallback for edge cases - diff --git a/src/external/wannier_shared.jl b/src/external/wannier_shared.jl index 37e9a9ebfa..f8ce686071 100644 --- a/src/external/wannier_shared.jl +++ b/src/external/wannier_shared.jl @@ -190,13 +190,14 @@ end @timing function write_w90_unk(fileprefix::String, basis, ψ; n_bands, spin=1) + @assert basis.model.n_components == 1 fft_size = basis.fft_size for ik in krange_spin(basis, spin) open(dirname(fileprefix) * (@sprintf "/UNK%05i.%i" ik spin), "w") do fp println(fp, "$(fft_size[1]) $(fft_size[2]) $(fft_size[3]) $ik $n_bands") for iband = 1:n_bands - ψnk_real = ifft(basis, basis.kpoints[ik], @view ψ[ik][:, iband]) + ψnk_real = ifft(basis, basis.kpoints[ik], @view ψ[ik][1, :, iband]) for iz = 1:fft_size[3], iy = 1:fft_size[2], ix = 1:fft_size[1] @printf fp "%25.18f %25.18f\n" real(ψnk_real[ix, iy, iz]) imag(ψnk_real[ix, iy, iz]) end @@ -218,6 +219,7 @@ We use here that: ``u_{n(k + G_{\rm shift})}(r) = e^{-i*\langle G_{\rm shift},r \rangle} u_{nk}``. """ @views function overlap_Mmn_k_kpb(basis::PlaneWaveBasis, ψ, ik, ik_plus_b, G_shift, n_bands) + @assert basis.model.n_components == 1 # Search for common Fourier modes and their resp. indices in Bloch states k and k+b # TODO Check if this can be improved using the G vector mapping in the kpoints k = basis.kpoints[ik] @@ -233,7 +235,7 @@ We use here that: for n = 1:n_bands for m = 1:n_bands # Select the coefficient in right order - Mkb[m, n] = dot(ψ[ik][ip, m], ψ[ik_plus_b][ip_plus_b, n]) + Mkb[m, n] = dot(ψ[ik][1, ip, m], ψ[ik_plus_b][1, ip_plus_b, n]) end end iszero(Mkb) && return Matrix(I, n_bands, n_bands) @@ -297,19 +299,19 @@ function compute_amn_kpoint(basis::PlaneWaveBasis, kpt, ψk, projections, n_band Ak end - @timing function write_w90_amn( fileprefix::String, basis::PlaneWaveBasis, projections, ψ; n_bands) + @assert basis.model.n_components == 1 open(fileprefix * ".amn", "w") do fp println(fp, "Generated by DFTK at $(now())") println(fp, "$n_bands $(length(basis.kpoints)) $(length(projections))") for (ik, (ψk, kpt)) in enumerate(zip(ψ, basis.kpoints)) - Ak = compute_amn_kpoint(basis, kpt, ψk, projections, n_bands) + Ak = compute_amn_kpoint(basis, kpt, ψk[1, :, :], projections, n_bands) for n = 1:size(Ak, 2) for m = 1:size(Ak, 1) @printf(fp, "%3i %3i %3i %22.18f %22.18f \n", diff --git a/src/fft.jl b/src/fft.jl index 163f4c5848..7bb9b795b6 100644 --- a/src/fft.jl +++ b/src/fft.jl @@ -24,8 +24,8 @@ function ifft!(f_real::AbstractArray3, basis::PlaneWaveBasis, f_fourier::Abstrac mul!(f_real, basis.opBFFT, f_fourier) f_real .*= basis.ifft_normalization end -function ifft!(f_real::AbstractArray3, basis::PlaneWaveBasis, - kpt::Kpoint, f_fourier::AbstractVector; normalize=true) +function ifft!(f_real::AbstractArray3, basis::PlaneWaveBasis, kpt::Kpoint, + f_fourier::AbstractVector; normalize=true) @assert length(f_fourier) == length(kpt.mapping) @assert size(f_real) == basis.fft_size @@ -37,6 +37,25 @@ function ifft!(f_real::AbstractArray3, basis::PlaneWaveBasis, normalize && (f_real .*= basis.ifft_normalization) f_real end +function ifft!(f_real::AbstractArray4, basis::PlaneWaveBasis, f_fourier::AbstractMatrix) + mul!(f_real, basis.opBFFT_mc, f_fourier) + f_real .*= basis.ifft_normalization +end +function ifft!(f_real::AbstractArray4, basis::PlaneWaveBasis, kpt::Kpoint, + f_fourier::AbstractMatrix; normalize=true) + n_components = basis.model.n_components + @assert length(f_fourier) == n_components*length(kpt.mapping) + @assert size(f_real)[2:end] == basis.fft_size + + # Pad the input data + fill!(f_real, 0) + reshape(f_real, n_components, :)[:, kpt.mapping] = f_fourier + + # Perform an IFFT + mul!(f_real, basis.ipBFFT_mc, f_real) + normalize && (f_real .*= basis.ifft_normalization) + f_real +end """ ifft(basis::PlaneWaveBasis, [kpt::Kpoint, ] f_fourier) @@ -57,6 +76,11 @@ end function ifft(basis::PlaneWaveBasis, kpt::Kpoint, f_fourier::AbstractVector; kwargs...) ifft!(similar(f_fourier, basis.fft_size...), basis, kpt, f_fourier; kwargs...) end +function ifft(basis::PlaneWaveBasis, kpt::Kpoint, f_fourier::AbstractMatrix; kwargs...) + ifft!(similar(f_fourier, basis.model.n_components, basis.fft_size...), + basis, kpt, f_fourier; kwargs...) +end + """ Perform a real valued iFFT; see [`ifft`](@ref). Note that this function silently drops the imaginary part. @@ -77,8 +101,8 @@ function fft!(f_fourier::AbstractArray3, basis::PlaneWaveBasis, f_real::Abstract mul!(f_fourier, basis.opFFT, f_real) f_fourier .*= basis.fft_normalization end -function fft!(f_fourier::AbstractVector, basis::PlaneWaveBasis, - kpt::Kpoint, f_real::AbstractArray3; normalize=true) +function fft!(f_fourier::AbstractVector, basis::PlaneWaveBasis, kpt::Kpoint, + f_real::AbstractArray3; normalize=true) @assert size(f_real) == basis.fft_size @assert length(f_fourier) == length(kpt.mapping) @@ -90,6 +114,28 @@ function fft!(f_fourier::AbstractVector, basis::PlaneWaveBasis, normalize && (f_fourier .*= basis.fft_normalization) f_fourier end +function fft!(f_fourier::AbstractArray4, basis::PlaneWaveBasis, f_real::AbstractArray4) + if eltype(f_real) <: Real + f_real = complex.(f_real) + end + mul!(f_fourier, basis.opFFT_mc, f_real) + f_fourier .*= basis.fft_normalization +end +function fft!(f_fourier::AbstractMatrix, basis::PlaneWaveBasis, kpt::Kpoint, + f_real::AbstractArray4; normalize=true) + @assert size(f_real)[2:end] == basis.fft_size + @assert length(f_fourier) == basis.model.n_components * length(kpt.mapping) + + # FFT + mul!(f_real, basis.ipFFT_mc, f_real) + + # Truncate + for σ = 1:basis.model.n_components + f_fourier[σ, :] .= @view(f_real[σ, :, :, :][kpt.mapping]) + end + normalize && (f_fourier .*= basis.fft_normalization) + f_fourier +end """ fft(basis::PlaneWaveBasis, [kpt::Kpoint, ] f_real) @@ -112,6 +158,10 @@ end function fft(basis::PlaneWaveBasis, kpt::Kpoint, f_real::AbstractArray3; kwargs...) fft!(similar(f_real, length(kpt.mapping)), basis, kpt, copy(f_real); kwargs...) end +function fft(basis::PlaneWaveBasis, kpt::Kpoint, f_real::AbstractArray4; kwargs...) + fft!(similar(f_real, basis.model.n_components, length(kpt.mapping)), basis, kpt, + copy(f_real); kwargs...) +end # returns matrix representations of the ifft and fft matrices. For debug purposes. function ifft_matrix(basis::PlaneWaveBasis{T}) where {T} @@ -261,25 +311,32 @@ end Plan a FFT of type `T` and size `fft_size`, spending some time on finding an optimal algorithm. (Inplace, out-of-place) x (forward, backward) FFT plans are returned. """ -function build_fft_plans!(tmp::Array{Complex{Float64}}) - ipFFT = FFTW.plan_fft!(tmp; flags=FFTW.MEASURE) - opFFT = FFTW.plan_fft(tmp; flags=FFTW.MEASURE) +function build_fft_plans!(tmp::Array{Complex{Float64}}; region=1:ndims(tmp)) + ipFFT = FFTW.plan_fft!(tmp, region; flags=FFTW.MEASURE) + opFFT = FFTW.plan_fft(tmp, region; flags=FFTW.MEASURE) # backwards-FFT by inverting and stripping off normalizations - ipFFT, opFFT, inv(ipFFT).p, inv(opFFT).p + ipBFFT = inv(ipFFT).p + opBFFT = inv(opFFT).p + (; ipFFT, opFFT, ipBFFT, opBFFT) end -function build_fft_plans!(tmp::Array{Complex{Float32}}) +function build_fft_plans!(tmp::Array{Complex{Float32}}; region=1:ndims(tmp)) # For Float32 there are issues with aligned FFTW plans, so we # fall back to unaligned FFTW plans (which are generally discouraged). - ipFFT = FFTW.plan_fft!(tmp; flags=FFTW.MEASURE | FFTW.UNALIGNED) - opFFT = FFTW.plan_fft(tmp; flags=FFTW.MEASURE | FFTW.UNALIGNED) + ipFFT = FFTW.plan_fft!(tmp, region; flags=FFTW.MEASURE | FFTW.UNALIGNED) + opFFT = FFTW.plan_fft(tmp, region; flags=FFTW.MEASURE | FFTW.UNALIGNED) # backwards-FFT by inverting and stripping off normalizations - ipFFT, opFFT, inv(ipFFT).p, inv(opFFT).p + ipBFFT = inv(ipFFT).p + opBFFT = inv(opFFT).p + (; ipFFT, opFFT, ipBFFT, opBFFT) end -function build_fft_plans!(tmp::AbstractArray{Complex{T}}) where {T<:Union{Float32,Float64}} - ipFFT = AbstractFFTs.plan_fft!(tmp) - opFFT = AbstractFFTs.plan_fft(tmp) +function build_fft_plans!(tmp::AbstractArray{Complex{T}}; + region=1:ndims(tmp)) where {T<:Union{Float32,Float64}} + ipFFT = AbstractFFTs.plan_fft!(tmp, region) + opFFT = AbstractFFTs.plan_fft(tmp, region) # backwards-FFT by inverting and stripping off normalizations - ipFFT, opFFT, inv(ipFFT).p, inv(opFFT).p + ipBFFT = inv(ipFFT).p + opBFFT = inv(opFFT).p + (; ipFFT, opFFT, ipBFFT, opBFFT) end # TODO Some grid sizes are broken in the generic FFT implementation diff --git a/src/input_output.jl b/src/input_output.jl index f7932836f6..62775973bc 100644 --- a/src/input_output.jl +++ b/src/input_output.jl @@ -8,6 +8,7 @@ function Base.show(io::IO, model::Model) nD = model.n_dim == 3 ? "" : "$(model.n_dim)D, " print(io, "Model(", model.model_name, ", ", nD, + "n_components = ", model.n_components, ", ", "spin_polarization = :", model.spin_polarization, ")") end @@ -33,6 +34,7 @@ function Base.show(io::IO, ::MIME"text/plain", model::Model) if !isnothing(model.n_electrons) showfieldln(io, "num. electrons", model.n_electrons) end + showfieldln(io, "num. components", model.n_components) showfieldln(io, "spin polarization", model.spin_polarization) showfieldln(io, "temperature", @sprintf "%.5g Ha" model.temperature) if model.temperature > 0 @@ -74,6 +76,7 @@ function todict!(dict, model::Model) dict["recip_lattice"] = model.recip_lattice dict["n_dim"] = model.n_dim dict["spin_polarization"] = model.spin_polarization + dict["n_components"] = model.n_components dict["n_spin_components"] = model.n_spin_components dict["temperature"] = model.temperature dict["smearing"] = string(model.smearing) diff --git a/src/interpolation.jl b/src/interpolation.jl index d18bf4959f..71850e54c5 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -93,24 +93,26 @@ end Interpolate some data from one ``k``-point to another. The interpolation is fast, but not necessarily exact. Intended only to construct guesses for iterative solvers. """ -function interpolate_kpoint(data_in::AbstractVecOrMat, +function interpolate_kpoint(data_in::AbstractArray3, basis_in::PlaneWaveBasis, kpoint_in::Kpoint, basis_out::PlaneWaveBasis, kpoint_out::Kpoint) # TODO merge with transfer_blochwave_kpt if kpoint_in == kpoint_out return copy(data_in) end - @assert length(G_vectors(basis_in, kpoint_in)) == size(data_in, 1) + n_components = basis_in.model.n_components + @assert n_components == basis_out.model.n_components == size(data_in, 1) + @assert length(G_vectors(basis_in, kpoint_in)) == size(data_in, 2) - n_bands = size(data_in, 2) + n_bands = size(data_in, 3) n_Gk_out = length(G_vectors(basis_out, kpoint_out)) - data_out = similar(data_in, n_Gk_out, n_bands) .= 0 + data_out = similar(data_in, n_components, n_Gk_out, n_bands) .= 0 # TODO: use a map, or this will not be GPU compatible (scalar indexing) - for iin = 1:size(data_in, 1) + for iin = 1:size(data_in, 2) idx_fft = kpoint_in.mapping[iin] idx_fft in keys(kpoint_out.mapping_inv) || continue iout = kpoint_out.mapping_inv[idx_fft] - data_out[iout, :] = data_in[iin, :] + data_out[:, iout, :] = data_in[:, iin, :] end - ortho_qr(data_out) # Re-orthogonalize and renormalize + ortho_qr(basis_out, kpoint_out, data_out) # Re-orthogonalize and renormalize end diff --git a/src/orbitals.jl b/src/orbitals.jl index ace1adb8fb..905feab178 100644 --- a/src/orbitals.jl +++ b/src/orbitals.jl @@ -6,16 +6,15 @@ using Random # Used to have a generic API for CPU and GPU computations alike: s # others when using temperature. It is set to 0.0 by default, to treat with insulators. function select_occupied_orbitals(basis, ψ, occupation; threshold=0.0) N = [something(findlast(x -> x > threshold, occk), 0) for occk in occupation] - selected_ψ = [@view ψk[:, 1:N[ik]] for (ik, ψk) in enumerate(ψ)] - selected_occ = [ occk[1:N[ik]] for (ik, occk) in enumerate(occupation)] + selected_ψ = [@view ψk[:, :, 1:N[ik]] for (ik, ψk) in enumerate(ψ)] + selected_occ = [ occk[1:N[ik]] for (ik, occk) in enumerate(occupation)] - # if we have an insulator, sanity check that the orbitals we kept are the - # occupied ones + # If we have an insulator, sanity check that the orbitals we kept are the occupied ones. if iszero(threshold) model = basis.model n_spin = model.n_spin_components n_bands = div(model.n_electrons, n_spin * filled_occupation(model), RoundUp) - @assert n_bands == size(selected_ψ[1], 2) + @assert all([n_bands == size(ψk, 3) for ψk in selected_ψ]) end (; ψ=selected_ψ, occupation=selected_occ) end @@ -29,9 +28,9 @@ function blockify_ψ(basis::PlaneWaveBasis, ψ::AbstractVector) # Zero-pad each ψk to the full stored rectangle ψ = map(ψ) do ψk - n_G, n_bands = size(ψk) - ψk_full = similar(ψk, max_n_G, n_bands) - ψk_full[1:n_G, :] = ψk + n_components, n_G, n_bands = size(ψk) + ψk_full = similar(ψk, n_components, max_n_G, n_bands) + ψk_full[:, 1:n_G, :] = ψk ψk_full end (; ψ, n_G_vectors) @@ -41,7 +40,7 @@ end # given in the second argument. function unblockify_ψ(ψ_padded::AbstractVector, n_G_vectors::AbstractVector) map(n_G_vectors, ψ_padded) do n_Gk, ψk_padded - ψk_padded[1:n_Gk, :] + ψk_padded[:, 1:n_Gk, :] end end @@ -80,7 +79,8 @@ end unpack_ψ(x, sizes_ψ) = deepcopy(unsafe_unpack_ψ(x, sizes_ψ)) function random_orbitals(basis::PlaneWaveBasis{T}, kpt::Kpoint, howmany::Integer) where {T} - orbitals = similar(basis.G_vectors, Complex{T}, length(G_vectors(basis, kpt)), howmany) + orbitals = similar(basis.G_vectors, Complex{T}, + basis.model.n_components, length(G_vectors(basis, kpt)), howmany) randn!(TaskLocalRNG(), orbitals) # use the RNG on the device if we're using a GPU - ortho_qr(orbitals) + ortho_qr(basis, kpt, orbitals) end diff --git a/src/postprocess/current.jl b/src/postprocess/current.jl index 5f9ccaf46b..4f4613d236 100644 --- a/src/postprocess/current.jl +++ b/src/postprocess/current.jl @@ -4,16 +4,20 @@ Computes the *probability* (not charge) current, ``∑_n f_n \Im(ψ_n · ∇ψ_n """ function compute_current(basis::PlaneWaveBasis, ψ, occupation) @assert length(basis.symmetries) == 1 # TODO lift this - current = [zeros(eltype(basis), basis.fft_size) for α = 1:3] + current = [zeros(eltype(basis), basis.fft_size) for _ = 1:3] for (ik, kpt) in enumerate(basis.kpoints) - for (n, ψnk) in enumerate(eachcol(ψ[ik])) + for (n, ψnk) in enumerate(eachslice(ψ[ik]; dims=3)) ψnk_real = ifft(basis, kpt, ψnk) - for α = 1:3 - dαψnk = [im * q[α] for q in Gplusk_vectors_cart(basis, kpt)] .* ψnk - dαψnk_real = ifft(basis, kpt, dαψnk) - current[α] .+= @. basis.kweights[ik] * - occupation[ik][n] * - imag(conj(ψnk_real) * dαψnk_real) + for σ = 1:basis.model.n_components + ψσnk = ψnk[σ, :] + ψσnk_real = ψnk_real[σ, :, :, :] + for α = 1:3 + dαψσnk = [im * q[α] for q in Gplusk_vectors_cart(basis, kpt)] .* ψσnk + dαψσnk_real = ifft(basis, kpt, dαψσnk) + current[α] .+= @. basis.kweights[ik] * + occupation[ik][n] * + imag(conj(ψσnk_real) * dαψσnk_real) + end end end end diff --git a/src/response/chi0.jl b/src/response/chi0.jl index 3c7ce90f81..f88db6fbad 100644 --- a/src/response/chi0.jl +++ b/src/response/chi0.jl @@ -175,7 +175,7 @@ function sternheimer_solver(Hk, ψk, ε, rhs; function R_ldiv!(x, y) x .= R(precon \ R(y)) end - J = LinearMap{eltype(ψk)}(RAR, size(Hk, 1)) + J = LinearMap{eltype(ψk)}(RAR, size(ψk, 1)) res = cg(J, bb; precon=FunctionPreconditioner(R_ldiv!), tol, proj=R, callback=cg_callback) !res.converged && @warn("Sternheimer CG not converged", res.n_iter, @@ -188,7 +188,7 @@ function sternheimer_solver(Hk, ψk, ε, rhs; # Note that αkn is an empty array if there is no extra bands. αkn = ψk_exHψk_ex \ ψk_extra' * (b - H(δψknᴿ)) - δψkn = ψk_extra * αkn + δψknᴿ + ψk_extra * αkn + δψknᴿ # δψkn end # Apply the four-point polarizability operator χ0_4P = -Ω^-1 @@ -252,7 +252,7 @@ The derivatives of the occupations are in-place stored in δocc. The tuple (; δocc, δεF) is returned. It is assumed the passed `δocc` are initialised to zero. """ -function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ) where {T} +@views function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ) where {T} model = basis.model temperature = model.temperature smearing = model.smearing @@ -266,7 +266,7 @@ function compute_δocc!(δocc, basis::PlaneWaveBasis{T}, ψ, εF, ε, δHψ) whe D = zero(T) for ik = 1:Nk, (n, εnk) in enumerate(ε[ik]) enred = (εnk - εF) / temperature - δεnk = real(dot(ψ[ik][:, n], δHψ[ik][:, n])) + δεnk = real(dot(ψ[ik][:, :, n], δHψ[ik][:, :, n])) fpnk = filled_occ * Smearing.occupation_derivative(smearing, enred) / temperature δocc[ik][n] = δεnk * fpnk D += fpnk * basis.kweights[ik] @@ -292,9 +292,9 @@ a Sternheimer equation for each `k`-points. It is assumed the passed `δψ` are to zero. For phonon, `δHψ[ik]` is ``δH·ψ_{k-q}``, expressed in `basis.kpoints[ik]`. """ -function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε_minus_q=ε; - ψ_extra=[zeros_like(ψk, size(ψk,1), 0) for ψk in ψ], - q=zero(Vec3{T}), kwargs_sternheimer...) where {T} +@views function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε_minus_q=ε; + ψ_extra=[zeros_like(ψk, size(ψk)[1:2]..., 0) for ψk in ψ], + q=zero(Vec3{T}), kwargs_sternheimer...) where {T} # We solve the Sternheimer equation # (H_k - ε_{n,k-q}) δψ_{n,k} = - (1 - P_{k}) δHψ_{n, k-q}, # where P_{k} is the projector on ψ_{k} and with the conventions: @@ -315,9 +315,6 @@ function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε δψk = δψ[ik] εk_minus_q = ε_minus_q[ik] - ψk_extra = ψ_extra[ik] - Hψk_extra = Hk * ψk_extra - εk_extra = diag(real.(ψk_extra' * Hψk_extra)) for n = 1:length(εk_minus_q) fnk_minus_q = filled_occ * Smearing.occupation(smearing, (εk_minus_q[n]-εF) / temperature) @@ -330,12 +327,22 @@ function compute_δψ!(δψ, basis::PlaneWaveBasis{T}, H, ψ, εF, ε, δHψ, ε ddiff = Smearing.occupation_divided_difference ratio = filled_occ * ddiff(smearing, εk[m], εk_minus_q[n], εF, temperature) αmn = compute_αmn(fmk, fnk_minus_q, ratio) # fnk_minus_q * αmn + fmk * αnm = ratio - δψk[:, n] .+= ψk[:, m] .* αmn .* dot(ψk[:, m], δHψ[ik][:, n]) + δψk[:, :, n] .+= ψk[:, :, m] .* αmn .* dot(ψk[:, :, m], δHψ[ik][:, :, n]) end - # Sternheimer contribution - δψk[:, n] .+= sternheimer_solver(Hk, ψk, εk_minus_q[n], δHψ[ik][:, n]; ψk_extra, - εk_extra, Hψk_extra, kwargs_sternheimer...) + # Sternheimer contribution: + # First, reinterpret the quantities to solve the equation as a generic + # matrix/vector problem… + kpt = basis.kpoints[ik] + ψk_σG = to_composite_σG(basis, kpt, ψk) + δHψkn = to_composite_σG(basis, kpt, δHψ[ik][:, :, n]) + ψk_extra = to_composite_σG(basis, kpt, ψ_extra[ik]) + Hψk_extra = to_composite_σG(basis, kpt, Hk * ψ_extra[ik]) + εk_extra = diag(real.(ψk_extra'Hψk_extra)) + δψkn = to_composite_σG(basis, kpt, δψk[:, :, n]) + # … then, solve the problem. + δψkn .+= sternheimer_solver(Hk, ψk_σG, εk_minus_q[n], δHψkn; ψk_extra, εk_extra, + Hψk_extra, kwargs_sternheimer...) end end end @@ -356,10 +363,10 @@ end mask_occ = map(occk -> findall(occnk -> abs(occnk) ≥ occ_thresh, occk), occupation) mask_extra = map(occk -> findall(occnk -> abs(occnk) < occ_thresh, occk), occupation) - ψ_occ = [ψ[ik][:, maskk] for (ik, maskk) in enumerate(mask_occ)] - ψ_extra = [ψ[ik][:, maskk] for (ik, maskk) in enumerate(mask_extra)] + ψ_occ = [ψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_occ)] + ψ_extra = [ψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_extra)] ε_occ = [eigenvalues[ik][maskk] for (ik, maskk) in enumerate(mask_occ)] - δHψ_minus_q_occ = [δHψ[ik][:, mask_occ[k_to_k_minus_q[ik]]] for ik = 1:length(basis.kpoints)] + δHψ_minus_q_occ = [δHψ[ik][:, :, mask_occ[k_to_k_minus_q[ik]]] for ik = 1:length(basis.kpoints)] # Only needed for phonon calculations. ε_minus_q_occ = [eigenvalues[k_to_k_minus_q[ik]][mask_occ[k_to_k_minus_q[ik]]] for ik = 1:length(basis.kpoints)] @@ -383,7 +390,7 @@ end # Then we compute δψ (again in-place into a zero-padded array) with elements of # `basis.kpoints` that are equivalent to `k+q`. δψ = zero.(δHψ) - δψ_occ = [δψ[ik][:, maskk] for (ik, maskk) in enumerate(mask_occ[k_to_k_minus_q])] + δψ_occ = [δψ[ik][:, :, maskk] for (ik, maskk) in enumerate(mask_occ[k_to_k_minus_q])] compute_δψ!(δψ_occ, basis, ham.blocks, ψ_occ, εF, ε_occ, δHψ_minus_q_occ, ε_minus_q_occ; ψ_extra, q, kwargs_sternheimer...) diff --git a/src/response/hessian.jl b/src/response/hessian.jl index 3350f114ed..dca1e577f2 100644 --- a/src/response/hessian.jl +++ b/src/response/hessian.jl @@ -26,9 +26,14 @@ Compute the application of Ω defined at ψ to δψ. H is the Hamiltonian comput from ψ and Λ is the set of Rayleigh coefficients ψk' * Hk * ψk at each k-point. """ @timing function apply_Ω(δψ, ψ, H::Hamiltonian, Λ) - δψ = proj_tangent(δψ, ψ) - Ωδψ = [H.blocks[ik] * δψk - δψk * Λ[ik] for (ik, δψk) in enumerate(δψ)] - proj_tangent!(Ωδψ, ψ) + δψ = proj_tangent(H.basis, δψ, ψ) + Ωδψ = map(enumerate(zip(δψ, H * δψ))) do (ik, (δψk, Hδψk)) + δψk = to_composite_σG(H.basis, H.basis.kpoints[ik], δψ[ik]) + Λk = to_composite_σG(H.basis, H.basis.kpoints[ik], Λ[ik]) + δψΛk = from_composite_σG(H.basis, H.basis.kpoints[ik], δψk * Λk) + Hδψk - δψΛk + end + proj_tangent!(Ωδψ, H.basis, ψ) end """ @@ -38,7 +43,7 @@ Compute the application of K defined at ψ to δψ. ρ is the density issued fro δψ also generates a δρ, computed with `compute_δρ`. """ @views @timing function apply_K(basis::PlaneWaveBasis, δψ, ψ, ρ, occupation) - δψ = proj_tangent(δψ, ψ) + δψ = proj_tangent(basis, δψ, ψ) δρ = compute_δρ(basis, ψ, δψ, occupation) δV = apply_kernel(basis, δρ; ρ) @@ -46,15 +51,18 @@ Compute the application of K defined at ψ to δψ. ρ is the density issued fro kpt = basis.kpoints[ik] δVψk = similar(ψk) - for n = 1:size(ψk, 2) - ψnk_real = ifft(basis, kpt, ψk[:, n]) - δVψnk_real = δV[:, :, :, kpt.spin] .* ψnk_real - δVψk[:, n] = fft(basis, kpt, δVψnk_real) + for n = 1:size(ψk, 3) + ψnk_real = ifft(basis, kpt, ψk[:, :, n]) + δVψnk_real = similar(ψnk_real) + for σ = 1:basis.model.n_components + δVψnk_real[σ, :, :, :] = δV[:, :, :, kpt.spin] .* ψnk_real[σ, :, :, :] + end + δVψk[:, :, n] = fft(basis, kpt, δVψnk_real) end δVψk end # ensure projection onto the tangent space - proj_tangent!(Kδψ, ψ) + proj_tangent!(Kδψ, basis, ψ) end """ @@ -83,24 +91,28 @@ Return δψ where (Ω+K) δψ = rhs unsafe_unpack(x) = unsafe_unpack_ψ(reinterpret_complex(x), size.(ψ)) # project rhs on the tangent space before starting - proj_tangent!(rhs, ψ) + proj_tangent!(rhs, basis, ψ) rhs_pack = pack(rhs) # preconditioner Pks = [PreconditionerTPA(basis, kpt) for kpt in basis.kpoints] for ik = 1:length(Pks) - precondprep!(Pks[ik], ψ[ik]) + precondprep!(Pks[ik], to_composite_σG(basis, basis.kpoints[ik], ψ[ik])) end function f_ldiv!(x, y) δψ = unpack(y) - proj_tangent!(δψ, ψ) - Pδψ = [ Pks[ik] \ δψk for (ik, δψk) in enumerate(δψ)] - proj_tangent!(Pδψ, ψ) + proj_tangent!(δψ, basis, ψ) + Pδψ = [Pks[ik] \ δψk for (ik, δψk) in enumerate(δψ)] + proj_tangent!(Pδψ, basis, ψ) x .= pack(Pδψ) end # Rayleigh-coefficients - Λ = [ψk'Hψk for (ψk, Hψk) in zip(ψ, H * ψ)] + Λ = map(enumerate(zip(ψ, H * ψ))) do (ik, (ψk, Hψk)) + ψk = to_composite_σG(basis, basis.kpoints[ik], ψk) + Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk) + from_composite_σG(basis, basis.kpoints[ik], ψk'Hψk) + end # mapping of the linear system on the tangent space function ΩpK(x) @@ -114,7 +126,7 @@ Return δψ where (Ω+K) δψ = rhs # solve (Ω+K) δψ = rhs on the tangent space with CG function proj(x) δψ = unpack(x) - proj_tangent!(δψ, ψ) + proj_tangent!(δψ, basis, ψ) pack(δψ) end res = cg(J, rhs_pack; precon=FunctionPreconditioner(f_ldiv!), proj, tol, @@ -134,10 +146,10 @@ Solve the problem `(Ω+K) δψ = rhs` using a split algorithm, where `rhs` is ty - `δVind`: Change in potential induced by `δρ` (the term needed on top of `δHextψ` to get `δHψ`). """ -@timing function solve_ΩplusK_split(ham::Hamiltonian, ρ::AbstractArray{T}, ψ, occupation, εF, - eigenvalues, rhs; tol=1e-8, tol_sternheimer=tol/10, - verbose=false, occupation_threshold, q=zero(Vec3{real(T)}), - kwargs...) where {T} +@timing function solve_ΩplusK_split(ham::Hamiltonian, ρ::AbstractArray{T}, ψ, occupation, εF, + eigenvalues, rhs; tol=1e-8, tol_sternheimer=tol/10, + verbose=false, occupation_threshold, q=zero(Vec3{real(T)}), + kwargs...) where {T} # Using χ04P = -Ω^-1, E extension operator (2P->4P) and R restriction operator: # (Ω+K)^-1 = Ω^-1 ( 1 - K (1 + Ω^-1 K )^-1 Ω^-1 ) # = -χ04P ( 1 - K (1 - χ04P K )^-1 (-χ04P)) @@ -177,7 +189,7 @@ Solve the problem `(Ω+K) δψ = rhs` using a split algorithm, where `rhs` is ty # Compute total change in eigenvalues δeigenvalues = map(ψ, δHψ) do ψk, δHψk - map(eachcol(ψk), eachcol(δHψk)) do ψnk, δHψnk + map(eachslice(ψk; dims=3), eachslice(δHψk; dims=3)) do ψnk, δHψnk real(dot(ψnk, δHψnk)) # δε_{nk} = <ψnk | δH | ψnk> end end diff --git a/src/scf/direct_minimization.jl b/src/scf/direct_minimization.jl index ef0e142987..232ff0874d 100644 --- a/src/scf/direct_minimization.jl +++ b/src/scf/direct_minimization.jl @@ -95,6 +95,8 @@ function direct_minimization(basis::PlaneWaveBasis{T}; pack(ψ) = copy(reinterpret_real(pack_ψ(ψ))) unpack(x) = unpack_ψ(reinterpret_complex(x), size.(ψ)) unsafe_unpack(x) = unsafe_unpack_ψ(reinterpret_complex(x), size.(ψ)) + ψ_σG = to_composite_σG(basis, ψ) + unsafe_unpack_σG(x) = unsafe_unpack_ψ(reinterpret_complex(x), size.(ψ_σG)) # This will get updated along the iterations ρ = nothing @@ -115,7 +117,8 @@ function direct_minimization(basis::PlaneWaveBasis{T}; # the next step would be ρout - ρ. We thus record convergence, but let Optim do # one more step. δψ = unsafe_unpack(optim_state.s) - ψ_next = [ortho_qr(ψ[ik] - δψ[ik]) for ik in 1:Nk] + ψ_next = [ortho_qr(basis, kpt, ψ[ik] - δψ[ik]) + for (ik, kpt) in enumerate(basis.kpoints)] compute_density(basis, ψ_next, occupation) end @@ -154,9 +157,9 @@ function direct_minimization(basis::PlaneWaveBasis{T}; energies.total end - manifold = DMManifold(Nk, unsafe_unpack) + manifold = DMManifold(Nk, unsafe_unpack_σG) Pks = [prec_type(basis, kpt) for kpt in basis.kpoints] - P = DMPreconditioner(Nk, Pks, unsafe_unpack) + P = DMPreconditioner(Nk, Pks, unsafe_unpack_σG) optim_options = Optim.Options(; allow_f_increases=true, callback=optim_callback, @@ -171,12 +174,12 @@ function direct_minimization(basis::PlaneWaveBasis{T}; ψ = unpack(Optim.minimizer(res)) # Final Rayleigh-Ritz (not strictly necessary, but sometimes useful) - eigenvalues = Vector{T}[] - for ik = 1:Nk - Hψk = ham[ik] * ψ[ik] - F = eigen(Hermitian(ψ[ik]'Hψk)) - push!(eigenvalues, F.values) - ψ[ik] .= ψ[ik] * F.vectors + eigenvalues::Vector{Vector{T}} = map(enumerate(zip(ψ, ham * ψ))) do (ik, (ψk, Hψk)) + ψk = to_composite_σG(basis, basis.kpoints[ik], ψk) + Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk) + F = eigen(Hermitian(ψk'Hψk)) + ψk .= ψk * F.vectors + F.values end εF = nothing # does not necessarily make sense here, as the diff --git a/src/scf/nbands_algorithm.jl b/src/scf/nbands_algorithm.jl index 77139c07da..79a3a13968 100644 --- a/src/scf/nbands_algorithm.jl +++ b/src/scf/nbands_algorithm.jl @@ -59,7 +59,7 @@ function determine_n_bands(bands::AdaptiveBands, occupation::Nothing, eigenvalue if isnothing(ψ) n_bands_compute = bands.n_bands_compute else - n_bands_compute = max(bands.n_bands_compute, maximum(ψk -> size(ψk, 2), ψ)) + n_bands_compute = max(bands.n_bands_compute, maximum(ψk -> size(ψk, 3), ψ)) end # Boost number of bands to converge to have more information around in the next step # and to thus make a better decision on the number of bands we actually care about. @@ -87,7 +87,7 @@ function determine_n_bands(bands::AdaptiveBands, occupation::AbstractVector, end n_bands_compute = max(bands.n_bands_compute, n_bands_compute_ε, n_bands_converge + 3) if !isnothing(ψ) - n_bands_compute = max(n_bands_compute, maximum(ψk -> size(ψk, 2), ψ)) + n_bands_compute = max(n_bands_compute, maximum(ψk -> size(ψk, 3), ψ)) end (; n_bands_converge, n_bands_compute) end diff --git a/src/scf/newton.jl b/src/scf/newton.jl index 144599999d..ccbba0ce52 100644 --- a/src/scf/newton.jl +++ b/src/scf/newton.jl @@ -53,21 +53,25 @@ function compute_projected_gradient(basis::PlaneWaveBasis, ψ, occupation) ρ = compute_density(basis, ψ, occupation) H = energy_hamiltonian(basis, ψ, occupation; ρ).ham - [proj_tangent_kpt(H.blocks[ik] * ψk, ψk) for (ik, ψk) in enumerate(ψ)] + [proj_tangent_kpt(H.basis, H.basis.kpoints[ik], H.blocks[ik] * ψk, ψk) + for (ik, ψk) in enumerate(ψ)] end # Projections on the space tangent to ψ -function proj_tangent_kpt!(δψk, ψk) +function proj_tangent_kpt!(δψk, basis::PlaneWaveBasis, kpt::Kpoint, ψk) # δψk = δψk - ψk * (ψk'δψk) - mul!(δψk, ψk, ψk'δψk, -1, 1) + ψk_σG = to_composite_σG(basis, kpt, ψk) + δψk_σG = to_composite_σG(basis, kpt, δψk) + mul!(δψk_σG, ψk_σG, ψk_σG'δψk_σG, -1, 1) + δψk end -proj_tangent_kpt(δψk, ψk) = proj_tangent_kpt!(deepcopy(δψk), ψk) +proj_tangent_kpt(basis, kpt, δψk, ψk) = proj_tangent_kpt!(deepcopy(δψk), basis, kpt, ψk) -function proj_tangent(δψ, ψ) - [proj_tangent_kpt(δψ[ik], ψk) for (ik, ψk) in enumerate(ψ)] +function proj_tangent(basis::PlaneWaveBasis, δψ, ψ) + [proj_tangent_kpt(basis, basis.kpoints[ik], δψ[ik], ψk) for (ik, ψk) in enumerate(ψ)] end -function proj_tangent!(δψ, ψ) - [proj_tangent_kpt!(δψ[ik], ψk) for (ik, ψk) in enumerate(ψ)] +function proj_tangent!(δψ, basis::PlaneWaveBasis, ψ) + [proj_tangent_kpt!(δψ[ik], basis, basis.kpoints[ik], ψk) for (ik, ψk) in enumerate(ψ)] end @@ -79,8 +83,7 @@ end Newton algorithm. Be careful that the starting point needs to be not too far from the solution. """ -function newton(basis::PlaneWaveBasis{T}, ψ0; - tol=1e-6, tol_cg=tol / 100, maxiter=20, +function newton(basis::PlaneWaveBasis{T}, ψ0; tol=1e-6, tol_cg=tol / 100, maxiter=20, callback=ScfDefaultCallback(), is_converged=ScfConvergenceDensity(tol)) where {T} @@ -93,7 +96,7 @@ function newton(basis::PlaneWaveBasis{T}, ψ0; filled_occ = filled_occupation(model) n_spin = model.n_spin_components n_bands = div(model.n_electrons, n_spin * filled_occ, RoundUp) - @assert n_bands == size(ψ0[1], 2) + @assert all([n_bands == size(ψk, 3) for ψk in ψ0]) # number of kpoints and occupation Nk = length(basis.kpoints) @@ -119,7 +122,7 @@ function newton(basis::PlaneWaveBasis{T}, ψ0; res = compute_projected_gradient(basis, ψ, occupation) # solve (Ω+K) δψ = -res so that the Newton step is ψ <- ψ + δψ δψ = solve_ΩplusK(basis, ψ, -res, occupation; tol=tol_cg).δψ - ψ = [ortho_qr(ψ[ik] + δψ[ik]) for ik = 1:Nk] + ψ = [ortho_qr(basis, basis.kpoints[ik], ψ[ik] + δψ[ik]) for ik = 1:Nk] ρout = compute_density(basis, ψ, occupation) energies, H = energy_hamiltonian(basis, ψ, occupation; ρ=ρout) @@ -136,12 +139,12 @@ function newton(basis::PlaneWaveBasis{T}, ψ0; end # Rayleigh-Ritz - eigenvalues = [] - for ik = 1:Nk - Hψk = H.blocks[ik] * ψ[ik] - F = eigen(Hermitian(ψ[ik]'Hψk)) - push!(eigenvalues, F.values) - ψ[ik] .= ψ[ik] * F.vectors + eigenvalues = map(enumerate(zip(ψ, H * ψ))) do (ik, (ψk, Hψk)) + ψk = to_composite_σG(basis, basis.kpoints[ik], ψk) + Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk) + F = eigen(Hermitian(ψk'Hψk)) + ψk .= ψk * F.vectors + F.values end εF = nothing # does not necessarily make sense here, as the diff --git a/src/scf/self_consistent_field.jl b/src/scf/self_consistent_field.jl index 37117e0839..c598e88ab6 100644 --- a/src/scf/self_consistent_field.jl +++ b/src/scf/self_consistent_field.jl @@ -63,8 +63,8 @@ function next_density(ham::Hamiltonian, increased_n_bands = true else @assert length(ψ) == length(ham.basis.kpoints) - n_bands_compute = max(n_bands_compute, maximum(ψk -> size(ψk, 2), ψ)) - increased_n_bands = n_bands_compute > size(ψ[1], 2) + n_bands_compute = max(n_bands_compute, maximum(ψk -> size(ψk, 3), ψ)) + increased_n_bands = any([n_bands_compute > size(ψk, 3) for ψk in ψ]) end # TODO Synchronize since right now it is assumed that the same number of bands are @@ -90,8 +90,9 @@ function next_density(ham::Hamiltonian, "`nbandsalg=AdaptiveBands(model; n_bands_converge=$(n_bands_converge + 3)`)") end - ρout = compute_density(ham.basis, eigres.X, occupation; nbandsalg.occupation_threshold) - (; ψ=eigres.X, eigenvalues=eigres.λ, occupation, εF, ρout, diagonalization=eigres, + ψ = eigres.X + ρout = compute_density(ham.basis, ψ, occupation; nbandsalg.occupation_threshold) + (; ψ, eigenvalues=eigres.λ, occupation, εF, ρout, diagonalization=eigres, n_bands_converge, nbandsalg.occupation_threshold) end diff --git a/src/supercell.jl b/src/supercell.jl index fbc3d82d30..4cbb3e3e47 100644 --- a/src/supercell.jl +++ b/src/supercell.jl @@ -72,8 +72,7 @@ function cell_to_supercell(ψ, basis::PlaneWaveBasis{T}, # Ensure that the basis is unfolded. length(basis.kgrid) != length(basis.kpoints) && error("basis must be unfolded") - num_kpG = sum(size.(ψ, 1)) - num_bands = size(ψ[1], 2) + num_kpG = sum([size(ψk, 2) for ψk in ψ]) # Maps k+G vector of initial basis to a G vector of basis_supercell function cell_supercell_mapping(kpt) @@ -84,12 +83,12 @@ function cell_to_supercell(ψ, basis::PlaneWaveBasis{T}, # Transfer all ψ[k] independently and return the hcat of all blocs ψ_out_blocs = [] for (ik, kpt) in enumerate(basis.kpoints) - ψk_supercell = zeros(complex(T), num_kpG, num_bands) - ψk_supercell[cell_supercell_mapping(kpt), :] .= ψ[ik] + ψk_supercell = zeros(complex(T), basis.model.n_components, num_kpG, size(ψ[ik], 3)) + ψk_supercell[:, cell_supercell_mapping(kpt), :] .= ψ[ik] push!(ψ_out_blocs, ψk_supercell) end - # Note that each column is normalize since each ψ[ik][:,n] is. - reduce(hcat, ψ_out_blocs) + # Note that each band is normalized since each ψ[ik][:,:,n] is. + [reshape(stack(ψ_out_blocs), basis.model.n_components, num_kpG, :)] end @doc raw""" @@ -110,7 +109,7 @@ function cell_to_supercell(scfres::NamedTuple) # Compute supercell basis, ψ, occupations and ρ basis_supercell = cell_to_supercell(basis) - ψ_supercell = [cell_to_supercell(ψ, basis, basis_supercell)] + ψ_supercell = cell_to_supercell(ψ, basis, basis_supercell) eigs_supercell = [reduce(vcat, scfres_unfold.eigenvalues)] occ_supercell = compute_occupation(basis_supercell, eigs_supercell, scfres.εF).occupation ρ_supercell = compute_density(basis_supercell, ψ_supercell, occ_supercell; diff --git a/src/symmetry.jl b/src/symmetry.jl index a25f0556d3..275736b8f5 100644 --- a/src/symmetry.jl +++ b/src/symmetry.jl @@ -216,7 +216,7 @@ Apply a symmetry operation to eigenvectors `ψk` at a given `kpoint` to obtain a equivalent point in [-0.5, 0.5)^3 and associated eigenvectors (expressed in the basis of the new ``k``-point). """ -function apply_symop(symop::SymOp, basis, kpoint, ψk::AbstractVecOrMat) +function apply_symop(symop::SymOp, basis, kpoint, ψk::AbstractArray) S, τ = symop.S, symop.τ isone(symop) && return kpoint, ψk @@ -248,11 +248,13 @@ function apply_symop(symop::SymOp, basis, kpoint, ψk::AbstractVecOrMat) invS = Mat3{Int}(inv(S)) Gs_full = [G + kshift for G in G_vectors(basis, Skpoint)] ψSk = zero(ψk) - for iband = 1:size(ψk, 2) + for iband = 1:size(ψk, 3) for (ig, G_full) in enumerate(Gs_full) igired = index_G_vectors(basis, kpoint, invS * G_full) @assert igired !== nothing - ψSk[ig, iband] = cis2pi(-dot(G_full, τ)) * ψk[igired, iband] + for σ = 1:basis.model.n_components + ψSk[σ, ig, iband] = cis2pi(-dot(G_full, τ)) * ψk[σ, igired, iband] + end end end @@ -415,6 +417,8 @@ function unfold_array(basis_irred, basis_unfolded, data, is_ψ) error("Brillouin zone symmetry unfolding not supported with MPI yet") end data_unfolded = similar(data, length(basis_unfolded.kpoints)) + n_components = basis_unfolded.model.n_components + @assert n_components == basis_irred.model.n_components for ik_unfolded = 1:length(basis_unfolded.kpoints) kpt_unfolded = basis_unfolded.kpoints[ik_unfolded] ik_irred, symop = unfold_mapping(basis_irred, kpt_unfolded) @@ -422,8 +426,8 @@ function unfold_array(basis_irred, basis_unfolded, data, is_ψ) # transform ψ_k from data into ψ_Sk in data_unfolded kunfold_coord = kpt_unfolded.coordinate @assert normalize_kpoint_coordinate(kunfold_coord) ≈ kunfold_coord - _, ψSk = apply_symop(symop, basis_irred, - basis_irred.kpoints[ik_irred], data[ik_irred]) + _, ψSk = apply_symop(symop, basis_irred, basis_irred.kpoints[ik_irred], + data[ik_irred]) data_unfolded[ik_unfolded] = ψSk else # simple copy diff --git a/src/terms/Hamiltonian.jl b/src/terms/Hamiltonian.jl index 94e4adc9bc..9208f69d6a 100644 --- a/src/terms/Hamiltonian.jl +++ b/src/terms/Hamiltonian.jl @@ -56,7 +56,8 @@ function HamiltonianBlock(basis, kpoint, operators; scratch=nothing) end end function _ham_allocate_scratch(basis::PlaneWaveBasis{T}) where {T} - [(; ψ_reals=zeros_like(basis.G_vectors, complex(T), basis.fft_size...)) + [(; ψ_reals=zeros_like(basis.G_vectors, complex(T), + basis.model.n_components, basis.fft_size...)) for _ = 1:Threads.nthreads()] end @@ -90,37 +91,48 @@ function LinearAlgebra.mul!(Hψ, H::Hamiltonian, ψ) Hψ end # need `deepcopy` here to copy the elements of the array of arrays ψ (not just pointers) -Base.:*(H::Hamiltonian, ψ) = mul!(deepcopy(ψ), H, ψ) +Base.:*(H::Hamiltonian, ψ::AbstractArray) = mul!(deepcopy(ψ), H, ψ) + +# Serves as a wrapper around the specialized multiplication functions for LOBPCG as it +# expects to work on vectors or matrices. Explicitely create a 3-tensor to prevent ambiguity. +@timing function LinearAlgebra.mul!(Hψ_σG::AbstractVecOrMat, H::HamiltonianBlock, + ψ_σG::AbstractVecOrMat) + Hψ = unfold_from_composite_σG(H.basis, H.kpoint, Hψ_σG) + ψ = unfold_from_composite_σG(H.basis, H.kpoint, ψ_σG) + mul!(Hψ, H, ψ) + Hψ_σG +end # Loop through bands, IFFT to get ψ in real space, loop through terms, FFT and accumulate into Hψ # For the common DftHamiltonianBlock there is an optimized version below -@views @timing "Hamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray, +@views @timing "Hamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray3, H::GenericHamiltonianBlock, - ψ::AbstractArray) + ψ::AbstractArray3) + basis = H.basis function allocate_local_storage() - T = eltype(H.basis) - (; Hψ_fourier = similar(Hψ[:, 1]), - ψ_real = similar(ψ, complex(T), H.basis.fft_size...), - Hψ_real = similar(Hψ, complex(T), H.basis.fft_size...)) + T = eltype(basis) + (; Hψ_fourier = similar(Hψ[:, :, 1]), + ψ_real = similar(ψ, complex(T), basis.model.n_components, basis.fft_size...), + Hψ_real = similar(Hψ, complex(T), basis.model.n_components, basis.fft_size...)) end - parallel_loop_over_range(1:size(ψ, 2); allocate_local_storage) do iband, storage + parallel_loop_over_range(1:size(ψ, 3); allocate_local_storage) do iband, storage to = TimerOutput() # Thread-local timer output # Take ψi, IFFT it to ψ_real, apply each term to Hψ_fourier and Hψ_real, and add it # to Hψ. storage.Hψ_real .= 0 storage.Hψ_fourier .= 0 - ifft!(storage.ψ_real, H.basis, H.kpoint, ψ[:, iband]) + ifft!(storage.ψ_real, basis, H.kpoint, ψ[:, :, iband]) for op in H.optimized_operators @timeit to "$(nameof(typeof(op)))" begin apply!((; fourier=storage.Hψ_fourier, real=storage.Hψ_real), op, - (; fourier=ψ[:, iband], real=storage.ψ_real)) + (; fourier=ψ[:, :, iband], real=storage.ψ_real)) end end - Hψ[:, iband] .= storage.Hψ_fourier - fft!(storage.Hψ_fourier, H.basis, H.kpoint, storage.Hψ_real) - Hψ[:, iband] .+= storage.Hψ_fourier + Hψ[:, :, iband] .= storage.Hψ_fourier + fft!(storage.Hψ_fourier, basis, H.kpoint, storage.Hψ_real) + Hψ[:, :, iband] .+= storage.Hψ_fourier if Threads.threadid() == 1 merge!(DFTK.timer, to; tree_point=[t.name for t in DFTK.timer.timer_stack]) @@ -131,10 +143,10 @@ Base.:*(H::Hamiltonian, ψ) = mul!(deepcopy(ψ), H, ψ) end # Fast version, specialized on DFT models. Minimizes the number of FFTs and allocations -@views @timing "DftHamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray, +@views @timing "DftHamiltonian multiplication" function LinearAlgebra.mul!(Hψ::AbstractArray3, H::DftHamiltonianBlock, - ψ::AbstractArray) - n_bands = size(ψ, 2) + ψ::AbstractArray3) + n_bands = size(ψ, 3) iszero(n_bands) && return Hψ # Nothing to do if ψ empty have_divAgrad = !isnothing(H.divAgrad_op) @@ -146,17 +158,21 @@ end ψ_real = storage.ψ_reals @timeit to "local+kinetic" begin - ifft!(ψ_real, H.basis, H.kpoint, ψ[:, iband]; normalize=false) - ψ_real .*= potential - fft!(Hψ[:, iband], H.basis, H.kpoint, ψ_real; normalize=false) # overwrites ψ_real - Hψ[:, iband] .+= H.fourier_op.multiplier .* ψ[:, iband] + ifft!(ψ_real, H.basis, H.kpoint, ψ[:, :, iband]; normalize=false) + for σ = 1:H.basis.model.n_components + ψ_real[σ, :, :, :] .*= potential + end + fft!(Hψ[:, :, iband], H.basis, H.kpoint, ψ_real; normalize=false) # overwrites ψ_real + for σ = 1:H.basis.model.n_components + Hψ[σ, :, iband] .+= H.fourier_op.multiplier .* ψ[σ, :, iband] + end end if have_divAgrad @timeit to "divAgrad" begin - apply!((; fourier=Hψ[:, iband], real=nothing), + apply!((; fourier=Hψ[:, :, iband], real=nothing), H.divAgrad_op, - (; fourier=ψ[:, iband], real=nothing); + (; fourier=ψ[:, :, iband], real=nothing); ψ_scratch=ψ_real) end end @@ -180,11 +196,10 @@ end Hψ end - """ Get energies and Hamiltonian kwargs is additional info that might be useful for the energy terms to precompute -(eg the density ρ) +(e.g., the density ρ) """ @timing function energy_hamiltonian(basis::PlaneWaveBasis, ψ, occupation; kwargs...) # it: index into terms, ik: index into kpoints diff --git a/src/terms/anyonic.jl b/src/terms/anyonic.jl index 8a4000936a..c0cfa20ae8 100644 --- a/src/terms/anyonic.jl +++ b/src/terms/anyonic.jl @@ -72,6 +72,7 @@ function (A::Anyonic)(basis) @assert basis.model.lattice[2, 1] == basis.model.lattice[1, 2] == 0 @assert basis.model.lattice[1, 1] == basis.model.lattice[2, 2] @assert basis.model.n_spin_components == 1 + @assert basis.model.n_components == 1 TermAnyonic(basis, eltype(basis)(A.hbar), eltype(basis)(A.β)) end @@ -103,7 +104,7 @@ function ene_ops(term::TermAnyonic, basis::PlaneWaveBasis{T}, ψ, occupation; ρ, kwargs...) where {T} hbar = term.hbar β = term.β - @assert ψ !== nothing # the hamiltonian depends explicitly on ψ + @assert !isnothing(ψ) # the hamiltonian depends explicitly on ψ # Compute A in Fourier domain # curl A = 2π ρ, ∇⋅A = 0 @@ -153,8 +154,9 @@ function ene_ops(term::TermAnyonic, basis::PlaneWaveBasis{T}, ψ, occupation; ops_ham = [ops_energy..., RealSpaceMultiplication(basis, basis.kpoints[1], eff_pot_real)] E = zero(T) - for iband = 1:size(ψ[1], 2) - ψnk = @views ψ[1][:, iband] + @assert length(ψ) == 1 + for iband = 1:size(ψ[1], 3) + ψnk = @views ψ[1][:, :, iband] # TODO optimize this for op in ops_energy E += occupation[1][iband] * real(dot(ψnk, op * ψnk)) diff --git a/src/terms/entropy.jl b/src/terms/entropy.jl index 006ed95f4f..e1155a594d 100644 --- a/src/terms/entropy.jl +++ b/src/terms/entropy.jl @@ -25,8 +25,8 @@ function ene_ops(term::TermEntropy, basis::PlaneWaveBasis{T}, ψ, occupation; eigenvalues = kwargs[:eigenvalues] E = zero(T) - for (ik, k) in enumerate(basis.kpoints) - for iband = 1:size(ψ[ik], 2) + for (ik, ψk) in enumerate(ψ) + for iband = 1:size(ψk, 3) E -= (temperature * basis.kweights[ik] * filled_occupation(basis.model) diff --git a/src/terms/kinetic.jl b/src/terms/kinetic.jl index 311f83d4a5..93d8d739d6 100644 --- a/src/terms/kinetic.jl +++ b/src/terms/kinetic.jl @@ -48,10 +48,10 @@ end E = zero(T) for (ik, ψk) in enumerate(ψ) - for iband = 1:size(ψk, 2) - ψnk = @views ψk[:, iband] + for iband = 1:size(ψk, 3), σ = 1:size(ψk, 1) + ψσkn = @views ψk[σ, :, iband] E += (basis.kweights[ik] * occupation[ik][iband] - * real(dot(ψnk, Diagonal(term.kinetic_energies[ik]), ψnk))) + * real(dot(ψσkn, Diagonal(term.kinetic_energies[ik]), ψσkn))) end end E = mpi_sum(E, basis.comm_kpts) diff --git a/src/terms/magnetic.jl b/src/terms/magnetic.jl index 4fc26384b1..b21d9568bf 100644 --- a/src/terms/magnetic.jl +++ b/src/terms/magnetic.jl @@ -39,9 +39,9 @@ function ene_ops(term::TermMagnetic, basis::PlaneWaveBasis{T}, ψ, occupation; end E = zero(T) - for (ik, k) in enumerate(basis.kpoints) - for iband = 1:size(ψ[1], 2) - ψnk = @views ψ[ik][:, iband] + for (ik, ψk) in enumerate(ψ) + for iband = 1:size(ψk, 3) + ψnk = @views ψ[ik][:, :, iband] # TODO optimize this E += basis.kweights[ik] * occupation[ik][iband] * real(dot(ψnk, ops[ik] * ψnk)) end diff --git a/src/terms/nonlocal.jl b/src/terms/nonlocal.jl index a1fed882b7..a00476ea20 100644 --- a/src/terms/nonlocal.jl +++ b/src/terms/nonlocal.jl @@ -37,9 +37,11 @@ end E = zero(T) for (ik, ψk) in enumerate(ψ) - Pψk = term.ops[ik].P' * ψk # nproj x nband - band_enes = dropdims(sum(real.(conj.(Pψk) .* (term.ops[ik].D * Pψk)), dims=1), dims=1) - E += basis.kweights[ik] * sum(band_enes .* occupation[ik]) + for σ = 1:basis.model.n_components + Pψk = term.ops[ik].P' * ψk[σ, :, :] # nproj x nband + band_enes = dropdims(sum(real.(conj.(Pψk) .* (term.ops[ik].D * Pψk)), dims=1), dims=1) + E += basis.kweights[ik] * sum(band_enes .* occupation[ik]) + end end E = mpi_sum(E, basis.comm_kpts) @@ -76,13 +78,15 @@ end structure_factors = [cis2pi(-dot(p, r)) for p in G_plus_k] P = structure_factors .* form_factors ./ sqrt(unit_cell_volume) - forces[idx] += map(1:3) do α + @views forces[idx] += map(1:3) do α dPdR = [-2T(π)*im*p[α] for p in G_plus_k] .* P - ψk = ψ[ik] - δHψk = P * (C * (dPdR' * ψk)) - -sum(occupation[ik][iband] * basis.kweights[ik] * - 2real(dot(ψk[:, iband], δHψk[:, iband])) - for iband=1:size(ψk, 2)) + mapreduce(+, 1:model.n_components) do σ + ψkσ = ψ[ik][σ, :, :] + δHψkσ = P * (C * (dPdR' * ψkσ)) + -sum(occupation[ik][iband] * basis.kweights[ik] * + 2real(dot(ψkσ[:, iband], δHψkσ[:, iband])) + for iband=1:size(ψkσ, 2)) + end end # α end # r end # kpt @@ -254,11 +258,11 @@ function PDPψk(basis, positions, psp_groups, kpt, kpt_minus_q, ψk) D = build_projection_coefficients(basis, psp_groups) P = build_projection_vectors(basis, kpt, psp_groups, positions) P_minus_q = build_projection_vectors(basis, kpt_minus_q, psp_groups, positions) - P * (D * P_minus_q' * ψk) + stack([P * (D * P_minus_q' * ψkσ) for ψkσ in eachslice(ψk; dims=1)]; dims=1) end -function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, occupation, - δψ, δoccupation, q) where {T} +@views function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, + occupation, δψ, δoccupation, q) where {T} S = complex(T) model = basis.model psp_groups = [group for group in model.atom_groups @@ -278,16 +282,18 @@ function compute_dynmat_δH(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, for idx in group δforces[idx] += map(1:3) do α δHψk = derivative_wrt_αs(model.positions, α, idx) do positions_αs - PDPψk(basis, positions_αs, psp_groups, kpt_plus_q, kpt, ψ[ik]) + PDPψk(basis, positions_αs, psp_groups, kpt_plus_q, kpt, ψk) end δHψk_plus_q = derivative_wrt_αs(model.positions, α, idx) do positions_αs - PDPψk(basis, positions_αs, psp_groups, kpt, kpt, ψ[ik]) + PDPψk(basis, positions_αs, psp_groups, kpt, kpt, ψk) + end + mapreduce(+, 1:basis.model.n_components) do σ + -sum( 2occupation[ik][iband] * basis.kweights[ik] + * dot(δψk_plus_q[σ, :, iband], δHψk[σ, :, iband]) + + δoccupation[ik][iband] * basis.kweights[ik] + * 2real(dot(ψk[σ, :, iband], δHψk_plus_q[σ, :, iband])) + for iband=1:size(ψk, 3)) end - -sum( 2occupation[ik][iband] * basis.kweights[ik] - * dot(δψk_plus_q[:, iband], δHψk[:, iband]) - + δoccupation[ik][iband] * basis.kweights[ik] - * 2real(dot(ψk[:, iband], δHψk_plus_q[:, iband])) - for iband=1:size(ψk, 2)) end end end @@ -330,9 +336,10 @@ end PDPψk(basis, positions_βsαs, psp_groups, kpt, kpt, ψ[ik]) end end - dynmat_δ²H[β, s, α, s] += sum(occupation[ik][n] * basis.kweights[ik] * - dot(ψ[ik][:, n], δ²Hψ[ik][:, n]) - for n=1:size(ψ[ik], 2)) + dynmat_δ²H[β, s, α, s] += mapreduce(+, 1:basis.model.n_components) do σ + sum(occupation[ik][n] * basis.kweights[ik] * dot(ψ[ik][σ, :, n], δ²Hψ[ik][σ, :, n]) + for n=1:size(ψ[ik], 3)) + end end end @@ -345,6 +352,7 @@ function compute_δHψ_αs(::TermAtomicNonlocal, basis::PlaneWaveBasis{T}, ψ, model = basis.model psp_groups = [group for group in model.atom_groups if model.atoms[first(group)] isa ElementPsp] + isempty(psp_groups) && return nothing ψ_minus_q = transfer_blochwave_equivalent_to_actual(basis, ψ, -q) map(enumerate(basis.kpoints)) do (ik, kpt) diff --git a/src/terms/operators.jl b/src/terms/operators.jl index 9d483e18f3..10ea390eb6 100644 --- a/src/terms/operators.jl +++ b/src/terms/operators.jl @@ -12,7 +12,7 @@ abstract type RealFourierOperator end # Unoptimized fallback, intended for exploratory use only. # For performance, call through Hamiltonian which saves FFTs. -function LinearAlgebra.mul!(Hψ::AbstractVector, op::RealFourierOperator, ψ::AbstractVector) +function LinearAlgebra.mul!(Hψ::AbstractMatrix, op::RealFourierOperator, ψ::AbstractMatrix) ψ_real = ifft(op.basis, op.kpoint, ψ) Hψ_fourier = similar(ψ) Hψ_real = similar(ψ_real) @@ -24,13 +24,20 @@ function LinearAlgebra.mul!(Hψ::AbstractVector, op::RealFourierOperator, ψ::Ab Hψ .= Hψ_fourier .+ fft(op.basis, op.kpoint, Hψ_real) Hψ end -function LinearAlgebra.mul!(Hψ::AbstractMatrix, op::RealFourierOperator, ψ::AbstractMatrix) - @views for i = 1:size(ψ, 2) - mul!(Hψ[:, i], op, ψ[:, i]) +function LinearAlgebra.mul!(Hψ::AbstractArray3, op::RealFourierOperator, ψ::AbstractArray3) + @views for n = 1:size(ψ, 3) + mul!(Hψ[:, :, n], op, ψ[:, :, n]) end Hψ end Base.:*(op::RealFourierOperator, ψ) = mul!(similar(ψ), op, ψ) +# Default transformation: from two components tensors to diagonal matrices for each +# reciprocal vector. +function Matrix(op::RealFourierOperator) + n_Gk = length(G_vectors(op.basis, op.kpoint)) + n_components = op.basis.model.n_components + reshape(Array(op), n_components*n_Gk, n_components*n_Gk) +end """ Noop operation: don't do anything. @@ -41,9 +48,10 @@ struct NoopOperator{T <: Real} <: RealFourierOperator kpoint::Kpoint{T} end apply!(Hψ, op::NoopOperator, ψ) = nothing -function Matrix(op::NoopOperator) +function Array(op::NoopOperator) n_Gk = length(G_vectors(op.basis, op.kpoint)) - zeros_like(op.basis.G_vectors, eltype(op.basis), n_Gk, n_Gk) + n_components = op.basis.model.n_components + zeros_like(op.basis.G_vectors, eltype(op.basis), n_components, n_Gk, n_components, n_Gk) end """ @@ -58,22 +66,28 @@ struct RealSpaceMultiplication{T <: Real, AT <: AbstractArray} <: RealFourierOpe potential::AT end function apply!(Hψ, op::RealSpaceMultiplication, ψ) - Hψ.real .+= op.potential .* ψ.real + @views for σ = 1:op.basis.model.n_components + Hψ.real[σ, :, :, :] .+= op.potential .* ψ.real[σ, :, :, :] + end + Hψ.real end -function Matrix(op::RealSpaceMultiplication) +function Array(op::RealSpaceMultiplication) # V(G, G') = = 1/sqrt(Ω) pot_fourier = fft(op.basis, op.potential) n_G = length(G_vectors(op.basis, op.kpoint)) - H = zeros(complex(eltype(op.basis)), n_G, n_G) - for (j, G′) in enumerate(G_vectors(op.basis, op.kpoint)) - for (i, G) in enumerate(G_vectors(op.basis, op.kpoint)) - # G_vectors(basis)[ind_ΔG] = G - G' - ind_ΔG = index_G_vectors(op.basis, G - G′) - if isnothing(ind_ΔG) - error("For full matrix construction, the FFT size must be " * - "large enough so that Hamiltonian applications are exact") + n_components = op.basis.model.n_components + H = zeros(complex(eltype(op.basis)), n_components, n_G, n_components, n_G) + for σ = 1:n_components + for (j, G′) in enumerate(G_vectors(op.basis, op.kpoint)) + for (i, G) in enumerate(G_vectors(op.basis, op.kpoint)) + # G_vectors(basis)[ind_ΔG] = G - G' + ind_ΔG = index_G_vectors(op.basis, G - G′) + if isnothing(ind_ΔG) + error("For full matrix construction, the FFT size must be " * + "large enough so that Hamiltonian applications are exact") + end + H[σ, i, σ, j] = pot_fourier[ind_ΔG] / sqrt(op.basis.model.unit_cell_volume) end - H[i, j] = pot_fourier[ind_ΔG] / sqrt(op.basis.model.unit_cell_volume) end end H @@ -91,9 +105,17 @@ struct FourierMultiplication{T <: Real, AT <: AbstractArray} <: RealFourierOpera multiplier::AT end function apply!(Hψ, op::FourierMultiplication, ψ) - Hψ.fourier .+= op.multiplier .* ψ.fourier + @views for σ = 1:op.basis.model.n_components + Hψ.fourier[σ, :] .+= op.multiplier .* ψ.fourier[σ, :] + end + Hψ.fourier +end +function Array(op::FourierMultiplication) + n_Gk = length(G_vectors(op.basis, op.kpoint)) + n_components = op.basis.model.n_components + D = Diagonal(op.multiplier) + reshape(kron(D, I(n_components)), n_components, n_Gk, n_components, n_Gk) end -Matrix(op::FourierMultiplication) = Array(Diagonal(op.multiplier)) """ Nonlocal operator in Fourier space in Kleinman-Bylander format, @@ -108,9 +130,20 @@ struct NonlocalOperator{T <: Real, PT, DT} <: RealFourierOperator D::DT end function apply!(Hψ, op::NonlocalOperator, ψ) - Hψ.fourier .+= op.P * (op.D * (op.P' * ψ.fourier)) + @views for σ = 1:op.basis.model.n_components + Hψ.fourier[σ, :, :] .+= op.P * (op.D * (op.P' * ψ.fourier[σ, :, :])) + end + Hψ.fourier +end +function Array(op::NonlocalOperator) + n_Gk = length(G_vectors(op.basis, op.kpoint)) + n_components = op.basis.model.n_components + H = zeros(complex(eltype(op.basis)), n_components, n_Gk, n_components, n_Gk) + for σ = 1:op.basis.model.n_components + H[σ, :, σ, :] = op.P * op.D * op.P' + end + H end -Matrix(op::NonlocalOperator) = op.P * op.D * op.P' """ Magnetic field operator A⋅(-i∇). @@ -122,15 +155,20 @@ struct MagneticFieldOperator{T <: Real, AT} <: RealFourierOperator end function apply!(Hψ, op::MagneticFieldOperator, ψ) # TODO this could probably be better optimized - for α = 1:3 + @views for α = 1:3 iszero(op.Apot[α]) && continue pα = [p[α] for p in Gplusk_vectors_cart(op.basis, op.kpoint)] - ∂αψ_fourier = pα .* ψ.fourier + ∂αψ_fourier = similar(ψ.fourier) + for σ = 1:op.basis.model.n_components + ∂αψ_fourier[σ, :] = pα .* ψ.fourier[σ, :] + end ∂αψ_real = ifft(op.basis, op.kpoint, ∂αψ_fourier) - Hψ.real .+= op.Apot[α] .* ∂αψ_real + for σ = 1:op.basis.model.n_components + Hψ.real[σ, :, :, :] .+= op.Apot[α] .* ∂αψ_real[σ, :, :, :] + end end end -# TODO Implement Matrix(op::MagneticFieldOperator) +# TODO Implement Array(op::MagneticFieldOperator) @doc raw""" Nonlocal "divAgrad" operator ``-½ ∇ ⋅ (A ∇)`` where ``A`` is a scalar field on the @@ -143,17 +181,27 @@ struct DivAgradOperator{T <: Real, AT} <: RealFourierOperator A::AT end function apply!(Hψ, op::DivAgradOperator, ψ; - ψ_scratch=zeros(complex(eltype(op.basis)), op.basis.fft_size...)) + ψ_scratch=zeros(complex(eltype(op.basis)), + op.basis.model.n_components, op.basis.fft_size...)) # TODO: Performance improvements: Unscaled plans, avoid remaining allocations # (which are only on the small k-point-specific Fourier grid G_plus_k = [[p[α] for p in Gplusk_vectors_cart(op.basis, op.kpoint)] for α = 1:3] - for α = 1:3 - ∂αψ_real = ifft!(ψ_scratch, op.basis, op.kpoint, im .* G_plus_k[α] .* ψ.fourier) - A∇ψ = fft(op.basis, op.kpoint, ∂αψ_real .* op.A) - Hψ.fourier .-= im .* G_plus_k[α] .* A∇ψ ./ 2 + @views for α = 1:3 + ∂αψ_fourier = similar(ψ.fourier) + for σ = 1:op.basis.model.n_components + ∂αψ_fourier[σ, :] = im .* G_plus_k[α] .* ψ.fourier[σ, :] + end + ∂αψ_real = ifft!(ψ_scratch, op.basis, op.kpoint, ∂αψ_fourier) + for σ = 1:op.basis.model.n_components + ∂αψ_real[σ, :, :, :] .*= op.A + end + A∇ψ = fft(op.basis, op.kpoint, ∂αψ_real) + for σ = 1:op.basis.model.n_components + Hψ.fourier[σ, :] .-= im .* G_plus_k[α] .* A∇ψ[σ, :] ./ 2 + end end end -# TODO Implement Matrix(op::DivAgrad) +# TODO Implement Array(op::DivAgrad) # Optimize RFOs by combining terms that can be combined diff --git a/src/transfer.jl b/src/transfer.jl index 88956c22fe..941341b534 100644 --- a/src/transfer.jl +++ b/src/transfer.jl @@ -108,13 +108,14 @@ Transfer an array `ψk` defined on basis_in ``k``-point kpt_in to basis_out ``k` function transfer_blochwave_kpt(ψk_in, basis_in::PlaneWaveBasis, kpt_in::Kpoint, basis_out::PlaneWaveBasis, kpt_out::Kpoint) kpt_in == kpt_out && return copy(ψk_in) - @assert length(G_vectors(basis_in, kpt_in)) == size(ψk_in, 1) + @assert length(G_vectors(basis_in, kpt_in)) == size(ψk_in, 2) idcsk_in, idcsk_out = transfer_mapping(basis_in, kpt_in, basis_out, kpt_out) - n_bands = size(ψk_in, 2) - ψk_out = similar(ψk_in, length(G_vectors(basis_out, kpt_out)), n_bands) + n_bands = size(ψk_in, 3) + n_components = basis_in.model.n_components + ψk_out = similar(ψk_in, n_components, length(G_vectors(basis_out, kpt_out)), n_bands) ψk_out .= 0 - ψk_out[idcsk_out, :] .= ψk_in[idcsk_in, :] + ψk_out[:, idcsk_out, :] .= ψk_in[:, idcsk_in, :] ψk_out end @@ -124,6 +125,7 @@ Transfer Bloch wave between two basis sets. Limited feature set. """ function transfer_blochwave(ψ_in, basis_in::PlaneWaveBasis, basis_out::PlaneWaveBasis) @assert basis_in.model.lattice == basis_out.model.lattice + @assert basis_in.model.n_components == basis_out.model.n_components @assert length(basis_in.kpoints) == length(basis_out.kpoints) @assert all(basis_in.kpoints[ik].coordinate == basis_out.kpoints[ik].coordinate for ik = 1:length(basis_in.kpoints)) @@ -215,10 +217,12 @@ element of `basis.kpoints` equivalent to ``k-q``. for (ik, kpt) in enumerate(basis.kpoints) # … then perform the multiplication with f in real space and get the Fourier # coefficients. - for n = 1:size(ψ[ik], 2) - fψ[ik][:, n] = fft(basis, kpt, - ifft(basis, ψ_minus_q[ik].kpt, ψ_minus_q[ik].ψk[:, n]) - .* f_real[:, :, :, kpt.spin]) + for n = 1:size(ψ[ik], 3) + ψkn_minus_q_real = ifft(basis, ψ_minus_q[ik].kpt, ψ_minus_q[ik].ψk[:, :, n]) + for σ = 1:basis.model.n_components + fψ[ik][σ, :, n] = fft(basis, kpt, ψkn_minus_q_real[σ, :, :, :] + .* f_real[:, :, :, kpt.spin]) + end end end fψ diff --git a/src/workarounds/forwarddiff_rules.jl b/src/workarounds/forwarddiff_rules.jl index 998d4ca8b9..720085a367 100644 --- a/src/workarounds/forwarddiff_rules.jl +++ b/src/workarounds/forwarddiff_rules.jl @@ -47,12 +47,13 @@ function Base.:*(p::AbstractFFTs.Plan, x::AbstractArray{<:Complex{<:Dual{Tg}}}) end end -function build_fft_plans!(tmp::AbstractArray{Complex{T}}) where {T<:Dual} - opFFT = AbstractFFTs.plan_fft(tmp) - opBFFT = AbstractFFTs.plan_bfft(tmp) +function build_fft_plans!(tmp::AbstractArray{Complex{T}}; + region=1:ndims(tmp)) where {T<:ForwardDiff.Dual} + opFFT = AbstractFFTs.plan_fft(tmp, region) + opBFFT = AbstractFFTs.plan_bfft(tmp, region) ipFFT = DummyInplace{typeof(opFFT)}(opFFT) ipBFFT = DummyInplace{typeof(opBFFT)}(opBFFT) - ipFFT, opFFT, ipBFFT, opBFFT + (; ipFFT, opFFT, ipBFFT, opBFFT) end # Convert and strip off duals if that's the only way @@ -107,6 +108,7 @@ function construct_value(model::Model{T}) where {T <: Dual} temperature=ForwardDiff.value(model.temperature), model.smearing, εF=ForwardDiff.value(model.εF), + model.n_components, model.spin_polarization, model.symmetries, # Can be safely disabled: this has been checked for basis.model @@ -193,7 +195,7 @@ function self_consistent_field(basis_dual::PlaneWaveBasis{T}; # TODO Could add δresults[α].δVind the dual part of the total local potential in ham_dual # and in this way return a ham that represents also the total change in Hamiltonian - energies, ham = energy_hamiltonian(basis_dual, ψ, occupation; ρ, eigenvalues, εF) + (; energies, ham) = energy_hamiltonian(basis_dual, ψ, occupation; ρ, eigenvalues, εF) # This has to be changed whenever the scfres structure changes (; ham, basis=basis_dual, energies, ρ, eigenvalues, occupation, εF, ψ, diff --git a/test/anyons.jl b/test/anyons.jl index 771cecf635..545b71d966 100644 --- a/test/anyons.jl +++ b/test/anyons.jl @@ -26,19 +26,19 @@ end # See https://arxiv.org/pdf/1901.10739.pdf # We test E11, which is a quantity defined in the above paper - ## Unit cell. Having one of the lattice vectors as zero means a 2D system + # Unit cell. Having one of the lattice vectors as zero means a 2D system a = 14 lattice = a .* [[1 0 0.]; [0 1 0]; [0 0 0]]; - ## Confining scalar potential - pot(x, y, z) = ((x - a/2)^2 + (y - a/2)^2); + # Confining scalar potential + pot(x, y, _) = ((x - a/2)^2 + (y - a/2)^2); - ## Parameters + # Parameters Ecut = 30 n_electrons = 1 β = 5; - ## Collect all the terms, build and run the model + # Collect all the terms, build and run the model terms = [Kinetic(; scaling_factor=2), ExternalFromReal(X -> pot(X...)), Anyonic(1, β) diff --git a/test/chi0.jl b/test/chi0.jl index b558696757..1bd4687b24 100644 --- a/test/chi0.jl +++ b/test/chi0.jl @@ -73,7 +73,7 @@ function test_chi0(testcase; symmetries=false, temperature=0, spin_polarization= # Test apply_χ0 without extra bands ψ_occ, occ_occ = DFTK.select_occupied_orbitals(basis, scfres.ψ, scfres.occupation; threshold=scfres.occupation_threshold) - ε_occ = [scfres.eigenvalues[ik][1:size(ψk, 2)] for (ik, ψk) in enumerate(ψ_occ)] + ε_occ = [scfres.eigenvalues[ik][1:size(ψk, 3)] for (ik, ψk) in enumerate(ψ_occ)] diff_applied_χ0_noextra = apply_χ0(scfres.ham, ψ_occ, occ_occ, scfres.εF, ε_occ, δV; scfres.occupation_threshold) diff --git a/test/compute_density.jl b/test/compute_density.jl index 0b01dcb26c..65b3dddcd3 100644 --- a/test/compute_density.jl +++ b/test/compute_density.jl @@ -39,21 +39,21 @@ end function test_orthonormality(basis, ψ; tol=1e-8) - n_k = length(ψ) - n_states = size(ψ[1], 2) + n_states = size(ψ[1], 3) n_fft = prod(basis.fft_size) for (ik, kpt) in enumerate(basis.kpoints) - # Fourier-transform the wave functions to real space - ψk = ψ[ik] - ψk_real = cat((DFTK.ifft(basis, kpt, ψik) for ψik in eachcol(ψk))..., dims=4) + for σ = 1:basis.model.n_components + # Fourier-transform the wave functions to real space + ψk = ψ[ik][σ, :, :] + ψk_real = cat((DFTK.ifft(basis, kpt, ψik) for ψik in eachcol(ψk))...; dims=4) - T = real(eltype(ψk_real)) - ψk_real_mat = reshape(ψk_real, n_fft, n_states) - ψk_real_overlap = adjoint(ψk_real_mat) * ψk_real_mat - nondiag = ψk_real_overlap - I * (n_fft / basis.model.unit_cell_volume) + ψk_real_mat = reshape(ψk_real, n_fft, n_states) + ψk_real_overlap = adjoint(ψk_real_mat) * ψk_real_mat + nondiag = ψk_real_overlap - I * (n_fft / basis.model.unit_cell_volume) - @test maximum(abs.(nondiag)) < tol + @test maximum(abs, nondiag) < tol + end end end @@ -72,7 +72,7 @@ @test ham_full.basis.fft_size == ham_ir.basis.fft_size # Test density is the same in both schemes, and symmetric wrt the basis symmetries - @test maximum(abs.(ρ_ir - ρ_full)) < 10tol + @test maximum(abs, ρ_ir - ρ_full) < 10tol @test maximum(abs, DFTK.symmetrize_ρ(ham_ir.basis, ρ_ir) - ρ_ir) < tol # Test local potential is the same in both schemes @@ -100,7 +100,8 @@ # yields an eigenfunction of the Hamiltonian # Also check that the accumulated partial densities are equal # to the returned density. - for (ik, kpt) in enumerate(ham_ir.basis.kpoints) + @assert ham_ir.basis.model.n_components == 1 + for ik in eachindex(ham_ir.basis.kpoints) Hk_ir = ham_ir.blocks[ik] for symop in ham_ir.basis.symmetries Skpoint, ψSk = DFTK.apply_symop(symop, ham_ir.basis, Hk_ir.kpoint, ψ_ir[ik]) @@ -113,14 +114,14 @@ range = 1:length(eigenvalues_ir[ik]) - n_ignore for iband in range - ψnSk = ψSk[:, iband] + ψnSk = ψSk[1, :, iband] residual = norm(Hk_full * ψnSk - eigenvalues_ir[ik][iband] * ψnSk) @test residual < 10tol end # iband end # symop end # k - end # eigenvectors - end # testset + end # eigenvectors + end # testset end test_full_vs_irreducible("silicon (3,2,3)", silicon, [3, 2, 3], Ecut=5, tol=1e-10) diff --git a/test/compute_jacobian_eigen.jl b/test/compute_jacobian_eigen.jl index 8982ca2df6..6c75403ee7 100644 --- a/test/compute_jacobian_eigen.jl +++ b/test/compute_jacobian_eigen.jl @@ -8,6 +8,8 @@ using DFTK: precondprep!, FunctionPreconditioner using LinearMaps function eigen_ΩplusK(basis::PlaneWaveBasis{T}, ψ, occupation, numval) where {T} + n_components = basis.model.n_components + @assert n_components == 1 pack(ψ) = reinterpret_real(pack_ψ(ψ)) unpack(x) = unpack_ψ(reinterpret_complex(x), size.(ψ)) @@ -18,33 +20,36 @@ function eigen_ΩplusK(basis::PlaneWaveBasis{T}, ψ, occupation, numval) where { # preconditioner Pks = [PreconditionerTPA(basis, kpt) for kpt in basis.kpoints] - for (ik, ψk) in enumerate(ψ) - precondprep!(Pks[ik], ψk) + for ik = 1:length(Pks) + precondprep!(Pks[ik], to_composite_σG(basis, basis.kpoints[ik], ψ[ik])) end function f_ldiv!(x, y) for n = 1:size(y, 2) δψ = unpack(y[:, n]) - proj_tangent!(δψ, ψ) - Pδψ = [ Pks[ik] \ δψk for (ik, δψk) in enumerate(δψ)] - proj_tangent!(Pδψ, ψ) + proj_tangent!(δψ, basis, ψ) + Pδψ = [Pks[ik] \ δψk for (ik, δψk) in enumerate(δψ)] + proj_tangent!(Pδψ, basis, ψ) x[:, n] .= pack(Pδψ) end x end # random starting point on the tangent space to avoid eigenvalue 0 - n_bands = size(ψ[1], 2) x0 = map(1:numval) do _ - initial = map(basis.kpoints) do kpt + initial = map(enumerate(basis.kpoints)) do (ik, kpt) n_Gk = length(G_vectors(basis, kpt)) - randn(Complex{eltype(basis)}, n_Gk, n_bands) + randn(Complex{T}, n_components, n_Gk, size(ψ[ik], 3)) end - pack(proj_tangent(initial, ψ)) + pack(proj_tangent(basis, initial, ψ)) end x0 = stack(x0) # Rayleigh-coefficients - Λ = [ψk'Hψk for (ψk, Hψk) in zip(ψ, H * ψ)] + Λ = map(enumerate(zip(ψ, H * ψ))) do (ik, (ψk, Hψk)) + ψk = to_composite_σG(basis, basis.kpoints[ik], ψk) + Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk) + from_composite_σG(basis, basis.kpoints[ik], ψk'Hψk) + end # mapping of the linear system on the tangent space function ΩpK(x) diff --git a/test/fourier_transforms.jl b/test/fourier_transforms.jl index 7bbed3c091..af9ab6ce45 100644 --- a/test/fourier_transforms.jl +++ b/test/fourier_transforms.jl @@ -17,15 +17,15 @@ f2_R = ifft(pw, f2_G) f3_G = fft!(similar(f_R), pw, f_R) - @test maximum(abs.(f2_G - f_G)) < 1e-12 - @test maximum(abs.(f2_R - f_R)) < 1e-12 - @test maximum(abs.(f3_G - f_G)) < 1e-12 + @test maximum(abs, f2_G - f_G) < 1e-12 + @test maximum(abs, f2_R - f_R) < 1e-12 + @test maximum(abs, f3_G - f_G) < 1e-12 ifft_mat = DFTK.ifft_matrix(pw) fft_mat = DFTK.fft_matrix(pw) - @test maximum(abs.(ifft_mat * fft_mat - I)) < 1e-12 - @test maximum(abs.(ifft_mat * vec(f_G) - vec(f_R))) < 1e-12 - @test maximum(abs.(fft_mat * vec(f_R) - vec(f_G))) < 1e-12 + @test maximum(abs, ifft_mat * fft_mat - I) < 1e-12 + @test maximum(abs, ifft_mat * vec(f_G) - vec(f_R)) < 1e-12 + @test maximum(abs, fft_mat * vec(f_R) - vec(f_G)) < 1e-12 end @testset "Transformation on the spherical basis set" begin @@ -41,7 +41,7 @@ f2_R = similar(f_R) ifft!(f2_R, pw, kpt, f2_G) - @test maximum(abs.(f2_G - f_G)) < 1e-12 - @test maximum(abs.(f2_R - f_R)) < 1e-12 + @test maximum(abs, f2_G - f_G) < 1e-12 + @test maximum(abs, f2_R - f_R) < 1e-12 end end diff --git a/test/hamiltonian_consistency.jl b/test/hamiltonian_consistency.jl index 11128503d8..d86102c3a8 100644 --- a/test/hamiltonian_consistency.jl +++ b/test/hamiltonian_consistency.jl @@ -1,9 +1,9 @@ @testsetup module HamConsistency using Test using DFTK -using Logging -using DFTK: mpi_sum +using DFTK: mpi_sum, filled_occupation, DivAgradOperator, MagneticFieldOperator using LinearAlgebra +using Logging using ..TestCases: silicon testcase = silicon @@ -11,10 +11,9 @@ function test_matrix_repr_operator(hamk, ψk; atol=1e-8) for operator in hamk.operators try operator_matrix = Matrix(operator) - @test norm(operator_matrix*ψk - operator*ψk) < atol - catch e - allowed_missing_operators = Union{DFTK.DivAgradOperator, - DFTK.MagneticFieldOperator} + @test norm(operator_matrix*ψk[1, :, :] - (operator*ψk)[1, :, :]) < atol + catch + allowed_missing_operators = Union{DivAgradOperator, MagneticFieldOperator} @assert operator isa allowed_missing_operators @info "Matrix of operator $(nameof(typeof(operator))) not yet supported" maxlog=1 end @@ -36,10 +35,12 @@ function test_consistency_term(term; rtol=1e-4, atol=1e-8, ε=1e-6, kgrid=[1, 2, n_electrons = testcase.n_electrons n_bands = div(n_electrons, 2, RoundUp) - filled_occ = DFTK.filled_occupation(model) + filled_occ = filled_occupation(model) - ψ = [Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q) - for kpt in basis.kpoints] + ψ = map(basis.kpoints) do kpt + Q = Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q) + from_composite_σG(basis, kpt, Q) + end occupation = [filled_occ * rand(n_bands) for _ = 1:length(basis.kpoints)] occ_scaling = n_electrons / sum(sum(occupation)) occupation = [occ * occ_scaling for occ in occupation] @@ -63,10 +64,11 @@ function test_consistency_term(term; rtol=1e-4, atol=1e-8, ε=1e-6, kgrid=[1, 2, diff = (compute_E(ε) - compute_E(-ε)) / (2ε) diff_predicted = 0.0 - for ik = 1:length(basis.kpoints) - Hψk = ham.blocks[ik]*ψ[ik] + for (ik, kpt) in enumerate(basis.kpoints) test_matrix_repr_operator(ham.blocks[ik], ψ[ik]; atol) - δψkHψk = sum(occupation[ik][iband] * real(dot(δψ[ik][:, iband], Hψk[:, iband])) + δψk = to_composite_σG(basis, kpt, δψ[ik]) + Hψk = to_composite_σG(basis, kpt, ham.blocks[ik]*ψ[ik]) + δψkHψk = sum(occupation[ik][iband] * real(dot(δψk[:, iband], Hψk[:, iband])) for iband=1:n_bands) diff_predicted += 2 * basis.kweights[ik] * δψkHψk end diff --git a/test/hessian.jl b/test/hessian.jl index 08577c6547..35e4638630 100644 --- a/test/hessian.jl +++ b/test/hessian.jl @@ -26,7 +26,7 @@ end @test isapprox( real(dot(ϕ, solve_ΩplusK(basis, ψ, rhs, occupation).δψ)), - real(dot(solve_ΩplusK(basis, ψ, ϕ, occupation).δψ, rhs)), + real(dot(solve_ΩplusK(basis, ψ, ϕ, occupation).δψ, rhs)); atol=1e-7 ) end @@ -40,12 +40,16 @@ end H = energy_hamiltonian(basis, ψ, occupation; ρ).ham # Rayleigh-coefficients - Λ = [ψk'Hψk for (ψk, Hψk) in zip(ψ, H * ψ)] + Λ = map(enumerate(zip(ψ, H * ψ))) do (ik, (ψk, Hψk)) + ψk = to_composite_σG(basis, basis.kpoints[ik], ψk) + Hψk = to_composite_σG(basis, basis.kpoints[ik], Hψk) + from_composite_σG(basis, basis.kpoints[ik], ψk'Hψk) + end # Ω is complex-linear and so self-adjoint as a complex operator. @test isapprox( dot(ϕ, apply_Ω(rhs, ψ, H, Λ)), - dot(apply_Ω(ϕ, ψ, H, Λ), rhs), + dot(apply_Ω(ϕ, ψ, H, Λ), rhs); atol=1e-7 ) end @@ -60,7 +64,7 @@ end # hence we test using the real dot product. @test isapprox( real(dot(ϕ, apply_K(basis, rhs, ψ, ρ, occupation))), - real(dot(apply_K(basis, ϕ, ψ, ρ, occupation), rhs)), + real(dot(apply_K(basis, ϕ, ψ, ρ, occupation), rhs)); atol=1e-7 ) end @@ -81,7 +85,7 @@ end @testset "self-adjointness of solve_ΩplusK_split" begin @test isapprox(real(dot(ϕ, solve_ΩplusK_split(scfres, rhs).δψ)), - real(dot(solve_ΩplusK_split(scfres, ϕ).δψ, rhs)), + real(dot(solve_ΩplusK_split(scfres, ϕ).δψ, rhs)); atol=1e-7) end @@ -114,7 +118,7 @@ end @testset "self-adjointness of solve_ΩplusK_split" begin @test isapprox(real(dot(ϕ, solve_ΩplusK_split(scfres, rhs).δψ)), - real(dot(solve_ΩplusK_split(scfres, ϕ).δψ, rhs)), + real(dot(solve_ΩplusK_split(scfres, ϕ).δψ, rhs)); atol=1e-7) end end diff --git a/test/multicomponents.jl b/test/multicomponents.jl new file mode 100644 index 0000000000..dd4293d9ea --- /dev/null +++ b/test/multicomponents.jl @@ -0,0 +1,119 @@ +# Note: The consistency tests at temperature may have an error of the order of magnitude +# of `default_occupation_threshold`; use `nbandsalg` if small tolerance. +@testsetup module Multicomponents +using Test +using DFTK +using LinearAlgebra + +function test_consistency(setup; Ecut=5, kgrid=(4,4,4), tol=1e-5) + res1 = setup(Ecut, kgrid, tol, 1) + res2 = setup(Ecut, kgrid, tol, 2) + @test norm(res1 - res2) < tol +end +end + +@testitem "Energy & Density: LOBPCG" setup=[TestCases, Multicomponents] begin + using DFTK + test_consistency = Multicomponents.test_consistency + testcase = TestCases.silicon + testcase = merge(testcase, (; temperature=0.03)) + eigensolver = lobpcg_hyper + + test_consistency() do Ecut, kgrid, tol, n_components + model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions; + n_components, testcase.temperature) + basis = PlaneWaveBasis(model; Ecut, kgrid) + nbandsalg = AdaptiveBands(model; occupation_threshold=tol/10) + scfres = self_consistent_field(basis; eigensolver, tol, nbandsalg) + [scfres.energies.total, scfres.ρ] + end +end + +@testitem "Energy & Density: diag_full" setup=[TestCases, Multicomponents] begin + using DFTK + test_consistency = Multicomponents.test_consistency + testcase = TestCases.silicon + testcase = merge(testcase, (; temperature=0.03)) + eigensolver = diag_full + + test_consistency() do Ecut, kgrid, tol, n_components + model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions; + n_components, testcase.temperature) + basis = PlaneWaveBasis(model; Ecut, kgrid) + nbandsalg = AdaptiveBands(model; occupation_threshold=tol/10) + scfres = self_consistent_field(basis; eigensolver, tol, nbandsalg) + [scfres.energies.total, scfres.ρ] + end +end + +@testitem "Forces" setup=[TestCases, Multicomponents] begin + using DFTK + test_consistency = Multicomponents.test_consistency + testcase = TestCases.silicon + testcase = merge(testcase, (; temperature=0.03)) + + test_consistency() do Ecut, kgrid, tol, n_components + positions = [(ones(3)) / 4, -ones(3) / 8] + model = model_PBE(testcase.lattice, testcase.atoms, positions; n_components, + testcase.temperature) + basis = PlaneWaveBasis(model; Ecut, kgrid) + nbandsalg = AdaptiveBands(model; occupation_threshold=tol/100) + scfres = self_consistent_field(basis; tol=tol/10, nbandsalg) + compute_forces(scfres) + end +end + +@testitem "δρ, temperature" setup=[TestCases, Multicomponents] begin + using DFTK + using LinearAlgebra + test_consistency = Multicomponents.test_consistency + testcase = TestCases.aluminium_primitive + testcase = merge(testcase, (; temperature=0.03)) + + Ecut = 10 + kgrid = (1, 1, 1) + + test_consistency(; Ecut, kgrid) do Ecut, kgrid, tol, n_components + model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions; + n_components, testcase.temperature) + basis = PlaneWaveBasis(model; Ecut, kgrid) + nbandsalg = AdaptiveBands(model; occupation_threshold=tol/10) + scfres = self_consistent_field(basis; tol, nbandsalg) + δV = guess_density(basis) + apply_χ0(scfres, δV) + end +end + +@testitem "Energy & Density: Newton" #= + =# tags=[:dont_test_mpi] setup=[TestCases, Multicomponents] begin + using DFTK + test_consistency = Multicomponents.test_consistency + testcase = TestCases.silicon + + test_consistency() do Ecut, kgrid, tol, n_components + model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions; + n_components, testcase.temperature) + basis = PlaneWaveBasis(model; Ecut, kgrid) + scfres_start = self_consistent_field(basis; maxiter=1) + # remove virtual orbitals + (; ψ) = DFTK.select_occupied_orbitals(basis, scfres_start.ψ, scfres_start.occupation) + newton(basis, ψ; tol=tol/10).ρ + end +end + +@testitem "Energy & Density: Direct minimization" #= + =# tags=[:dont_test_mpi] setup=[TestCases, Multicomponents] begin + using DFTK + test_consistency = Multicomponents.test_consistency + testcase = TestCases.silicon + + test_consistency() do Ecut, kgrid, tol, n_components + model = model_PBE(testcase.lattice, testcase.atoms, testcase.positions; + n_components, testcase.temperature) + basis = PlaneWaveBasis(model; Ecut, kgrid) + scfres_start = self_consistent_field(basis; maxiter=1) + # remove virtual orbitals + (; ψ) = DFTK.select_occupied_orbitals(basis, scfres_start.ψ, scfres_start.occupation) + direct_minimization(basis; ψ, tol=tol/10).ρ + end +end diff --git a/test/scf_compare.jl b/test/scf_compare.jl index 689630c765..7bf48a7a2e 100644 --- a/test/scf_compare.jl +++ b/test/scf_compare.jl @@ -80,7 +80,7 @@ end if mpi_nprocs() == 1 # Distributed implementation not yet available @testset "Direct minimization" begin ρ_dm = direct_minimization(basis; ψ, tol).ρ - @test maximum(abs.(ρ_dm - ρ_def)) < 10tol + @test maximum(abs, ρ_dm - ρ_def) < 10tol end end @@ -88,7 +88,7 @@ end if mpi_nprocs() == 1 # Distributed implementation not yet available @testset "Newton" begin ρ_newton = newton(basis, ψ; tol).ρ - @test maximum(abs.(ρ_newton - ρ_def)) < 10tol + @test maximum(abs, ρ_newton - ρ_def) < 10tol end end end diff --git a/test/serialisation.jl b/test/serialisation.jl index 2e282c410a..ce37f7d0b0 100644 --- a/test/serialisation.jl +++ b/test/serialisation.jl @@ -9,6 +9,7 @@ function test_scfres_agreement(tested, ref; test_ψ=true) @test tested.basis.model.εF == ref.basis.model.εF @test tested.basis.model.symmetries == ref.basis.model.symmetries @test tested.basis.model.spin_polarization == ref.basis.model.spin_polarization + @test tested.basis.model.n_components == ref.basis.model.n_components @test tested.basis.model.positions == ref.basis.model.positions @test atomic_symbol.(tested.basis.model.atoms) == atomic_symbol.(ref.basis.model.atoms) diff --git a/test/todict.jl b/test/todict.jl index 8ca5808b13..cb7ecb85c7 100644 --- a/test/todict.jl +++ b/test/todict.jl @@ -8,10 +8,11 @@ function test_agreement_bands(band_data, dict; explicit_reshape=false, test_ψ=t basis = band_data.basis model = basis.model - n_kpoints = length(basis.kcoords_global) - n_spin = model.n_spin_components - n_bands = length(band_data.eigenvalues[1]) - max_n_G = DFTK.mpi_max(maximum(kpt -> length(G_vectors(basis, kpt)), basis.kpoints), + n_kpoints = length(basis.kcoords_global) + n_spin = model.n_spin_components + n_bands = length(band_data.eigenvalues[1]) + n_components = model.n_components + max_n_G = DFTK.mpi_max(maximum(kpt -> length(G_vectors(basis, kpt)), basis.kpoints), basis.comm_kpts) rotations = [symop.W for symop in basis.symmetries] translations = [symop.w for symop in basis.symmetries] @@ -38,6 +39,7 @@ function test_agreement_bands(band_data, dict; explicit_reshape=false, test_ψ=t @test dict["n_bands"] == n_bands @test dict["n_kpoints"] == n_kpoints @test dict["n_atoms"] == length(model.atoms) + @test dict["n_components"] == n_components @test dict["n_spin_components"] == n_spin @test dict["model_name"] == model.model_name @test dict["temperature"] ≈ model.temperature atol=1e-12 @@ -68,7 +70,7 @@ function test_agreement_bands(band_data, dict; explicit_reshape=false, test_ψ=t if test_ψ n_G_resh = condreshape(dict["kpt_n_G_vectors"], (n_kpoints, n_spin)) G_vecs_resh = condreshape(dict["kpt_G_vectors"], (3, max_n_G, n_kpoints, n_spin)) - ψ_resh = condreshape(dict["ψ"], (max_n_G, n_bands, n_kpoints, n_spin)) + ψ_resh = condreshape(dict["ψ"], (n_components, max_n_G, n_bands, n_kpoints, n_spin)) end end n_iter_resh = MPI.bcast(n_iter_resh, 0, MPI.COMM_WORLD) @@ -92,7 +94,7 @@ function test_agreement_bands(band_data, dict; explicit_reshape=false, test_ψ=t @test n_G_resh[ikgl, σ] == length(basis.kpoints[ik].G_vectors) @test all(G_vecs_resh[:, iG, ikgl, σ] == G for (iG, G) in enumerate(basis.kpoints[ik].G_vectors)) - @test ψ_resh[1:n_G_resh[ikgl, σ], :, ikgl, σ] ≈ band_data.ψ[ik] atol=1e-12 + @test ψ_resh[:, 1:n_G_resh[ikgl, σ], :, ikgl, σ] ≈ band_data.ψ[ik] atol=1e-12 end end end # function diff --git a/test/transfer.jl b/test/transfer.jl index e21ea073b3..4acf53c14f 100644 --- a/test/transfer.jl +++ b/test/transfer.jl @@ -23,11 +23,12 @@ ψ_bb = transfer_blochwave(ψ_b, bigger_basis, basis) @test norm(ψ - ψ_bb) < eps(eltype(basis)) - T = compute_transfer_matrix(basis, bigger_basis) # transfer - Tᵇ = compute_transfer_matrix(bigger_basis, basis) # back-transfer - Tψ = [Tk * ψk for (Tk, ψk) in zip(T, ψ)] - TᵇTψ = [Tᵇk * Tψk for (Tᵇk, Tψk) in zip(Tᵇ, Tψ)] - @test norm(ψ - TᵇTψ) < eps(eltype(basis)) + T = compute_transfer_matrix(basis, bigger_basis) # transfer + Tᵇ = compute_transfer_matrix(bigger_basis, basis) # back-transfer + ψ_σG = to_composite_σG(basis, ψ) + Tψ = [Tk * ψk for (Tk, ψk) in zip(T, ψ_σG)] + TᵇTψ = [Tᵇk * Tψk for (Tᵇk, Tψk) in zip(Tᵇ, Tψ)] + @test norm(ψ_σG - TᵇTψ) < eps(eltype(basis)) # TᵇT should be the identity and TTᵇ should be a projection TᵇT = [Tᵇk * Tk for (Tk, Tᵇk) in zip(T, Tᵇ)] @@ -40,7 +41,7 @@ bigger_basis = PlaneWaveBasis(model; Ecut, kgrid, kshift) ψ_b = transfer_blochwave(ψ, basis, bigger_basis) ψ_bb = transfer_blochwave(ψ_b, bigger_basis, basis) - @test norm(ψ-ψ_bb) < eps(eltype(basis)) + @test norm(ψ - ψ_bb) < eps(eltype(basis)) end @testitem "Transfer of density" begin From 097fb2b4da7a29b7f2284cf4d157e380f7d271ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Polack?= Date: Mon, 11 Dec 2023 23:20:43 +0100 Subject: [PATCH 2/3] regression tests & benchmark --- .github/workflows/ci.yaml | 10 +- .github/workflows/regression.yaml | 98 ++++++++++++++++++ .gitignore | 2 + Project.toml | 3 +- benchmark/Project.toml | 10 ++ benchmark/benchmarks.jl | 6 ++ benchmark/humongous/Project.toml | 10 ++ benchmark/humongous/benchmarks.jl | 21 ++++ benchmark/humongous/run.jl | 43 ++++++++ benchmark/load.jl | 14 +++ benchmark/regression/testcases.jl | 158 ++++++++++++++++++++++++++++++ benchmark/run.jl | 14 +++ test/runtests_runner.jl | 30 ++++++ 13 files changed, 413 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/regression.yaml create mode 100644 benchmark/Project.toml create mode 100644 benchmark/benchmarks.jl create mode 100644 benchmark/humongous/Project.toml create mode 100644 benchmark/humongous/benchmarks.jl create mode 100644 benchmark/humongous/run.jl create mode 100644 benchmark/load.jl create mode 100644 benchmark/regression/testcases.jl create mode 100644 benchmark/run.jl diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f56c5cd9b4..896dec11b3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -21,11 +21,11 @@ jobs: fail-fast: false matrix: include: - - {mode: stable, os: ubuntu-latest, payload: noslow-example } - - {mode: stable, os: macOS-latest, payload: noslow } - - {mode: stable, os: windows-latest, payload: noslow } - - {mode: stable, os: ubuntu-latest, payload: noslow-mpi } - - {mode: nightly, os: ubuntu-latest, payload: noslow } + - {mode: stable, os: ubuntu-latest, payload: example-noslow-noregression } + - {mode: stable, os: macOS-latest, payload: noslow-noregression } + - {mode: stable, os: windows-latest, payload: noslow-noregression } + - {mode: stable, os: ubuntu-latest, payload: mpi-noslow-noregression } + - {mode: nightly, os: ubuntu-latest, payload: noslow-noregression } env: GKS_ENCODING: utf8 GKSwstype: 100 # Needed for Plots-related tests diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml new file mode 100644 index 0000000000..ae2e25eda2 --- /dev/null +++ b/.github/workflows/regression.yaml @@ -0,0 +1,98 @@ +name: Regression +on: + push: + branches: + - master + tags: ['*'] + pull_request: + schedule: + - cron: '0 4 * * 6' # Run every Saturday +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + test: + name: Benchmarking ${{ matrix.description }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - {description: run, payload: benchmarks.jl } + - {description: load, payload: load.jl } + steps: + # Remove older benchmark comment + - name: pr-deleter + uses: maheshrayas/action-pr-comment-delete@v3.0 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + org: + repo: + user: github-actions[bot] + issue: ${{github.event.number}} + + - uses: actions/checkout@v4 + + - name: Setup Julia stable + uses: julia-actions/setup-julia@v1 + with: + version: '1.9' + arch: x64 + + - uses: julia-actions/cache@v1 + with: + include-matrix: false + - uses: julia-actions/julia-buildpkg@v1 + + - name: Install dependencies + run: | + julia --project=benchmark -e ' + using Pkg + Pkg.develop(PackageSpec(; path=pwd())) + Pkg.instantiate()' + + - name: Run benchmarks against master + # Remove baseline once merged. Regression tests will only work after this is merged + # in master. + run: | + julia --project=benchmark -e " + using BenchmarkCI + baseline = \"HEAD\" + script = \"\$(pwd())/benchmark/${{ matrix.payload }}\" + BenchmarkCI.judge(; baseline, script, retune=true)" + if: ${{ github.event_name == 'pull_request' }} + + - name: Run benchmarks against last release + run: | + julia --project=benchmark -e " + import Pkg + baseline = \"v\" * Pkg.TOML.parsefile(\"Project.toml\")[\"version\"] + script = \"\$(pwd())/benchmark/${{ matrix.payload }}\" + using BenchmarkCI + BenchmarkCI.judge(; baseline, script, retune=true)" + if: ${{ github.event_name == 'schedule' || + github.event.push.ref == 'refs/heads/master' }} + + - name: Print judgement + run: | + julia --project=benchmark -e ' + using BenchmarkCI + BenchmarkCI.displayjudgement()' + + - name: Post results + run: | + julia --project=benchmark -e ' + using BenchmarkCI + BenchmarkCI.postjudge()' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Is report successful + run: | + res=$(julia --project=benchmark -e ' + using BenchmarkCI + BenchmarkCI.displayjudgement()' | grep --count ':x:') + if [[ $res -gt 1 ]]; then exit 1; fi diff --git a/.gitignore b/.gitignore index f3d553e487..d82b137880 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ Manifest.toml /LocalPreferences.toml .vscode .CondaPkg +/.benchmarkci +/benchmark/**/*.json diff --git a/Project.toml b/Project.toml index 415a2a168b..2957010e46 100644 --- a/Project.toml +++ b/Project.toml @@ -126,6 +126,7 @@ ASEconvert = "3da9722f-58c2-4165-81be-b4d7253e8fd2" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" AtomsIO = "1692102d-eeb4-4df9-807b-c9517f998d44" AtomsIOPython = "9e4c859b-2281-48ef-8059-f50fe53c37b0" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" @@ -148,4 +149,4 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192" wannier90_jll = "c5400fa0-8d08-52c2-913f-1e3f656c1ce9" [targets] -test = ["Test", "TestItemRunner", "ASEconvert", "Aqua", "AtomsIO", "AtomsIOPython", "CUDA", "CUDA_Runtime_jll", "ComponentArrays", "DoubleFloats", "FiniteDiff", "FiniteDifferences", "GenericLinearAlgebra", "IntervalArithmetic", "JLD2", "JSON3", "Logging", "Plots", "QuadGK", "Random", "KrylovKit", "Wannier", "WriteVTK", "wannier90_jll"] +test = ["Test", "TestItemRunner", "ASEconvert", "Aqua", "AtomsIO", "AtomsIOPython", "BenchmarkTools", "CUDA", "CUDA_Runtime_jll", "ComponentArrays", "DoubleFloats", "FiniteDiff", "FiniteDifferences", "GenericLinearAlgebra", "IntervalArithmetic", "JLD2", "JSON3", "Logging", "Plots", "QuadGK", "Random", "KrylovKit", "Wannier", "WriteVTK", "wannier90_jll"] diff --git a/benchmark/Project.toml b/benchmark/Project.toml new file mode 100644 index 0000000000..6b2665e976 --- /dev/null +++ b/benchmark/Project.toml @@ -0,0 +1,10 @@ +[deps] +AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a" +BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DFTK = "acf6eb54-70d9-11e9-0013-234b7a5f5337" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d" +TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" +Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" +UnitfulAtomic = "a7773ee8-282e-5fa2-be4e-bd808c38a91a" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl new file mode 100644 index 0000000000..fcda531cab --- /dev/null +++ b/benchmark/benchmarks.jl @@ -0,0 +1,6 @@ +using BenchmarkTools +using TestItemRunner + +const SUITE = BenchmarkGroup() + +@run_package_tests filter=ti->(:regression ∈ ti.tags) diff --git a/benchmark/humongous/Project.toml b/benchmark/humongous/Project.toml new file mode 100644 index 0000000000..3546e8e693 --- /dev/null +++ b/benchmark/humongous/Project.toml @@ -0,0 +1,10 @@ +[deps] +AtomsIO = "1692102d-eeb4-4df9-807b-c9517f998d44" +BenchmarkCI = "20533458-34a3-403d-a444-e18f38190b5b" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +DFTK = "acf6eb54-70d9-11e9-0013-234b7a5f5337" +LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" +PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d" +TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" diff --git a/benchmark/humongous/benchmarks.jl b/benchmark/humongous/benchmarks.jl new file mode 100644 index 0000000000..6d04955361 --- /dev/null +++ b/benchmark/humongous/benchmarks.jl @@ -0,0 +1,21 @@ +using BenchmarkTools +using TestItemRunner + +function run_scenario(scenario, complexity) + scenario_filter(i) = occursin(string(scenario), i.filename) && complexity ∈ i.tags + @run_package_tests filter=scenario_filter +end + +all_scenarios() = [:AlSiO2H, :Cr19, :Fe2MnAl, :Mn2RuGa, :WFe] +function make_suite(; scenarios=all_scenarios(), complexity=:debug) + @assert complexity ∈ [:debug, :small, :full] + @assert all(scenarios .∈ Ref(all_scenarios())) + + suite = BenchmarkGroup() + for scenario in scenarios + suite[scenario] = @benchmarkable run_scenario($scenario, $complexity) + end + suite +end + +const SUITE = make_suite(; scenarios=[:AlSiO2H]) diff --git a/benchmark/humongous/run.jl b/benchmark/humongous/run.jl new file mode 100644 index 0000000000..537755d0b6 --- /dev/null +++ b/benchmark/humongous/run.jl @@ -0,0 +1,43 @@ +ROOTPATH = abspath(joinpath(@__DIR__, "../..")) +import Pkg +Pkg.activate(@__DIR__) +if !isfile(joinpath(@__DIR__, "Manifest.toml")) + Pkg.develop(Pkg.PackageSpec(; path=ROOTPATH)) + Pkg.instantiate() +end + +import BenchmarkCI +import LibGit2 + +""" +Launch with +```julia +julia --project=benchmark/humongous -e ' + include("benchmark/humongous/run.jl") + run_benchmark()' +``` +""" +function run_benchmark(; retune=false, baseline="origin/master", target="HEAD", + script=nothing) + mktempdir(mktempdir()) do repo_dir # TestItemRunner needs access to parent directory as well. + project = joinpath(ROOTPATH, "benchmark", "humongous") + # Workaround to be able to benchmark releases before the use of PkgBenchmark. + # WARN: In this case, we need PkgBenchmark to be installed globally. + if isnothing(script) + # We run the default benchmark. + script = joinpath(project, "benchmarks.jl") + else + occursin(ROOTPATH, abspath(script)) && + error("Script should be outside the repository.") + end + script_copy = joinpath(repo_dir, "benchmarks.jl") + cp(script, script_copy) + + LibGit2.clone("https://github.com/epolack/DFTK-testproblems", + joinpath(repo_dir, "test")) + + BenchmarkCI.judge(; baseline, target, retune, script=script_copy, project) + + BenchmarkCI.displayjudgement() + end +end diff --git a/benchmark/load.jl b/benchmark/load.jl new file mode 100644 index 0000000000..3d0e20fc21 --- /dev/null +++ b/benchmark/load.jl @@ -0,0 +1,14 @@ +using BenchmarkTools + +const SUITE = BenchmarkGroup() + +julia_cmd = unsafe_string(Base.JLOptions().julia_bin) +SUITE["load"] = @benchmarkable run(`$julia_cmd \ + --startup-file=no \ + --project=$(Base.active_project()) \ + -e 'using DFTK'`) +SUITE["pecompilation"] = + @benchmarkable run(`$julia_cmd \ + --startup-file=no \ + --project=$(Base.active_project()) \ + -e 'Base.compilecache(Base.identify_package("DFTK"))'`) diff --git a/benchmark/regression/testcases.jl b/benchmark/regression/testcases.jl new file mode 100644 index 0000000000..62f07642b4 --- /dev/null +++ b/benchmark/regression/testcases.jl @@ -0,0 +1,158 @@ +@testsetup module Regression +using DFTK +using Unitful +using UnitfulAtomic +using AtomsBase +using ..TestCases: magnesium + +high_symmetry = let + a = 4.474 + lattice = [[0, a, a], [a, 0, a], [a, a, 0]]u"bohr" + x = 6.711 + y = 2.237 + atoms = [ + Atom(:Cu, [0, 0, 0]u"bohr", magnetic_moment=0), + Atom(:O, [x, y, x]u"bohr", magnetic_moment=0), + Atom(:O, [x, y, y]u"bohr", magnetic_moment=0), + ] + system = periodic_system(atoms, lattice) + merge(DFTK.parse_system(system), (; temperature=0.03, Ecut=20, kgrid=[4,4,4], + n_electrons=45, description="high_sym")) +end +high_kpoints = merge(magnesium, (; kgrid=[13,13,13], Ecut=20, description="high_kpoint")) +high_Ecut = merge(magnesium, (; kgrid=[4,4,4], Ecut=60, description="high_Ecut")) + +testcases = (; high_symmetry, high_kpoints, high_Ecut) +end + + +@testitem "Hamiltonian application" tags=[:regression] setup=[TestCases, Regression] begin + using DFTK + using LinearAlgebra + using BenchmarkTools + using .Main: SUITE + + for testcase in Regression.testcases + model = Model(testcase.lattice, testcase.atoms, testcase.positions; + testcase.temperature, terms=[Kinetic()]) + basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid) + + n_electrons = testcase.n_electrons + n_bands = div(n_electrons, 2, RoundUp) + ψ = [Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q) + for kpt in basis.kpoints] + filled_occ = DFTK.filled_occupation(model) + occupation = [filled_occ * rand(n_bands) for _ = 1:length(basis.kpoints)] + occ_scaling = n_electrons / sum(sum(occupation)) + occupation = [occ * occ_scaling for occ in occupation] + + (; ham) = energy_hamiltonian(basis, ψ, occupation) + + SUITE["ham"][testcase.description] = + @benchmarkable for ik = 1:length($(basis.kpoints)) + $(ham.blocks)[ik]*$ψ[ik] + end + end +end + +@testitem "Single SCF step" tags=[:regression] setup=[TestCases, Regression] begin + using DFTK + using BenchmarkTools + using .Main: SUITE + + for testcase in Regression.testcases + model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions; + testcase.temperature) + basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid) + SUITE["scf"][testcase.description] = + @benchmarkable self_consistent_field($basis; tol=1e5) + end +end + +@testitem "Density + symmetrization" tags=[:regression] setup=[TestCases, Regression] begin + using DFTK + using BenchmarkTools + using .Main: SUITE + + for testcase in Regression.testcases + model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions; + testcase.temperature) + basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid) + scfres = self_consistent_field(basis; tol=10) + + ψ, occupation = DFTK.select_occupied_orbitals(basis, scfres.ψ, scfres.occupation; + threshold=1e-6) + + SUITE["density"]["ρ"][testcase.description] = + @benchmarkable compute_density($basis, $ψ, $occupation) + SUITE["density"]["sym"][testcase.description] = + @benchmarkable DFTK.symmetrize_ρ($basis, $(scfres.ρ)) + end +end + +@testitem "Basis construction" tags=[:regression] setup=[TestCases, Regression] begin + using DFTK + using BenchmarkTools + using .Main: SUITE + + for testcase in Regression.testcases + model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions; + testcase.temperature) + SUITE["basis"][testcase.description] = + @benchmarkable PlaneWaveBasis($model; + Ecut=$(testcase.Ecut), kgrid=$(testcase.kgrid)) + end +end + +@testitem "Sternheimer" tags=[:regression] setup=[TestCases, Regression] begin + using DFTK + using BenchmarkTools + using .Main: SUITE + + for testcase in Regression.testcases + model = model_LDA(testcase.lattice, testcase.atoms, testcase.positions; + testcase.temperature) + basis = PlaneWaveBasis(model; testcase.Ecut, testcase.kgrid) + scfres = self_consistent_field(basis; tol=10) + + rhs = DFTK.compute_projected_gradient(basis, scfres.ψ, scfres.occupation) + SUITE["response"]["sternheimer"][testcase.description] = + @benchmarkable DFTK.solve_ΩplusK_split($scfres, $rhs; tol=1e-1) + end +end + +@testitem "Response with AD" tags=[:regression] setup=[TestCases, Regression] begin + using DFTK + using BenchmarkTools + using LinearAlgebra + using ForwardDiff + using .Main: SUITE + + function make_basis(ε::T; a=10., Ecut=30) where {T} + lattice=T(a) * I(3) # lattice is a cube of ``a`` Bohrs + # Helium at the center of the box + atoms = [ElementPsp(:He; psp=load_psp("hgh/lda/He-q2"))] + positions = [[1/2, 1/2, 1/2]] + + model = model_DFT(lattice, atoms, positions, [:lda_x, :lda_c_vwn]; + extra_terms=[ExternalFromReal(r -> -ε * (r[1] - a/2))], + symmetries=false) + PlaneWaveBasis(model; Ecut, kgrid=[1, 1, 1]) # No k-point sampling on isolated system + end + + # dipole moment of a given density (assuming the current geometry) + function dipole(basis, ρ) + @assert isdiag(basis.model.lattice) + a = basis.model.lattice[1, 1] + rr = [a * (r[1] - 1/2) for r in r_vectors(basis)] + sum(rr .* ρ) * basis.dvol + end + + # Function to compute the dipole for a given field strength + function compute_dipole(ε; tol=1e-2, kwargs...) + scfres = self_consistent_field(make_basis(ε; kwargs...); tol) + dipole(scfres.basis, scfres.ρ) + end + + SUITE["response"]["ad"] = @benchmarkable ForwardDiff.derivative($compute_dipole, 0.0) +end diff --git a/benchmark/run.jl b/benchmark/run.jl new file mode 100644 index 0000000000..8ab416ed9b --- /dev/null +++ b/benchmark/run.jl @@ -0,0 +1,14 @@ +ROOTPATH = joinpath(@__DIR__, "..") +import Pkg +Pkg.activate(@__DIR__) +if !isfile(joinpath(@__DIR__, "Manifest.toml")) + Pkg.develop(Pkg.PackageSpec(; path=ROOTPATH)) + Pkg.instantiate() +end + +using BenchmarkCI + +# Remove target once merged. Regression tests will only work after this is merged in master. +BenchmarkCI.judge(; baseline="HEAD") + +BenchmarkCI.displayjudgement() diff --git a/test/runtests_runner.jl b/test/runtests_runner.jl index 7616bd8741..9e9e4b1976 100644 --- a/test/runtests_runner.jl +++ b/test/runtests_runner.jl @@ -29,4 +29,34 @@ function dftk_testfilter(ti) return false end end + +using Test +function TestItemRunner.run_testitem(filepath, use_default_usings, setups, package_name, + original_code, line, column, test_setup_module_set) + mod = Core.eval(Main, :(module $(gensym()) end)) + + if use_default_usings + Core.eval(mod, :(using Test)) + + if package_name!="" + Core.eval(mod, :(using $(Symbol(package_name)))) + end + end + + for m in setups + Core.eval(mod, Expr(:using, Expr(:., :., :., nameof(test_setup_module_set.setupmodule), m))) + end + + code = string('\n'^line, ' '^column, original_code) + + TestItemRunner.withpath(filepath) do + # Replace the test by the current testset. + description = Test.pop_testset().description + @testset "$(description)" begin + Base.invokelatest(include_string, mod, code, filepath) + end + Test.push_testset(Test.FallbackTestSet()) # so the parent pops nothing + end +end + @run_package_tests filter=dftk_testfilter verbose=true From 35f72a4ee223e004c5d2328892c9a4c1ca908102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Polack?= Date: Tue, 20 Feb 2024 14:57:57 +0100 Subject: [PATCH 3/3] fix for components --- benchmark/regression/testcases.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmark/regression/testcases.jl b/benchmark/regression/testcases.jl index 62f07642b4..cc7c7ea8f9 100644 --- a/benchmark/regression/testcases.jl +++ b/benchmark/regression/testcases.jl @@ -39,8 +39,7 @@ end n_electrons = testcase.n_electrons n_bands = div(n_electrons, 2, RoundUp) - ψ = [Matrix(qr(randn(ComplexF64, length(G_vectors(basis, kpt)), n_bands)).Q) - for kpt in basis.kpoints] + ψ = [DFTK.random_orbitals(basis, kpt, n_bands) for kpt in basis.kpoints] filled_occ = DFTK.filled_occupation(model) occupation = [filled_occ * rand(n_bands) for _ = 1:length(basis.kpoints)] occ_scaling = n_electrons / sum(sum(occupation))