diff --git a/src/PlaneWaveBasis.jl b/src/PlaneWaveBasis.jl index 2636acb6c7..626c90ba97 100644 --- a/src/PlaneWaveBasis.jl +++ b/src/PlaneWaveBasis.jl @@ -117,7 +117,6 @@ end # prevent broadcast -import Base.Broadcast.broadcastable Base.Broadcast.broadcastable(basis::PlaneWaveBasis) = Ref(basis) Base.eltype(::PlaneWaveBasis{T}) where {T} = T diff --git a/src/eigen/lobpcg_hyper_impl.jl b/src/eigen/lobpcg_hyper_impl.jl index 2877819c4c..1d3b1ee4f2 100644 --- a/src/eigen/lobpcg_hyper_impl.jl +++ b/src/eigen/lobpcg_hyper_impl.jl @@ -204,7 +204,7 @@ normest(M) = maximum(abs.(diag(M))) + norm(M - Diagonal(diag(M))) success = false end invR = inv(R) - @assert all(!isnan, invR) + @assert !any(isnan, invR) rmul!(X, invR) # we do not use X/R because we use invR next # We would like growth_factor *= opnorm(inv(R)) but it's too diff --git a/src/scf/direct_minimization.jl b/src/scf/direct_minimization.jl index a7e9e52276..76b55e2fb2 100644 --- a/src/scf/direct_minimization.jl +++ b/src/scf/direct_minimization.jl @@ -67,7 +67,7 @@ necessarily eigenvectors of the Hamiltonian. """ direct_minimization(basis::PlaneWaveBasis; kwargs...) = direct_minimization(basis, nothing; kwargs...) function direct_minimization(basis::PlaneWaveBasis{T}, ψ0; - prec_type=PreconditionerTPA, + prec_type=PreconditionerTPA, maxiter=1_000, optim_solver=Optim.LBFGS, tol=1e-6, kwargs...) where {T} if mpi_nprocs() > 1 # need synchronization in Optim @@ -123,7 +123,8 @@ function direct_minimization(basis::PlaneWaveBasis{T}, ψ0; optim_options = Optim.Options(; allow_f_increases=true, show_trace=true, x_tol=pop!(kwdict, :x_tol, tol), f_tol=pop!(kwdict, :f_tol, -1), - g_tol=pop!(kwdict, :g_tol, -1), kwdict...) + g_tol=pop!(kwdict, :g_tol, -1), + iterations=maxiter, kwdict...) res = Optim.optimize(Optim.only_fg!(fg!), pack(ψ0), optim_solver(; P, precondprep=precondprep!, manifold, linesearch=LineSearches.BackTracking()), diff --git a/src/terms/xc.jl b/src/terms/xc.jl index 06a9b2b92c..3efb3af9ef 100644 --- a/src/terms/xc.jl +++ b/src/terms/xc.jl @@ -79,9 +79,8 @@ function xc_potential_real(term::TermXc, basis::PlaneWaveBasis{T}, ψ, occupatio max_ρ_derivs = maximum(max_required_derivative, term.functionals) density = LibxcDensities(basis, max_ρ_derivs, ρ, τ) - # Evaluate terms and energy contribution (zk == energy per unit particle) - # It may happen that a functional does only provide a potenital and not an energy term - # Therefore skip_unsupported_derivatives=true to avoid an error. + # Evaluate terms and energy contribution + # If the XC functional is not supported for an architecture, terms is on the CPU terms = potential_terms(term.functionals, density) @assert haskey(terms, :Vρ) && haskey(terms, :e) E = term.scaling_factor * sum(terms.e) * basis.dvol @@ -94,14 +93,14 @@ function xc_potential_real(term::TermXc, basis::PlaneWaveBasis{T}, ψ, occupatio # Potential contributions Vρ -2 ∇⋅(Vσ ∇ρ) + ΔVl potential = zero(ρ) @views for s in 1:n_spin - Vρ = reshape(terms.Vρ, n_spin, basis.fft_size...) + Vρ = to_device(basis.architecture, reshape(terms.Vρ, n_spin, basis.fft_size...)) potential[:, :, :, s] .+= Vρ[s, :, :, :] if haskey(terms, :Vσ) && any(x -> abs(x) > potential_threshold, terms.Vσ) # Need gradient correction # TODO Drop do-block syntax here? potential[:, :, :, s] .+= -2divergence_real(basis) do α - Vσ = reshape(terms.Vσ, :, basis.fft_size...) + Vσ = to_device(basis.architecture, reshape(terms.Vσ, :, basis.fft_size...)) # Extra factor (1/2) for s != t is needed because libxc only keeps σ_{αβ} # in the energy expression. See comment block below on spin-polarised XC. @@ -113,7 +112,7 @@ function xc_potential_real(term::TermXc, basis::PlaneWaveBasis{T}, ψ, occupatio if haskey(terms, :Vl) && any(x -> abs(x) > potential_threshold, terms.Vl) @warn "Meta-GGAs with a Δρ term have not yet been thoroughly tested." maxlog=1 mG² = .-norm2.(G_vectors_cart(basis)) - Vl = reshape(terms.Vl, n_spin, basis.fft_size...) + Vl = to_device(basis.architecture, reshape(terms.Vl, n_spin, basis.fft_size...)) Vl_fourier = fft(basis, Vl[s, :, :, :]) potential[:, :, :, s] .+= irfft(basis, mG² .* Vl_fourier) # ΔVl end @@ -123,7 +122,7 @@ function xc_potential_real(term::TermXc, basis::PlaneWaveBasis{T}, ψ, occupatio Vτ = nothing if haskey(terms, :Vτ) && any(x -> abs(x) > potential_threshold, terms.Vτ) # Need meta-GGA non-local operator (Note: -½ part of the definition of DivAgrid) - Vτ = reshape(terms.Vτ, n_spin, basis.fft_size...) + Vτ = to_device(basis.architecture, reshape(terms.Vτ, n_spin, basis.fft_size...)) Vτ = term.scaling_factor * permutedims(Vτ, (2, 3, 4, 1)) end @@ -379,10 +378,11 @@ function apply_kernel(term::TermXc, basis::PlaneWaveBasis{T}, δρ; ρ, kwargs.. ] end + # If the XC functional is not supported for an architecture, terms is on the CPU terms = kernel_terms(term.functionals, density) δV = zero(ρ) # [ix, iy, iz, iσ] - Vρρ = reshape(terms.Vρρ, n_spin, n_spin, basis.fft_size...) + Vρρ = to_device.(basis.architecture, reshape(terms.Vρρ, n_spin, n_spin, basis.fft_size...)) @views for s in 1:n_spin, t in 1:n_spin # LDA term δV[:, :, :, s] .+= Vρρ[s, t, :, :, :] .* δρ[t, :, :, :] end @@ -422,9 +422,9 @@ function add_kernel_gradient_correction!(δV, terms, density, perturbation, cros δρ = perturbation.ρ_real ∇δρ = perturbation.∇ρ_real δσ = cross_derivatives[:δσ] - Vρσ = reshape(terms.Vρσ, n_spin, spin_σ, basis.fft_size...) - Vσσ = reshape(terms.Vσσ, spin_σ, spin_σ, basis.fft_size...) - Vσ = reshape(terms.Vσ, spin_σ, basis.fft_size...) + Vρσ = to_device.(basis.architecture, reshape(terms.Vρσ, n_spin, spin_σ, basis.fft_size...)) + Vσσ = to_device.(basis.architecture, reshape(terms.Vσσ, spin_σ, spin_σ, basis.fft_size...)) + Vσ = to_device.(basis.architecture, reshape(terms.Vσ, spin_σ, basis.fft_size...)) T = eltype(ρ) tσ = DftFunctionals.spinindex_σ diff --git a/src/workarounds/gpu_arrays.jl b/src/workarounds/gpu_arrays.jl index 5c071db17c..ba32c8e2fa 100644 --- a/src/workarounds/gpu_arrays.jl +++ b/src/workarounds/gpu_arrays.jl @@ -12,11 +12,12 @@ function lowpass_for_symmetry!(ρ::AbstractGPUArray, basis; symmetries=basis.sym ρ .= to_device(basis.architecture, ρ_CPU) end -# Fallback implementation for the GPU: Transfer to the CPU and run computation for fun in (:potential_terms, :kernel_terms) @eval function DftFunctionals.$fun(fun::DispatchFunctional, ρ::AT, args...) where {AT <: AbstractGPUArray{Float64}} - term = $fun(fun, Array(ρ), Array.(args)...) - AT.(term) + # Fallback implementation for the GPU: Transfer to the CPU and run computation there + cpuify(::Nothing) = nothing + cpuify(x::AbstractArray) = Array(x) + $fun(fun, Array(ρ), cpuify.(args)...) end end diff --git a/test/anyons.jl b/test/anyons.jl index 644e2c676f..855e421c0c 100644 --- a/test/anyons.jl +++ b/test/anyons.jl @@ -46,7 +46,7 @@ if mpi_nprocs() == 1 # Direct minimisation not supported on mpi ] model = Model(lattice; n_electrons, terms, spin_polarization=:spinless) # "spinless electrons" basis = PlaneWaveBasis(model; Ecut, kgrid=(1, 1, 1)) - scfres = direct_minimization(basis, tol=1e-6) # Does not really converge beyond 1e-6 + scfres = direct_minimization(basis, tol=1e-6, maxiter=300) # Limit maxiter as guess can be bad E = scfres.energies.total s = 2 E11 = π/2 * (2(s+1)/s)^((s+2)/s) * (s/(s+2))^(2(s+1)/s) * E^((s+2)/s) / β