Fix GPU CI by enforcing to run on CUDA 11 (#859)

JuliaMolSim · May 18, 2023 · 904a6e2 · 904a6e2
1 parent 562a888
commit 904a6e2
Show file tree

Hide file tree

Showing 11 changed files with 30 additions and 19 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,6 @@
 *.jl.mem
 deps/deps.jl
 Manifest.toml
-LocalPreferences.toml
+/LocalPreferences.toml
 .vscode
 .CondaPkg
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -12,17 +12,28 @@ julia/1.8-n2:
     # SCHEDULER_PARAMETERS: "-N 1 -n 1 -c 16 --gres=gpu:a100:1 --qos=devel -p dgx -t 00:15:00 -A hpc-prf-dftkjl"
     SCHEDULER_PARAMETERS: "-N 1 -n 1 -c 16 --gres=gpu:a100:1             -p gpu -t 00:30:00 -A hpc-prf-dftkjl"
     JULIA_NUM_THREADS: "1"  # GPU and multi-threading not yet compatible
+    JULIA_LOAD_PATH: "test/configuration_cuda11:"  # Needed to pick up LocalPreferences.toml
   coverage: '/\(\d+.\d+\%\) covered/'
   rules:
     - if: $CI_COMMIT_BRANCH   == "master"
     - if: $CI_PIPELINE_SOURCE == "external_pull_request_event" && $CI_EXTERNAL_PULL_REQUEST_SOURCE_BRANCH_NAME =~ /gpu$/
     - if: $CI_PIPELINE_SOURCE == "external_pull_request_event"
       when: manual
   script:
-    - module load lang/JuliaHPC/1.8.5-foss-2022a-CUDA-11.7.0
-    - julia --color=yes --project=. -e '
-        using Pkg;
-        Pkg.test(; coverage=true, test_args=["gpu"])
-      '
+    - module load lang/JuliaHPC/1.9.0-foss-2022a-CUDA-11.7.0
+    # testing
+    # - julia --color=yes --project=. -e '
+    #     using Pkg;
+    #     Pkg.test(; coverage=true, test_args=["gpu"])
+    #   '
+    # The above does not work, because Pkg.test seems just ignores the JULIA_LOAD_PATH
+    # and thus does not pick up the custom preferences ... so we need to do it manually
+    - julia --color=yes -e 'using Pkg; Pkg.add("TestEnv")'
+    - julia --color=yes --code-coverage --project=. -e '
+        using TestEnv;
+        TestEnv.activate("DFTK");
+        include("test/runtests.jl")
+      ' gpu
+    # end testing
     - julia --color=yes --project=test/coverage -e 'import Pkg; Pkg.instantiate()'
     - julia --color=yes --project=test/coverage test/coverage/coverage.jl
diff --git a/examples/gpu.jl b/examples/gpu.jl
@@ -14,6 +14,4 @@ model = model_PBE(lattice, atoms, positions)
 architecture = has_cuda() ? DFTK.GPU(CuArray) : DFTK.CPU()
 
 basis  = PlaneWaveBasis(model; Ecut=30, kgrid=(5, 5, 5), architecture)
-# FIXME right now guess generation on the GPU is broken
-ρ = DFTK.to_device(architecture, guess_density(PlaneWaveBasis(model; basis.Ecut, basis.kgrid)))
-scfres = self_consistent_field(basis; tol=1e-2, solver=scf_damping_solver(), ρ)
+scfres = self_consistent_field(basis; tol=1e-2, solver=scf_damping_solver())
diff --git a/src/PlaneWaveBasis.jl b/src/PlaneWaveBasis.jl
@@ -117,7 +117,6 @@ end
 
 
 # prevent broadcast
-import Base.Broadcast.broadcastable
 Base.Broadcast.broadcastable(basis::PlaneWaveBasis) = Ref(basis)
 
 Base.eltype(::PlaneWaveBasis{T}) where {T} = T

diff --git a/src/eigen/lobpcg_hyper_impl.jl b/src/eigen/lobpcg_hyper_impl.jl
@@ -204,7 +204,7 @@ normest(M) = maximum(abs.(diag(M))) + norm(M - Diagonal(diag(M)))
             success = false
         end
         invR = inv(R)
-        @assert all(!isnan, invR)
+        @assert !any(isnan, invR)
         rmul!(X, invR)  # we do not use X/R because we use invR next
 
         # We would like growth_factor *= opnorm(inv(R)) but it's too

diff --git a/src/scf/direct_minimization.jl b/src/scf/direct_minimization.jl
@@ -67,7 +67,7 @@ necessarily eigenvectors of the Hamiltonian.
 """
 direct_minimization(basis::PlaneWaveBasis; kwargs...) = direct_minimization(basis, nothing; kwargs...)
 function direct_minimization(basis::PlaneWaveBasis{T}, ψ0;
-                             prec_type=PreconditionerTPA,
+                             prec_type=PreconditionerTPA, maxiter=1_000,
                              optim_solver=Optim.LBFGS, tol=1e-6, kwargs...) where {T}
     if mpi_nprocs() > 1
         # need synchronization in Optim
@@ -123,7 +123,8 @@ function direct_minimization(basis::PlaneWaveBasis{T}, ψ0;
     optim_options = Optim.Options(; allow_f_increases=true, show_trace=true,
                                   x_tol=pop!(kwdict, :x_tol, tol),
                                   f_tol=pop!(kwdict, :f_tol, -1),
-                                  g_tol=pop!(kwdict, :g_tol, -1), kwdict...)
+                                  g_tol=pop!(kwdict, :g_tol, -1),
+                                  iterations=maxiter, kwdict...)
     res = Optim.optimize(Optim.only_fg!(fg!), pack(ψ0),
                          optim_solver(; P, precondprep=precondprep!, manifold,
                                       linesearch=LineSearches.BackTracking()),

diff --git a/src/workarounds/cuda_arrays.jl b/src/workarounds/cuda_arrays.jl
@@ -1,5 +1,3 @@
-using LinearAlgebra
-
 synchronize_device(::GPU{<:CUDA.CuArray}) = CUDA.synchronize()
 
 for fun in (:potential_terms, :kernel_terms)

diff --git a/test/anyons.jl b/test/anyons.jl
@@ -46,7 +46,7 @@ if mpi_nprocs() == 1  # Direct minimisation not supported on mpi
              ]
     model = Model(lattice; n_electrons, terms, spin_polarization=:spinless)  # "spinless electrons"
     basis = PlaneWaveBasis(model; Ecut, kgrid=(1, 1, 1))
-    scfres = direct_minimization(basis, tol=1e-6)  # Does not really converge beyond 1e-6
+    scfres = direct_minimization(basis, tol=1e-6, maxiter=300)  # Limit maxiter as guess can be bad
     E = scfres.energies.total
     s = 2
     E11 = π/2 * (2(s+1)/s)^((s+2)/s) * (s/(s+2))^(2(s+1)/s) * E^((s+2)/s) / β

diff --git a/test/configuration_cuda11/LocalPreferences.toml b/test/configuration_cuda11/LocalPreferences.toml
@@ -0,0 +1,2 @@
+[CUDA_Runtime_jll]
+version = "11.8"
diff --git a/test/configuration_cuda11/Project.toml b/test/configuration_cuda11/Project.toml
@@ -0,0 +1,2 @@
+[extras]
+CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
diff --git a/test/gpu.jl b/test/gpu.jl
@@ -10,13 +10,13 @@ include("testcases.jl")
     function run_problem(; architecture)
         model = model_PBE(silicon.lattice, silicon.atoms, silicon.positions)
         basis = PlaneWaveBasis(model; Ecut=10, kgrid=(3, 3, 3), architecture)
-        self_consistent_field(basis; tol=1e-10, solver=scf_damping_solver(1.0))
+        self_consistent_field(basis; tol=1e-9, solver=scf_damping_solver(1.0))
     end
 
     scfres_cpu = run_problem(; architecture=DFTK.CPU())
     scfres_gpu = run_problem(; architecture=DFTK.GPU(CuArray))
-    @test abs(scfres_cpu.energies.total - scfres_gpu.energies.total) < 1e-10
-    @test norm(scfres_cpu.ρ - Array(scfres_gpu.ρ)) < 1e-10
+    @test abs(scfres_cpu.energies.total - scfres_gpu.energies.total) < 1e-9
+    @test norm(scfres_cpu.ρ - Array(scfres_gpu.ρ)) < 1e-9
 end
 
 @testset "CUDA iron functionality test" begin
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[extras]
		CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"