From 80d680d04f500598304f0dd42099f006422ea528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Pacaud?= Date: Wed, 6 Dec 2023 10:04:07 +0100 Subject: [PATCH] Fix error in BieglerKKTSystem's solve for nontrivial scaling (#71) * replace ArgosCUDA using package extensions * CI: set explicitly CUDA 11.8 for runtime * move CI to Julia 1.9 * add specific workflow for CI * Making codecov strictly informative --------- Co-authored-by: Michel Schanen --- .ci/Project.toml | 26 +++++++++++ .ci/setup.jl | 10 +++++ .github/codecov.yml | 8 ++++ .github/workflows/action.yml | 11 ++--- Project.toml | 24 ++++------ .../src/ArgosCUDA.jl => ext/ArgosCUDAExt.jl | 3 +- {lib/ArgosCUDA.jl/src => ext}/api.jl | 16 +++---- {lib/ArgosCUDA.jl/src => ext}/kernels.jl | 17 ------- {lib/ArgosCUDA.jl/src => ext}/reduction.jl | 0 {lib/ArgosCUDA.jl/src => ext}/sparse.jl | 0 lib/ArgosCUDA.jl/Project.toml | 15 ------- lib/ArgosCUDA.jl/README.md | 4 -- src/KKT/reduced_newton.jl | 14 ++---- src/api.jl | 7 +++ test/Algorithms/MadNLP_wrapper.jl | 44 ++++++++++++++++--- test/Artifacts.toml | 6 +++ test/Project.toml | 22 ++++++++++ test/runtests.jl | 4 +- 18 files changed, 145 insertions(+), 86 deletions(-) create mode 100644 .ci/Project.toml create mode 100644 .ci/setup.jl create mode 100644 .github/codecov.yml rename lib/ArgosCUDA.jl/src/ArgosCUDA.jl => ext/ArgosCUDAExt.jl (94%) rename {lib/ArgosCUDA.jl/src => ext}/api.jl (70%) rename {lib/ArgosCUDA.jl/src => ext}/kernels.jl (94%) rename {lib/ArgosCUDA.jl/src => ext}/reduction.jl (100%) rename {lib/ArgosCUDA.jl/src => ext}/sparse.jl (100%) delete mode 100644 lib/ArgosCUDA.jl/Project.toml delete mode 100644 lib/ArgosCUDA.jl/README.md create mode 100644 test/Artifacts.toml create mode 100644 test/Project.toml diff --git a/.ci/Project.toml b/.ci/Project.toml new file mode 100644 index 00000000..2c6f8218 --- /dev/null +++ b/.ci/Project.toml @@ -0,0 +1,26 @@ +[compat] +CUDA = "4.1, 5" +FiniteDiff = "2.7" +Ipopt = "1" +MadNLP = "0.7" + +[deps] +Argos = "ef244971-cf80-42b0-9762-2c2c832df5d5" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" +ExaPF = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa" +FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" +Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" +MadNLPGPU = "d72a61cc-809d-412f-99be-fd81f4b8a598" +MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[extras] +CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" diff --git a/.ci/setup.jl b/.ci/setup.jl new file mode 100644 index 00000000..9a9506ff --- /dev/null +++ b/.ci/setup.jl @@ -0,0 +1,10 @@ + +using Pkg +Pkg.instantiate() + +using CUDA + +argos_path = joinpath(@__DIR__, "..") +Pkg.develop(path=argos_path) + +CUDA.set_runtime_version!(v"11.8") diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 00000000..bfdc9877 --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,8 @@ +coverage: + status: + project: + default: + informational: true + patch: + default: + informational: true diff --git a/.github/workflows/action.yml b/.github/workflows/action.yml index ac6f2fef..83f7f2da 100644 --- a/.github/workflows/action.yml +++ b/.github/workflows/action.yml @@ -19,7 +19,7 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - julia-version: ['1.8'] + julia-version: ['1.9'] julia-arch: [x64] steps: @@ -43,7 +43,7 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - julia-version: ['1.8'] + julia-version: ['1.9'] julia-arch: [x64] steps: @@ -52,9 +52,6 @@ jobs: with: version: ${{ matrix.julia-version }} arch: ${{ matrix.julia-arch }} - - run: julia --project -e 'using Pkg; Pkg.Registry.update()' - - run: julia --project -e 'using Pkg; Pkg.add("CUSOLVERRF")' - - run: julia --project -e 'using Pkg; Pkg.develop(path="lib/ArgosCUDA.jl")' - - uses: julia-actions/julia-buildpkg@latest - - uses: julia-actions/julia-runtest@latest + - run: julia --project=.ci .ci/setup.jl + - run: julia --project=.ci test/runtests.jl diff --git a/Project.toml b/Project.toml index 4e9a29ce..c0ae28d4 100644 --- a/Project.toml +++ b/Project.toml @@ -13,25 +13,17 @@ NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +[weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c" + +[extensions] +ArgosCUDAExt = ["CUDA", "CUSOLVERRF"] + [compat] -CUDA = "4.1, 5" ExaPF = "~0.9.3" -FiniteDiff = "2.7" -Ipopt = "1" KernelAbstractions = "0.9" MadNLP = "0.7" MathOptInterface = "1" NLPModels = "0.19, 0.20" -julia = "1.6" - -[extras] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" -FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" -Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" -LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["Test", "CUDA", "DelimitedFiles", "FiniteDiff", "Ipopt", "LazyArtifacts", "Random"] +julia = "1.9" diff --git a/lib/ArgosCUDA.jl/src/ArgosCUDA.jl b/ext/ArgosCUDAExt.jl similarity index 94% rename from lib/ArgosCUDA.jl/src/ArgosCUDA.jl rename to ext/ArgosCUDAExt.jl index 430b27b6..09659dd0 100644 --- a/lib/ArgosCUDA.jl/src/ArgosCUDA.jl +++ b/ext/ArgosCUDAExt.jl @@ -1,4 +1,4 @@ -module ArgosCUDA +module ArgosCUDAExt using LinearAlgebra using SparseArrays @@ -22,3 +22,4 @@ include("reduction.jl") include("api.jl") end # module + diff --git a/lib/ArgosCUDA.jl/src/api.jl b/ext/api.jl similarity index 70% rename from lib/ArgosCUDA.jl/src/api.jl rename to ext/api.jl index f8058f50..0611d62e 100644 --- a/lib/ArgosCUDA.jl/src/api.jl +++ b/ext/api.jl @@ -4,9 +4,8 @@ function MadNLP._madnlp_unsafe_wrap(vec::CuVector, n, shift=1) return vec end - -function run_opf_gpu(datafile::String, ::Argos.FullSpace; options...) - flp = Argos.FullSpaceEvaluator(datafile; device=CUDADevice()) +function Argos.run_opf_gpu(datafile::String, ::Argos.FullSpace; options...) + flp = Argos.FullSpaceEvaluator(datafile; device=CUDABackend()) model = Argos.OPFModel(Argos.bridge(flp)) ips = MadNLP.MadNLPSolver( model; @@ -16,8 +15,8 @@ function run_opf_gpu(datafile::String, ::Argos.FullSpace; options...) return ips end -function run_opf_gpu(datafile::String, ::Argos.BieglerReduction; options...) - flp = Argos.FullSpaceEvaluator(datafile; device=CUDADevice()) +function Argos.run_opf_gpu(datafile::String, ::Argos.BieglerReduction; options...) + flp = Argos.FullSpaceEvaluator(datafile; device=CUDABackend()) model = Argos.OPFModel(Argos.bridge(flp)) madnlp_options = Dict{Symbol, Any}(options...) @@ -30,8 +29,8 @@ function run_opf_gpu(datafile::String, ::Argos.BieglerReduction; options...) return ips end -function run_opf_gpu(datafile::String, ::Argos.DommelTinney; options...) - flp = Argos.ReducedSpaceEvaluator(datafile; device=CUDADevice(), nbatch_hessian=256) +function Argos.run_opf_gpu(datafile::String, ::Argos.DommelTinney; options...) + flp = Argos.ReducedSpaceEvaluator(datafile; device=CUDABackend(), nbatch_hessian=256) model = Argos.OPFModel(Argos.bridge(flp)) madnlp_options = Dict{Symbol, Any}(options...) @@ -42,7 +41,8 @@ function run_opf_gpu(datafile::String, ::Argos.DommelTinney; options...) opt_ipm, opt_linear, logger = MadNLP.load_options(; madnlp_options...) - KKT = MadNLP.DenseCondensedKKTSystem{Float64, CuVector{Float64}, CuMatrix{Float64}} + QN = MadNLP.ExactHessian{Float64, CuVector{Float64}} + KKT = MadNLP.DenseCondensedKKTSystem{Float64, CuVector{Float64}, CuMatrix{Float64}, QN} ips = MadNLP.MadNLPSolver{Float64, KKT}(model, opt_ipm, opt_linear; logger=logger) MadNLP.solve!(ips) diff --git a/lib/ArgosCUDA.jl/src/kernels.jl b/ext/kernels.jl similarity index 94% rename from lib/ArgosCUDA.jl/src/kernels.jl rename to ext/kernels.jl index 207807b9..3cfe5978 100644 --- a/lib/ArgosCUDA.jl/src/kernels.jl +++ b/ext/kernels.jl @@ -9,7 +9,6 @@ function Argos.transfer2tril!(hessvals::AbstractVector, H::CuSparseMatrixCSR, cs KA.synchronize(CUDABackend()) end - @kernel function _fixed_kernel!(dest, fixed, val) i = @index(Global, Linear) dest[fixed[i]] = val @@ -21,7 +20,6 @@ function Argos.fixed!(dest::CuVector, ind_fixed, val::Number) KA.synchronize(CUDABackend()) end - @kernel function _copy_index_kernel!(dest, src, idx) i = @index(Global, Linear) @inbounds dest[i] = src[idx[i]] @@ -34,7 +32,6 @@ function Argos.copy_index!(dest::CuVector{T}, src::CuVector{T}, idx) where T KA.synchronize(CUDABackend()) end - @kernel function _fixed_diag_kernel!(dest, fixed, val) i = @index(Global, Linear) k = fixed[i] @@ -75,7 +72,6 @@ function Argos.transfer_auglag_hessian!( return end - @kernel function _batch_tangents_kernel!(seeds, offset, n_batches) i = @index(Global, Linear) @inbounds seeds[i + offset, i] = 1.0 @@ -92,7 +88,6 @@ function Argos.set_batch_tangents!(seeds::CuMatrix, offset, n, n_batches) return end - @kernel function _tgtmul_1_kernel!(y, A_rowPtr, A_colVal, A_nzVal, z, w, alpha, nx, nu) i, k = @index(Global, NTuple) @inbounds for c in A_rowPtr[i]:A_rowPtr[i+1]-1 @@ -122,7 +117,6 @@ function Argos.tgtmul!( KA.synchronize(CUDABackend()) end - @kernel function _tgtmul_2_kernel!(yx, yu, A_rowPtr, A_colVal, A_nzVal, z, w, alpha, nx, nu) i, k = @index(Global, NTuple) @inbounds for c in A_rowPtr[i]:A_rowPtr[i+1]-1 @@ -157,7 +151,6 @@ function Argos.tgtmul!( KA.synchronize(CUDABackend()) end - @kernel function _scale_transpose_kernel!( Jtz, Jp, Jj, Jz, D, tperm, ) @@ -169,13 +162,3 @@ end end end -function Argos.update!(K::Argos.HJDJ, A, D, Σ) - m = size(A, 1) - ev = _scale_transpose_kernel!(CUDABackend())( - K.Jt.nzVal, A.rowPtr, A.colVal, A.nzVal, D, K.transperm, - ndrange=(m, 1), - ) - KA.synchronize(ev) - spgemm!('N', 'N', 1.0, K.Jt, A, 0.0, K.JtJ, 'O') - K.Σ .= Σ -end diff --git a/lib/ArgosCUDA.jl/src/reduction.jl b/ext/reduction.jl similarity index 100% rename from lib/ArgosCUDA.jl/src/reduction.jl rename to ext/reduction.jl diff --git a/lib/ArgosCUDA.jl/src/sparse.jl b/ext/sparse.jl similarity index 100% rename from lib/ArgosCUDA.jl/src/sparse.jl rename to ext/sparse.jl diff --git a/lib/ArgosCUDA.jl/Project.toml b/lib/ArgosCUDA.jl/Project.toml deleted file mode 100644 index f9647636..00000000 --- a/lib/ArgosCUDA.jl/Project.toml +++ /dev/null @@ -1,15 +0,0 @@ -name = "ArgosCUDA" -uuid = "8946db8d-321b-4174-84ad-48e2f9b69c56" -authors = ["François Pacaud "] -version = "0.1.0" - -[deps] -Argos = "ef244971-cf80-42b0-9762-2c2c832df5d5" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c" -ExaPF = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa" -KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" -MadNLPGPU = "d72a61cc-809d-412f-99be-fd81f4b8a598" -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/lib/ArgosCUDA.jl/README.md b/lib/ArgosCUDA.jl/README.md deleted file mode 100644 index 3ef0f00d..00000000 --- a/lib/ArgosCUDA.jl/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# ArgosCUDA.jl - -CUDA wrapper for Argos.jl. - diff --git a/src/KKT/reduced_newton.jl b/src/KKT/reduced_newton.jl index bd17095b..60ade890 100644 --- a/src/KKT/reduced_newton.jl +++ b/src/KKT/reduced_newton.jl @@ -406,17 +406,13 @@ function MadNLP.solve_refine_wrapper!( Λ = Σₛ ./ (Σd .* Σₛ .- α.^2) # Reduction (1) --- Condensed - # vj .= Λ .* (r₅ .+ α .* r₃ ./ Σₛ) # v = (α Σₛ⁻¹ α)⁻¹ * (r₅ + α Σₛ⁻¹ r₃) - vj .= (Σₛ .* r₅ .+ r₃) # v = (Σₛ r₅ + α r₃) - mul!(jv, kkt.A', vj, 1.0, 0.0) # jᵥ = Aᵀ v + vj .= Λ .* (r₅ .+ α .* r₃ ./ Σₛ) # v = (α Σₛ⁻¹ α)⁻¹ * (r₅ + α Σₛ⁻¹ r₃) + mul!(jv, kkt.A', vj, -1.0, 0.0) # jᵥ = Aᵀ v jv .+= r₁₂ # r₁₂ - Aᵀv # Reduction (2) --- Biegler sx1 .= r₄ # r₄ ldiv!(Gxi, sx1) # Gₓ⁻¹ r₄ - tt = similar(sx1) - mul!(tt, kkt.Gx, sx1) - sx2 .= tx # tx = jv[1:nx] kvx .= sx1 ; kvu .= 0.0 mul!(kh, K, kv) # [Kₓₓ Gₓ⁻¹ r₄ ; Kᵤₓ Gₓ⁻¹ r₄ ] @@ -444,10 +440,8 @@ function MadNLP.solve_refine_wrapper!( # (2) Extract Condensed mul!(vj, kkt.A, dxu) # Aₓ dₓ + Aᵤ dᵤ - # dy .= Λ .* (r₅ .- vj .+ α .* r₃ ./ Σₛ) - # ds .= (r₃ .+ α .* dy) ./ Σₛ - ds .= (vj .- r₅) - dy .= Σₛ .* ds .- r₃ + dy .= Λ .* (r₅ .- vj .+ α .* r₃ ./ Σₛ) + ds .= (r₃ .+ α .* dy) ./ Σₛ x[ips.ind_fixed] .= 0.0 copyto!(x_h, x) diff --git a/src/api.jl b/src/api.jl index d5817771..9148333e 100644 --- a/src/api.jl +++ b/src/api.jl @@ -34,6 +34,13 @@ By default, Argos implements three different formulations for the OPF: """ function run_opf end +""" + run_opf_gpu(datafile::String, ::AbstractOPFFormulation; options...) + +Solve the OPF problem associated to `datafile` using MadNLP on the GPU. +""" +function run_opf_gpu end + """ FullSpace <: AbstractOPFFormulation diff --git a/test/Algorithms/MadNLP_wrapper.jl b/test/Algorithms/MadNLP_wrapper.jl index ab4738b5..ac32c900 100644 --- a/test/Algorithms/MadNLP_wrapper.jl +++ b/test/Algorithms/MadNLP_wrapper.jl @@ -1,5 +1,6 @@ using MadNLP +using MadNLPGPU function _test_results_match(ips1, ips2; atol=1e-10) @test ips1.status == ips2.status @@ -96,16 +97,45 @@ end end end -@testset "Solve OPF with $form" for form in [ - Argos.FullSpace(), - Argos.BieglerReduction(), - Argos.DommelTinney(), +@testset "[CPU] Solve OPF with $form" for (form, linear_solver_algo) in [ + (Argos.FullSpace(), MadNLP.BUNCHKAUFMAN), + (Argos.BieglerReduction(), MadNLP.CHOLESKY), + (Argos.DommelTinney(), MadNLP.CHOLESKY), ] case = "case9.m" datafile = joinpath(INSTANCES_DIR, case) - ips = Argos.run_opf(datafile, form; tol=1e-5, print_level=MadNLP.ERROR) - @test isa(ips, MadNLP.MadNLPSolver) - @test ips.status == MadNLP.SOLVE_SUCCEEDED + solver = Argos.run_opf( + datafile, + form; + tol=1e-5, + print_level=MadNLP.ERROR, + linear_solver=LapackCPUSolver, + lapack_algorithm=linear_solver_algo, + ) + @test isa(solver, MadNLP.MadNLPSolver) + @test solver.status == MadNLP.SOLVE_SUCCEEDED +end + +if has_cuda_gpu() + @testset "[CUDA] Solve OPF with $form" for (form, linear_solver_algo) in [ + (Argos.FullSpace(), MadNLP.BUNCHKAUFMAN), + (Argos.BieglerReduction(), MadNLP.CHOLESKY), + (Argos.DommelTinney(), MadNLP.CHOLESKY), + ] + case = "case9.m" + datafile = joinpath(INSTANCES_DIR, case) + + solver = Argos.run_opf_gpu( + datafile, + form; + tol=1e-5, + linear_solver=LapackGPUSolver, + lapack_algorithm=linear_solver_algo, + print_level=MadNLP.ERROR, + ) + @test isa(solver, MadNLP.MadNLPSolver) + @test solver.status == MadNLP.SOLVE_SUCCEEDED + end end diff --git a/test/Artifacts.toml b/test/Artifacts.toml new file mode 100644 index 00000000..0aa188ca --- /dev/null +++ b/test/Artifacts.toml @@ -0,0 +1,6 @@ +[ExaData] +git-tree-sha1 = "fc4d4283fe5bd2327fb1504317feb645a8421f7c" +lazy = true + [[ExaData.download]] + url = "https://web.cels.anl.gov/~mschanen/ExaData-d235e24.tar.gz" + sha256 = "86771527a71bba9add90fb14b3bb65da58c021cf55325159aa04f7149f4874e5" diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 00000000..e9fab75e --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,22 @@ +[deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c" +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" +ExaPF = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa" +FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" +Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" +MadNLPGPU = "d72a61cc-809d-412f-99be-fd81f4b8a598" +MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[compat] +CUDA = "4.1, 5" +FiniteDiff = "2.7" +Ipopt = "1" +MadNLP = "0.7" diff --git a/test/runtests.jl b/test/runtests.jl index 2e87bd3a..53090a21 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,7 @@ using LinearAlgebra using SparseArrays using CUDA +using CUSOLVERRF using KernelAbstractions using ExaPF @@ -19,11 +20,12 @@ const CASES = ["case9.m", "case30.m"] ARCHS = Any[(CPU(), Array, SparseMatrixCSC)] if has_cuda_gpu() - using ArgosCUDA CUDA_ARCH = (CUDABackend(), CuArray, nothing) push!(ARCHS, CUDA_ARCH) + CUDA.versioninfo() end + # Load test modules @isdefined(TestEvaluators) || include("Evaluators/TestEvaluators.jl")