diff --git a/HybridKKT.jl/Manifest.toml b/HybridKKT.jl/Manifest.toml index ce8b19a..4e50226 100644 --- a/HybridKKT.jl/Manifest.toml +++ b/HybridKKT.jl/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.4" manifest_format = "2.0" -project_hash = "9b51f6a5d9720d979fe8f9e8ae156436f5d4eb87" +project_hash = "049dbd24fbfa4394b14509dff247ec1a4ee8602f" [[deps.AMD]] deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"] @@ -649,8 +649,6 @@ version = "0.5.13" [[deps.MadNLP]] deps = ["LDLFactorizations", "LinearAlgebra", "Logging", "NLPModels", "Pkg", "Printf", "SolverCore", "SparseArrays", "SuiteSparse"] git-tree-sha1 = "27b224771f66a0576d4f6be35e2055c8eb6c2b48" -repo-rev = "master" -repo-url = "https://github.com/MadNLP/MadNLP.jl.git" uuid = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" version = "0.8.4" weakdeps = ["MathOptInterface"] @@ -661,18 +659,12 @@ weakdeps = ["MathOptInterface"] [[deps.MadNLPGPU]] deps = ["AMD", "CUDA", "CUDSS", "CUSOLVERRF", "KernelAbstractions", "LinearAlgebra", "MadNLP", "MadNLPTests", "Metis", "SparseArrays"] git-tree-sha1 = "542846a6ecbaa6cbd7f3dbc6f86301b82b934a78" -repo-rev = "master" -repo-subdir = "lib/MadNLPGPU" -repo-url = "https://github.com/MadNLP/MadNLP.jl.git" uuid = "d72a61cc-809d-412f-99be-fd81f4b8a598" version = "0.7.3" [[deps.MadNLPHSL]] deps = ["HSL", "LinearAlgebra", "MadNLP"] git-tree-sha1 = "0122611889ec2429f742145f8cffb6d57e80103d" -repo-rev = "master" -repo-subdir = "lib/MadNLPHSL" -repo-url = "https://github.com/MadNLP/MadNLP.jl.git" uuid = "7fb6135f-58fe-4112-84ca-653cf5be0c77" version = "0.5.2" diff --git a/HybridKKT.jl/Project.toml b/HybridKKT.jl/Project.toml index 0b26bb6..562e964 100644 --- a/HybridKKT.jl/Project.toml +++ b/HybridKKT.jl/Project.toml @@ -12,6 +12,7 @@ CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" CUTEst = "1b53aba6-35b6-5f92-a507-53c67d53f819" Comonicon = "863f3e99-da2a-4334-8734-de3dacbe5542" ExaModels = "1037b233-b668-4ce9-9b63-f9f681f55dd2" +HSL_jll = "017b0a0e-03f4-516a-9b91-836bbd1904dd" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" diff --git a/HybridKKT.jl/README.md b/HybridKKT.jl/README.md new file mode 100644 index 0000000..871e244 --- /dev/null +++ b/HybridKKT.jl/README.md @@ -0,0 +1,48 @@ +# HybridKKT.jl + +Artifact to reproduce the benchmarks presented in +the manuscript ["Condensed-space methods for nonlinear programming on GPUs"](https://arxiv.org/abs/2405.14236). + +A `Manifest.toml` file is provided to duplicate the exact package versions we used for the +benchmarks presented in the paper. A `Makefile` is used as a main entry point. + +*Important notice:* +This repository is provided only for reproduction purpose. Please use +the following implementation if you want to use the condensed KKT systems in +your own work: + +- LiftedKKT has been implemented in [MadNLP](https://github.com/MadNLP/MadNLP.jl/blob/master/src/KKT/Sparse/condensed.jl) +- HyKKT has been implemented in a separate extension: [HybridKKT.jl](https://github.com/MadNLP/HybridKKT.jl) + +## Installation +To install all the dependencies, please use: +```shell +make install + +``` +This command installs MadNLP and all the required dependencies +(including CUDA and cuDSS). + +Note that HSL has to be installed independently using +[libHSL](https://licences.stfc.ac.uk/product/libhsl), and then do: + +```shell +export LIBHSL="/your/path/to/HSL_jll.jl" +julia --project -e "using Pkg; Pkg.develop(path=ENV[\"LIBHSL\"])" + +``` + +## Tests the installation + +You can check the installation has succeeded by running: +```shell +make tests + +``` + +## Reproduce the results +You can reproduce the PGLIB and the COPS benchmarks using: +```shell +make benchmarks + +``` diff --git a/HybridKKT.jl/benchmarks/COPS/benchmark.jl b/HybridKKT.jl/benchmarks/COPS/benchmark.jl index 0a4d90d..ef1e6ef 100644 --- a/HybridKKT.jl/benchmarks/COPS/benchmark.jl +++ b/HybridKKT.jl/benchmarks/COPS/benchmark.jl @@ -194,7 +194,7 @@ end use_gpu=true, tol=tol, linear_solver=MadNLPGPU.CUDSSSolver, - cudss_algorithm=MadNLP.BUNCHKAUFMAN, + cudss_algorithm=MadNLP.LDL, print_level=print_level, ) output_file = joinpath(RESULTS_DIR, "cops-$(flag)-madnlp-sckkt-cudss-ldl.csv") @@ -211,7 +211,7 @@ end use_gpu=true, tol=tol, linear_solver=MadNLPGPU.CUDSSSolver, - cudss_algorithm=MadNLP.BUNCHKAUFMAN, + cudss_algorithm=MadNLP.LDL, print_level=print_level, ) output_file = joinpath(RESULTS_DIR, "cops-$(flag)-madnlp-hckkt-cudss-ldl.csv") diff --git a/HybridKKT.jl/benchmarks/OPF/benchmark.jl b/HybridKKT.jl/benchmarks/OPF/benchmark.jl index a8057b1..3e65ad4 100644 --- a/HybridKKT.jl/benchmarks/OPF/benchmark.jl +++ b/HybridKKT.jl/benchmarks/OPF/benchmark.jl @@ -28,7 +28,6 @@ FULL_BENCHMARK = [ "pglib_opf_case2000_goc.m", "pglib_opf_case20758_epigrids.m", "pglib_opf_case2312_goc.m", - # "pglib_opf_case24464_goc.m", "pglib_opf_case2742_goc.m", "pglib_opf_case2869_pegase.m", "pglib_opf_case30000_goc.m", @@ -285,7 +284,7 @@ end use_gpu=true, tol=tol, linear_solver=MadNLPGPU.CUDSSSolver, - cudss_algorithm=MadNLP.BUNCHKAUFMAN, + cudss_algorithm=MadNLP.LDL, print_level=print_level, ) output_file = joinpath(RESULTS_DIR, "pglib-$(flag)-madnlp-sckkt-cudss-ldl.csv") @@ -302,7 +301,7 @@ end use_gpu=true, tol=tol, linear_solver=MadNLPGPU.CUDSSSolver, - cudss_algorithm=MadNLP.BUNCHKAUFMAN, + cudss_algorithm=MadNLP.LDL, print_level=print_level, ) output_file = joinpath(RESULTS_DIR, "pglib-$(flag)-madnlp-hckkt-cudss-ldl-$(gamma_).csv") diff --git a/HybridKKT.jl/benchmarks/OPF/model.jl b/HybridKKT.jl/benchmarks/OPF/model.jl index df349a0..c3f5495 100644 --- a/HybridKKT.jl/benchmarks/OPF/model.jl +++ b/HybridKKT.jl/benchmarks/OPF/model.jl @@ -110,7 +110,7 @@ function ac_power_model( data = parse_ac_power_data(filename, backend) - w = ExaModels.ExaCore(T, backend) + w = ExaModels.ExaCore(T; backend=backend) va = ExaModels.variable(w, length(data.bus);) diff --git a/README.md b/README.md index 460d169..1e55f53 100644 --- a/README.md +++ b/README.md @@ -1,57 +1,20 @@ # nlp-on-gpu-paper -Make it a review paper comparing different methods for implementing a nonlinear sparse, large-scale optimization solver on the GPU. We will focus on methods, not the implementation. For numerical comparison, we will use MadNLP. We will compare: - - HyKKT method - - condensed space inequality relaxation - - condensed then reduce +This repository is an artifact storing the tex sources and the Julia scripts +used to generate the results in the manuscript "Condensed-space methods for nonlinear programming on GPUs", +available on [arxiv](https://arxiv.org/abs/2405.14236). + +- The directory `HybridKKT.jl/` stores the Julia implementation of the method presented in the paper. +- The directory `tex/` stores the tex sources. +- The directory `scripts/` stores script files to generate the plots presented in the paper using `matplotlib`. + +To cite the paper, please use the following bibtex reference: +```tex +@article{pacaud2024condensed, + title={Condensed-space methods for nonlinear programming on {GPU}s}, + author={Pacaud, Fran{\c{c}}ois and Shin, Sungho and Montoison, Alexis and Schanen, Michel and Anitescu, Mihai}, + journal={arXiv preprint arXiv:2405.14236}, + year={2024} +} +``` - For sparse solver, we compare two options: - - CUDSS - - CUSOLVERRF - - Portability (not our primary focus, but if we want to say something) - - https://github.com/ORNL/ReSolve/blob/v0.99.1/resolve/LinSolverDirectRocSolverRf.cpp - -# Friday, January 19th - -* Goal of the optimization paper - -Assess the capabilities of three linear solvers to solve nonlinear optimization problem on the GPU: -- Null-space method (aka reduced Hessian, Argos) -- Hybrid-condensed KKT solver (HyKKT) -- Sparse-condensed KKT with equality relaxation strategy - -The two last methods require efficient sparse Cholesky available on the GPU. - - -* Latests developments (https://github.com/exanauts/nlp-on-gpu-paper/tree/main/scripts) - -- Implementation of HyKKT in MadNLP, now gives correct result. - * works on the GPU - * no iterative refinement (yet): limited precision - * solve OPF problems with tol=1e-3 - * it looks like CG is the bottleneck in the algorithm -- Full integration of cuDSS into MadNLP for sparse-Cholesky -- Integration of CHOLMOD on the CPU for comparison - - -* To discuss - -- Improve the HyKKT implementation - * Implement iterative refinement on the GPU @FP - * double check the accuracy of the linear solve (and its interplay with CG convergence) @FP - * identify the computation bottleneck and address them @FP - * Implement AMD ordering for sparse Cholesky @AM - * scaling of the problem (use KrylovPreconditioner?) @AM - ^ Implement symmetric scaling on the GPU -- Decide what we want to showcase exactly - * go deeper into the LA aspect - * Benchmark on OPF and SCOPF instances? - * Include additional benchmarks? - ^ COPS benchmark in ExaModels? Have a look at the Goddard problem - ^ PDE-constrained optimization? -- NCL ? Can we finish the implementation in a reasonable time? - - - -*