manuscript ready

exanauts · Jul 31, 2024 · b6319a5 · b6319a5
1 parent 8c2f927
commit b6319a5
Show file tree

Hide file tree

Showing 7 changed files with 72 additions and 69 deletions.
diff --git a/HybridKKT.jl/Manifest.toml b/HybridKKT.jl/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.10.4"
 manifest_format = "2.0"
-project_hash = "9b51f6a5d9720d979fe8f9e8ae156436f5d4eb87"
+project_hash = "049dbd24fbfa4394b14509dff247ec1a4ee8602f"
 
 [[deps.AMD]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse_jll"]
@@ -649,8 +649,6 @@ version = "0.5.13"
 [[deps.MadNLP]]
 deps = ["LDLFactorizations", "LinearAlgebra", "Logging", "NLPModels", "Pkg", "Printf", "SolverCore", "SparseArrays", "SuiteSparse"]
 git-tree-sha1 = "27b224771f66a0576d4f6be35e2055c8eb6c2b48"
-repo-rev = "master"
-repo-url = "https://github.com/MadNLP/MadNLP.jl.git"
 uuid = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
 version = "0.8.4"
 weakdeps = ["MathOptInterface"]
@@ -661,18 +659,12 @@ weakdeps = ["MathOptInterface"]
 [[deps.MadNLPGPU]]
 deps = ["AMD", "CUDA", "CUDSS", "CUSOLVERRF", "KernelAbstractions", "LinearAlgebra", "MadNLP", "MadNLPTests", "Metis", "SparseArrays"]
 git-tree-sha1 = "542846a6ecbaa6cbd7f3dbc6f86301b82b934a78"
-repo-rev = "master"
-repo-subdir = "lib/MadNLPGPU"
-repo-url = "https://github.com/MadNLP/MadNLP.jl.git"
 uuid = "d72a61cc-809d-412f-99be-fd81f4b8a598"
 version = "0.7.3"
 
 [[deps.MadNLPHSL]]
 deps = ["HSL", "LinearAlgebra", "MadNLP"]
 git-tree-sha1 = "0122611889ec2429f742145f8cffb6d57e80103d"
-repo-rev = "master"
-repo-subdir = "lib/MadNLPHSL"
-repo-url = "https://github.com/MadNLP/MadNLP.jl.git"
 uuid = "7fb6135f-58fe-4112-84ca-653cf5be0c77"
 version = "0.5.2"
 

diff --git a/HybridKKT.jl/Project.toml b/HybridKKT.jl/Project.toml
@@ -12,6 +12,7 @@ CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
 CUTEst = "1b53aba6-35b6-5f92-a507-53c67d53f819"
 Comonicon = "863f3e99-da2a-4334-8734-de3dacbe5542"
 ExaModels = "1037b233-b668-4ce9-9b63-f9f681f55dd2"
+HSL_jll = "017b0a0e-03f4-516a-9b91-836bbd1904dd"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"

diff --git a/HybridKKT.jl/README.md b/HybridKKT.jl/README.md
@@ -0,0 +1,48 @@
+# HybridKKT.jl
+
+Artifact to reproduce the benchmarks presented in
+the manuscript ["Condensed-space methods for nonlinear programming on GPUs"](https://arxiv.org/abs/2405.14236).
+
+A `Manifest.toml` file is provided to duplicate the exact package versions we used for the
+benchmarks presented in the paper. A `Makefile` is used as a main entry point.
+
+*Important notice:*
+This repository is provided only for reproduction purpose. Please use
+the following implementation if you want to use the condensed KKT systems in
+your own work:
+
+- LiftedKKT has been implemented in [MadNLP](https://github.com/MadNLP/MadNLP.jl/blob/master/src/KKT/Sparse/condensed.jl)
+- HyKKT has been implemented in a separate extension: [HybridKKT.jl](https://github.com/MadNLP/HybridKKT.jl)
+
+## Installation
+To install all the dependencies, please use:
+```shell
+make install
+
+```
+This command installs MadNLP and all the required dependencies
+(including CUDA and cuDSS).
+
+Note that HSL has to be installed independently using
+[libHSL](https://licences.stfc.ac.uk/product/libhsl), and then do:
+
+```shell
+export LIBHSL="/your/path/to/HSL_jll.jl"
+julia --project -e "using Pkg; Pkg.develop(path=ENV[\"LIBHSL\"])"
+
+```
+
+## Tests the installation
+
+You can check the installation has succeeded by running:
+```shell
+make tests
+
+```
+
+## Reproduce the results
+You can reproduce the PGLIB and the COPS benchmarks using:
+```shell
+make benchmarks
+
+```
diff --git a/HybridKKT.jl/benchmarks/COPS/benchmark.jl b/HybridKKT.jl/benchmarks/COPS/benchmark.jl
@@ -194,7 +194,7 @@ end
             use_gpu=true,
             tol=tol,
             linear_solver=MadNLPGPU.CUDSSSolver,
-            cudss_algorithm=MadNLP.BUNCHKAUFMAN,
+            cudss_algorithm=MadNLP.LDL,
             print_level=print_level,
         )
         output_file = joinpath(RESULTS_DIR, "cops-$(flag)-madnlp-sckkt-cudss-ldl.csv")
@@ -211,7 +211,7 @@ end
             use_gpu=true,
             tol=tol,
             linear_solver=MadNLPGPU.CUDSSSolver,
-            cudss_algorithm=MadNLP.BUNCHKAUFMAN,
+            cudss_algorithm=MadNLP.LDL,
             print_level=print_level,
         )
         output_file = joinpath(RESULTS_DIR, "cops-$(flag)-madnlp-hckkt-cudss-ldl.csv")

diff --git a/HybridKKT.jl/benchmarks/OPF/benchmark.jl b/HybridKKT.jl/benchmarks/OPF/benchmark.jl
@@ -28,7 +28,6 @@ FULL_BENCHMARK = [
     "pglib_opf_case2000_goc.m",
     "pglib_opf_case20758_epigrids.m",
     "pglib_opf_case2312_goc.m",
-    # "pglib_opf_case24464_goc.m",
     "pglib_opf_case2742_goc.m",
     "pglib_opf_case2869_pegase.m",
     "pglib_opf_case30000_goc.m",
@@ -285,7 +284,7 @@ end
             use_gpu=true,
             tol=tol,
             linear_solver=MadNLPGPU.CUDSSSolver,
-            cudss_algorithm=MadNLP.BUNCHKAUFMAN,
+            cudss_algorithm=MadNLP.LDL,
             print_level=print_level,
         )
         output_file = joinpath(RESULTS_DIR, "pglib-$(flag)-madnlp-sckkt-cudss-ldl.csv")
@@ -302,7 +301,7 @@ end
             use_gpu=true,
             tol=tol,
             linear_solver=MadNLPGPU.CUDSSSolver,
-            cudss_algorithm=MadNLP.BUNCHKAUFMAN,
+            cudss_algorithm=MadNLP.LDL,
             print_level=print_level,
         )
         output_file = joinpath(RESULTS_DIR, "pglib-$(flag)-madnlp-hckkt-cudss-ldl-$(gamma_).csv")

diff --git a/HybridKKT.jl/benchmarks/OPF/model.jl b/HybridKKT.jl/benchmarks/OPF/model.jl
@@ -110,7 +110,7 @@ function ac_power_model(
 
     data = parse_ac_power_data(filename, backend)
 
-    w = ExaModels.ExaCore(T, backend)
+    w = ExaModels.ExaCore(T; backend=backend)
 
     va = ExaModels.variable(w, length(data.bus);)
 

diff --git a/README.md b/README.md
@@ -1,57 +1,20 @@
 # nlp-on-gpu-paper
 
-Make it a review paper comparing different methods for implementing a nonlinear sparse, large-scale optimization solver on the GPU. We will focus on methods, not the implementation. For numerical comparison, we will use MadNLP. We will compare:
-  - HyKKT method
-  - condensed space inequality relaxation
-  - condensed then reduce
+This repository is an artifact storing the tex sources and the Julia scripts
+used to generate the results in the manuscript "Condensed-space methods for nonlinear programming on GPUs",
+available on [arxiv](https://arxiv.org/abs/2405.14236).
+
+- The directory `HybridKKT.jl/` stores the Julia implementation of the method presented in the paper.
+- The directory `tex/` stores the tex sources.
+- The directory `scripts/` stores script files to generate the plots presented in the paper using `matplotlib`.
+
+To cite the paper, please use the following bibtex reference:
+```tex
+@article{pacaud2024condensed,
+  title={Condensed-space methods for nonlinear programming on {GPU}s},
+  author={Pacaud, Fran{\c{c}}ois and Shin, Sungho and Montoison, Alexis and Schanen, Michel and Anitescu, Mihai},
+  journal={arXiv preprint arXiv:2405.14236},
+  year={2024}
+}
+```
 
-  For sparse solver, we compare two options:
-  - CUDSS
-  - CUSOLVERRF
-
-  Portability (not our primary focus, but if we want to say something)
-  - https://github.com/ORNL/ReSolve/blob/v0.99.1/resolve/LinSolverDirectRocSolverRf.cpp
-
-# Friday, January 19th
-
-* Goal of the optimization paper
-
-Assess the capabilities of three linear solvers to solve nonlinear optimization problem on the GPU:
-- Null-space method (aka reduced Hessian, Argos)
-- Hybrid-condensed KKT solver (HyKKT)
-- Sparse-condensed KKT with equality relaxation strategy
-
-The two last methods require efficient sparse Cholesky available on the GPU.
-
-
-* Latests developments (https://github.com/exanauts/nlp-on-gpu-paper/tree/main/scripts)
-
-- Implementation of HyKKT in MadNLP, now gives correct result.
-    * works on the GPU
-    * no iterative refinement (yet): limited precision
-    * solve OPF problems with tol=1e-3
-    * it looks like CG is the bottleneck in the algorithm
-- Full integration of cuDSS into MadNLP for sparse-Cholesky
-- Integration of CHOLMOD on the CPU for comparison
-
-
-* To discuss
-
-- Improve the HyKKT implementation
-    * Implement iterative refinement on the GPU @FP
-    * double check the accuracy of the linear solve (and its interplay with CG convergence) @FP
-    * identify the computation bottleneck and address them @FP
-    * Implement AMD ordering for sparse Cholesky @AM
-    * scaling of the problem (use KrylovPreconditioner?) @AM
-        ^ Implement symmetric scaling on the GPU
-- Decide what we want to showcase exactly
-    * go deeper into the LA aspect
-    * Benchmark on OPF and SCOPF instances?
-    * Include additional benchmarks?
-        ^ COPS benchmark in ExaModels? Have a look at the Goddard problem
-        ^ PDE-constrained optimization?
-- NCL ? Can we finish the implementation in a reasonable time?
-
-
-
-*