From fc91d8858550d2dcaa593be6848c1b17414a65ed Mon Sep 17 00:00:00 2001 From: fritzgoebel Date: Mon, 20 Feb 2023 10:40:32 -0500 Subject: [PATCH] Add choice for triangular solver implementation for Ginkgo (#585) * Add choice for triangular solver implementation for Ginkgo, remove iterative refinement inside Ginkgo integration * Add options to configure Ginkgo GMRES * Update ginkgo install on Newell and Deception. * Remove verbose output from build script and fix newell. * Turn of outer iterative refinement with Ginkgo * Increase verbosity for Ginkgo CUDA test and turning off non-sparse tests temporarily. * Remove nvblas.conf from tracked files * Update GINKGO on Ascent * Use Ginkgo triangular solver as default instead of vendor implementation * Revert temporary changes made for debugging. * Used gcc/9.1.0-based GINKGO module on Ascent for consistency * Address Review Comments --------- Co-authored-by: ni7660@kit.edu Co-authored-by: rcrutherford Co-authored-by: pelesh Co-authored-by: Nicholson Koukpaizan --- .nfs0000000216213277000026b7 | Bin 0 -> 16384 bytes BUILD.sh | 2 +- scripts/ascentVariables.sh | 4 +- scripts/marianasVariables.sh | 4 +- scripts/newellVariables.sh | 68 ++++++++++----------- src/Drivers/Sparse/NlpSparseEx1Driver.cpp | 7 ++- src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp | 6 +- src/LinAlg/hiopLinSolverSparseGinkgo.cpp | 54 ++++++++++------ src/LinAlg/hiopLinSolverSparseGinkgo.hpp | 2 + src/Utils/hiopOptions.cpp | 28 ++++++--- 10 files changed, 106 insertions(+), 69 deletions(-) create mode 100644 .nfs0000000216213277000026b7 diff --git a/.nfs0000000216213277000026b7 b/.nfs0000000216213277000026b7 new file mode 100644 index 0000000000000000000000000000000000000000..5bbc78355b93c572f827ddf7e1b829bd03155a17 GIT binary patch literal 16384 zcmeHOTWlmp73~0<2PBXeLJ=aWjCZwTDITvko0pRS?VcWcvh&J3yf!3sdV0z;o$cxF zbobb6Z5$AfFF=Yy5FtVlKS;n2LVP6rASEA25dtAVLIeaNBnTnpgMfk%BH`SsZqLJG z$8o+8J@WB*x~p#8I(6&Ty|>5hQ>(=?yFGVC!Ru;8`QV&sg}1(b{R2%!2?kN`*3%1g z=fGEoTilOq&pmZI=etg>>upYL|7a|5-VeNoxD|!-J=^oeueq)r&1+@zJTKW{6y~A} z(Nq!X#gu`Rfg>|;m2%7Jg;UghCUXbA(Zz%&Q11SS311SS311SS311SS311SS311SS314m%M>?q12`aMtiHlF_{ z`TvU_P?X;QKLVZt0-ykV1o-Fs73Fo{CE$7BhrqMI*MTPi8|VS|0LOtD;9plM%I|?E zfv*8=U;%jPN=5lRa2xRR_bJLFzyk1(_bSSBz?XnE;6~sNS18JtfjfaeUalyQ0XlFg za140rGDSH9{OVFg`5f?BpbC5l_{lNc2Ob4BfdOzc@VECU$_v2vfG+?K04<;ZoCJ;m zmjHi4f#Mn92f#Og&j6nSJ^`!)8gK(}4RAH^8VV{e0$&Bb0$c>{0B!?r1bzp;Uj&Hv z=ZO0wzpOsKHDPqP&)p7pBf}G4#mFkN3S%q9YRzahbfc^;msOT&g`AbmZr@~%?Q%0< zrqku^z|62}g=KbIFV|YQQYa{glD$%_vCPuIb~yl&G7eR;{-T+4Ixz0B2kMk_@*os~C7Z>Sw4qcl)J7oBY zdOYcjmSR!aR?oJ2Bki#n9EbUy?M4ukW6fZj1P($zX=Eb=$%zWHVyw@@(CqT;IkjFX zR+d>tcg;2w>yFJ|P?HlZ?&(k-gb{0tt!YNg;U+$ME@$2blXl9mN8>T&JZv7`BzfM4 zXr@;3Y|lNR#4NF^yWt?>D9nYwC@rQEfGAt)`JL z)@RuaR;$CYm{4hxoZpw*y?fcq%}FM$sqCvQOF>lh2|#jU>0Q1BZ`;QRz+KZ*D052 z1@@;VQI|R9z=cIxUcYa;oscXK9=b>K7`QI2fEXM@De;*xVu*Wos&v5xIQGDFLA}}c z9UjubF*~JuzN)p#@RHcJbh60Kw2N5Bvj%;fQzm_3kZsx~%X=5LyWHI?>Rhq8VyvpA zWh{Q)pyh)})CcH=V+#(4wkorD5Sg~iu+4ZkkWwRXN~#qKeu6I*&6zoL-@C1~@VIoR!n$*3^2w*r<+4O|BhP<}$Xz^0>%l_@%DLjw(^B7Aq@_ z(wLU}&2nO48<+Wdr{30Inott#W~^d3FjNd0nr;dunH~ZFJ!)#hj!naG=;ZIL%yfnhB|b z7ZWi{x5p4pnohJ$Tg>MHVmUYwi%%JaHdS4{S7mIB015%kw(Z!EVYU$ zbjs*8won|4QB(%TP*3g0n(DcCxj#+Vi77f5&g*+rmmNC6{O_VCX_z0 zK6j!&cY;noCH{^$6QL_x)$0v$lAOxjmOD-NMV!t=pf0l%-*aqhJKNBkOe@z;ox1%D ztLWz_WMtV|wN%vBlk4a`jP3Qc8iG$WD!O~hYM7fWGl3mha0W& zuwk)+*_BF0osN5vdWm>O9R4_Y^wG8lrxin)+S(e9Eu8u|V&ER`YZc)mCKViM!xqk9 zijtMS?b`i8--!K1hh-BgZ^KeWPT&sURn!4q z0KN}A4m<+bKmnKmZUC+Y{*4;JUw|J2PXKLT9k>R#40sduf;WJl0bO7QP=J@OK<*9v z7WgLcIB*X581P}>a^P9i621l0fJNXH)D(UJTmag@mB4qw*LQ#|;2c2w-3_F#l!26i zl!5;X24t;FWQ}8?H&t}ZPR9rb?M{;KX2Zam&&ybw?xUDOnQsz($6+V}h(5)4$W@V# z`#z3zkHi?b_6U_D%bA(1GDVy@Kg+8}z(@Dx{R0Vc9rwkvZN2H>%jEJlSMW{YwAeK8J7iJ_m-sL~p&oO1P>WPc8 z=()2;g`pceY>$l0j;qY5C= zPHZnxKqUFn+(s-?ULBc8B4xZ#K)EQTICAsG6^(#Ot+*U|gTUe;74VB2yTwnOI3^QC z22_+i%M|C#&2;jxhiE-_D+^S(5hWy1*xo{IJ3tA4&-q4C`zG3Dm@LP9 zf4o{Lt>Yd0=r9UMQ5nS&>Y%7^Y}gLZPIhioTX-rZD&6zaQ4Z%IMdWOkRG3D^2vuk? ztY46b#YJ4A35bU(O*80n{Aair0)-7ZKbgIYr)+bPY=KqQFNg<_C^gYDB_xn>YZ(JH zOuDJ7_XwVSFXG0z2YH w+YD{X@KJ0=Ma7^3B-UCIyj!&q=Ri$1kv6fy9Telp%HF}bFvH?`SetStringValue("linsol_mode", "speculative"); nlp.options->SetStringValue("linear_solver_sparse", "cusolver-lu"); nlp.options->SetStringValue("cusolver_lu_refactorization", "rf"); - nlp.options->SetIntegerValue("ir_inner_cusolver_maxit", 100); - nlp.options->SetNumericValue("ir_inner_cusolver_tol", 1e-16); - nlp.options->SetIntegerValue("ir_inner_cusolver_restart", 20); + nlp.options->SetIntegerValue("ir_inner_maxit", 100); + nlp.options->SetNumericValue("ir_inner_tol", 1e-16); + nlp.options->SetIntegerValue("ir_inner_restart", 20); nlp.options->SetStringValue("ir_inner_cusolver_gs_scheme", "mgs_pm"); nlp.options->SetStringValue("compute_mode", "hybrid"); // LU solver needs to use inertia free approach @@ -236,6 +236,7 @@ int main(int argc, char **argv) nlp.options->SetStringValue("linsol_mode", "speculative"); nlp.options->SetStringValue("linear_solver_sparse", "ginkgo"); nlp.options->SetStringValue("fact_acceptor", "inertia_free"); + nlp.options->SetIntegerValue("ir_outer_maxit", 0); if (use_ginkgo_cuda) { nlp.options->SetStringValue("ginkgo_exec", "cuda"); } else if (use_ginkgo_hip) { diff --git a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp index cfcd10e80..957b31f33 100644 --- a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp +++ b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp @@ -121,7 +121,7 @@ namespace hiop refact_ = "glu"; } // by default, dont use iterative refinement - int maxit_test = nlp_->options->GetInteger("ir_inner_cusolver_maxit"); + int maxit_test = nlp_->options->GetInteger("ir_inner_maxit"); if ((maxit_test < 0) || (maxit_test > 1000)){ nlp_->log->printf(hovWarning, @@ -138,7 +138,7 @@ namespace hiop if(use_ir_ == "yes") { if(refact_ == "rf") { - ir_->restart_ = nlp_->options->GetInteger("ir_inner_cusolver_restart"); + ir_->restart_ = nlp_->options->GetInteger("ir_inner_restart"); if ((ir_->restart_ <0) || (ir_->restart_ >100)){ nlp_->log->printf(hovWarning, @@ -148,7 +148,7 @@ namespace hiop } - ir_->tol_ = nlp_->options->GetNumeric("ir_inner_cusolver_tol"); + ir_->tol_ = nlp_->options->GetNumeric("ir_inner_tol"); if ((ir_->tol_ <0) || (ir_->tol_ >1)){ nlp_->log->printf(hovWarning, "Wrong tol value: %e. Use double tol value between 0 and 1. Setting default (1e-12) ...\n", diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp index 79e7aa654..4af6348f2 100644 --- a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp +++ b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp @@ -233,7 +233,9 @@ std::shared_ptr create_exec(std::string executor_string) std::shared_ptr setup_solver_factory(std::shared_ptr exec, - std::shared_ptr> mtx) + std::shared_ptr> mtx, + gko::solver::trisolve_algorithm alg, + const unsigned gmres_iter, const double gmres_tol, const unsigned gmres_restart) { auto preprocessing_fact = gko::share(gko::reorder::Mc64::build().on(exec)); auto preprocessing = gko::share(preprocessing_fact->generate(mtx)); @@ -241,19 +243,24 @@ std::shared_ptr setup_solver_factory(std::shared_ptr::build() .with_factorization(lu_fact) + .with_algorithm(alg) .on(exec)); - auto solver_fact = gko::share(gko::solver::Gmres<>::build() - .with_criteria( - gko::stop::Iteration::build() - .with_max_iters(200u) - .on(exec), - gko::stop::ResidualNorm<>::build() - .with_baseline(gko::stop::mode::absolute) - .with_reduction_factor(1e-8) - .on(exec)) - .with_krylov_dim(10u) - .with_preconditioner(inner_solver_fact) - .on(exec)); + + std::shared_ptr solver_fact = inner_solver_fact; + if (gmres_iter > 0) { + solver_fact = gko::share(gko::solver::Gmres::build() + .with_criteria( + gko::stop::Iteration::build() + .with_max_iters(gmres_iter) + .on(exec), + gko::stop::ResidualNorm<>::build() + .with_baseline(gko::stop::mode::absolute) + .with_reduction_factor(gmres_tol) + .on(exec)) + .with_krylov_dim(gmres_restart) + .with_preconditioner(inner_solver_fact) + .on(exec)); + } auto reusable_factory = gko::share(gko::solver::ScaledReordered<>::build() .with_solver(solver_fact) @@ -265,6 +272,9 @@ std::shared_ptr setup_solver_factory(std::shared_ptr + hiopLinSolverSymSparseGinkgo::alg_map_ = {{"syncfree", gko::solver::trisolve_algorithm::syncfree}, + {"sparselib", gko::solver::trisolve_algorithm::sparselib}}; hiopLinSolverSymSparseGinkgo::hiopLinSolverSymSparseGinkgo(const int& n, const int& nnz, @@ -289,12 +299,17 @@ std::shared_ptr setup_solver_factory(std::shared_ptr0); exec_ = create_exec(nlp_->options->GetString("ginkgo_exec")); + auto alg = alg_map_.at(nlp_->options->GetString("ginkgo_trisolve")); + auto gmres_iter = nlp_->options->GetInteger("ir_inner_maxit"); + auto gmres_tol = nlp_->options->GetNumeric("ir_inner_tol"); + auto gmres_restart = nlp_->options->GetInteger("ir_inner_restart"); + iterative_refinement_ = gmres_iter > 0; host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_); mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_); nnz_ = mtx_->get_num_stored_elements(); - reusable_factory_ = setup_solver_factory(exec_, mtx_); + reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart); } int hiopLinSolverSymSparseGinkgo::matrixChanged() @@ -313,9 +328,14 @@ std::shared_ptr setup_solver_factory(std::shared_ptrgenerate(mtx_)); // Temporary solution for the ginkgo GLU integration. - auto sol = gko::as>(gko::as>(gko_solver_)->get_solver()); - auto precond = gko::as>(sol->get_preconditioner()); - auto status = precond->get_factorization_status(); + auto direct = iterative_refinement_ ? + gko::as>( + gko::as>( + gko::as>( + gko_solver_)->get_solver())->get_preconditioner()) : + gko::as>( + gko::as>(gko_solver_)->get_solver()); + auto status = direct->get_factorization_status(); return status == gko::experimental::factorization::status::success ? 0 : -1; } diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp index e87bff3b9..058c606a5 100644 --- a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp +++ b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp @@ -92,7 +92,9 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse std::shared_ptr> host_mtx_; std::shared_ptr reusable_factory_; std::shared_ptr gko_solver_; + bool iterative_refinement_; + static const std::map alg_map_; public: diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp index fc513eb36..0237b85f6 100644 --- a/src/Utils/hiopOptions.cpp +++ b/src/Utils/hiopOptions.cpp @@ -890,6 +890,20 @@ void hiopOptionsNLP::register_options() "Selects the hardware architecture to run the Ginkgo linear solver on."); } + + // choose triangular solver implementation in Ginkgo. + // - Default is 'sparselib' which uses vendor triangular solvers + // - 'syncfree' uses the busy waiting loop based Ginkgo implementation + { + vector range {"syncfree", "sparselib"}; + + register_str_option("ginkgo_trisolve", + "syncfree", + range, + "Selects the triangular solver for Ginkgo."); + } + + // choose sparsity permutation (to reduce nz in the factors). This option is available only when using // Cholesky linear solvers // - metis: use CUDA function csrmetisnd, which is a wrapper of METIS_NodeND; requires linking with @@ -936,24 +950,24 @@ void hiopOptionsNLP::register_options() "'glu' is experimental and 'rf' is NVIDIA's stable refactorization. "); } - - register_int_option("ir_inner_cusolver_restart", + register_int_option("ir_inner_restart", 20, 1, 100, - "FGMRES restart value (default is 20). "); + "(F)GMRES restart value (default is 20). "); - register_num_option("ir_inner_cusolver_tol", + register_num_option("ir_inner_tol", 1e-12, 1e-16, 1e-1, - "FGMRES tolerance (default is 1e-12). "); + "(F)GMRES tolerance (default is 1e-12). "); - register_int_option("ir_inner_cusolver_maxit", + register_int_option("ir_inner_maxit", 50, 0, 1000, - "FGMRES maximum number of iterations (default is 50). "); + "(F)GMRES maximum number of iterations (default is 50). "); + { vector range = {"mgs", "cgs2", "mgs_two_synch", "mgs_pm"}; auto default_value = range[0];