Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add choice for triangular solver implementation for Ginkgo #585

Merged
merged 12 commits into from
Feb 20, 2023
Merged
Binary file added .nfs0000000216213277000026b7
Binary file not shown.
2 changes: 1 addition & 1 deletion BUILD.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ EOD
esac
done

set -xv
# set -xv
pelesh marked this conversation as resolved.
Show resolved Hide resolved

# If MY_CLUSTER is not set by user, try to discover it from environment
if [[ ! -v MY_CLUSTER ]]
Expand Down
4 changes: 2 additions & 2 deletions scripts/ascentVariables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ module load exasgd-coinhsl/2015.06.23/gcc-9.1.0-qe3m7kw
module load exasgd-cub/1.16.0/gcc-9.1.0-o5zdbep
# [email protected]%[email protected]~allow-unsupported-compilers~dev arch=linux-rhel8-power9le
module load exasgd-cuda/11.4.2/gcc-9.1.0-4676kh5
# ginkgo@glu%gcc@9.1.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 dev_path=/gpfs/wolf/proj-shared/csc359/src/ginkgo arch=linux-rhel8-power9le
module load exasgd-ginkgo/glu/cuda-11.4.2/gcc-9.1.0-fpuykyc
# ginkgo@1.5.0.glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_system=cmake build_type=Debug cuda_arch=70 dev_path=/gpfs/wolf/proj-shared/csc359/src/ginkgo arch=linux-rhel8-power9le
module load exasgd-ginkgo/1.5.0.glu_experimental/cuda-11.4.2/gcc-10.2.0-ndoi6vk
# [email protected]%[email protected] libs=shared,static arch=linux-rhel8-power9le
module load exasgd-gmp/6.2.1/gcc-9.1.0-umqilrg
# gnuconfig@2021-08-14%[email protected] arch=linux-rhel8-power9le
Expand Down
4 changes: 2 additions & 2 deletions scripts/marianasVariables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ module load camp-0.2.3-gcc-10.2.0-36lcy72
module load openblas-0.3.20-gcc-10.2.0-x6v3mwm
# [email protected]%[email protected]+blas arch=linux-centos7-zen2
module load coinhsl-2019.05.21-gcc-10.2.0-gkzkws6
# [email protected]_experimental%[email protected]+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load ginkgo-1.5.0.glu_experimental-gcc-10.2.0-x73b7k3
# [email protected]_experimental%[email protected]+cuda~develtools~full_optimizations~hwloc~ipo~mpi~oneapi+openmp~rocm+shared build_system=cmake build_type=Debug cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load ginkgo-1.5.0.glu_experimental-gcc-10.2.0-3o5dw4r
# [email protected]%[email protected]+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=60,70,75,80 arch=linux-centos7-zen2
module load magma-2.6.2-gcc-10.2.0-caockkq
# [email protected]%[email protected]~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos7-zen2
Expand Down
68 changes: 34 additions & 34 deletions scripts/newellVariables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,40 +9,40 @@ module use -a /share/apps/modules/tools
module use -a /share/apps/modules/compilers
module use -a /share/apps/modules/mpi
module use -a /etc/modulefiles
module use -a /qfs/projects/exasgd/src/jaelyn-spack/spack/share/spack/modules/linux-centos8-power9le
module use -a /qfs/projects/exasgd/src/ci-newll/ci-modules/linux-centos8-power9le

# Load spack-built modules

# [email protected]%[email protected] patches=35c4492,7793209,a49dd5b arch=linux-centos8-power9le
module load autoconf-2.69-gcc-8.5.0-2mzbyqj
# [email protected]%[email protected] patches=130cd48 arch=linux-centos8-power9le
module load autoconf-archive-2022.02.11-gcc-8.5.0-nolgalj
module load autoconf-2.69-gcc-8.5.0-khf4rhm
# [email protected]%[email protected] patches=139214f arch=linux-centos8-power9le
module load autoconf-archive-2022.02.11-gcc-8.5.0-hbtsmvt
# [email protected]%[email protected] arch=linux-centos8-power9le
module load automake-1.16.5-gcc-8.5.0-pnnvoal
module load automake-1.16.5-gcc-8.5.0-4vya5zv
# [email protected]%[email protected]+cxx~docs+stl patches=b231fcc arch=linux-centos8-power9le
module load berkeley-db-18.1.40-gcc-8.5.0-cuzn6qn
# [email protected]%[email protected] arch=linux-centos8-power9le
module load blt-0.4.1-gcc-8.5.0-likpa4a
module load blt-0.4.1-gcc-8.5.0-dp7ssua
# [email protected]%[email protected]~debug~pic+shared arch=linux-centos8-power9le
module load bzip2-1.0.8-gcc-8.5.0-tsweuon
# ca-certificates-mozilla@2022-03-29%[email protected] arch=linux-centos8-power9le
module load ca-certificates-mozilla-2022-03-29-gcc-8.5.0-zyzfhdf
# [email protected].2%[email protected]+cuda~ipo~rocm~tests build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le
module load camp-0.2.2-gcc-8.5.0-5po5zoy
# [email protected].2%[email protected]~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos8-power9le
module load cmake-3.23.2-gcc-8.5.0-pr3l2mn
# coinhsl@2015.06.23%[email protected]+blas arch=linux-centos8-power9le
module load coinhsl-2015.06.23-gcc-8.5.0-f6ka4rc
# ca-certificates-mozilla@2022-07-19%[email protected] arch=linux-centos8-power9le
module load ca-certificates-mozilla-2022-07-19-gcc-8.5.0-db3wqwx
# [email protected].3%[email protected]+cuda~ipo+openmp~rocm~tests build_system=cmake build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le
module load camp-0.2.3-gcc-8.5.0-x4hzwm5
# [email protected].3%[email protected]~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos8-power9le
module load cmake-3.23.3-gcc-8.5.0-h76vmev
# coinhsl@2019.05.21%[email protected]+blas arch=linux-centos8-power9le
module load coinhsl-2019.05.21-gcc-8.5.0-hoy7u3p
# [email protected]%[email protected] arch=linux-centos8-power9le
module load cub-1.16.0-gcc-8.5.0-p3cnthb
# [email protected]%[email protected] arch=linux-centos8-power9le
module load diffutils-3.8-gcc-8.5.0-ppyuisg
# [email protected]%[email protected] arch=linux-centos8-power9le
module load gdbm-1.19-gcc-8.5.0-unfo3x4
# ginkgo@glu_experimental%[email protected]+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 arch=linux-centos8-power9le
module load ginkgo-glu_experimental-gcc-8.5.0-tq3ravg
module load gdbm-1.19-gcc-8.5.0-uowynqh
# ginkgo@1.5.0.glu_experimental%[email protected]+cuda~develtools~full_optimizations~hwloc~ipo~mpi~oneapi+openmp~rocm+shared build_system=cmake build_type=Debug cuda_arch=70 arch=linux-centos8-power9le
module load ginkgo-1.5.0.glu_experimental-gcc-8.5.0-p3aodax
# [email protected]%[email protected] libs=shared,static arch=linux-centos8-power9le
module load gmp-6.2.1-gcc-8.5.0-xlcuuht
module load gmp-6.2.1-gcc-8.5.0-bq7amxg
# gnuconfig@2021-08-14%[email protected] arch=linux-centos8-power9le
module load gnuconfig-2021-08-14-gcc-8.5.0-qjyg7ls
# [email protected]%[email protected] libs=shared,static arch=linux-centos8-power9le
Expand All @@ -54,31 +54,31 @@ module load libtool-2.4.7-gcc-8.5.0-kxdso3c
# [email protected]%[email protected]+sigsegv patches=9dc5fbd,bfdffa7 arch=linux-centos8-power9le
module load m4-1.4.19-gcc-8.5.0-untfsqf
# [email protected]%[email protected]+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le
module load magma-2.6.2-gcc-8.5.0-ee3572c
module load magma-2.6.2-gcc-8.5.0-kfhqe36
# [email protected]%[email protected]~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos8-power9le
module load metis-5.1.0-gcc-8.5.0-ldsei63
module load metis-5.1.0-gcc-8.5.0-ib64hvb
# [email protected]%[email protected] libs=shared,static arch=linux-centos8-power9le
module load mpfr-4.1.0-gcc-8.5.0-esdxmf2
# ncurses@6.2%[email protected]~symlinks+termlib abi=none arch=linux-centos8-power9le
module load ncurses-6.2-gcc-8.5.0-v24hmxo
# [email protected]%[email protected]~bignuma~consistent_fpcsr~ilp64+locking+pic+shared symbol_suffix=none threads=none arch=linux-centos8-power9le
module load openblas-0.3.20-gcc-8.5.0-rwstn2s
module load mpfr-4.1.0-gcc-8.5.0-ko56wbz
# ncurses@6.3%[email protected]~symlinks+termlib abi=none arch=linux-centos8-power9le
module load ncurses-6.3-gcc-8.5.0-glmmmuu
# [email protected]%[email protected]~bignuma~consistent_fpcsr~ilp64+locking+pic+shared patches=9f12903 symbol_suffix=none threads=none arch=linux-centos8-power9le
module load openblas-0.3.20-gcc-8.5.0-dmvuekp
# [email protected]%[email protected]~docs~shared certs=mozilla patches=3fdcf2d arch=linux-centos8-power9le
module load openssl-1.1.1q-gcc-8.5.0-xlfn3bw
## module load openssl-1.1.1q-gcc-8.5.0-lv52izx
# [email protected]%[email protected]+cpanm+shared+threads arch=linux-centos8-power9le
module load perl-5.34.1-gcc-8.5.0-fn534xj
module load perl-5.34.1-gcc-8.5.0-qt5uuuh
# [email protected]%[email protected] arch=linux-centos8-power9le
module load pkgconf-1.8.0-gcc-8.5.0-imrnro2
# [email protected]%[email protected]+odule load raja-0.14.0-gcc-8.5.0-qsgckji
module load raja-0.14.0-gcc-8.5.0-qsgckji
# [email protected]%[email protected]+cuda~examples~exercises~ipo+openmp~rocm+shared~tests build_system=cmake build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le
module load raja-0.14.0-gcc-8.5.0-2pndg26
# [email protected]%[email protected] arch=linux-centos8-power9le
module load readline-8.1.2-gcc-8.5.0-l4hzlyf
module load readline-8.1.2-gcc-8.5.0-6rwgkxr
# [email protected]%[email protected]~cuda~graphblas~openmp+pic~tbb arch=linux-centos8-power9le
module load suite-sparse-5.10.1-gcc-8.5.0-ykffgpl
module load suite-sparse-5.10.1-gcc-8.5.0-yc2nlwi
# [email protected]%[email protected] patches=12f6edb,1732115 arch=linux-centos8-power9le
module load texinfo-6.5-gcc-8.5.0-fvxyl2q
# [email protected]%[email protected]+c+cuda~device_alloc~deviceconst+examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=70 tests=none arch=linux-centos8-power9le
module load umpire-6.0.0-gcc-8.5.0-xar7dg5
module load texinfo-6.5-gcc-8.5.0-v2eju2d
# [email protected]%[email protected]+c+cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp~rocm~shared build_system=cmake build_type=RelWithDebInfo cuda_arch=70 tests=none arch=linux-centos8-power9le
module load umpire-6.0.0-gcc-8.5.0-mftt44d
# [email protected]%[email protected]+optimize+pic+shared patches=0d38234 arch=linux-centos8-power9le
module load zlib-1.2.12-gcc-8.5.0-spb5k73

Expand Down
1 change: 1 addition & 0 deletions src/Drivers/Sparse/NlpSparseEx1Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ int main(int argc, char **argv)
nlp.options->SetStringValue("linsol_mode", "speculative");
nlp.options->SetStringValue("linear_solver_sparse", "ginkgo");
nlp.options->SetStringValue("fact_acceptor", "inertia_free");
nlp.options->SetIntegerValue("ir_outer_maxit", 0);
if (use_ginkgo_cuda) {
nlp.options->SetStringValue("ginkgo_exec", "cuda");
} else if (use_ginkgo_hip) {
Expand Down
44 changes: 34 additions & 10 deletions src/LinAlg/hiopLinSolverSparseGinkgo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,28 +232,46 @@ std::shared_ptr<gko::Executor> create_exec(std::string executor_string)
}


gko::solver::trisolve_algorithm create_alg(std::string algorithm_string)
{
std::map<std::string, gko::solver::trisolve_algorithm>
cnpetra marked this conversation as resolved.
Show resolved Hide resolved
alg_map{
{"syncfree", gko::solver::trisolve_algorithm::syncfree},
{"sparselib", gko::solver::trisolve_algorithm::sparselib}};

return alg_map.at(algorithm_string);
}


std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gko::Executor> exec,
std::shared_ptr<gko::matrix::Csr<double, int>> mtx)
std::shared_ptr<gko::matrix::Csr<double, int>> mtx,
gko::solver::trisolve_algorithm alg,
const unsigned gmres_iter, const double gmres_tol, const unsigned gmres_restart)
{
auto preprocessing_fact = gko::share(gko::reorder::Mc64<double, int>::build().on(exec));
auto preprocessing = gko::share(preprocessing_fact->generate(mtx));
auto lu_fact = gko::share(gko::experimental::factorization::Glu<double, int>::build_reusable()
.on(exec, mtx.get(), preprocessing.get()));
auto inner_solver_fact = gko::share(gko::experimental::solver::Direct<double, int>::build()
.with_factorization(lu_fact)
.with_algorithm(alg)
.on(exec));
auto solver_fact = gko::share(gko::solver::Gmres<>::build()
.with_criteria(

std::shared_ptr<gko::LinOpFactory> solver_fact = inner_solver_fact;
if (gmres_iter > 0) {
solver_fact = gko::share(gko::solver::Gmres<double>::build()
cnpetra marked this conversation as resolved.
Show resolved Hide resolved
.with_criteria(
gko::stop::Iteration::build()
.with_max_iters(200u)
.with_max_iters(gmres_iter)
.on(exec),
gko::stop::ResidualNorm<>::build()
.with_baseline(gko::stop::mode::absolute)
.with_reduction_factor(1e-8)
.with_reduction_factor(gmres_tol)
.on(exec))
.with_krylov_dim(10u)
.with_krylov_dim(gmres_restart)
.with_preconditioner(inner_solver_fact)
.on(exec));
}

auto reusable_factory = gko::share(gko::solver::ScaledReordered<>::build()
.with_solver(solver_fact)
Expand Down Expand Up @@ -289,12 +307,17 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
assert(n_>0);

exec_ = create_exec(nlp_->options->GetString("ginkgo_exec"));
auto alg = create_alg(nlp_->options->GetString("ginkgo_trisolve"));
auto gmres_iter = nlp_->options->GetInteger("ir_inner_ginkgo_maxit");
auto gmres_tol = nlp_->options->GetNumeric("ir_inner_ginkgo_tol");
auto gmres_restart = nlp_->options->GetInteger("ir_inner_ginkgo_restart");
iterative_refinement_ = gmres_iter > 0;

host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_);
mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_);
nnz_ = mtx_->get_num_stored_elements();

reusable_factory_ = setup_solver_factory(exec_, mtx_);
reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart);
}

int hiopLinSolverSymSparseGinkgo::matrixChanged()
Expand All @@ -313,9 +336,10 @@ std::shared_ptr<gko::LinOpFactory> setup_solver_factory(std::shared_ptr<const gk
gko_solver_ = gko::share(reusable_factory_->generate(mtx_));

// Temporary solution for the ginkgo GLU integration.
auto sol = gko::as<gko::solver::Gmres<>>(gko::as<gko::solver::ScaledReordered<>>(gko_solver_)->get_solver());
auto precond = gko::as<gko::experimental::solver::Direct<double, int>>(sol->get_preconditioner());
auto status = precond->get_factorization_status();
auto direct = iterative_refinement_ ?
gko::as<gko::experimental::solver::Direct<double, int>>(gko::as<gko::solver::Gmres<>>(gko::as<gko::solver::ScaledReordered<>>(gko_solver_)->get_solver())->get_preconditioner()) :
cnpetra marked this conversation as resolved.
Show resolved Hide resolved
gko::as<gko::experimental::solver::Direct<double, int>>(gko::as<gko::solver::ScaledReordered<>>(gko_solver_)->get_solver());
auto status = direct->get_factorization_status();

return status == gko::experimental::factorization::status::success ? 0 : -1;
}
Expand Down
1 change: 1 addition & 0 deletions src/LinAlg/hiopLinSolverSparseGinkgo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse
std::shared_ptr<gko::matrix::Csr<double, int>> host_mtx_;
std::shared_ptr<gko::LinOpFactory> reusable_factory_;
std::shared_ptr<gko::LinOp> gko_solver_;
bool iterative_refinement_;


public:
Expand Down
38 changes: 38 additions & 0 deletions src/Utils/hiopOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,44 @@ void hiopOptionsNLP::register_options()
"Selects the hardware architecture to run the Ginkgo linear solver on.");
}


// choose triangular solver implementation in Ginkgo.
// - Default is 'sparselib' which uses vendor triangular solvers
// - 'syncfree' uses the busy waiting loop based Ginkgo implementation
{
vector<string> range {"syncfree", "sparselib"};

register_str_option("ginkgo_trisolve",
"syncfree",
range,
"Selects the triangular solver for Ginkgo.");
}


// Ginkgo iterative refinement options
{
register_int_option("ir_inner_ginkgo_restart",
cnpetra marked this conversation as resolved.
Show resolved Hide resolved
20,
1,
100,
"GMRES restart value (default is 20). ");

register_num_option("ir_inner_ginkgo_tol",
1e-12,
1e-16,
1e-1,
"GMRES tolerance (default is 1e-12). ");

// 0 iterations means no iterative refinement, making the restart and tolerance options irrelevant.
register_int_option("ir_inner_ginkgo_maxit",
0,
0,
1000,
"GMRES maximum number of iterations (default is 0). ");

}


// choose sparsity permutation (to reduce nz in the factors). This option is available only when using
// Cholesky linear solvers
// - metis: use CUDA function csrmetisnd, which is a wrapper of METIS_NodeND; requires linking with
Expand Down