From 0c96a9d92172aec30f4eabf8142a77f51fbe618d Mon Sep 17 00:00:00 2001 From: xDarkLemon Date: Wed, 14 Feb 2024 21:21:57 -0500 Subject: [PATCH 1/2] Add AMGCL_cuda --- CMakeLists.txt | 23 ++-- cmake/recipes/cusolverdn.cmake | 26 +++-- linear-solver-spec.json | 14 ++- src/polysolve/linear/AMGCL_cuda.cu | 173 ++++++++++++++++++++++++++++ src/polysolve/linear/AMGCL_cuda.hpp | 113 ++++++++++++++++++ src/polysolve/linear/CMakeLists.txt | 2 + src/polysolve/linear/CuSolverDN.cu | 4 +- src/polysolve/linear/Solver.cpp | 8 ++ 8 files changed, 337 insertions(+), 26 deletions(-) create mode 100644 src/polysolve/linear/AMGCL_cuda.cu create mode 100644 src/polysolve/linear/AMGCL_cuda.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 587f37e1..96c484b1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,8 +75,8 @@ option(POLYSOLVE_WITH_CHOLMOD "Enable Cholmod library" option(POLYSOLVE_WITH_UMFPACK "Enable UmfPack library" ON) option(POLYSOLVE_WITH_SUPERLU "Enable SuperLU library" ON) option(POLYSOLVE_WITH_MKL "Enable MKL library" ${POLYSOLVE_NOT_ON_APPLE_SILICON}) -option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" OFF) -option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" OFF) +option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" ON) +option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" ON) option(POLYSOLVE_WITH_HYPRE "Enable hypre" ON) option(POLYSOLVE_WITH_AMGCL "Use AMGCL" ON) option(POLYSOLVE_WITH_SPECTRA "Enable Spectra library" ON) @@ -146,6 +146,10 @@ endif() # Add an empty library and fill in the list of sources in `src/CMakeLists.txt`. add_library(polysolve_linear) +if(POLYSOLVE_WITH_CUSOLVER) + include(cusolverdn) +endif() + add_library(polysolve::linear ALIAS polysolve_linear) add_subdirectory(src/polysolve) add_subdirectory(src/polysolve/linear) @@ -272,7 +276,7 @@ endif() # AMGCL solver if(POLYSOLVE_WITH_AMGCL) include(amgcl) - target_link_libraries(polysolve_linear PUBLIC amgcl::amgcl) + target_link_libraries(polysolve_linear PUBLIC amgcl::amgcl cuda_target) target_compile_definitions(polysolve_linear PRIVATE -DPOLYSOLVE_WITH_AMGCL) endif() @@ -284,14 +288,11 @@ if(POLYSOLVE_WITH_SPECTRA) endif() # cuSolver solvers -if(POLYSOLVE_WITH_CUSOLVER) - include(cusolverdn) - if(TARGET CUDA::cusolver) - target_link_libraries(polysolve_linear PUBLIC CUDA::cusolver) - target_compile_definitions(polysolve_linear PRIVATE -DPOLYSOLVE_WITH_CUSOLVER) - else() - message(WARNING "cuSOLVER not found, solver will not be available.") - endif() +if(TARGET CUDA::cusolver) + target_link_libraries(polysolve_linear PUBLIC CUDA::cusolver) + target_compile_definitions(polysolve_linear PRIVATE -DPOLYSOLVE_WITH_CUSOLVER) +else() + message(WARNING "cuSOLVER not found, solver will not be available.") endif() # Sanitizers diff --git a/cmake/recipes/cusolverdn.cmake b/cmake/recipes/cusolverdn.cmake index 43657ac0..7c1760ad 100644 --- a/cmake/recipes/cusolverdn.cmake +++ b/cmake/recipes/cusolverdn.cmake @@ -6,6 +6,14 @@ endif() message(STATUS "Third-party: creating targets 'CUDA::cusolver'") +# Nvidia RTX8000 -> compute_75 +# Nvidia V100 -> compute_70 +# Nvidia 1080/1080Ti -> compute_61 +# Nvidia 3080Ti -> compute_86 +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70 75 86) +endif() + include(CheckLanguage) check_language(CUDA) if(CMAKE_CUDA_COMPILER) @@ -18,7 +26,7 @@ if(CMAKE_CUDA_COMPILER) set(CUDA_PROPAGATE_HOST_FLAGS OFF) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math --expt-relaxed-constexpr -gencode arch=compute_86,code=sm_86") - target_compile_options(polysolve PUBLIC $<$: + target_compile_options(polysolve_linear PUBLIC $<$: --generate-line-info --use_fast_math @@ -33,21 +41,15 @@ if(CMAKE_CUDA_COMPILER) ) - target_link_libraries(polysolve PUBLIC CUDA::toolkit) - set_target_properties(polysolve PROPERTIES CUDA_SEPARABLE_COMPILATION ON) - # Nvidia RTX8000 -> compute_75 - # Nvidia V100 -> compute_70 - # Nvidia 1080/1080Ti -> compute_61 - # Nvidia 3080Ti -> compute_86 - if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES 70 75 86) - endif() - set_target_properties(polysolve PROPERTIES CUDA_ARCHITECTURES "70;75;86") + set_target_properties(polysolve_linear PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + set_target_properties(polysolve_linear PROPERTIES CUDA_ARCHITECTURES native) + # set_target_properties(polysolve PROPERTIES CUDA_ARCHITECTURES "70;75;86") + target_link_libraries(polysolve_linear PUBLIC CUDA::toolkit CUDA::cudart) if(APPLE) # We need to add the path to the driver (libcuda.dylib) as an rpath, # so that the static cuda runtime can find it at runtime. - set_property(TARGET polysolve + set_property(TARGET polysolve_linear PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) endif() diff --git a/linear-solver-spec.json b/linear-solver-spec.json index 1e59528b..f0cf2caf 100644 --- a/linear-solver-spec.json +++ b/linear-solver-spec.json @@ -15,7 +15,8 @@ "Eigen::MINRES", "Pardiso", "Hypre", - "AMGCL" + "AMGCL", + "AMGCL_cuda" ], "doc": "Settings for the linear solver." }, @@ -42,6 +43,7 @@ "Pardiso", "Hypre", "AMGCL", + "AMGCL_cuda", "Eigen::LeastSquaresConjugateGradient", "Eigen::DGMRES", "Eigen::ConjugateGradient", @@ -153,6 +155,16 @@ ], "doc": "Settings for the AMGCL solver." }, + { + "pointer": "/AMGCL_cuda", + "default": null, + "type": "object", + "optional": [ + "solver", + "precond" + ], + "doc": "Settings for the AMGCL_cuda solver." + }, { "pointer": "/Eigen::LeastSquaresConjugateGradient/max_iter", "default": 1000, diff --git a/src/polysolve/linear/AMGCL_cuda.cu b/src/polysolve/linear/AMGCL_cuda.cu new file mode 100644 index 00000000..12cf21ed --- /dev/null +++ b/src/polysolve/linear/AMGCL_cuda.cu @@ -0,0 +1,173 @@ +#ifdef POLYSOLVE_WITH_AMGCL +// #ifdef POLYSOLVE_WITH_CUDA + +//////////////////////////////////////////////////////////////////////////////// +#include "AMGCL_cuda.hpp" + +#include +#include +//////////////////////////////////////////////////////////////////////////////// + +namespace polysolve::linear +{ + + namespace + { + /* https://stackoverflow.com/questions/15904896/range-based-for-loop-on-a-dynamic-array */ + template + struct WrappedArray + { + WrappedArray(const T *first, const T *last) + : begin_{first}, end_{last} {} + WrappedArray(const T *first, std::ptrdiff_t size) + : WrappedArray{first, first + size} {} + + const T *begin() const noexcept { return begin_; } + const T *end() const noexcept { return end_; } + const T &operator[](const size_t i) const { return begin_[i]; } + + const T *begin_; + const T *end_; + }; + + json default_params() + { + json params = R"({ + "precond": { + "relax": { + "type": "spai0" + }, + "class": "amg", + "max_levels": 6, + "direct_coarse": false, + "ncycle": 2, + "coarsening": { + "type": "smoothed_aggregation", + "estimate_spectral_radius": true, + "relax": 1, + "aggr": { + "eps_strong": 0 + } + } + }, + "solver": { + "tol": 1e-10, + "maxiter": 1000, + "type": "cg" + } + })"_json; + + return params; + } + + void set_params(const json ¶ms, json &out) + { + if (params.contains("AMGCL_cuda")) + { + // Patch the stored params with input ones + if (params["AMGCL_cuda"].contains("precond")) + out["precond"].merge_patch(params["AMGCL_cuda"]["precond"]); + if (params["AMGCL_cuda"].contains("solver")) + out["solver"].merge_patch(params["AMGCL_cuda"]["solver"]); + + if (out["precond"]["class"] == "schur_pressure_correction") + { + // Initialize the u and p solvers with a tolerance that is comparable to the main solver's + if (!out["precond"].contains("usolver")) + { + out["precond"]["usolver"] = R"({"solver": {"maxiter": 100}})"_json; + out["precond"]["usolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get(); + } + if (!out["precond"].contains("usolver")) + { + out["precond"]["psolver"] = R"({"solver": {"maxiter": 100}})"_json; + out["precond"]["psolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get(); + } + } + } + } + } // namespace + + //////////////////////////////////////////////////////////////////////////////// + + AMGCL_cuda::AMGCL_cuda() + { + params_ = default_params(); + // NOTE: usolver and psolver parameters are only used if the + // preconditioner class is "schur_pressure_correction" + precond_num_ = 0; + cusparseCreate(&backend_params_.cusparse_handle); + } + + // Set solver parameters + void AMGCL_cuda::set_parameters(const json ¶ms) + { + if (params.contains("AMGCL_cuda")) + { + set_params(params, params_); + } + } + + void AMGCL_cuda::get_info(json ¶ms) const + { + params["num_iterations"] = iterations_; + params["final_res_norm"] = residual_error_; + } + + //////////////////////////////////////////////////////////////////////////////// + + void AMGCL_cuda::factorize(const StiffnessMatrix &Ain) + { + assert(precond_num_ > 0); + + int numRows = Ain.rows(); + + WrappedArray ia(Ain.outerIndexPtr(), numRows + 1); + WrappedArray ja(Ain.innerIndexPtr(), Ain.nonZeros()); + WrappedArray a(Ain.valuePtr(), Ain.nonZeros()); + if (params_["precond"]["class"] == "schur_pressure_correction") + { + std::vector pmask(numRows, 0); + for (size_t i = precond_num_; i < numRows; ++i) + pmask[i] = 1; + params_["precond"]["pmask"] = pmask; + } + + // AMGCL takes the parameters as a Boost property_tree (i.e., another JSON data structure) + std::stringstream ss_params; + ss_params << params_; + boost::property_tree::ptree pt_params; + boost::property_tree::read_json(ss_params, pt_params); + auto A = std::tie(numRows, ia, ja, a); + solver_ = std::make_unique(A, pt_params, backend_params_); + // std::cout << *solver_.get() << std::endl; + iterations_ = 0; + residual_error_ = 0; + } + + //////////////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////////////// + + void AMGCL_cuda::solve(const Eigen::Ref rhs, Eigen::Ref result) + { + assert(result.size() == rhs.size()); + std::vector _rhs(rhs.data(), rhs.data() + rhs.size()); + std::vector x(result.data(), result.data() + result.size()); + auto rhs_b = Backend::copy_vector(_rhs, backend_params_); + auto x_b = Backend::copy_vector(x, backend_params_); + + std::tie(iterations_, residual_error_) = (*solver_)(*rhs_b, *x_b); + thrust::copy(x_b->begin(), x_b->end(), result.data()); + } + + //////////////////////////////////////////////////////////////////////////////// + + AMGCL_cuda::~AMGCL_cuda() + { + } + +} // namespace polysolve + +// #endif +#endif diff --git a/src/polysolve/linear/AMGCL_cuda.hpp b/src/polysolve/linear/AMGCL_cuda.hpp new file mode 100644 index 00000000..40324680 --- /dev/null +++ b/src/polysolve/linear/AMGCL_cuda.hpp @@ -0,0 +1,113 @@ +#pragma once + +#ifdef POLYSOLVE_WITH_AMGCL +// #ifdef POLYSOLVE_WITH_CUDA + +//////////////////////////////////////////////////////////////////////////////// +#include "Solver.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// SET THIS AS AN OPTIONAL HEADER +#ifdef POLYSOLVE_WITH_CUSPARSEILU0 +#include +#endif + +#include +#include +#include +#include +#include +// #include +#include +#include +#include +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// +// WARNING: +// The matrix is assumed to be in row-major format, since AMGCL assumes that the +// outer index is for row. If the matrix is symmetric, you are fine, because CSR +// and CSC are the same. If the matrix is not symmetric and you pass in a +// column-major matrix, the solver will actually solve A^T x = b. +// + +namespace polysolve::linear +{ + class AMGCL_cuda : public Solver + { + + public: + AMGCL_cuda(); + ~AMGCL_cuda(); + + private: + POLYSOLVE_DELETE_MOVE_COPY(AMGCL_cuda) + + public: + ////////////////////// + // Public interface // + ////////////////////// + + // Set solver parameters + virtual void set_parameters(const json ¶ms) override; + + // Retrieve information + virtual void get_info(json ¶ms) const override; + + // Analyze sparsity pattern + virtual void analyze_pattern(const StiffnessMatrix &A, const int precond_num) override + { + precond_num_ = precond_num; + } + + // Factorize system matrix + virtual void factorize(const StiffnessMatrix &A) override; + + // Solve the linear system Ax = b + virtual void solve(const Ref b, Ref x) override; + + // Name of the solver type (for debugging purposes) + virtual std::string name() const override { return "AMGCL_cuda"; } + + private: + using Backend = amgcl::backend::cuda; + using Solver = amgcl::make_solver< + amgcl::runtime::preconditioner, + amgcl::runtime::solver::wrapper>; + std::unique_ptr solver_; + json params_; + typename Backend::params backend_params_; + + int precond_num_; + int block_size_ = 1; + + // Output info + size_t iterations_; + double residual_error_; + }; + +} // namespace polysolve + +// #endif +#endif diff --git a/src/polysolve/linear/CMakeLists.txt b/src/polysolve/linear/CMakeLists.txt index a742a357..2fe93c2e 100644 --- a/src/polysolve/linear/CMakeLists.txt +++ b/src/polysolve/linear/CMakeLists.txt @@ -15,6 +15,8 @@ set(SOURCES Pardiso.hpp SaddlePointSolver.cpp SaddlePointSolver.hpp + AMGCL_cuda.cu + AMGCL_cuda.hpp ) source_group(TREE "${CMAKE_CURRENT_SOURCE_DIR}" PREFIX "Source Files" FILES ${SOURCES}) diff --git a/src/polysolve/linear/CuSolverDN.cu b/src/polysolve/linear/CuSolverDN.cu index 29cf7570..8b3544fa 100644 --- a/src/polysolve/linear/CuSolverDN.cu +++ b/src/polysolve/linear/CuSolverDN.cu @@ -206,7 +206,7 @@ namespace polysolve::linear } // namespace polysolve::linear -template polysolve::CuSolverDN::CuSolverDN(); -template polysolve::CuSolverDN::CuSolverDN(); +template polysolve::linear::CuSolverDN::CuSolverDN(); +template polysolve::linear::CuSolverDN::CuSolverDN(); #endif \ No newline at end of file diff --git a/src/polysolve/linear/Solver.cpp b/src/polysolve/linear/Solver.cpp index c1e8a4e4..d9ed75ea 100644 --- a/src/polysolve/linear/Solver.cpp +++ b/src/polysolve/linear/Solver.cpp @@ -35,6 +35,7 @@ #endif #ifdef POLYSOLVE_WITH_AMGCL #include "AMGCL.hpp" +#include "AMGCL_cuda.hpp" #endif #ifdef POLYSOLVE_WITH_CUSOLVER #include "CuSolverDN.cuh" @@ -377,6 +378,12 @@ namespace polysolve::linear { return std::make_unique(); #endif +#ifdef POLYSOLVE_WITH_AMGCL + } + else if (solver == "AMGCL_cuda") + { + return std::make_unique(); +#endif #if EIGEN_VERSION_AT_LEAST(3, 3, 0) // Available only with Eigen 3.3.0 and newer #ifndef POLYSOLVE_LARGE_INDEX @@ -498,6 +505,7 @@ namespace polysolve::linear #endif #ifdef POLYSOLVE_WITH_AMGCL "AMGCL", + "AMGCL_cuda", #endif #if EIGEN_VERSION_AT_LEAST(3, 3, 0) #ifndef POLYSOLVE_LARGE_INDEX From be5aa2577c446f9ce82717d0c5856f47d4f75b41 Mon Sep 17 00:00:00 2001 From: xDarkLemon Date: Wed, 14 Feb 2024 23:59:48 -0500 Subject: [PATCH 2/2] Move AMGCL_cuda spec --- CMakeLists.txt | 2 +- linear-solver-spec.json | 132 +++++++++++++++++++++++++++++ src/polysolve/linear/AMGCL_cuda.cu | 33 -------- 3 files changed, 133 insertions(+), 34 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 384bc80f..d65af43e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,7 +76,7 @@ option(POLYSOLVE_WITH_UMFPACK "Enable UmfPack library" option(POLYSOLVE_WITH_SUPERLU "Enable SuperLU library" ON) option(POLYSOLVE_WITH_MKL "Enable MKL library" ${POLYSOLVE_NOT_ON_APPLE_SILICON}) option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" ON) -option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" ON) +option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" OFF) option(POLYSOLVE_WITH_HYPRE "Enable hypre" ON) option(POLYSOLVE_WITH_AMGCL "Use AMGCL" ON) option(POLYSOLVE_WITH_SPECTRA "Enable Spectra library" ON) diff --git a/linear-solver-spec.json b/linear-solver-spec.json index f0cf2caf..5fba7bc6 100644 --- a/linear-solver-spec.json +++ b/linear-solver-spec.json @@ -433,5 +433,137 @@ "default": 0, "type": "float", "doc": "Aggregation epsilon strong." + }, + { + "pointer": "/AMGCL_cuda/solver", + "default": null, + "type": "object", + "optional": [ + "tol", + "maxiter", + "type" + ], + "doc": "Solver settings for the AMGCL." + }, + { + "pointer": "/AMGCL_cuda/precond", + "default": null, + "type": "object", + "optional": [ + "relax", + "class", + "max_levels", + "direct_coarse", + "ncycle", + "coarsening" + ], + "doc": "Preconditioner settings for the AMGCL." + }, + { + "pointer": "/AMGCL_cuda/solver/maxiter", + "default": 1000, + "type": "int", + "doc": "Maximum number of iterations." + }, + { + "pointer": "/AMGCL_cuda/solver/tol", + "default": 1e-10, + "type": "float", + "doc": "Convergence tolerance." + }, + { + "pointer": "/AMGCL_cuda/solver/type", + "default": "cg", + "type": "string", + "doc": "Type of solver to use." + }, + { + "pointer": "/AMGCL_cuda/precond/relax", + "default": null, + "type": "object", + "optional": [ + "degree", + "type", + "power_iters", + "higher", + "lower", + "scale" + ], + "doc": "Preconditioner settings for the AMGCL." + }, + { + "pointer": "/AMGCL_cuda/precond/class", + "default": "amg", + "type": "string", + "doc": "Type of preconditioner to use." + }, + { + "pointer": "/AMGCL_cuda/precond/max_levels", + "default": 6, + "type": "int", + "doc": "Maximum number of levels." + }, + { + "pointer": "/AMGCL_cuda/precond/direct_coarse", + "default": false, + "type": "bool", + "doc": "Use direct solver for the coarsest level." + }, + { + "pointer": "/AMGCL_cuda/precond/ncycle", + "default": 2, + "type": "int", + "doc": "Number of cycles." + }, + { + "pointer": "/AMGCL_cuda/precond/coarsening", + "default": null, + "type": "object", + "optional": [ + "type", + "estimate_spectral_radius", + "relax", + "aggr" + ], + "doc": "Coarsening parameters." + }, + { + "pointer": "/AMGCL_cuda/precond/relax/type", + "default": "spai0", + "type": "string", + "doc": "Type of relaxation to use." + }, + { + "pointer": "/AMGCL_cuda/precond/coarsening/type", + "default": "smoothed_aggregation", + "type": "string", + "doc": "Coarsening type." + }, + { + "pointer": "/AMGCL_cuda/precond/coarsening/estimate_spectral_radius", + "default": true, + "type": "bool", + "doc": "Should the spectral radius be estimated." + }, + { + "pointer": "/AMGCL_cuda/precond/coarsening/relax", + "default": 1, + "type": "float", + "doc": "Coarsening relaxation." + }, + { + "pointer": "/AMGCL_cuda/precond/coarsening/aggr", + "default": null, + "type": "object", + "optional": [ + "eps_strong" + ], + "doc": "Aggregation settings." + }, + { + "pointer": "/AMGCL_cuda/precond/coarsening/aggr/eps_strong", + "default": 0, + "type": "float", + "doc": "Aggregation epsilon strong." } ] \ No newline at end of file diff --git a/src/polysolve/linear/AMGCL_cuda.cu b/src/polysolve/linear/AMGCL_cuda.cu index 12cf21ed..eed7dbf4 100644 --- a/src/polysolve/linear/AMGCL_cuda.cu +++ b/src/polysolve/linear/AMGCL_cuda.cu @@ -1,5 +1,4 @@ #ifdef POLYSOLVE_WITH_AMGCL -// #ifdef POLYSOLVE_WITH_CUDA //////////////////////////////////////////////////////////////////////////////// #include "AMGCL_cuda.hpp" @@ -30,36 +29,6 @@ namespace polysolve::linear const T *end_; }; - json default_params() - { - json params = R"({ - "precond": { - "relax": { - "type": "spai0" - }, - "class": "amg", - "max_levels": 6, - "direct_coarse": false, - "ncycle": 2, - "coarsening": { - "type": "smoothed_aggregation", - "estimate_spectral_radius": true, - "relax": 1, - "aggr": { - "eps_strong": 0 - } - } - }, - "solver": { - "tol": 1e-10, - "maxiter": 1000, - "type": "cg" - } - })"_json; - - return params; - } - void set_params(const json ¶ms, json &out) { if (params.contains("AMGCL_cuda")) @@ -92,7 +61,6 @@ namespace polysolve::linear AMGCL_cuda::AMGCL_cuda() { - params_ = default_params(); // NOTE: usolver and psolver parameters are only used if the // preconditioner class is "schur_pressure_correction" precond_num_ = 0; @@ -169,5 +137,4 @@ namespace polysolve::linear } // namespace polysolve -// #endif #endif