polyfem · xDarkLemon · Feb 15, 2024 · Feb 15, 2024 · Feb 15, 2024 · teseoch
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -75,8 +75,8 @@ option(POLYSOLVE_WITH_CHOLMOD       "Enable Cholmod library"
 option(POLYSOLVE_WITH_UMFPACK       "Enable UmfPack library"                             ON)
 option(POLYSOLVE_WITH_SUPERLU       "Enable SuperLU library"                             ON)
 option(POLYSOLVE_WITH_MKL           "Enable MKL library"  ${POLYSOLVE_NOT_ON_APPLE_SILICON})
-option(POLYSOLVE_WITH_CUSOLVER      "Enable cuSOLVER library"                           OFF)
-option(POLYSOLVE_WITH_PARDISO       "Enable Pardiso library"                            OFF)
+option(POLYSOLVE_WITH_CUSOLVER      "Enable cuSOLVER library"                            ON)
+option(POLYSOLVE_WITH_PARDISO       "Enable Pardiso library"                             ON)
 option(POLYSOLVE_WITH_HYPRE         "Enable hypre"                                       ON)
 option(POLYSOLVE_WITH_AMGCL         "Use AMGCL"                                          ON)
 option(POLYSOLVE_WITH_SPECTRA       "Enable Spectra library"                             ON)
@@ -146,6 +146,10 @@ endif()
 
 # Add an empty library and fill in the list of sources in `src/CMakeLists.txt`.
 add_library(polysolve_linear)
+if(POLYSOLVE_WITH_CUSOLVER)
+    include(cusolverdn)
+endif()
+
 add_library(polysolve::linear ALIAS polysolve_linear)
 add_subdirectory(src/polysolve)
 add_subdirectory(src/polysolve/linear)
@@ -272,8 +276,8 @@ endif()
 # AMGCL solver
 if(POLYSOLVE_WITH_AMGCL)
     include(amgcl)
-    target_link_libraries(polysolve_linear PUBLIC amgcl::amgcl)
-    target_compile_definitions(polysolve_linear PUBLIC POLYSOLVE_WITH_AMGCL)
+    target_link_libraries(polysolve_linear PUBLIC amgcl::amgcl cuda_target)
+    target_compile_definitions(polysolve_linear PRIVATE -DPOLYSOLVE_WITH_AMGCL)
 endif()
 
 # Spectra (MPL 2.0)
@@ -284,14 +288,11 @@ if(POLYSOLVE_WITH_SPECTRA)
 endif()
 
 # cuSolver solvers
-if(POLYSOLVE_WITH_CUSOLVER)
-    include(cusolverdn)
-    if(TARGET CUDA::cusolver)
-        target_link_libraries(polysolve_linear PUBLIC CUDA::cusolver)
-        target_compile_definitions(polysolve_linear PUBLIC POLYSOLVE_WITH_CUSOLVER)
-    else()
-        message(WARNING "cuSOLVER not found, solver will not be available.")
-    endif()
+if(TARGET CUDA::cusolver)
+    target_link_libraries(polysolve_linear PUBLIC CUDA::cusolver)
+    target_compile_definitions(polysolve_linear PRIVATE -DPOLYSOLVE_WITH_CUSOLVER)
+else()
+    message(WARNING "cuSOLVER not found, solver will not be available.")
 endif()
 
 # Sanitizers

diff --git a/cmake/recipes/cusolverdn.cmake b/cmake/recipes/cusolverdn.cmake
@@ -6,6 +6,14 @@ endif()
 
 message(STATUS "Third-party: creating targets 'CUDA::cusolver'")
 
+# Nvidia RTX8000 -> compute_75
+# Nvidia V100 -> compute_70
+# Nvidia 1080/1080Ti -> compute_61
+# Nvidia 3080Ti -> compute_86
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES 70 75 86)
+endif()
+
 include(CheckLanguage)
 check_language(CUDA)
 if(CMAKE_CUDA_COMPILER)
@@ -18,7 +26,7 @@ if(CMAKE_CUDA_COMPILER)
         set(CUDA_PROPAGATE_HOST_FLAGS OFF)
         set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math --expt-relaxed-constexpr  -gencode arch=compute_86,code=sm_86")
 
-        target_compile_options(polysolve PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:
+        target_compile_options(polysolve_linear PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:
 
         --generate-line-info
         --use_fast_math
@@ -33,21 +41,15 @@ if(CMAKE_CUDA_COMPILER)
 
         )
 
-        target_link_libraries(polysolve PUBLIC CUDA::toolkit)
-        set_target_properties(polysolve PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-        # Nvidia RTX8000 -> compute_75
-        # Nvidia V100 -> compute_70
-        # Nvidia 1080/1080Ti -> compute_61
-        # Nvidia 3080Ti -> compute_86
-        if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-            set(CMAKE_CUDA_ARCHITECTURES 70 75 86)
-        endif()
-        set_target_properties(polysolve PROPERTIES CUDA_ARCHITECTURES "70;75;86")
+        set_target_properties(polysolve_linear PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+        set_target_properties(polysolve_linear PROPERTIES CUDA_ARCHITECTURES native)
+        # set_target_properties(polysolve PROPERTIES CUDA_ARCHITECTURES "70;75;86")
+        target_link_libraries(polysolve_linear PUBLIC CUDA::toolkit CUDA::cudart)
 
         if(APPLE)
             # We need to add the path to the driver (libcuda.dylib) as an rpath,
             # so that the static cuda runtime can find it at runtime.
-            set_property(TARGET polysolve
+            set_property(TARGET polysolve_linear
                         PROPERTY
                         BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
         endif()

diff --git a/linear-solver-spec.json b/linear-solver-spec.json
@@ -15,7 +15,8 @@
             "Eigen::MINRES",
             "Pardiso",
             "Hypre",
-            "AMGCL"
+            "AMGCL",
+            "AMGCL_cuda"
         ],
         "doc": "Settings for the linear solver."
     },
@@ -42,6 +43,7 @@
             "Pardiso",
             "Hypre",
             "AMGCL",
+            "AMGCL_cuda",
             "Eigen::LeastSquaresConjugateGradient",
             "Eigen::DGMRES",
             "Eigen::ConjugateGradient",
@@ -153,6 +155,16 @@
         ],
         "doc": "Settings for the AMGCL solver."
     },
+    {
+        "pointer": "/AMGCL_cuda",
+        "default": null,
+        "type": "object",
+        "optional": [
+            "solver",
+            "precond"
+        ],
+        "doc": "Settings for the AMGCL_cuda solver."
+    },
     {
         "pointer": "/Eigen::LeastSquaresConjugateGradient/max_iter",
         "default": 1000,

diff --git a/src/polysolve/linear/AMGCL_cuda.cu b/src/polysolve/linear/AMGCL_cuda.cu
@@ -0,0 +1,173 @@
+#ifdef POLYSOLVE_WITH_AMGCL
+// #ifdef POLYSOLVE_WITH_CUDA
+
+////////////////////////////////////////////////////////////////////////////////
+#include "AMGCL_cuda.hpp"
+
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/json_parser.hpp>
+////////////////////////////////////////////////////////////////////////////////
+
+namespace polysolve::linear
+{
+
+    namespace
+    {
+        /* https://stackoverflow.com/questions/15904896/range-based-for-loop-on-a-dynamic-array */
+        template <typename T>
+        struct WrappedArray
+        {
+            WrappedArray(const T *first, const T *last)
+                : begin_{first}, end_{last} {}
+            WrappedArray(const T *first, std::ptrdiff_t size)
+                : WrappedArray{first, first + size} {}
+
+            const T *begin() const noexcept { return begin_; }
+            const T *end() const noexcept { return end_; }
+            const T &operator[](const size_t i) const { return begin_[i]; }
+
+            const T *begin_;
+            const T *end_;
+        };
+
+        json default_params()
+        {
+            json params = R"({
+                "precond": {
+                    "relax": {
+                        "type": "spai0"
+                    },
+                    "class": "amg",
+                    "max_levels": 6,
+                    "direct_coarse": false,
+                    "ncycle": 2,
+                    "coarsening": {
+                        "type": "smoothed_aggregation",
+                        "estimate_spectral_radius": true,
+                        "relax": 1,
+                        "aggr": {
+                            "eps_strong": 0
+                        }
+                    }
+                },
+                "solver": {
+                    "tol": 1e-10,
+                    "maxiter": 1000,
+                    "type": "cg"
+                }
+        })"_json;
+
+            return params;
+        }
+
+        void set_params(const json &params, json &out)
+        {
+            if (params.contains("AMGCL_cuda"))
+            {
+                // Patch the stored params with input ones
+                if (params["AMGCL_cuda"].contains("precond"))
+                    out["precond"].merge_patch(params["AMGCL_cuda"]["precond"]);
+                if (params["AMGCL_cuda"].contains("solver"))
+                    out["solver"].merge_patch(params["AMGCL_cuda"]["solver"]);
+
+                if (out["precond"]["class"] == "schur_pressure_correction")
+                {
+                    // Initialize the u and p solvers with a tolerance that is comparable to the main solver's
+                    if (!out["precond"].contains("usolver"))
+                    {
+                        out["precond"]["usolver"] = R"({"solver": {"maxiter": 100}})"_json;
+                        out["precond"]["usolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get<double>();
+                    }
+                    if (!out["precond"].contains("usolver"))
+                    {
+                        out["precond"]["psolver"] = R"({"solver": {"maxiter": 100}})"_json;
+                        out["precond"]["psolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get<double>();
+                    }
+                }
+            }
+        }
+    } // namespace
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    AMGCL_cuda::AMGCL_cuda()
+    {
+        params_ = default_params();
+        // NOTE: usolver and psolver parameters are only used if the
+        // preconditioner class is "schur_pressure_correction"
+        precond_num_ = 0;
+        cusparseCreate(&backend_params_.cusparse_handle);
+    }
+
+    // Set solver parameters
+    void AMGCL_cuda::set_parameters(const json &params)
+    {
+        if (params.contains("AMGCL_cuda"))
+        {
+            set_params(params, params_);
+        }
+    }
+
+    void AMGCL_cuda::get_info(json &params) const
+    {
+        params["num_iterations"] = iterations_;
+        params["final_res_norm"] = residual_error_;
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    void AMGCL_cuda::factorize(const StiffnessMatrix &Ain)
+    {
+        assert(precond_num_ > 0);
+
+        int numRows = Ain.rows();
+
+        WrappedArray<StiffnessMatrix::StorageIndex> ia(Ain.outerIndexPtr(), numRows + 1);
+        WrappedArray<StiffnessMatrix::StorageIndex> ja(Ain.innerIndexPtr(), Ain.nonZeros());
+        WrappedArray<StiffnessMatrix::Scalar> a(Ain.valuePtr(), Ain.nonZeros());
+        if (params_["precond"]["class"] == "schur_pressure_correction")
+        {
+            std::vector<char> pmask(numRows, 0);
+            for (size_t i = precond_num_; i < numRows; ++i)
+                pmask[i] = 1;
+            params_["precond"]["pmask"] = pmask;
+        }
+
+        // AMGCL takes the parameters as a Boost property_tree (i.e., another JSON data structure)
+        std::stringstream ss_params;
+        ss_params << params_;
+        boost::property_tree::ptree pt_params;
+        boost::property_tree::read_json(ss_params, pt_params);
+        auto A = std::tie(numRows, ia, ja, a);
+        solver_ = std::make_unique<Solver>(A, pt_params, backend_params_);
+        // std::cout << *solver_.get() << std::endl;
+        iterations_ = 0;
+        residual_error_ = 0;
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    void AMGCL_cuda::solve(const Eigen::Ref<const VectorXd> rhs, Eigen::Ref<VectorXd> result)
+    {
+        assert(result.size() == rhs.size());
+        std::vector<double> _rhs(rhs.data(), rhs.data() + rhs.size());
+        std::vector<double> x(result.data(), result.data() + result.size());
+        auto rhs_b = Backend::copy_vector(_rhs, backend_params_);
+        auto x_b = Backend::copy_vector(x, backend_params_);
+
+        std::tie(iterations_, residual_error_) = (*solver_)(*rhs_b, *x_b);
+        thrust::copy(x_b->begin(), x_b->end(), result.data());
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    AMGCL_cuda::~AMGCL_cuda()
+    {
+    }
+
+} // namespace polysolve
+
+// #endif
+#endif