Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AMGCL_cuda #70

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ option(POLYSOLVE_WITH_CHOLMOD "Enable Cholmod library"
option(POLYSOLVE_WITH_UMFPACK "Enable UmfPack library" ON)
option(POLYSOLVE_WITH_SUPERLU "Enable SuperLU library" ON)
option(POLYSOLVE_WITH_MKL "Enable MKL library" ${POLYSOLVE_NOT_ON_APPLE_SILICON})
option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" OFF)
option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" OFF)
option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" ON)
option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" ON)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

leave pardiso off

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

option(POLYSOLVE_WITH_HYPRE "Enable hypre" ON)
option(POLYSOLVE_WITH_AMGCL "Use AMGCL" ON)
option(POLYSOLVE_WITH_SPECTRA "Enable Spectra library" ON)
Expand Down Expand Up @@ -146,6 +146,10 @@ endif()

# Add an empty library and fill in the list of sources in `src/CMakeLists.txt`.
add_library(polysolve_linear)
if(POLYSOLVE_WITH_CUSOLVER)
include(cusolverdn)
endif()

add_library(polysolve::linear ALIAS polysolve_linear)
add_subdirectory(src/polysolve)
add_subdirectory(src/polysolve/linear)
Expand Down Expand Up @@ -272,8 +276,8 @@ endif()
# AMGCL solver
if(POLYSOLVE_WITH_AMGCL)
include(amgcl)
target_link_libraries(polysolve_linear PUBLIC amgcl::amgcl)
target_compile_definitions(polysolve_linear PUBLIC POLYSOLVE_WITH_AMGCL)
target_link_libraries(polysolve_linear PUBLIC amgcl::amgcl cuda_target)
target_compile_definitions(polysolve_linear PRIVATE -DPOLYSOLVE_WITH_AMGCL)
endif()

# Spectra (MPL 2.0)
Expand All @@ -284,14 +288,11 @@ if(POLYSOLVE_WITH_SPECTRA)
endif()

# cuSolver solvers
if(POLYSOLVE_WITH_CUSOLVER)
include(cusolverdn)
if(TARGET CUDA::cusolver)
target_link_libraries(polysolve_linear PUBLIC CUDA::cusolver)
target_compile_definitions(polysolve_linear PUBLIC POLYSOLVE_WITH_CUSOLVER)
else()
message(WARNING "cuSOLVER not found, solver will not be available.")
endif()
if(TARGET CUDA::cusolver)
target_link_libraries(polysolve_linear PUBLIC CUDA::cusolver)
target_compile_definitions(polysolve_linear PRIVATE -DPOLYSOLVE_WITH_CUSOLVER)
else()
message(WARNING "cuSOLVER not found, solver will not be available.")
endif()

# Sanitizers
Expand Down
26 changes: 14 additions & 12 deletions cmake/recipes/cusolverdn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ endif()

message(STATUS "Third-party: creating targets 'CUDA::cusolver'")

# Nvidia RTX8000 -> compute_75
# Nvidia V100 -> compute_70
# Nvidia 1080/1080Ti -> compute_61
# Nvidia 3080Ti -> compute_86
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 70 75 86)
endif()

include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
Expand All @@ -18,7 +26,7 @@ if(CMAKE_CUDA_COMPILER)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math --expt-relaxed-constexpr -gencode arch=compute_86,code=sm_86")

target_compile_options(polysolve PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:
target_compile_options(polysolve_linear PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:

--generate-line-info
--use_fast_math
Expand All @@ -33,21 +41,15 @@ if(CMAKE_CUDA_COMPILER)

)

target_link_libraries(polysolve PUBLIC CUDA::toolkit)
set_target_properties(polysolve PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
# Nvidia RTX8000 -> compute_75
# Nvidia V100 -> compute_70
# Nvidia 1080/1080Ti -> compute_61
# Nvidia 3080Ti -> compute_86
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 70 75 86)
endif()
set_target_properties(polysolve PROPERTIES CUDA_ARCHITECTURES "70;75;86")
set_target_properties(polysolve_linear PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(polysolve_linear PROPERTIES CUDA_ARCHITECTURES native)
# set_target_properties(polysolve PROPERTIES CUDA_ARCHITECTURES "70;75;86")
target_link_libraries(polysolve_linear PUBLIC CUDA::toolkit CUDA::cudart)

if(APPLE)
# We need to add the path to the driver (libcuda.dylib) as an rpath,
# so that the static cuda runtime can find it at runtime.
set_property(TARGET polysolve
set_property(TARGET polysolve_linear
PROPERTY
BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif()
Expand Down
14 changes: 13 additions & 1 deletion linear-solver-spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"Eigen::MINRES",
"Pardiso",
"Hypre",
"AMGCL"
"AMGCL",
"AMGCL_cuda"
],
"doc": "Settings for the linear solver."
},
Expand All @@ -42,6 +43,7 @@
"Pardiso",
"Hypre",
"AMGCL",
"AMGCL_cuda",
"Eigen::LeastSquaresConjugateGradient",
"Eigen::DGMRES",
"Eigen::ConjugateGradient",
Expand Down Expand Up @@ -153,6 +155,16 @@
],
"doc": "Settings for the AMGCL solver."
},
{
"pointer": "/AMGCL_cuda",
"default": null,
"type": "object",
"optional": [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it have opts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

"solver",
"precond"
],
"doc": "Settings for the AMGCL_cuda solver."
},
{
"pointer": "/Eigen::LeastSquaresConjugateGradient/max_iter",
"default": 1000,
Expand Down
173 changes: 173 additions & 0 deletions src/polysolve/linear/AMGCL_cuda.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#ifdef POLYSOLVE_WITH_AMGCL
// #ifdef POLYSOLVE_WITH_CUDA
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we need this

Copy link
Contributor Author

@xDarkLemon xDarkLemon Feb 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I commented it out because polysolve CMakeLists.txt does not have POLYSOLVE_WITH_CUDA flag, but polyfem has. So I deleted the flag here.


////////////////////////////////////////////////////////////////////////////////
#include "AMGCL_cuda.hpp"

#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
////////////////////////////////////////////////////////////////////////////////

namespace polysolve::linear
{

namespace
{
/* https://stackoverflow.com/questions/15904896/range-based-for-loop-on-a-dynamic-array */
template <typename T>
struct WrappedArray
{
WrappedArray(const T *first, const T *last)
: begin_{first}, end_{last} {}
WrappedArray(const T *first, std::ptrdiff_t size)
: WrappedArray{first, first + size} {}

const T *begin() const noexcept { return begin_; }
const T *end() const noexcept { return end_; }
const T &operator[](const size_t i) const { return begin_[i]; }

const T *begin_;
const T *end_;
};

json default_params()
{
json params = R"({
"precond": {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thse should go in the spec

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved.

"relax": {
"type": "spai0"
},
"class": "amg",
"max_levels": 6,
"direct_coarse": false,
"ncycle": 2,
"coarsening": {
"type": "smoothed_aggregation",
"estimate_spectral_radius": true,
"relax": 1,
"aggr": {
"eps_strong": 0
}
}
},
"solver": {
"tol": 1e-10,
"maxiter": 1000,
"type": "cg"
}
})"_json;

return params;
}

void set_params(const json &params, json &out)
{
if (params.contains("AMGCL_cuda"))
{
// Patch the stored params with input ones
if (params["AMGCL_cuda"].contains("precond"))
out["precond"].merge_patch(params["AMGCL_cuda"]["precond"]);
if (params["AMGCL_cuda"].contains("solver"))
out["solver"].merge_patch(params["AMGCL_cuda"]["solver"]);

if (out["precond"]["class"] == "schur_pressure_correction")
{
// Initialize the u and p solvers with a tolerance that is comparable to the main solver's
if (!out["precond"].contains("usolver"))
{
out["precond"]["usolver"] = R"({"solver": {"maxiter": 100}})"_json;
out["precond"]["usolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get<double>();
}
if (!out["precond"].contains("usolver"))
{
out["precond"]["psolver"] = R"({"solver": {"maxiter": 100}})"_json;
out["precond"]["psolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get<double>();
}
}
}
}
} // namespace

////////////////////////////////////////////////////////////////////////////////

AMGCL_cuda::AMGCL_cuda()
{
params_ = default_params();
// NOTE: usolver and psolver parameters are only used if the
// preconditioner class is "schur_pressure_correction"
precond_num_ = 0;
cusparseCreate(&backend_params_.cusparse_handle);
}

// Set solver parameters
void AMGCL_cuda::set_parameters(const json &params)
{
if (params.contains("AMGCL_cuda"))
{
set_params(params, params_);
}
}

void AMGCL_cuda::get_info(json &params) const
{
params["num_iterations"] = iterations_;
params["final_res_norm"] = residual_error_;
}

////////////////////////////////////////////////////////////////////////////////

void AMGCL_cuda::factorize(const StiffnessMatrix &Ain)
{
assert(precond_num_ > 0);

int numRows = Ain.rows();

WrappedArray<StiffnessMatrix::StorageIndex> ia(Ain.outerIndexPtr(), numRows + 1);
WrappedArray<StiffnessMatrix::StorageIndex> ja(Ain.innerIndexPtr(), Ain.nonZeros());
WrappedArray<StiffnessMatrix::Scalar> a(Ain.valuePtr(), Ain.nonZeros());
if (params_["precond"]["class"] == "schur_pressure_correction")
{
std::vector<char> pmask(numRows, 0);
for (size_t i = precond_num_; i < numRows; ++i)
pmask[i] = 1;
params_["precond"]["pmask"] = pmask;
}

// AMGCL takes the parameters as a Boost property_tree (i.e., another JSON data structure)
std::stringstream ss_params;
ss_params << params_;
boost::property_tree::ptree pt_params;
boost::property_tree::read_json(ss_params, pt_params);
auto A = std::tie(numRows, ia, ja, a);
solver_ = std::make_unique<Solver>(A, pt_params, backend_params_);
// std::cout << *solver_.get() << std::endl;
iterations_ = 0;
residual_error_ = 0;
}

////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

void AMGCL_cuda::solve(const Eigen::Ref<const VectorXd> rhs, Eigen::Ref<VectorXd> result)
{
assert(result.size() == rhs.size());
std::vector<double> _rhs(rhs.data(), rhs.data() + rhs.size());
std::vector<double> x(result.data(), result.data() + result.size());
auto rhs_b = Backend::copy_vector(_rhs, backend_params_);
auto x_b = Backend::copy_vector(x, backend_params_);

std::tie(iterations_, residual_error_) = (*solver_)(*rhs_b, *x_b);
thrust::copy(x_b->begin(), x_b->end(), result.data());
}

////////////////////////////////////////////////////////////////////////////////

AMGCL_cuda::~AMGCL_cuda()
{
}

} // namespace polysolve

// #endif
#endif
Loading