Skip to content

Commit

Permalink
GMRESMLMG: Support Multi-level composite solve
Browse files Browse the repository at this point in the history
  • Loading branch information
WeiqunZhang committed Jan 17, 2025
1 parent 0233b8e commit 9f7e9eb
Show file tree
Hide file tree
Showing 23 changed files with 990 additions and 274 deletions.
203 changes: 203 additions & 0 deletions Src/Base/AMReX_FabArrayUtility.H
Original file line number Diff line number Diff line change
Expand Up @@ -1602,6 +1602,209 @@ Dot (FabArray<FAB> const& x, int xcomp, FabArray<FAB> const& y, int ycomp, int n
return sm;
}

/**
* \brief Compute dot product of FabArray with itself
*
* \param x first FabArray
* \param xcomp starting component of x
* \param y second FabArray
* \param ycomp starting component of y
* \param ncomp number of components
* \param nghost number of ghost cells
* \param local If true, MPI communication is skipped.
*/
template <typename FAB, std::enable_if_t<IsBaseFab<FAB>::value,int> FOO = 0>
typename FAB::value_type
Dot (FabArray<FAB> const& x, int xcomp, int ncomp, IntVect const& nghost, bool local = false)
{
BL_ASSERT(x.nGrowVect().allGE(nghost));

BL_PROFILE("amrex::Dot()");

using T = typename FAB::value_type;
auto sm = T(0.0);
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion()) {
auto const& xma = x.const_arrays();
sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
{
auto t = T(0.0);
auto const& xfab = xma[box_no];
for (int n = 0; n < ncomp; ++n) {
auto v = xfab(i,j,k,xcomp+n);
t += v*v;
}
return t;
});
} else
#endif
{
#ifdef AMREX_USE_OMP
#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
#endif
for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
{
Box const& bx = mfi.growntilebox(nghost);
auto const& xfab = x.const_array(mfi);
AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
{
auto v = xfab(i,j,k,xcomp+n);
sm += v*v;
});
}
}

if (!local) {
ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
}

return sm;
}

/**
* \brief Compute dot product of two FabArrays in region that mask is true
*
* \param mask mask
* \param x first FabArray
* \param xcomp starting component of x
* \param y second FabArray
* \param ycomp starting component of y
* \param ncomp number of components
* \param nghost number of ghost cells
* \param local If true, MPI communication is skipped.
*/
template <typename IFAB, typename FAB,
std::enable_if_t<IsBaseFab<FAB>::value && IsBaseFab<IFAB>::value,int> FOO = 0>
typename FAB::value_type
Dot (FabArray<IFAB> const& mask, FabArray<FAB> const& x, int xcomp,
FabArray<FAB> const& y, int ycomp, int ncomp, IntVect const& nghost,
bool local = false)
{
BL_ASSERT(x.boxArray() == y.boxArray() && x.boxArray() == mask.boxArray());
BL_ASSERT(x.DistributionMap() == y.DistributionMap() && x.DistributionMap() == mask.DistributionMap());
BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost) &&
mask.nGrowVect().allGE(nghost));

BL_PROFILE("amrex::Dot()");

using T = typename FAB::value_type;
auto sm = T(0.0);
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion()) {
auto const& mma = mask.const_arrays();
auto const& xma = x.const_arrays();
auto const& yma = y.const_arrays();
sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
{
auto t = T(0.0);
auto m = T(mma[box_no](i,j,k));
if (m != 0) {
auto const& xfab = xma[box_no];
auto const& yfab = yma[box_no];
for (int n = 0; n < ncomp; ++n) {
t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
}
}
return t*m;
});
} else
#endif
{
#ifdef AMREX_USE_OMP
#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
#endif
for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
{
Box const& bx = mfi.growntilebox(nghost);
auto const& mfab = mask.const_array(mfi);
auto const& xfab = x.const_array(mfi);
auto const& yfab = y.const_array(mfi);
AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
{
auto m = T(mfab(i,j,k));
sm += m * xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n);
});
}
}

if (!local) {
ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
}

return sm;
}

/**
* \brief Compute dot product of FabArray with itself in region that mask is true
*
* \param mask mask
* \param x FabArray
* \param xcomp starting component of x
* \param ncomp number of components
* \param nghost number of ghost cells
* \param local If true, MPI communication is skipped.
*/
template <typename IFAB, typename FAB,
std::enable_if_t<IsBaseFab<FAB>::value && IsBaseFab<IFAB>::value,int> FOO = 0>
typename FAB::value_type
Dot (FabArray<IFAB> const& mask, FabArray<FAB> const& x, int xcomp, int ncomp,
IntVect const& nghost, bool local = false)
{
BL_ASSERT(x.boxArray() == mask.boxArray());
BL_ASSERT(x.DistributionMap() == mask.DistributionMap());
BL_ASSERT(x.nGrowVect().allGE(nghost) && mask.nGrowVect().allGE(nghost));

BL_PROFILE("amrex::Dot()");

using T = typename FAB::value_type;
auto sm = T(0.0);
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion()) {
auto const& mma = mask.const_arrays();
auto const& xma = x.const_arrays();
sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, x, nghost,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<T>
{
auto t = T(0.0);
auto m = T(mma[box_no](i,j,k));
if (m != 0) {
auto const& xfab = xma[box_no];
for (int n = 0; n < ncomp; ++n) {
auto v = xfab(i,j,k,xcomp+n);
t += v*v;
}
}
return t*m;
});
} else
#endif
{
#ifdef AMREX_USE_OMP
#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
#endif
for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
{
Box const& bx = mfi.growntilebox(nghost);
auto const& mfab = mask.const_array(mfi);
auto const& xfab = x.const_array(mfi);
AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
{
auto m = T(mfab(i,j,k));
auto v = xfab(i,j,k,xcomp+n);
sm += m*v*v;
});
}
}

if (!local) {
ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
}

return sm;
}

//! dst = val
template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
void setVal (MF& dst, typename MF::value_type val)
Expand Down
18 changes: 18 additions & 0 deletions Src/Base/AMReX_Vector.H
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ namespace amrex
return r;
}

template <class T, std::size_t N, typename = typename T::FABType>
[[nodiscard]] Vector<Array<T,N>*> GetVecOfPtrs (Vector<Array<T,N>>& a)
{
Vector<Array<T,N>*> r;
r.reserve(a.size());
for (auto& x : a) { r.push_back(&x); }
return r;
}

template <class T>
[[nodiscard]] Vector<T*> GetVecOfPtrs (const Vector<std::unique_ptr<T> >& a)
{
Expand All @@ -86,6 +95,15 @@ namespace amrex
return r;
}

template <class T, std::size_t N, typename = typename T::FABType>
[[nodiscard]] Vector<Array<T,N> const*> GetVecOfConstPtrs (Vector<Array<T,N>> const& a)
{
Vector<Array<T,N> const*> r;
r.reserve(a.size());
for (auto& x : a) { r.push_back(&x); }
return r;
}

template <class T>
[[nodiscard]] Vector<const T*> GetVecOfConstPtrs (const Vector<std::unique_ptr<T> >& a)
{
Expand Down
Loading

0 comments on commit 9f7e9eb

Please sign in to comment.