From 9f7e9ebd8330af9feefdc214e316186695bc5bd2 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Tue, 17 Dec 2024 10:57:34 -0800 Subject: [PATCH] GMRESMLMG: Support Multi-level composite solve --- Src/Base/AMReX_FabArrayUtility.H | 203 +++++++++++++++++ Src/Base/AMReX_Vector.H | 18 ++ Src/LinearSolvers/AMReX_GMRES_MLMG.H | 214 ++++++++++-------- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 22 +- Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H | 76 +++++++ .../MLMG/AMReX_MLEBABecLap_F.cpp | 2 +- .../MLMG/AMReX_MLEBNodeFDLaplacian.H | 4 +- .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 50 ++-- Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp | 2 +- Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 34 ++- Src/LinearSolvers/MLMG/AMReX_MLMG.H | 186 ++++++++------- .../MLMG/AMReX_MLNodeLaplacian_sync.cpp | 4 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H | 8 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp | 95 ++++++-- .../MLMG/AMReX_MLNodeLinOp_1D_K.H | 35 +++ .../MLMG/AMReX_MLNodeLinOp_2D_K.H | 57 +++++ .../MLMG/AMReX_MLNodeLinOp_3D_K.H | 82 +++++++ .../LinearSolvers/ABecLaplacian_C/MyTest.cpp | 110 ++++++--- Tests/LinearSolvers/CellEB/GNUmakefile | 2 +- Tests/LinearSolvers/CellEB/MyTest.H | 3 + Tests/LinearSolvers/CellEB/MyTest.cpp | 41 +++- Tests/LinearSolvers/CurlCurl/MyTest.cpp | 5 - Tests/LinearSolvers/NodalPoisson/MyTest.cpp | 11 +- 23 files changed, 990 insertions(+), 274 deletions(-) diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H index bfac76c471e..6df3edd827a 100644 --- a/Src/Base/AMReX_FabArrayUtility.H +++ b/Src/Base/AMReX_FabArrayUtility.H @@ -1602,6 +1602,209 @@ Dot (FabArray const& x, int xcomp, FabArray const& y, int ycomp, int n return sm; } +/** + * \brief Compute dot product of FabArray with itself + * + * \param x first FabArray + * \param xcomp starting component of x + * \param y second FabArray + * \param ycomp starting component of y + * \param ncomp number of components + * \param nghost number of ghost cells + * \param local If true, MPI communication is skipped. + */ +template ::value,int> FOO = 0> +typename FAB::value_type +Dot (FabArray const& x, int xcomp, int ncomp, IntVect const& nghost, bool local = false) +{ + BL_ASSERT(x.nGrowVect().allGE(nghost)); + + BL_PROFILE("amrex::Dot()"); + + using T = typename FAB::value_type; + auto sm = T(0.0); +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& xma = x.const_arrays(); + sm = ParReduce(TypeList{}, TypeList{}, x, nghost, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple + { + auto t = T(0.0); + auto const& xfab = xma[box_no]; + for (int n = 0; n < ncomp; ++n) { + auto v = xfab(i,j,k,xcomp+n); + t += v*v; + } + return t; + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm) +#endif + for (MFIter mfi(x,true); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.growntilebox(nghost); + auto const& xfab = x.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + auto v = xfab(i,j,k,xcomp+n); + sm += v*v; + }); + } + } + + if (!local) { + ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub()); + } + + return sm; +} + +/** + * \brief Compute dot product of two FabArrays in region that mask is true + * + * \param mask mask + * \param x first FabArray + * \param xcomp starting component of x + * \param y second FabArray + * \param ycomp starting component of y + * \param ncomp number of components + * \param nghost number of ghost cells + * \param local If true, MPI communication is skipped. + */ +template ::value && IsBaseFab::value,int> FOO = 0> +typename FAB::value_type +Dot (FabArray const& mask, FabArray const& x, int xcomp, + FabArray const& y, int ycomp, int ncomp, IntVect const& nghost, + bool local = false) +{ + BL_ASSERT(x.boxArray() == y.boxArray() && x.boxArray() == mask.boxArray()); + BL_ASSERT(x.DistributionMap() == y.DistributionMap() && x.DistributionMap() == mask.DistributionMap()); + BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost) && + mask.nGrowVect().allGE(nghost)); + + BL_PROFILE("amrex::Dot()"); + + using T = typename FAB::value_type; + auto sm = T(0.0); +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& mma = mask.const_arrays(); + auto const& xma = x.const_arrays(); + auto const& yma = y.const_arrays(); + sm = ParReduce(TypeList{}, TypeList{}, x, nghost, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple + { + auto t = T(0.0); + auto m = T(mma[box_no](i,j,k)); + if (m != 0) { + auto const& xfab = xma[box_no]; + auto const& yfab = yma[box_no]; + for (int n = 0; n < ncomp; ++n) { + t += xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n); + } + } + return t*m; + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm) +#endif + for (MFIter mfi(x,true); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.growntilebox(nghost); + auto const& mfab = mask.const_array(mfi); + auto const& xfab = x.const_array(mfi); + auto const& yfab = y.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + auto m = T(mfab(i,j,k)); + sm += m * xfab(i,j,k,xcomp+n) * yfab(i,j,k,ycomp+n); + }); + } + } + + if (!local) { + ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub()); + } + + return sm; +} + +/** + * \brief Compute dot product of FabArray with itself in region that mask is true + * + * \param mask mask + * \param x FabArray + * \param xcomp starting component of x + * \param ncomp number of components + * \param nghost number of ghost cells + * \param local If true, MPI communication is skipped. + */ +template ::value && IsBaseFab::value,int> FOO = 0> +typename FAB::value_type +Dot (FabArray const& mask, FabArray const& x, int xcomp, int ncomp, + IntVect const& nghost, bool local = false) +{ + BL_ASSERT(x.boxArray() == mask.boxArray()); + BL_ASSERT(x.DistributionMap() == mask.DistributionMap()); + BL_ASSERT(x.nGrowVect().allGE(nghost) && mask.nGrowVect().allGE(nghost)); + + BL_PROFILE("amrex::Dot()"); + + using T = typename FAB::value_type; + auto sm = T(0.0); +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& mma = mask.const_arrays(); + auto const& xma = x.const_arrays(); + sm = ParReduce(TypeList{}, TypeList{}, x, nghost, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple + { + auto t = T(0.0); + auto m = T(mma[box_no](i,j,k)); + if (m != 0) { + auto const& xfab = xma[box_no]; + for (int n = 0; n < ncomp; ++n) { + auto v = xfab(i,j,k,xcomp+n); + t += v*v; + } + } + return t*m; + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm) +#endif + for (MFIter mfi(x,true); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.growntilebox(nghost); + auto const& mfab = mask.const_array(mfi); + auto const& xfab = x.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + auto m = T(mfab(i,j,k)); + auto v = xfab(i,j,k,xcomp+n); + sm += m*v*v; + }); + } + } + + if (!local) { + ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub()); + } + + return sm; +} + //! dst = val template ,int> = 0> void setVal (MF& dst, typename MF::value_type val) diff --git a/Src/Base/AMReX_Vector.H b/Src/Base/AMReX_Vector.H index c468befa94b..4196e8de8ff 100644 --- a/Src/Base/AMReX_Vector.H +++ b/Src/Base/AMReX_Vector.H @@ -66,6 +66,15 @@ namespace amrex return r; } + template + [[nodiscard]] Vector*> GetVecOfPtrs (Vector>& a) + { + Vector*> r; + r.reserve(a.size()); + for (auto& x : a) { r.push_back(&x); } + return r; + } + template [[nodiscard]] Vector GetVecOfPtrs (const Vector >& a) { @@ -86,6 +95,15 @@ namespace amrex return r; } + template + [[nodiscard]] Vector const*> GetVecOfConstPtrs (Vector> const& a) + { + Vector const*> r; + r.reserve(a.size()); + for (auto& x : a) { r.push_back(&x); } + return r; + } + template [[nodiscard]] Vector GetVecOfConstPtrs (const Vector >& a) { diff --git a/Src/LinearSolvers/AMReX_GMRES_MLMG.H b/Src/LinearSolvers/AMReX_GMRES_MLMG.H index 56ee477250e..ebc8d4be3b5 100644 --- a/Src/LinearSolvers/AMReX_GMRES_MLMG.H +++ b/Src/LinearSolvers/AMReX_GMRES_MLMG.H @@ -4,6 +4,7 @@ #include #include +#include #include namespace amrex { @@ -19,9 +20,10 @@ template class GMRESMLMGT { public: + using VEC = Vector; using MG = MLMGT; using RT = typename MG::RT; // double or float - using GM = GMRES>; + using GM = GMRES,GMRESMLMGT>; explicit GMRESMLMGT (MG& mlmg); @@ -35,8 +37,13 @@ public: */ void solve (MF& a_sol, MF const& a_rhs, RT a_tol_rel, RT a_tol_abs); + void solve (Vector const& a_sol, Vector const& a_rhs, RT a_tol_rel, RT a_tol_abs); + //! Sets verbosity. - void setVerbose (int v) { m_gmres.setVerbose(v); } + void setVerbose (int v) { + m_verbose = v; + m_gmres.setVerbose(v); + } //! Sets the max number of iterations void setMaxIters (int niters) { m_gmres.setMaxIters(niters); } @@ -50,46 +57,34 @@ public: //! Get the GMRES object. GM& getGMRES () { return m_gmres; } - /** - * \brief Set MLMG's multiplicative property of zero - * - * This should NOT be called unless MLMG has the multiplicative property - * of zero. MLMG is not a matrix, and usually does not have the - * properties of a matrix such as the multiplicative property of zero - * (i.e., M*0=0) because how domain boundary conditions are - * handled. However, if MLMG has the property of zero, calling this - * function with true can have a small performance benefit. - */ - void setPropertyOfZero (bool b) { m_prop_zero = b; } - //! Make MultiFab without ghost cells - MF makeVecRHS () const; + VEC makeVecRHS () const; //! Make MultiFab with ghost cells and set ghost cells to zero - MF makeVecLHS () const; + VEC makeVecLHS () const; - RT norm2 (MF const& mf) const; + RT norm2 (VEC const& mf) const; - static void scale (MF& mf, RT scale_factor); + static void scale (VEC& mf, RT scale_factor); - RT dotProduct (MF const& mf1, MF const& mf2) const; + RT dotProduct (VEC const& mf1, VEC const& mf2) const; //! lhs = 0 - static void setToZero (MF& lhs); + static void setToZero (VEC& lhs); //! lhs = rhs - static void assign (MF& lhs, MF const& rhs); + static void assign (VEC& lhs, VEC const& rhs); //! lhs += a*rhs - static void increment (MF& lhs, MF const& rhs, RT a); + static void increment (VEC& lhs, VEC const& rhs, RT a); //! lhs = a*rhs_a + b*rhs_b - static void linComb (MF& lhs, RT a, MF const& rhs_a, RT b, MF const& rhs_b); + static void linComb (VEC& lhs, RT a, VEC const& rhs_a, RT b, VEC const& rhs_b); //! lhs = L(rhs) - void apply (MF& lhs, MF const& rhs) const; + void apply (VEC& lhs, VEC const& rhs) const; - void precond (MF& lhs, MF const& rhs) const; + void precond (VEC& lhs, VEC const& rhs) const; //! Control whether or not to use MLMG as preconditioner. bool usePrecond (bool new_flag) { return std::exchange(m_use_precond, new_flag); } @@ -101,132 +96,171 @@ private: GM m_gmres; MG* m_mlmg; MLLinOpT* m_linop; + RT m_precon_reltol = 1.e-4; + int m_verbose = 0; + int m_nlevels = 0; bool m_use_precond = true; - bool m_prop_zero = false; int m_precond_niters = 1; }; template GMRESMLMGT::GMRESMLMGT (MG& mlmg) - : m_mlmg(&mlmg), m_linop(&mlmg.getLinOp()) + : m_mlmg(&mlmg), m_linop(&mlmg.getLinOp()), m_nlevels(m_linop->NAMRLevels()) { - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_linop->NAMRLevels() == 1, - "Only support single level solve"); - m_mlmg->setVerbose(0); - m_mlmg->setBottomVerbose(0); - m_mlmg->prepareForGMRES(); + m_mlmg->preparePrecond(); m_gmres.define(*this); } template -auto GMRESMLMGT::makeVecRHS () const -> MF +auto GMRESMLMGT::makeVecRHS () const -> VEC { - return m_linop->make(0, 0, IntVect(0)); + VEC vmf(m_nlevels); + for (int ilev = 0; ilev < m_nlevels; ++ilev) { + vmf[ilev] = m_linop->make(ilev, 0, IntVect(0)); + } + return vmf; } template -auto GMRESMLMGT::makeVecLHS () const -> MF +auto GMRESMLMGT::makeVecLHS () const -> VEC { - auto mf = m_linop->make(0, 0, IntVect(1)); - setBndry(mf, RT(0), 0, nComp(mf)); - return mf; + VEC vmf(m_nlevels); + for (int ilev = 0; ilev < m_nlevels; ++ilev) { + vmf[ilev] = m_linop->make(ilev, 0, IntVect(1)); + setBndry(vmf[ilev], RT(0), 0, nComp(vmf[ilev])); + } + return vmf; } template -auto GMRESMLMGT::norm2 (MF const& mf) const -> RT +auto GMRESMLMGT::norm2 (VEC const& mf) const -> RT { - auto r = m_linop->xdoty(0, 0, mf, mf, false); - return std::sqrt(r); + return m_linop->norm2Precond(GetVecOfConstPtrs(mf)); } template -void GMRESMLMGT::scale (MF& mf, RT scale_factor) +void GMRESMLMGT::scale (VEC& mf, RT scale_factor) { - Scale(mf, scale_factor, 0, nComp(mf), 0); + for (auto& xmf : mf) { + Scale(xmf, scale_factor, 0, nComp(xmf), 0); + } } template -auto GMRESMLMGT::dotProduct (MF const& mf1, MF const& mf2) const -> RT +auto GMRESMLMGT::dotProduct (VEC const& mf1, VEC const& mf2) const -> RT { - return m_linop->xdoty(0, 0, mf1, mf2, false); + return m_linop->dotProductPrecond(GetVecOfConstPtrs(mf1), GetVecOfConstPtrs(mf2)); } template -void GMRESMLMGT::setToZero (MF& lhs) +void GMRESMLMGT::setToZero (VEC& lhs) { - setVal(lhs, RT(0.0)); + for (auto& xmf : lhs) { + setVal(xmf, RT(0.0)); + } } template -void GMRESMLMGT::assign (MF& lhs, MF const& rhs) +void GMRESMLMGT::assign (VEC& lhs, VEC const& rhs) { - LocalCopy(lhs, rhs, 0, 0, nComp(lhs), IntVect(0)); + auto nlevels = int(lhs.size()); + for (int ilev = 0; ilev < nlevels; ++ilev) { + LocalCopy(lhs[ilev], rhs[ilev], 0, 0, nComp(lhs[ilev]), IntVect(0)); + } } template -void GMRESMLMGT::increment (MF& lhs, MF const& rhs, RT a) +void GMRESMLMGT::increment (VEC& lhs, VEC const& rhs, RT a) { - Saxpy(lhs, a, rhs, 0, 0, nComp(lhs), IntVect(0)); + auto nlevels = int(lhs.size()); + for (int ilev = 0; ilev < nlevels; ++ilev) { + Saxpy(lhs[ilev], a, rhs[ilev], 0, 0, nComp(lhs[ilev]), IntVect(0)); + } } template -void GMRESMLMGT::linComb (MF& lhs, RT a, MF const& rhs_a, RT b, MF const& rhs_b) +void GMRESMLMGT::linComb (VEC& lhs, RT a, VEC const& rhs_a, RT b, VEC const& rhs_b) { - LinComb(lhs, a, rhs_a, 0, b, rhs_b, 0, 0, nComp(lhs), IntVect(0)); + auto nlevels = int(lhs.size()); + for (int ilev = 0; ilev < nlevels; ++ilev) { + LinComb(lhs[ilev], a, rhs_a[ilev], 0, b, rhs_b[ilev], 0, 0, nComp(lhs[ilev]), IntVect(0)); + } } template -void GMRESMLMGT::apply (MF& lhs, MF const& rhs) const +void GMRESMLMGT::apply (VEC& lhs, VEC const& rhs) const { - m_linop->apply(0, 0, lhs, const_cast(rhs), - MLLinOpT::BCMode::Homogeneous, - MLLinOpT::StateMode::Correction); + m_mlmg->applyPrecond(GetVecOfPtrs(lhs), GetVecOfPtrs(const_cast(rhs))); } template -void GMRESMLMGT::precond (MF& lhs, MF const& rhs) const +void GMRESMLMGT::precond (VEC& lhs, VEC const& rhs) const { if (m_use_precond) { - m_mlmg->prepareMGcycle(); - - for (int icycle = 0; icycle < m_precond_niters; ++icycle) { - if (icycle == 0) { - LocalCopy(m_mlmg->res[0][0], rhs, 0, 0, nComp(rhs), IntVect(0)); - } else { - m_mlmg->computeResOfCorrection(0,0); - LocalCopy(m_mlmg->res[0][0], m_mlmg->rescor[0][0], 0, 0, nComp(rhs), IntVect(0)); - } - - m_mlmg->mgVcycle(0,0); - - if (icycle == 0) { - LocalCopy(lhs, m_mlmg->cor[0][0], 0, 0, nComp(rhs), IntVect(0)); - } else { - increment(lhs, m_mlmg->cor[0][0], RT(1)); - } - } + m_mlmg->setPrecondIter(m_precond_niters); + setToZero(lhs); + m_mlmg->precond(GetVecOfPtrs(lhs), GetVecOfConstPtrs(rhs), 0, 0); } else { - LocalCopy(lhs, rhs, 0, 0, nComp(lhs), IntVect(0)); + for (int ilev = 0; ilev < m_nlevels; ++ilev) { + LocalCopy(lhs[ilev], rhs[ilev], 0, 0, nComp(lhs[ilev]), IntVect(0)); + } } } template void GMRESMLMGT::solve (MF& a_sol, MF const& a_rhs, RT a_tol_rel, RT a_tol_abs) { - if (m_prop_zero) { - auto rhs = makeVecRHS(); - assign(rhs, a_rhs); - m_linop->setDirichletNodesToZero(0,0,rhs); - m_gmres.solve(a_sol, rhs, a_tol_rel, a_tol_abs); - } else { - auto res = makeVecRHS(); - m_mlmg->apply({&res}, {&a_sol}); // res = L(sol) - increment(res, a_rhs, RT(-1)); // res = L(sol) - rhs - auto cor = makeVecLHS(); - m_linop->setDirichletNodesToZero(0,0,res); - m_gmres.solve(cor, res, a_tol_rel, a_tol_abs); // L(cor) = res - increment(a_sol, cor, RT(-1)); // sol = sol - cor + AMREX_ALWAYS_ASSERT(m_nlevels == 1); + this->solve({&a_sol}, {&a_rhs}, a_tol_rel, a_tol_abs); +} + +template +void GMRESMLMGT::solve (Vector const& a_sol, Vector const& a_rhs, RT a_tol_rel, RT a_tol_abs) +{ + m_mlmg->incPrintIdentation(); + auto mlmg_verbose = m_mlmg->getVerbose(); + auto mlmg_bottom_verbose = m_mlmg->getBottomVerbose(); + m_mlmg->setVerbose(m_verbose); + auto mlmg_bottom_solver = m_mlmg->getBottomSolver(); + + if (mlmg_bottom_solver != BottomSolver::smoother && + mlmg_bottom_solver != BottomSolver::hypre && + mlmg_bottom_solver != BottomSolver::petsc) + { + m_mlmg->setBottomSolver(BottomSolver::smoother); } + + auto res = makeVecLHS(); + auto cor = makeVecLHS(); + + m_mlmg->apply(GetVecOfPtrs(res), a_sol); // res = L(sol) + // res = L(sol) - rhs + bool need_to_scale_rhs = m_linop->scaleRHS(0,nullptr); + for (int ilev = 0; ilev < m_nlevels; ++ilev) { + MF const* prhs; + if (need_to_scale_rhs) { + LocalCopy(cor[ilev], *a_rhs[ilev], 0, 0, nComp(cor[ilev]), IntVect(0)); + auto r = m_linop->scaleRHS(ilev, &(cor[ilev])); + amrex::ignore_unused(r); + prhs = &(cor[ilev]); + } else { + prhs = a_rhs[ilev]; + } + Saxpy(res[ilev], RT(-1), *prhs, 0, 0, nComp(res[ilev]), IntVect(0)); + } + for (int ilev = 0; ilev < m_nlevels; ++ilev) { + m_linop->setDirichletNodesToZero(ilev,0,res[ilev]); + } + m_gmres.solve(cor, res, a_tol_rel, a_tol_abs); // L(cor) = res + // sol = sol - cor + for (int ilev = 0; ilev < m_nlevels; ++ilev) { + Saxpy(*a_sol[ilev], RT(-1), cor[ilev], 0, 0, nComp(*a_sol[ilev]), IntVect(0)); + } + + m_mlmg->setBottomSolver(mlmg_bottom_solver); + m_mlmg->setVerbose(mlmg_verbose); + m_mlmg->setBottomVerbose(mlmg_bottom_verbose); + m_mlmg->decPrintIdentation(); } using GMRESMLMG = GMRESMLMGT; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index 30d0b96a6fd..11f4fc42c99 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -42,6 +42,7 @@ public: void setMaxIter (int _maxiter) { maxiter = _maxiter; } [[nodiscard]] int getMaxIter () const { return maxiter; } + void setPrintIdentation (std::string s) { print_ident = std::move(s); } /** * Is the initial guess provided to the solver zero ? @@ -73,6 +74,7 @@ private: IntVect nghost = IntVect(0); int iter = -1; bool initial_vec_zeroed = false; + std::string print_ident; }; template @@ -133,7 +135,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( verbose > 0 ) { - amrex::Print() << "MLCGSolver_BiCGStab: Initial error (error0) = " << rnorm0 << '\n'; + amrex::Print() << print_ident << "MLCGSolver_BiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } int ret = 0; iter = 1; @@ -143,7 +145,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) { if ( verbose > 0 ) { - amrex::Print() << "MLCGSolver_BiCGStab: niter = 0," + amrex::Print() << print_ident << "MLCGSolver_BiCGStab: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << '\n'; } @@ -186,7 +188,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( verbose > 2 && ParallelDescriptor::IOProcessor() ) { - amrex::Print() << "MLCGSolver_BiCGStab: Half Iter " + amrex::Print() << print_ident << "MLCGSolver_BiCGStab: Half Iter " << std::setw(11) << iter << " rel. err. " << rnorm/(rnorm0) << '\n'; @@ -222,7 +224,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( verbose > 2 ) { - amrex::Print() << "MLCGSolver_BiCGStab: Iteration " + amrex::Print() << print_ident << "MLCGSolver_BiCGStab: Iteration " << std::setw(11) << iter << " rel. err. " << rnorm/(rnorm0) << '\n'; @@ -239,7 +241,7 @@ MLCGSolverT::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( verbose > 0 ) { - amrex::Print() << "MLCGSolver_BiCGStab: Final: Iteration " + amrex::Print() << print_ident << "MLCGSolver_BiCGStab: Final: Iteration " << std::setw(4) << iter << " rel. err. " << rnorm/(rnorm0) << '\n'; @@ -303,7 +305,7 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( verbose > 0 ) { - amrex::Print() << "MLCGSolver_CG: Initial error (error0) : " << rnorm0 << '\n'; + amrex::Print() << print_ident << "MLCGSolver_CG: Initial error (error0) : " << rnorm0 << '\n'; } RT rho_1 = 0; @@ -313,7 +315,7 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 ) { - amrex::Print() << "MLCGSolver_CG: niter = 0," + amrex::Print() << print_ident << "MLCGSolver_CG: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << '\n'; } @@ -352,7 +354,7 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( verbose > 2 ) { - amrex::Print() << "MLCGSolver_cg:" + amrex::Print() << print_ident << "MLCGSolver_cg:" << " iter " << iter << " rho " << rho << " alpha " << alpha << '\n'; @@ -363,7 +365,7 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( verbose > 2 ) { - amrex::Print() << "MLCGSolver_cg: Iteration" + amrex::Print() << print_ident << "MLCGSolver_cg: Iteration" << std::setw(4) << iter << " rel. err. " << rnorm/(rnorm0) << '\n'; @@ -376,7 +378,7 @@ MLCGSolverT::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs) if ( verbose > 0 ) { - amrex::Print() << "MLCGSolver_cg: Final Iteration" + amrex::Print() << print_ident << "MLCGSolver_cg: Final Iteration" << std::setw(4) << iter << " rel. err. " << rnorm/(rnorm0) << '\n'; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index b318b318eb2..3138c89450b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -113,6 +113,10 @@ public: RT xdoty (int amrlev, int mglev, const MF& x, const MF& y, bool local) const final; + RT dotProductPrecond (Vector const& x, Vector const& y) const final; + + RT norm2Precond (Vector const& x) const final; + virtual void Fapply (int amrlev, int mglev, MF& out, const MF& in) const = 0; virtual void Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redblack) const = 0; virtual void FFlux (int amrlev, const MFIter& mfi, @@ -130,6 +134,9 @@ public: void avgDownResAmr (int clev, MF& cres, MF const& fres) const override; + void beginPrecondBC () override; + void endPrecondBC () override; + struct BCTL { BoundCond type; RT location; @@ -151,6 +158,8 @@ protected: Vector> > m_bndry_cor; Vector> > m_crse_cor_br; + Vector> > m_bndry_sol_zero; + // In case of agglomeration, coarse MG grids on amr level 0 are // not simply coarsened from fine MG grids. So we need to build // bcond and bcloc for each MG level. @@ -1939,6 +1948,37 @@ MLCellLinOpT::xdoty (int /*amrlev*/, int /*mglev*/, const MF& x, const MF& y return result; } +template +auto +MLCellLinOpT::dotProductPrecond (Vector const& x, Vector const& y) const -> RT +{ + const int ncomp = this->getNComp(); + const IntVect nghost(0); + RT result = 0; + for (int ilev = 0; ilev < this->NAMRLevels()-1; ++ilev) { + result += amrex::Dot(*m_norm_fine_mask[ilev], *x[ilev], 0, *y[ilev], 0, ncomp, nghost, true); + } + result += amrex::Dot(*x[this->NAMRLevels()-1], 0, + *y[this->NAMRLevels()-1], 0, ncomp, nghost, true); + ParallelAllReduce::Sum(result, ParallelContext::CommunicatorSub()); + return result; +} + +template +auto +MLCellLinOpT::norm2Precond (Vector const& x) const -> RT +{ + const int ncomp = this->getNComp(); + const IntVect nghost(0); + RT result = 0; + for (int ilev = 0; ilev < this->NAMRLevels()-1; ++ilev) { + result += amrex::Dot(*m_norm_fine_mask[ilev], *x[ilev], 0, ncomp, nghost, true); + } + result += amrex::Dot(*x[this->NAMRLevels()-1], 0, ncomp, nghost, true); + ParallelAllReduce::Sum(result, ParallelContext::CommunicatorSub()); + return std::sqrt(result); +} + template void MLCellLinOpT::computeVolInv () const @@ -2141,6 +2181,42 @@ MLCellLinOpT::avgDownResAmr (int clev, MF& cres, MF const& fres) const } } +template +void +MLCellLinOpT::beginPrecondBC () +{ + this->m_precond_mode = true; + + if (m_bndry_sol_zero.empty()) { + m_bndry_sol_zero.resize(m_bndry_sol.size()); + const int ncomp = this->getNComp(); + for (int amrlev = 0; amrlev < this->m_num_amr_levels; ++amrlev) { + m_bndry_sol_zero[amrlev] = std::make_unique> + (this->m_grids[amrlev][0], + this->m_dmap[amrlev][0], + ncomp, + this->m_geom[amrlev][0]); + } + std::swap(m_bndry_sol, m_bndry_sol_zero); + MF const* coarse_data_for_bc_save = this->m_coarse_data_for_bc; + this->m_coarse_data_for_bc = nullptr; + for (int amrlev = 0; amrlev < this->m_num_amr_levels; ++amrlev) { + this->setLevelBC(amrlev, nullptr); + } + this->m_coarse_data_for_bc = coarse_data_for_bc_save; + } else { + std::swap(m_bndry_sol, m_bndry_sol_zero); + } +} + +template +void +MLCellLinOpT::endPrecondBC () +{ + this->m_precond_mode = false; + std::swap(m_bndry_sol, m_bndry_sol_zero); +} + extern template class MLCellLinOpT; using MLCellLinOp = MLCellLinOpT; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp index f77cd8de900..940bfa045a2 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap_F.cpp @@ -33,7 +33,7 @@ MLEBABecLap::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) c const auto *const ccent = (factory) ? &(factory->getCentroid()) : nullptr; const bool is_eb_dirichlet = isEBDirichlet(); - const bool is_eb_inhomog = m_is_eb_inhomog; + const bool is_eb_inhomog = m_is_eb_inhomog && (!this->m_precond_mode); const int ncomp = getNComp(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 6ebbd2c65e7..89bc763cea5 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -76,7 +76,7 @@ public: [[nodiscard]] std::unique_ptr > makeFactory (int amrlev, int mglev) const final; - void scaleRHS (int amrlev, MultiFab& rhs) const final; + [[nodiscard]] bool scaleRHS (int amrlev, MultiFab* rhs) const final; #endif @@ -117,7 +117,7 @@ public: Array4 const& bfab) const override; #endif - void postSolve (Vector& sol) const override; + void postSolve (Vector const& sol) const override; private: GpuArray m_sigma{{AMREX_D_DECL(1_rt,1_rt,1_rt)}}; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index b5a69e455a1..61580e4d368 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -363,34 +363,39 @@ MLEBNodeFDLaplacian::prepareForSolve () } #ifdef AMREX_USE_EB -void -MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab& rhs) const +bool +MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab* rhs) const { const auto *factory = dynamic_cast(m_factory[amrlev][0].get()); - if (!factory) { return; } - auto const& dmask = *m_dirichlet_mask[amrlev][0]; - auto const& edgecent = factory->getEdgeCent(); + if (!factory) {return false; } + + if (rhs) { + auto const& dmask = *m_dirichlet_mask[amrlev][0]; + auto const& edgecent = factory->getEdgeCent(); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(rhs,TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - const Box& box = mfi.tilebox(); - Array4 const& rhsarr = rhs.array(mfi); - Array4 const& dmarr = dmask.const_array(mfi); - bool cutfab = edgecent[0]->ok(mfi); - if (cutfab) { - AMREX_D_TERM(Array4 const& ecx = edgecent[0]->const_array(mfi);, - Array4 const& ecy = edgecent[1]->const_array(mfi);, - Array4 const& ecz = edgecent[2]->const_array(mfi)); - AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, - { - mlebndfdlap_scale_rhs(i,j,k,rhsarr,dmarr,AMREX_D_DECL(ecx,ecy,ecz)); - }); + for (MFIter mfi(*rhs,TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& box = mfi.tilebox(); + Array4 const& rhsarr = rhs->array(mfi); + Array4 const& dmarr = dmask.const_array(mfi); + bool cutfab = edgecent[0]->ok(mfi); + if (cutfab) { + AMREX_D_TERM(Array4 const& ecx = edgecent[0]->const_array(mfi);, + Array4 const& ecy = edgecent[1]->const_array(mfi);, + Array4 const& ecz = edgecent[2]->const_array(mfi)); + AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, + { + mlebndfdlap_scale_rhs(i,j,k,rhsarr,dmarr,AMREX_D_DECL(ecx,ecy,ecz)); + }); + } } } + + return true; } #endif @@ -414,7 +419,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa auto const& dmask = *m_dirichlet_mask[amrlev][mglev]; #ifdef AMREX_USE_EB - const auto phieb = (m_in_solution_mode) ? m_s_phi_eb : Real(0.0); + const auto phieb = (m_in_solution_mode && !this->m_precond_mode) ? m_s_phi_eb : Real(0.0); const auto *factory = dynamic_cast(m_factory[amrlev][mglev].get()); Array edgecent {AMREX_D_DECL(nullptr,nullptr,nullptr)}; if (factory) { @@ -751,16 +756,17 @@ MLEBNodeFDLaplacian::fillRHS (MFIter const& /*mfi*/, Array4 const& /* #endif void -MLEBNodeFDLaplacian::postSolve (Vector& sol) const +MLEBNodeFDLaplacian::postSolve (Vector const& sol) const { #ifdef AMREX_USE_EB + if (this->m_precond_mode) { return; } for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { const auto phieb = m_s_phi_eb; const auto *factory = dynamic_cast(m_factory[amrlev][0].get()); if (!factory) { return; } auto const& levset_mf = factory->getLevelSet(); auto const& levset_ar = levset_mf.const_arrays(); - MultiFab& mf = sol[amrlev]; + MultiFab& mf = *sol[amrlev]; auto const& sol_ar = mf.arrays(); if (phieb == std::numeric_limits::lowest()) { auto const& phieb_ar = m_phi_eb[amrlev].const_arrays(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp index 3fd1b26cae4..894a9657f24 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp @@ -269,7 +269,7 @@ MLEBTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode Array4 foo; const bool is_eb_dirichlet = isEBDirichlet(); - const bool is_eb_inhomog = m_is_eb_inhomog; + const bool is_eb_inhomog = m_is_eb_inhomog && !this->m_precond_mode; Array4 const& velbfab = (is_eb_dirichlet && is_eb_inhomog) ? m_eb_phi[amrlev]->const_array(mfi) : foo; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index ce6a8b53335..08fa588b4a0 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -455,7 +455,9 @@ public: virtual void applyOverset (int /*amlev*/, MF& /*rhs*/) const {} //! scale RHS to fix solvability - virtual void scaleRHS (int /*amrlev*/, MF& /*rhs*/) const {} + [[nodiscard]] virtual bool scaleRHS (int /*amrlev*/, MF* /*rhs*/) const { + return false; + } //! get offset for fixing solvability virtual Vector getSolvabilityOffset (int /*amrlev*/, int /*mglev*/, @@ -467,7 +469,7 @@ public: virtual void prepareForSolve () = 0; - virtual void prepareForGMRES () {} + virtual void preparePrecond () {} //! For GMRES to work, this might need to be implemented to mask out //! Dirichlet nodes or cells (e.g., EB covered cells or overset cells) @@ -485,6 +487,10 @@ public: //! x dot y, used by the bottom solver virtual RT xdoty (int amrlev, int mglev, const MF& x, const MF& y, bool local) const = 0; + virtual RT dotProductPrecond (Vector const& x, Vector const& y) const; + + virtual RT norm2Precond (Vector const& x) const; + virtual std::unique_ptr> makeNLinOp (int /*grid_size*/) const { amrex::Abort("MLLinOp::makeNLinOp: N-Solve not supported"); @@ -533,7 +539,7 @@ public: virtual void copyNSolveSolution (MF&, MF const&) const {} - virtual void postSolve (Vector& /*sol*/) const {} + virtual void postSolve (Vector const& /*sol*/) const {} [[nodiscard]] virtual RT normInf (int amrlev, MF const& mf, bool local) const = 0; @@ -548,6 +554,9 @@ public: // This function is needed for FMG cycle, but not V-cycle. virtual void avgDownResMG (int clev, MF& cres, MF const& fres) const; + virtual void beginPrecondBC () { m_precond_mode = true; } + virtual void endPrecondBC () { m_precond_mode = false; } + [[nodiscard]] bool isMFIterSafe (int amrlev, int mglev1, int mglev2) const; //! Return the number of AMR levels @@ -628,6 +637,8 @@ protected: const MF* m_coarse_data_for_bc = nullptr; MF m_coarse_data_for_bc_raii; + bool m_precond_mode = false; + //! Return AMR refinement ratios [[nodiscard]] const Vector& AMRRefRatio () const noexcept { return m_amr_ref_ratio; } @@ -1643,6 +1654,23 @@ MLLinOpT::setLevelBC (int amrlev, const AMF* levelbcdata, robin_b_raii[amrlev].get(), robin_f_raii[amrlev].get()); } +template +auto +MLLinOpT::dotProductPrecond (Vector const& x, Vector const& y) const -> RT +{ + AMREX_ALWAYS_ASSERT(NAMRLevels() == 1); + return xdoty(0,0,*x[0],*y[0],false); +} + +template +auto +MLLinOpT::norm2Precond (Vector const& x) const -> RT +{ + AMREX_ALWAYS_ASSERT(NAMRLevels() == 1); + auto r = xdoty(0,0,*x[0],*x[0],false); + return std::sqrt(r); +} + extern template class MLLinOpT; using MLLinOp = MLLinOpT; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 78b2ffdd3df..4b666f2c03d 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -50,6 +50,9 @@ public: std::initializer_list a_rhs, RT a_tol_rel, RT a_tol_abs, const char* checkpoint_file = nullptr); + RT precond (Vector const& a_sol, Vector const& a_rhs, + RT a_tol_rel, RT a_tol_abs); + template void getGradSolution (const Vector >& a_grad_sol, Location a_loc = Location::FaceCenter); @@ -113,11 +116,21 @@ public: */ void apply (const Vector& out, const Vector& in); + //! out = L(in) as a preconditioner + void applyPrecond (const Vector& out, const Vector& in); + + [[nodiscard]] int getVerbose () const { return verbose; } + [[nodiscard]] int getBottomVerbose () const { return bottom_verbose; } + + void incPrintIdentation (); + void decPrintIdentation (); + void setThrowException (bool t) noexcept { throw_exception = t; } void setVerbose (int v) noexcept { verbose = v; } void setMaxIter (int n) noexcept { max_iters = n; } void setMaxFmgIter (int n) noexcept { max_fmg_iters = n; } void setFixedIter (int nit) noexcept { do_fixed_number_of_iters = nit; } + void setPrecondIter (int nit) noexcept { max_precond_iters = nit; } void setPreSmooth (int n) noexcept { nu1 = n; } void setPostSmooth (int n) noexcept { nu2 = n; } @@ -125,12 +138,13 @@ public: void setBottomSmooth (int n) noexcept { nub = n; } void setBottomSolver (BottomSolver s) noexcept { bottom_solver = s; } + [[nodiscard]] BottomSolver getBottomSolver () const noexcept { return bottom_solver; } void setCFStrategy (CFStrategy a_cf_strategy) noexcept {cf_strategy = a_cf_strategy;} void setBottomVerbose (int v) noexcept { bottom_verbose = v; } void setBottomMaxIter (int n) noexcept { bottom_maxiter = n; } void setBottomTolerance (RT t) noexcept { bottom_reltol = t; } void setBottomToleranceAbs (RT t) noexcept { bottom_abstol = t;} - RT getBottomToleranceAbs () noexcept{ return bottom_abstol; } + [[nodiscard]] RT getBottomToleranceAbs () const noexcept{ return bottom_abstol; } void setAlwaysUseBNorm (int flag) noexcept { always_use_bnorm = flag; } @@ -173,9 +187,7 @@ public: void prepareLinOp (); - void prepareMGcycle (); - - void prepareForGMRES (); + void preparePrecond (); void oneIter (int iter); @@ -231,11 +243,13 @@ public: private: + bool precond_mode = false; bool throw_exception = false; int verbose = 1; int max_iters = 200; int do_fixed_number_of_iters = 0; + int max_precond_iters = 1; int nu1 = 2; //!< pre int nu2 = 2; //!< post @@ -271,6 +285,8 @@ private: std::unique_ptr ns_sol; std::unique_ptr ns_rhs; + std::string print_ident; + //! Hypre #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) // Hypre::Interface hypre_interface = Hypre::Interface::structed; @@ -400,8 +416,8 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, if (verbose >= 1) { - amrex::Print() << "MLMG: Initial rhs = " << rhsnorm0 << "\n" - << "MLMG: Initial residual (resid0) = " << resnorm0 << "\n"; + amrex::Print() << print_ident << "MLMG: Initial rhs = " << rhsnorm0 << "\n" + << print_ident << "MLMG: Initial residual (resid0) = " << resnorm0 << "\n"; } } @@ -422,7 +438,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, if (!is_nsolve && resnorm0 <= res_target) { composite_norminf = resnorm0; if (verbose >= 1) { - amrex::Print() << "MLMG: No iterations needed\n"; + amrex::Print() << print_ident << "MLMG: No iterations needed\n"; } } else { auto iter_start_time = amrex::second(); @@ -444,7 +460,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, m_iter_fine_resnorm0.push_back(fine_norminf); composite_norminf = fine_norminf; if (verbose >= 2) { - amrex::Print() << "MLMG: Iteration " << std::setw(3) << iter+1 << " Fine resid/" + amrex::Print() << print_ident << "MLMG: Iteration " << std::setw(3) << iter+1 << " Fine resid/" << norm_name << " = " << fine_norminf/max_norm << "\n"; } bool fine_converged = (fine_norminf <= res_target); @@ -456,7 +472,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, computeMLResidual(finest_amr_lev-1); RT crse_norminf = MLResNormInf(finest_amr_lev-1); if (verbose >= 2) { - amrex::Print() << "MLMG: Iteration " << std::setw(3) << iter+1 + amrex::Print() << print_ident << "MLMG: Iteration " << std::setw(3) << iter+1 << " Crse resid/" << norm_name << " = " << crse_norminf/max_norm << "\n"; } @@ -468,7 +484,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, if (converged) { if (verbose >= 1) { - amrex::Print() << "MLMG: Final Iter. " << iter+1 + amrex::Print() << print_ident << "MLMG: Final Iter. " << iter+1 << " resid, resid/" << norm_name << " = " << composite_norminf << ", " << composite_norminf/max_norm << "\n"; @@ -478,7 +494,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, if (composite_norminf > RT(1.e20)*max_norm) { if (verbose > 0) { - amrex::Print() << "MLMG: Failing to converge after " << iter+1 << " iterations." + amrex::Print() << print_ident << "MLMG: Failing to converge after " << iter+1 << " iterations." << " resid, resid/" << norm_name << " = " << composite_norminf << ", " << composite_norminf/max_norm << "\n"; @@ -495,7 +511,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, if (!converged && do_fixed_number_of_iters == 0) { if (verbose > 0) { - amrex::Print() << "MLMG: Failed to converge after " << max_iters << " iterations." + amrex::Print() << print_ident << "MLMG: Failed to converge after " << max_iters << " iterations." << " resid, resid/" << norm_name << " = " << composite_norminf << ", " << composite_norminf/max_norm << "\n"; @@ -510,7 +526,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, timer[iter_time] = amrex::second() - iter_start_time; } - linop.postSolve(sol); + linop.postSolve(GetVecOfPtrs(sol)); IntVect ng_back = final_fill_bc ? IntVect(1) : IntVect(0); if (linop.hasHiddenDimension()) { @@ -529,7 +545,7 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, ParallelContext::CommunicatorSub()); if (ParallelContext::MyProcSub() == 0) { - amrex::AllPrint() << "MLMG: Timers: Solve = " << timer[solve_time] + amrex::AllPrint() << print_ident << "MLMG: Timers: Solve = " << timer[solve_time] << " Iter = " << timer[iter_time] << " Bottom = " << timer[bottom_time] << "\n"; } @@ -540,6 +556,24 @@ MLMGT::solve (const Vector& a_sol, const Vector& a_rhs, return composite_norminf; } +template +auto +MLMGT::precond (const Vector& a_sol, const Vector& a_rhs, + RT a_tol_rel, RT a_tol_abs) -> RT +{ + precond_mode = true; + std::swap(max_precond_iters, do_fixed_number_of_iters); + linop.beginPrecondBC(); + + auto r = solve(a_sol, a_rhs, a_tol_rel, a_tol_abs); + + linop.endPrecondBC(); + std::swap(max_precond_iters, do_fixed_number_of_iters); + precond_mode = false; + + return r; +} + template void MLMGT::prepareForFluxes (Vector const& a_sol) @@ -931,6 +965,17 @@ MLMGT::apply (const Vector& out, const Vector& a_in) } } +template +void +MLMGT::applyPrecond (const Vector& out, const Vector& in) +{ + precond_mode = true; + linop.beginPrecondBC(); + apply(out, in); + linop.endPrecondBC(); + precond_mode = false; +} + template template void @@ -1008,7 +1053,10 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& linop.unimposeNeumannBC(alev, rhs[alev]); linop.applyInhomogNeumannTerm(alev, rhs[alev]); linop.applyOverset(alev, rhs[alev]); - linop.scaleRHS(alev, rhs[alev]); + if ( ! precond_mode) { + bool r = linop.scaleRHS(alev, &(rhs[alev])); + amrex::ignore_unused(r); + } #ifdef AMREX_USE_EB const auto *factory = dynamic_cast(linop.Factory(alev)); @@ -1105,12 +1153,12 @@ MLMGT::prepareForSolve (Vector const& a_sol, Vector const& } if (verbose >= 2) { - amrex::Print() << "MLMG: # of AMR levels: " << namrlevs << "\n" - << " # of MG levels on the coarsest AMR level: " << linop.NMGLevels(0) + amrex::Print() << print_ident << "MLMG: # of AMR levels: " << namrlevs << "\n" + << print_ident << " # of MG levels on the coarsest AMR level: " << linop.NMGLevels(0) << "\n"; if (ns_linop) { - amrex::Print() << " # of MG levels in N-Solve: " << ns_linop->NMGLevels(0) << "\n" - << " # of grids in N-Solve: " << ns_linop->m_grids[0][0].size() << "\n"; + amrex::Print() << print_ident << " # of MG levels in N-Solve: " << ns_linop->NMGLevels(0) << "\n" + << print_ident << " # of grids in N-Solve: " << ns_linop->m_grids[0][0].size() << "\n"; } } } @@ -1129,65 +1177,10 @@ MLMGT::prepareLinOp () template void -MLMGT::prepareForGMRES () +MLMGT::preparePrecond () { prepareLinOp(); - linop.prepareForGMRES(); -} - -template -void -MLMGT::prepareMGcycle () -{ - if (res.empty()) { - IntVect ng = linop.getNGrowVectRestriction(); - linop.make(res, ng); - linop.make(rescor, ng); - - for (int alev = 0; alev <= finest_amr_lev; ++alev) - { - const int nmglevs = linop.NMGLevels(alev); - for (int mglev = 0; mglev < nmglevs; ++mglev) - { - setVal(res [alev][mglev], RT(0.0)); - setVal(rescor[alev][mglev], RT(0.0)); - } - } - - IntVect ng_sol(1); - if (linop.hasHiddenDimension()) { ng_sol[linop.hiddenDirection()] = 0; } - ng = ng_sol; - - cor.resize(namrlevs); - for (int alev = 0; alev <= finest_amr_lev; ++alev) - { - const int nmglevs = linop.NMGLevels(alev); - cor[alev].resize(nmglevs); - for (int mglev = 0; mglev < nmglevs; ++mglev) - { - cor[alev][mglev] = linop.make(alev, mglev, ng); - setVal(cor[alev][mglev], RT(0.0)); - } - } - - cor_hold.resize(std::max(namrlevs-1,1)); - { - const int alev = 0; - const int nmglevs = linop.NMGLevels(alev); - cor_hold[alev].resize(nmglevs); - for (int mglev = 0; mglev < nmglevs-1; ++mglev) - { - cor_hold[alev][mglev] = linop.make(alev, mglev, ng); - setVal(cor_hold[alev][mglev], RT(0.0)); - } - } - for (int alev = 1; alev < finest_amr_lev; ++alev) - { - cor_hold[alev].resize(1); - cor_hold[alev][0] = linop.make(alev, 0, ng); - setVal(cor_hold[alev][0], RT(0.0)); - } - } + linop.preparePrecond(); } template @@ -1318,7 +1311,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { RT norm = norminf(res[amrlev][mglev],0,ncomp,IntVect(0)); - amrex::Print() << "AT LEVEL " << amrlev << " " << mglev + amrex::Print() << print_ident << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm before smooth " << norm << "\n"; } @@ -1335,7 +1328,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0)); - amrex::Print() << "AT LEVEL " << amrlev << " " << mglev + amrex::Print() << print_ident << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm after smooth " << norm << "\n"; } @@ -1349,7 +1342,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { RT norm = norminf(res[amrlev][mglev_bottom],0,ncomp,IntVect(0)); - amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom + amrex::Print() << print_ident << "AT LEVEL " << amrlev << " " << mglev_bottom << " DN: Norm before bottom " << norm << "\n"; } bottomSolve(); @@ -1357,7 +1350,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) { computeResOfCorrection(amrlev, mglev_bottom); RT norm = norminf(rescor[amrlev][mglev_bottom],0,ncomp,IntVect(0)); - amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom + amrex::Print() << print_ident << "AT LEVEL " << amrlev << " " << mglev_bottom << " UP: Norm after bottom " << norm << "\n"; } } @@ -1366,7 +1359,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { RT norm = norminf(res[amrlev][mglev_bottom],0,ncomp,IntVect(0)); - amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom + amrex::Print() << print_ident << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm before smooth " << norm << "\n"; } setVal(cor[amrlev][mglev_bottom], RT(0.0)); @@ -1380,7 +1373,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) { computeResOfCorrection(amrlev, mglev_bottom); RT norm = norminf(rescor[amrlev][mglev_bottom],0,ncomp,IntVect(0)); - amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom + amrex::Print() << print_ident << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm after smooth " << norm << "\n"; } } @@ -1395,7 +1388,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) { computeResOfCorrection(amrlev, mglev); RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0)); - amrex::Print() << "AT LEVEL " << amrlev << " " << mglev + amrex::Print() << print_ident << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm before smooth " << norm << "\n"; } for (int i = 0; i < nu2; ++i) { @@ -1408,7 +1401,7 @@ MLMGT::mgVcycle (int amrlev, int mglev_top) { computeResOfCorrection(amrlev, mglev); RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0)); - amrex::Print() << "AT LEVEL " << amrlev << " " << mglev + amrex::Print() << print_ident << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm after smooth " << norm << "\n"; } } @@ -1616,13 +1609,14 @@ MLMGT::bottomSolveWithCG (MF& x, const MF& b, typename MLCGSolverT::Type MLCGSolverT cg_solver(linop); cg_solver.setSolver(type); cg_solver.setVerbose(bottom_verbose); + cg_solver.setPrintIdentation(print_ident); cg_solver.setMaxIter(bottom_maxiter); cg_solver.setInitSolnZeroed(true); if (cf_strategy == CFStrategy::ghostnodes) { cg_solver.setNGhost(linop.getNGrow()); } int ret = cg_solver.solve(x, b, bottom_reltol, bottom_abstol); if (ret != 0 && verbose > 1) { - amrex::Print() << "MLMG: Bottom solve failed.\n"; + amrex::Print() << print_ident << "MLMG: Bottom solve failed.\n"; } m_niters_cg.push_back(cg_solver.getNumIters()); return ret; @@ -1848,7 +1842,7 @@ MLMGT::makeSolvable () auto const& offset = linop.getSolvabilityOffset(0, 0, rhs[0]); if (verbose >= 4) { for (int c = 0; c < ncomp; ++c) { - amrex::Print() << "MLMG: Subtracting " << offset[c] << " from rhs component " + amrex::Print() << print_ident << "MLMG: Subtracting " << offset[c] << " from rhs component " << c << "\n"; } } @@ -1864,7 +1858,7 @@ MLMGT::makeSolvable (int amrlev, int mglev, MF& mf) auto const& offset = linop.getSolvabilityOffset(amrlev, mglev, mf); if (verbose >= 4) { for (int c = 0; c < ncomp; ++c) { - amrex::Print() << "MLMG: Subtracting " << offset[c] + amrex::Print() << print_ident << "MLMG: Subtracting " << offset[c] << " from mf component c = " << c << " on level (" << amrlev << ", " << mglev << ")\n"; } @@ -2031,6 +2025,24 @@ MLMGT::checkPoint (const Vector& a_sol, linop.checkPoint(file_name+"/linop"); } +template +void +MLMGT::incPrintIdentation () +{ + print_ident.resize(print_ident.size()+4, ' '); +} + +template +void +MLMGT::decPrintIdentation () +{ + if (print_ident.size() > 4) { + print_ident.resize(print_ident.size()-4, ' '); + } else { + print_ident.clear(); + } +} + extern template class MLMGT; using MLMG = MLMGT; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sync.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sync.cpp index fdf1f08ae59..1ae0205f986 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sync.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sync.cpp @@ -674,9 +674,9 @@ MLNodeLaplacian::reflux (int crse_amrlev, MultiFab fine_res_for_coarse(amrex::coarsen(fba, amrrr), fdm, 1, 0); std::unique_ptr tmp_fine_res; - if (amrrr == 4 && !a_fine_res.nGrowVect().allGE(3)) { + if (!a_fine_res.nGrowVect().allGE(amrrr-1)) { tmp_fine_res = std::make_unique(a_fine_res.boxArray(), - a_fine_res.DistributionMap(), 1, 3); + a_fine_res.DistributionMap(), 1, amrrr-1); MultiFab::Copy(*tmp_fine_res, a_fine_res, 0, 0, 1, 0); } MultiFab& fine_res = (tmp_fine_res) ? *tmp_fine_res : a_fine_res; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H index 9adb2f12408..a9c1a65b525 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H @@ -59,7 +59,7 @@ public: void prepareForSolve () override; - void prepareForGMRES () override; + void preparePrecond () override; void setDirichletNodesToZero (int amrlev, int mglev, MultiFab& mf) const override; @@ -69,6 +69,11 @@ public: Real xdoty (int amrlev, int mglev, const MultiFab& x, const MultiFab& y, bool local) const final; + Real dotProductPrecond (Vector const& x, + Vector const& y) const final; + + Real norm2Precond (Vector const& x) const final; + virtual void applyBC (int amrlev, int mglev, MultiFab& phi, BCMode bc_mode, StateMode state_mode, bool skip_fillboundary=false) const; @@ -135,6 +140,7 @@ protected: Vector > > m_has_fine_bndry; // does this fab contain c/f boundary? MultiFab m_bottom_dot_mask; MultiFab m_coarse_dot_mask; + Vector m_precond_weight_mask; Vector > m_norm_fine_mask; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp index 929d05dc5a1..9719ac3d3e8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp @@ -183,19 +183,41 @@ MLNodeLinOp::xdoty (int amrlev, int mglev, const MultiFab& x, const MultiFab& y, AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0); const auto& mask = (mglev+1 == m_num_mg_levels[0]) ? m_bottom_dot_mask : m_coarse_dot_mask; const int ncomp = y.nComp(); - const int nghost = 0; - MultiFab tmp(x.boxArray(), x.DistributionMap(), ncomp, 0); - MultiFab::Copy(tmp, x, 0, 0, ncomp, nghost); - for (int i = 0; i < ncomp; i++) { - MultiFab::Multiply(tmp, mask, 0, i, 1, nghost); - } - Real result = MultiFab::Dot(tmp,0,y,0,ncomp,nghost,true); + const IntVect nghost(0); + Real result = amrex::Dot(mask, x, 0, y, 0, ncomp, nghost, true); if (!local) { ParallelAllReduce::Sum(result, ParallelContext::CommunicatorSub()); } return result; } +Real +MLNodeLinOp::dotProductPrecond (Vector const& x, + Vector const& y) const +{ + Real result = 0; + const int ncomp = x[0]->nComp(); + for (int ilev = 0; ilev < NAMRLevels(); ++ilev) { + result += amrex::Dot(m_precond_weight_mask[ilev], + *x[ilev],0,*y[ilev],0,ncomp,IntVect(0),true); + } + ParallelAllReduce::Sum(result, ParallelContext::CommunicatorSub()); + return result; +} + +Real +MLNodeLinOp::norm2Precond (Vector const& x) const +{ + Real result = 0; + const int ncomp = x[0]->nComp(); + for (int ilev = 0; ilev < NAMRLevels(); ++ilev) { + result += amrex::Dot(m_precond_weight_mask[ilev], + *x[ilev],0,ncomp,IntVect(0),true); + } + ParallelAllReduce::Sum(result, ParallelContext::CommunicatorSub()); + return std::sqrt(result); +} + Vector MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const { @@ -384,17 +406,56 @@ MLNodeLinOp::buildMasks () } void -MLNodeLinOp::prepareForGMRES () +MLNodeLinOp::preparePrecond () { - if (m_coarse_dot_mask.empty()) { - int amrlev = 0; - int mglev = 0; - const Geometry& geom = m_geom[amrlev][mglev]; - const iMultiFab& omask = *m_owner_mask_top; - m_coarse_dot_mask.define(omask.boxArray(), omask.DistributionMap(), 1, 0); - const auto lobc = LoBC(); - const auto hibc = HiBC(); - MLNodeLinOp_set_dot_mask(m_coarse_dot_mask, omask, geom, lobc, hibc, m_coarsening_strategy); + if (m_precond_weight_mask.empty()) { + m_precond_weight_mask.resize(m_num_amr_levels); + for (int ilev = 0; ilev < m_num_amr_levels; ++ilev) { + m_precond_weight_mask[ilev].define(amrex::convert(m_grids[ilev][0],IntVect(1)), + m_dmap[ilev][0], 1, 0); + auto omask = makeOwnerMask(m_grids[ilev][0], + m_dmap[ilev][0], + m_geom[ilev][0]); + const auto lobc = LoBC(); + const auto hibc = HiBC(); + Box nddomain = amrex::surroundingNodes(m_geom[ilev][0].Domain()); + if (m_coarsening_strategy != MLNodeLinOp::CoarseningStrategy::Sigma) { + nddomain.grow(1000); // hack to avoid masks being modified at Neumann boundary + } + if (ilev < m_num_amr_levels-1) { + auto const& fmask = *m_nd_fine_mask[ilev]; +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(m_precond_weight_mask[ilev],TilingIfNotGPU()); + mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& dfab = m_precond_weight_mask[ilev].array(mfi); + Array4 const& sfab = omask->const_array(mfi); + Array4 const& ffab = fmask.const_array(mfi); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + { + mlndlap_set_dot_mask(tbx, dfab, sfab, ffab, nddomain, lobc, hibc); + }); + } + } else { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(m_precond_weight_mask[ilev],TilingIfNotGPU()); + mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& dfab = m_precond_weight_mask[ilev].array(mfi); + Array4 const& sfab = omask->const_array(mfi); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx, + { + mlndlap_set_dot_mask(tbx, dfab, sfab, nddomain, lobc, hibc); + }); + } + } + } } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_1D_K.H index b842dd81b88..c6222dd4a8a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_1D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_1D_K.H @@ -162,6 +162,41 @@ void mlndlap_set_dot_mask (Box const& bx, Array4 const& dmsk, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlndlap_set_dot_mask (Box const& bx, Array4 const& dmsk, + Array4 const& omsk, + Array4 const& fmsk, Box const& dom, + GpuArray const& bclo, + GpuArray const& bchi) noexcept +{ + const auto lo = bx.smallEnd(0); + const auto hi = bx.bigEnd(0); + + AMREX_PRAGMA_SIMD + for (int i = lo; i <= hi; ++i) { + if (fmsk(i,0,0) == 0) { + dmsk(i,0,0) = static_cast(omsk(i,0,0)); + } else { + dmsk(i,0,0) = Real(0); + } + } + + const auto domlo = dom.smallEnd(0); + const auto domhi = dom.bigEnd(0); + + if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow) + && lo == domlo) + { + dmsk(lo,0,0) *= Real(0.5); + } + + if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow) + && hi == domhi) + { + dmsk(hi,0,0) *= Real(0.5); + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_2D_K.H index 3d8746cf058..2646f48057c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_2D_K.H @@ -312,6 +312,63 @@ void mlndlap_set_dot_mask (Box const& bx, Array4 const& dmsk, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlndlap_set_dot_mask (Box const& bx, Array4 const& dmsk, + Array4 const& omsk, + Array4 const& fmsk, Box const& dom, + GpuArray const& bclo, + GpuArray const& bchi) noexcept +{ + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (fmsk(i,j,0) == 0) { + dmsk(i,j,0) = static_cast(omsk(i,j,0)); + } else { + dmsk(i,j,0) = Real(0); + } + }} + + const auto domlo = amrex::lbound(dom); + const auto domhi = amrex::ubound(dom); + + if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow) + && lo.x == domlo.x) + { + for (int j = lo.y; j <= hi.y; ++j) { + dmsk(lo.x,j,0) *= Real(0.5); + } + } + + if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow) + && hi.x == domhi.x) + { + for (int j = lo.y; j <= hi.y; ++j) { + dmsk(hi.x,j,0) *= Real(0.5); + } + } + + if ((bclo[1] == LinOpBCType::Neumann || bclo[1] == LinOpBCType::inflow) + && lo.y == domlo.y) + { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + dmsk(i,lo.y,0) *= Real(0.5); + } + } + + if ((bchi[1] == LinOpBCType::Neumann || bchi[1] == LinOpBCType::inflow) + && hi.y == domhi.y) + { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + dmsk(i,hi.y,0) *= Real(0.5); + } + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_3D_K.H index 976a16c7aa5..bf9c14cb491 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp_3D_K.H @@ -805,6 +805,88 @@ void mlndlap_set_dot_mask (Box const& bx, Array4 const& dmsk, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlndlap_set_dot_mask (Box const& bx, Array4 const& dmsk, + Array4 const& omsk, + Array4 const& fmsk, Box const& dom, + GpuArray const& bclo, + GpuArray const& bchi) noexcept +{ + const auto lo = amrex::lbound(bx); + const auto hi = amrex::ubound(bx); + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (fmsk(i,j,k) == 0) { + dmsk(i,j,k) = static_cast(omsk(i,j,k)); + } else { + dmsk(i,j,k) = Real(0); + } + }}} + + const auto domlo = amrex::lbound(dom); + const auto domhi = amrex::ubound(dom); + + if ((bclo[0] == LinOpBCType::Neumann || bclo[0] == LinOpBCType::inflow) + && lo.x == domlo.x) + { + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + dmsk(lo.x,j,k) *= Real(0.5); + }} + } + + if ((bchi[0] == LinOpBCType::Neumann || bchi[0] == LinOpBCType::inflow) + && hi.x == domhi.x) + { + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + dmsk(hi.x,j,k) *= Real(0.5); + }} + } + + if ((bclo[1] == LinOpBCType::Neumann || bclo[1] == LinOpBCType::inflow) + && lo.y == domlo.y) + { + for (int k = lo.z; k <= hi.z; ++k) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + dmsk(i,lo.y,k) *= Real(0.5); + }} + } + + if ((bchi[1] == LinOpBCType::Neumann || bchi[1] == LinOpBCType::inflow) + && hi.y == domhi.y) + { + for (int k = lo.z; k <= hi.z; ++k) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + dmsk(i,hi.y,k) *= Real(0.5); + }} + } + + if ((bclo[2] == LinOpBCType::Neumann || bclo[2] == LinOpBCType::inflow) + && lo.z == domlo.z) + { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + dmsk(i,j,lo.z) *= Real(0.5); + }} + } + + if ((bchi[2] == LinOpBCType::Neumann || bchi[2] == LinOpBCType::inflow) + && hi.z == domhi.z) + { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + dmsk(i,j,hi.z) *= Real(0.5); + }} + } +} + } #endif diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp index 7a7647ce93a..27f6d439d90 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp @@ -477,12 +477,9 @@ MyTest::solveABecLaplacianGMRES () const auto nlevels = static_cast(geom.size()); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(composite_solve == false || nlevels == 1, - "solveABecLaplacianGMRES does not support composite solve"); - - for (int ilev = 0; ilev < nlevels; ++ilev) + if (composite_solve) { - MLABecLaplacian mlabec({geom[ilev]}, {grids[ilev]}, {dmap[ilev]}, info); + MLABecLaplacian mlabec(geom, grids, dmap, info); mlabec.setMaxOrder(linop_maxorder); @@ -493,40 +490,99 @@ MyTest::solveABecLaplacianGMRES () LinOpBCType::Dirichlet, LinOpBCType::Neumann)}); - if (ilev > 0) { - mlabec.setCoarseFineBC(&solution[ilev-1], ref_ratio); + for (int ilev = 0; ilev < nlevels; ++ilev) + { + mlabec.setLevelBC(ilev, &solution[ilev]); } - // for problem with pure homogeneous Neumann BC, we could pass a nullptr - mlabec.setLevelBC(0, &solution[ilev]); - mlabec.setScalars(ascalar, bscalar); - mlabec.setACoeffs(0, acoef[ilev]); - - Array face_bcoef; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) + for (int ilev = 0; ilev < nlevels; ++ilev) { - const BoxArray& ba = amrex::convert(bcoef[ilev].boxArray(), - IntVect::TheDimensionVector(idim)); - face_bcoef[idim].define(ba, bcoef[ilev].DistributionMap(), 1, 0); + mlabec.setACoeffs(ilev, acoef[ilev]); + + Array face_bcoef; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) + { + const BoxArray& ba = amrex::convert(bcoef[ilev].boxArray(), + IntVect::TheDimensionVector(idim)); + face_bcoef[idim].define(ba, bcoef[ilev].DistributionMap(), 1, 0); + } + amrex::average_cellcenter_to_face(GetArrOfPtrs(face_bcoef), + bcoef[ilev], geom[ilev]); + mlabec.setBCoeffs(ilev, amrex::GetArrOfConstPtrs(face_bcoef)); } - amrex::average_cellcenter_to_face(GetArrOfPtrs(face_bcoef), - bcoef[ilev], geom[ilev]); - mlabec.setBCoeffs(0, amrex::GetArrOfConstPtrs(face_bcoef)); MLMG mlmg(mlabec); - GMRESMLMG gmsolver(mlmg); + GMRESMLMGT gmsolver(mlmg); gmsolver.usePrecond(true); gmsolver.setVerbose(verbose); - gmsolver.solve(solution[ilev], rhs[ilev], tol_rel, tol_abs); + gmsolver.solve(GetVecOfPtrs(solution), GetVecOfConstPtrs(rhs), tol_rel, tol_abs); if (verbose) { - MultiFab res(rhs[ilev].boxArray(), rhs[ilev].DistributionMap(), 1, 0); - mlmg.apply({&res}, {&solution[ilev]}); // res = L(sol) - MultiFab::Subtract(res, rhs[ilev], 0, 0, 1, 0); // now res = L(sol) - rhs - amrex::Print() << "Final residual = " << res.norminf(0) - << " " << res.norm1(0) << " " << res.norm2(0) << '\n'; + Vector res(nlevels); + for (int ilev = 0; ilev < nlevels; ++ilev) { + res[ilev].define(rhs[ilev].boxArray(), rhs[ilev].DistributionMap(), 1, 0); + } + mlmg.apply(GetVecOfPtrs(res), GetVecOfPtrs(solution)); // res = L(sol) + for (int ilev = 0; ilev < nlevels; ++ilev) { + MultiFab::Subtract(res[ilev], rhs[ilev], 0, 0, 1, 0); + amrex::Print() << "Final residual at level " << ilev << " = " + << res[ilev].norminf(0) << " " << res[ilev].norm1(0) << " " + << res[ilev].norm2(0) << '\n'; + } + } + } + else + { + for (int ilev = 0; ilev < nlevels; ++ilev) + { + MLABecLaplacian mlabec({geom[ilev]}, {grids[ilev]}, {dmap[ilev]}, info); + + mlabec.setMaxOrder(linop_maxorder); + + mlabec.setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Neumann, + LinOpBCType::Neumann)}, + {AMREX_D_DECL(LinOpBCType::Neumann, + LinOpBCType::Dirichlet, + LinOpBCType::Neumann)}); + + if (ilev > 0) { + mlabec.setCoarseFineBC(&solution[ilev-1], ref_ratio); + } + + // for problem with pure homogeneous Neumann BC, we could pass a nullptr + mlabec.setLevelBC(0, &solution[ilev]); + + mlabec.setScalars(ascalar, bscalar); + + mlabec.setACoeffs(0, acoef[ilev]); + + Array face_bcoef; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) + { + const BoxArray& ba = amrex::convert(bcoef[ilev].boxArray(), + IntVect::TheDimensionVector(idim)); + face_bcoef[idim].define(ba, bcoef[ilev].DistributionMap(), 1, 0); + } + amrex::average_cellcenter_to_face(GetArrOfPtrs(face_bcoef), + bcoef[ilev], geom[ilev]); + mlabec.setBCoeffs(0, amrex::GetArrOfConstPtrs(face_bcoef)); + + MLMG mlmg(mlabec); + GMRESMLMGT gmsolver(mlmg); + gmsolver.usePrecond(true); + gmsolver.setVerbose(verbose); + gmsolver.solve(solution[ilev], rhs[ilev], tol_rel, tol_abs); + + if (verbose) { + MultiFab res(rhs[ilev].boxArray(), rhs[ilev].DistributionMap(), 1, 0); + mlmg.apply({&res}, {&solution[ilev]}); // res = L(sol) + MultiFab::Subtract(res, rhs[ilev], 0, 0, 1, 0); // now res = L(sol) - rhs + amrex::Print() << "Final residual = " << res.norminf(0) + << " " << res.norm1(0) << " " << res.norm2(0) << '\n'; + } } } } diff --git a/Tests/LinearSolvers/CellEB/GNUmakefile b/Tests/LinearSolvers/CellEB/GNUmakefile index 45c97ff1584..83dd8799e0b 100644 --- a/Tests/LinearSolvers/CellEB/GNUmakefile +++ b/Tests/LinearSolvers/CellEB/GNUmakefile @@ -22,7 +22,7 @@ include ./Make.package Pdirs := Base Boundary AmrCore Pdirs += EB -Pdirs += LinearSolvers/MLMG +Pdirs += LinearSolvers Ppack += $(foreach dir, $(Pdirs), $(AMREX_HOME)/Src/$(dir)/Make.package) diff --git a/Tests/LinearSolvers/CellEB/MyTest.H b/Tests/LinearSolvers/CellEB/MyTest.H index 4af902ddec3..de06b39db79 100644 --- a/Tests/LinearSolvers/CellEB/MyTest.H +++ b/Tests/LinearSolvers/CellEB/MyTest.H @@ -44,6 +44,9 @@ private: int max_coarsening_level = 30; bool use_hypre = false; bool use_petsc = false; + + bool use_gmres = false; + amrex::Vector geom; amrex::Vector grids; amrex::Vector dmap; diff --git a/Tests/LinearSolvers/CellEB/MyTest.cpp b/Tests/LinearSolvers/CellEB/MyTest.cpp index 8f8c000997e..b895c09c986 100644 --- a/Tests/LinearSolvers/CellEB/MyTest.cpp +++ b/Tests/LinearSolvers/CellEB/MyTest.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -85,7 +86,43 @@ MyTest::solve () } const Real tol_rel = reltol; const Real tol_abs = 0.0; - mlmg.solve(amrex::GetVecOfPtrs(phi), amrex::GetVecOfConstPtrs(rhs), tol_rel, tol_abs); + + if (verbose) { + Vector res(max_level+1); + for (int ilev = 0; ilev <= max_level; ++ilev) { + res[ilev].define(rhs[ilev].boxArray(), rhs[ilev].DistributionMap(), 1, 0); + } + mlmg.apply(GetVecOfPtrs(res), GetVecOfPtrs(phi)); // res = L(sol) + for (int ilev = 0; ilev <= max_level; ++ilev) { + MultiFab::Subtract(res[ilev], rhs[ilev], 0, 0, 1, 0); + amrex::Print() << "Initial max, 1 and 2-norm residuals at level " << ilev << " = " + << res[ilev].norminf(0) << " " << res[ilev].norm1(0) << " " + << res[ilev].norm2(0) << '\n'; + } + } + + if (use_gmres) { + GMRESMLMGT gmsolver(mlmg); + gmsolver.usePrecond(true); + gmsolver.setVerbose(verbose); + gmsolver.solve(GetVecOfPtrs(phi), GetVecOfConstPtrs(rhs), tol_rel, tol_abs); + } else { + mlmg.solve(amrex::GetVecOfPtrs(phi), amrex::GetVecOfConstPtrs(rhs), tol_rel, tol_abs); + } + + if (verbose) { + Vector res(max_level+1); + for (int ilev = 0; ilev <= max_level; ++ilev) { + res[ilev].define(rhs[ilev].boxArray(), rhs[ilev].DistributionMap(), 1, 0); + } + mlmg.apply(GetVecOfPtrs(res), GetVecOfPtrs(phi)); // res = L(sol) + for (int ilev = 0; ilev <= max_level; ++ilev) { + MultiFab::Subtract(res[ilev], rhs[ilev], 0, 0, 1, 0); + amrex::Print() << "Final max, 1 and 2-norm residuals at level " << ilev << " = " + << res[ilev].norminf(0) << " " << res[ilev].norm1(0) << " " + << res[ilev].norm2(0) << '\n'; + } + } } void @@ -143,6 +180,8 @@ MyTest::readParameters () #ifdef AMREX_USE_PETSC pp.query("use_petsc",use_petsc); #endif + + pp.query("use_gmres", use_gmres); } void diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.cpp b/Tests/LinearSolvers/CurlCurl/MyTest.cpp index f8a84ca5051..ea7e23bd68e 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.cpp +++ b/Tests/LinearSolvers/CurlCurl/MyTest.cpp @@ -65,11 +65,6 @@ MyTest::solve () GMRESMLMGT gmsolver(mlmg); gmsolver.usePrecond(gmres_use_precond); gmsolver.setPrecondNumIters(gmres_precond_niters); - - // This system has homogeneous BC unlike - // Tests/LinearSolvers/ABecLaplacian_C, so the setup is simpler. - gmsolver.setPropertyOfZero(true); - gmsolver.setVerbose(verbose); gmsolver.solve(solution, rhs, tol_rel, tol_abs); } diff --git a/Tests/LinearSolvers/NodalPoisson/MyTest.cpp b/Tests/LinearSolvers/NodalPoisson/MyTest.cpp index 39c781d8795..af0248e7e05 100644 --- a/Tests/LinearSolvers/NodalPoisson/MyTest.cpp +++ b/Tests/LinearSolvers/NodalPoisson/MyTest.cpp @@ -59,7 +59,13 @@ MyTest::solve () solution[ilev].setVal(0.0, interior, 0, 1, 0); } - mlmg.solve(GetVecOfPtrs(solution), GetVecOfConstPtrs(rhs), reltol, 0.0); + if (use_gmres) { + GMRESMLMG gmsolver(mlmg); + gmsolver.setVerbose(verbose); + gmsolver.solve(GetVecOfPtrs(solution), GetVecOfConstPtrs(rhs), reltol, 0.0); + } else { + mlmg.solve(GetVecOfPtrs(solution), GetVecOfConstPtrs(rhs), reltol, 0.0); + } } else // solve level by level { @@ -154,9 +160,6 @@ MyTest::readParameters () pp.query("composite_solve", composite_solve); pp.query("use_gmres", use_gmres); - if (use_gmres) { - composite_solve = false; - } pp.query("verbose", verbose); pp.query("bottom_verbose", bottom_verbose);