From 80e9e87721db7eba10854796f87ae97aa7063f2f Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Mon, 18 Nov 2024 13:40:55 -0800 Subject: [PATCH 1/3] first round of GMRES --- Source/Utils/ERF_MLTerrainPoisson.H | 320 +++++++++++++++++++++ Source/Utils/ERF_TerrainPoisson_3D_K.H | 372 +++++++++++++++++++++++++ Source/Utils/ERF_solve_with_gmres.cpp | 11 +- Source/Utils/Make.package | 3 + 4 files changed, 699 insertions(+), 7 deletions(-) create mode 100644 Source/Utils/ERF_MLTerrainPoisson.H create mode 100644 Source/Utils/ERF_TerrainPoisson_3D_K.H diff --git a/Source/Utils/ERF_MLTerrainPoisson.H b/Source/Utils/ERF_MLTerrainPoisson.H new file mode 100644 index 000000000..8c6a03ccc --- /dev/null +++ b/Source/Utils/ERF_MLTerrainPoisson.H @@ -0,0 +1,320 @@ +#ifndef ERF_TERRAINPOISSON_H_ +#define ERF_TERRAINPOISSON_H_ + +#include +#include + +#include "ERF_TerrainPoisson_3D_K.H" + +namespace amrex { + +// del dot grad phi + +template +class MLTerrainPoissonT + : public MLCellABecLapT +{ +public: + + using FAB = typename MF::fab_type; + using RT = typename MF::value_type; + + using BCType = LinOpBCType; + using Location = typename MLLinOpT::Location; + + MLTerrainPoissonT () = default; + MLTerrainPoissonT (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo(), + const Vector const*>& a_factory = {}); + ~MLTerrainPoissonT () override; + + MLTerrainPoissonT (const MLTerrainPoissonT&) = delete; + MLTerrainPoissonT (MLTerrainPoissonT&&) = delete; + MLTerrainPoissonT& operator= (const MLTerrainPoissonT&) = delete; + MLTerrainPoissonT& operator= (MLTerrainPoissonT&&) = delete; + + void define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo(), + const Vector const*>& a_factory = {}); + + /** + * Define the height coordinate + * + * \param [in] amrlev The level of the multifab for the solver, with + * \p amrlev = 0 always being the lowest level in the + * AMR hierarchy represented in the solve. + * \param [in] zphys Multifab of z_phys_nd + */ + template ::value && + std::is_convertible_v, + int> = 0> + void setZPhysNd (int amrlev, const AMF& zphys); + + void prepareForSolve () final; + [[nodiscard]] bool isSingular (int amrlev) const final { return m_is_singular[amrlev]; } + [[nodiscard]] bool isBottomSingular () const final { return m_is_singular[0]; } + + void Fapply (int amrlev, int mglev, MF& out, const MF& in) const final; + void Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int redblack) const final; + void FFlux (int amrlev, const MFIter& mfi, + const Array& flux, + const FAB& sol, Location loc, int face_only=0) const final; + + void getFluxes (const Vector >& a_flux, + const Vector& a_sol); // const final; + + Vector m_a_zphys_nd; + + void normalize (int amrlev, int mglev, MF& mf) const final; + + [[nodiscard]] RT getAScalar () const final { return RT(0.0); } + [[nodiscard]] RT getBScalar () const final { return RT(-1.0); } + [[nodiscard]] MF const* getACoeffs (int /*amrlev*/, int /*mglev*/) const final { return nullptr; } + [[nodiscard]] Array getBCoeffs (int /*amrlev*/, int /*mglev*/) const final + { return {{ AMREX_D_DECL(nullptr,nullptr,nullptr)}}; } + +private: + + Vector m_is_singular; +}; + +template +MLTerrainPoissonT::MLTerrainPoissonT (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info, + const Vector const*>& a_factory) +{ + define(a_geom, a_grids, a_dmap, a_info, a_factory); +} + +template +void +MLTerrainPoissonT::define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info, + const Vector const*>& a_factory) +{ + BL_PROFILE("MLTerrainPoisson::define()"); + MLCellABecLapT::define(a_geom, a_grids, a_dmap, a_info, a_factory); +} + +template +MLTerrainPoissonT::~MLTerrainPoissonT () = default; + +template +void +MLTerrainPoissonT::prepareForSolve () +{ + BL_PROFILE("MLTerrainPoisson::prepareForSolve()"); + + MLCellABecLapT::prepareForSolve(); + + m_is_singular.clear(); + m_is_singular.resize(this->m_num_amr_levels, false); + auto itlo = std::find(this->m_lobc[0].begin(), this->m_lobc[0].end(), BCType::Dirichlet); + auto ithi = std::find(this->m_hibc[0].begin(), this->m_hibc[0].end(), BCType::Dirichlet); + if (itlo == this->m_lobc[0].end() && ithi == this->m_hibc[0].end()) + if (!m_is_singular[0] && this->m_needs_coarse_data_for_bc && + this->m_coarse_fine_bc_type == LinOpBCType::Neumann) + { + auto bbox = this->m_grids[0][0].minimalBox(); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (this->m_lobc[0][idim] == LinOpBCType::Dirichlet) { + bbox.growLo(idim,1); + } + if (this->m_hibc[0][idim] == LinOpBCType::Dirichlet) { + bbox.growHi(idim,1); + } + } + if (this->m_geom[0][0].Domain().contains(bbox)) { + m_is_singular[0] = true; + } + } +} + +template +template ::value && + std::is_convertible_v, int>> + +void +MLTerrainPoissonT::setZPhysNd (int amrlev, const AMF& zphys) +{ + AMREX_ASSERT_WITH_MESSAGE(zphys.nComp() == 1, + "MLTerrainPoisson::setZphysNd: zphys_nd is supposed to be single component."); + m_a_zphys_nd.resize(this->m_num_amr_levels); + m_a_zphys_nd[amrlev].define + (convert(this->m_grids[amrlev][0],IntVect(0,0,1)), + this->m_dmap[amrlev][0], 1, + zphys.nGrowVect(), + MFInfo(), *(this->m_factory[amrlev][0])); + m_a_zphys_nd[amrlev].LocalCopy(zphys, 0, 0, 1, zphys.nGrowVect()); +} + +template +void +MLTerrainPoissonT::Fapply (int amrlev, int mglev, MF& out, const MF& in) const +{ + BL_PROFILE("MLTerrainPoisson::Fapply()"); + + const Real* dxinv = this->m_geom[amrlev][mglev].InvCellSize(); + + AMREX_D_TERM(const RT dx = RT(dxinv[0]);, + const RT dy = RT(dxinv[1]);, + const RT dz = RT(dxinv[2]);); + + AMREX_D_TERM(const RT dhx = RT(dxinv[0]*dxinv[0]);, + const RT dhy = RT(dxinv[1]*dxinv[1]);, + const RT dhz = RT(dxinv[2]*dxinv[2]);); + +#if (AMREX_SPACEDIM == 3) + RT dh0 = this->get_d0(dhx, dhy, dhz); + RT dh1 = this->get_d1(dhx, dhy, dhz); +#endif + +#if (AMREX_SPACEDIM < 3) + const RT dx = RT(this->m_geom[amrlev][mglev].CellSize(0)); + const RT probxlo = RT(this->m_geom[amrlev][mglev].ProbLo(0)); +#endif + +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && out.isFusingCandidate() && !this->hasHiddenDimension()) { + auto const& xma = in.const_arrays(); + auto const& yma = out.arrays(); + auto const& zpa = m_a_zphys_nd[amrlev].arrays(); + { + ParallelFor(out, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + amrex::ignore_unused(j,k); + terrpoisson_adotx(AMREX_D_DECL(i,j,k), yma[box_no], xma[box_no], zpa[box_no], + AMREX_D_DECL(dx,dy,dz)); + }); + } + Gpu::streamSynchronize(); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(out, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.tilebox(); + auto const& xfab = in.array(mfi); + auto const& yfab = out.array(mfi); + auto const& zpa = m_a_zphys_nd[amrlev].array(mfi); + { + if (this->hasHiddenDimension()) { + Box const& bx2d = this->compactify(bx); + const auto& xfab2d = this->compactify(xfab); + const auto& yfab2d = this->compactify(yfab); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(bx2d, i, j, k, + { + amrex::ignore_unused(k); + // TwoD::terrpoisson_adotx(i, j, yfab2d, xfab2d, dh0, dh1); + }); + } else { + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(bx, i, j, k, + { + terrpoisson_adotx(i, j, k, yfab, xfab, zpa, dx, dy, dz); + }); + } + } + } // mfi + } +} + +template +void +MLTerrainPoissonT::normalize (int /*amrlev*/, int /*mglev*/, MF& /*mf*/) const +{ + BL_PROFILE("MLTerrainPoisson::normalize()"); + amrex::Abort("What am I doing in MLTerrainPoisson::normalize??"); +} + +template +void +MLTerrainPoissonT::Fsmooth (int amrlev, int /*mglev*/, MF& /*sol*/, const MF& /*rhs*/, int /*redblack*/) const +{ + BL_PROFILE("MLTerrainPoisson::Fsmooth()"); + amrex::Abort("What am I doing in MLTerrainPoisson::Fsmooth??"); +} + +template +void +MLTerrainPoissonT::getFluxes (const Vector >& a_flux, + const Vector& a_sol) +{ + BL_PROFILE("MLTerrainPoisson::getFluxes()"); + amrex::Abort("What am I doing in MLTerrainPoisson::getFluxes??"); +} + +template +void +MLTerrainPoissonT::FFlux (int amrlev, const MFIter& mfi, + const Array& flux, + const FAB& sol, Location, const int /*face_only*/) const +{ + BL_PROFILE("MLTerrainPoisson::FFlux()"); + + const int mglev = 0; + const Box& box = mfi.tilebox(); + const Real* dxinv = this->m_geom[amrlev][mglev].InvCellSize(); + + AMREX_D_TERM(const auto& fxarr = flux[0]->array();, + const auto& fyarr = flux[1]->array();, + const auto& fzarr = flux[2]->array();); + + const auto& solarr = sol.array(); + const auto& zpa = m_a_zphys_nd[amrlev].array(mfi); + + RT odx = RT(dxinv[0]); + RT ody = RT(dxinv[1]); + RT odz = RT(dxinv[2]); + + if (this->hiddenDirection() != 0) { + Box bflux = amrex::surroundingNodes(box, 0); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox, + { + terrpoisson_flux_x(tbox, fxarr, solarr, zpa, odx, odz); + }); + } else { + flux[0]->template setVal(RT(0.0)); + } + if (this->hiddenDirection() != 1) { + Box bflux = amrex::surroundingNodes(box, 1); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox, + { + terrpoisson_flux_y(tbox, fyarr, solarr, zpa, ody, odz); + }); + } else { + flux[1]->template setVal(RT(0.0)); + } + if (this->hiddenDirection() != 2) { + Box bflux = amrex::surroundingNodes(box, 2); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox, + { + terrpoisson_flux_z(tbox, fzarr, solarr, zpa, odx, ody, odz); + }); + } else { + flux[2]->template setVal(RT(0.0)); + } +} + +extern template class MLTerrainPoissonT; + +using MLTerrainPoisson = MLTerrainPoissonT; + +} + +#endif diff --git a/Source/Utils/ERF_TerrainPoisson_3D_K.H b/Source/Utils/ERF_TerrainPoisson_3D_K.H new file mode 100644 index 000000000..73e41931a --- /dev/null +++ b/Source/Utils/ERF_TerrainPoisson_3D_K.H @@ -0,0 +1,372 @@ +#ifndef ERF_TERRAINPOISSON_3D_K_H_ +#define ERF_TERRAINPOISSON_3D_K_H_ +#include + +namespace amrex { + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void terrpoisson_adotx (int i, int j, int k, Array4 const& y, + Array4 const& x, + Array4 const& zp, + T odx, T ody, T odz) noexcept +{ +#if 1 + Real h_xi, h_eta, h_zeta; + + // ********************************************************* + // Hi x-face + // ********************************************************* + // On x-face + Real px_hi = x(i+1,j,k) - x(i ,j,k); + + // On y-edges + Real pz_hi_md_hi = Real(0.25) * ( x(i+1,j,k+1) + x(i,j,k+1) + -x(i+1,j,k ) - x(i,j,k ) ); + h_xi = Real(0.25) * ( zp(i+1,j ,k+1) - zp(i-1,j ,k+1) + +zp(i+1,j+1,k+1) - zp(i-1,j+1,k+1) ); + h_zeta = Real(0.25) * ( zp(i+1,j ,k+2) - zp(i+1,j ,k ) + +zp(i+1,j+1,k+2) - zp(i+1,j+1,k ) ); + pz_hi_md_hi *= h_xi / h_zeta; + + // On y-edges + Real pz_hi_md_lo = Real(0.5) * ( x(i+1,j,k ) + x(i,j,k ) + -x(i+1,j,k-1) - x(i,j,k-1) ); + h_xi = Real(0.25) * ( zp(i+1,j ,k ) - zp(i-1,j ,k ) + +zp(i+1,j+1,k ) - zp(i-1,j+1,k ) ); + h_zeta = Real(0.25) * ( zp(i+1,j ,k+1) - zp(i+1,j ,k-1) + +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) ); + pz_hi_md_lo *= h_xi / h_zeta; + + // On x-face + px_hi -= Real(0.5) * ( pz_hi_md_hi + pz_hi_md_lo ); + px_hi *= Real(0.5) * ( zp(i+1,j,k+1) + zp(i+1,j+1,k+1) - zp(i+1,j,k) - zp(i+1,j+1,k) ) * odz; + + // ********************************************************* + // Lo x-face + // ********************************************************* + // On x-face + Real px_lo = x(i ,j,k) - x(i-1,j,k); + + // On y-edges + Real pz_lo_md_hi = Real(0.5) * ( x(i,j,k+1) + x(i-1,j,k+1) + -x(i,j,k ) - x(i-1,j,k ) ); + h_xi = Real(0.25) * ( zp(i,j ,k+1) - zp(i-2,j ,k+1) + +zp(i,j+1,k+1) - zp(i-2,j+1,k+1) ); + h_zeta = Real(0.25) * ( zp(i,j ,k+2) - zp(i ,j ,k ) + +zp(i,j+1,k+2) - zp(i ,j+1,k ) ); + pz_lo_md_hi *= h_xi / h_zeta; + + // On y-edges + Real pz_lo_md_lo = Real(0.5) * ( x(i,j,k ) + x(i-1,j,k ) + -x(i,j,k-1) - x(i-1,j,k-1) ); + + h_xi = Real(0.25) * ( zp(i,j ,k ) - zp(i-2,j ,k ) + +zp(i,j+1,k ) - zp(i-2,j+1,k ) ); + h_zeta = Real(0.25) * ( zp(i,j ,k+1) - zp(i ,j ,k-1) + +zp(i,j+1,k+1) - zp(i ,j+1,k-1) ); + pz_lo_md_lo *= h_xi / h_zeta; + + // On x-face + px_lo -= Real(0.5) * ( pz_lo_md_hi + pz_lo_md_lo ); + px_lo *= Real(0.5) * ( zp(i,j,k+1) + zp(i,j+1,k+1) - zp(i,j,k) - zp(i,j+1,k) ) * odz; + + // ********************************************************* + // Hi y-face + // ********************************************************* + // On y-face + Real py_hi = x(i,j+1,k) - x(i,j,k); + + // On x-edges + Real pz_md_hi_hi = Real(0.25) * ( x(i,j+1,k+1) + x(i,j,k+1) + -x(i,j+1,k ) - x(i,j,k ) ); + h_eta = Real(0.25) * ( zp(i ,j+1,k+1) - zp(i ,j-1,k+1) + +zp(i+1,j+1,k+1) - zp(i+1,j-1,k+1) ); + h_zeta = Real(0.25) * ( zp(i ,j+1,k+2) - zp(i ,j+1,k ) + +zp(i+1,j+1,k+2) - zp(i+1,j+1,k ) ); + pz_md_hi_hi *= h_eta/ h_zeta; + + // On x-edges + Real pz_md_hi_lo = Real(0.5) * ( x(i,j+1,k ) + x(i,j,k ) + -x(i,j+1,k-1) - x(i,j,k-1) ); + h_eta = Real(0.25) * ( zp(i ,j+1,k ) - zp(i ,j-1,k) + +zp(i+1,j+1,k ) - zp(i+1,j-1,k) ); + h_zeta = Real(0.25) * ( zp(i ,j+1,k+1) - zp(i ,j+1,k-1) + +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) ); + pz_md_hi_lo *= h_eta/ h_zeta; + + // On y-face + py_hi -= Real(0.5) * ( pz_md_hi_hi + pz_md_hi_lo ); + py_hi *= Real(0.5) * ( zp(i+1,j,k+1) + zp(i+1,j+1,k+1) - zp(i+1,j,k) - zp(i+1,j+1,k) ) * odz; + + // ********************************************************* + // Lo y-face + // ********************************************************* + // On y-face + Real py_lo = x(i,j,k) - x(i,j-1,k); + + // On x-edges + Real pz_md_lo_hi = Real(0.5) * ( x(i,j,k+1) + x(i,j-1,k+1) + -x(i,j,k ) - x(i,j-1,k ) ); + h_eta = Real(0.25) * ( zp(i ,j,k+1) - zp(i ,j-2,k+1) + +zp(i+1,j,k+1) - zp(i+1,j-2,k+1) ); + h_zeta = Real(0.25) * ( zp(i ,j,k+2) - zp(i ,j ,k ) + +zp(i+1,j,k+2) - zp(i+1,j ,k ) ); + pz_md_lo_hi *= h_eta/ h_zeta; + + // On x-edges + Real pz_md_lo_lo = Real(0.5) * ( x(i,j,k ) + x(i,j-1,k ) + -x(i,j,k-1) - x(i,j-1,k-1) ); + + h_eta = Real(0.25) * ( zp(i ,j,k ) - zp(i ,j-2,k ) + +zp(i+1,j,k ) - zp(i+1,j-2,k ) ); + h_zeta = Real(0.25) * ( zp(i ,j,k+1) - zp(i ,j ,k-1) + +zp(i+1,j,k+1) - zp(i+1,j ,k-1) ); + pz_md_lo_lo *= h_eta/ h_zeta; + + // On y-face + py_lo -= Real(0.5) * ( pz_md_lo_hi + pz_md_lo_lo ); + py_lo *= Real(0.5) * ( zp(i,j,k+1) + zp(i+1,j,k+1) - zp(i,j,k) - zp(i+1,j,k) ) * odz; + + // ********************************************************* + // Hi z-face + // ********************************************************* + // On z-face + Real pz_hi = x(i,j,k+1) - x(i,j,k ); + + // On corners + Real px_hi_md_hi = Real(0.5) * ( x(i+1,j,k+1) - x(i ,j,k+1) + +x(i+1,j,k ) - x(i ,j,k )); + Real px_lo_md_hi = Real(0.5) * ( x(i ,j,k+1) - x(i-1,j,k+1) + +x(i ,j,k ) - x(i-1,j,k )); + Real py_md_hi_hi = Real(0.5) * ( x(i,j+1,k+1) - x(i,j ,k+1) + +x(i,j+1,k ) - x(i,j ,k )); + Real py_md_lo_hi = Real(0.5) * ( x(i,j ,k+1) - x(i,j-1,k+1) + +x(i,j ,k ) - x(i-1, j,k )); + + // On z-face + Real h_xi_on_zhi = 0.5 * ( zp(i+1,j+1,k+1) + zp(i+1,j,k+1) - zp(i,j+1,k+1) - zp(i,j,k+1) ); + Real h_eta_on_zhi = 0.5 * ( zp(i+1,j+1,k+1) + zp(i,j+1,k+1) - zp(i+1,j,k+1) - zp(i,j,k+1) ); + pz_hi -= 0.5 * h_xi_on_zhi * ( (px_hi_md_hi + px_lo_md_hi) - (pz_hi_md_hi + pz_lo_md_hi) ); + pz_hi -= 0.5 * h_eta_on_zhi * ( (py_md_hi_hi + py_md_lo_hi) - (pz_md_hi_hi + pz_md_lo_hi) ); + + // ********************************************************* + // Lo z-face + // ********************************************************* + // On z-face + Real pz_lo = x(i,j,k ) - x(i,j,k-1); + + // On corners + Real px_hi_md_lo = Real(0.5) * ( x(i+1,j ,k ) - x(i ,j ,k ) + +x(i+1,j ,k-1) - x(i ,j ,k-1)); + Real px_lo_md_lo = Real(0.5) * ( x(i ,j ,k ) - x(i-1,j ,k ) + +x(i ,j ,k-1) - x(i-1,j ,k-1)); + Real py_md_hi_lo = Real(0.5) * ( x(i ,j+1,k ) - x(i ,j ,k ) + +x(i ,j+1,k-1) - x(i ,j ,k-1)); + Real py_md_lo_lo = Real(0.5) * ( x(i ,j ,k ) - x(i ,j-1,k ) + +x(i ,j ,k-1) - x(i ,j-1,k-1)); + + // On z-face + Real h_xi_on_zlo = 0.5 * (zp(i+1,j+1,k) + zp(i+1,j,k) - zp(i,j+1,k) - zp(i,j,k)); + Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k) + zp(i,j+1,k) - zp(i+1,j,k) - zp(i,j,k)); + pz_lo -= 0.5 * h_xi_on_zlo * ( (px_hi_md_lo + px_lo_md_lo) - (pz_hi_md_lo + pz_lo_md_lo) ); + pz_lo -= 0.5 * h_eta_on_zlo * ( (py_md_hi_lo + py_md_lo_lo) - (pz_md_hi_lo + pz_md_lo_lo) ); +#endif + + // ********************************************************* + // Adotx + // ********************************************************* + px_hi = x(i+1,j,k) - x(i ,j,k); + px_lo = x(i ,j,k) - x(i-1,j,k); + py_hi = x(i,j+1,k) - x(i,j ,k); + py_lo = x(i,j ,k) - x(i,j-1,k); + pz_hi = x(i,j,k+1) - x(i,j,k ); + pz_lo = x(i,j,k ) - x(i,j,k-1); + + y(i,j,k) = odx * odx * (px_hi - px_lo) + + ody * ody * (py_hi - py_lo) + + odz * odz * (pz_hi - pz_lo); +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void terrpoisson_flux_x (Box const& box, Array4 const& fx, + Array4 const& sol, + Array4 const& zp, + T dxinv, T dzinv) noexcept +{ + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + Real h_xi, h_zeta; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + // On x-face + Real px_lo = (sol(i,j,k) - sol(i-1,j,k)) * dxinv; + +#if 1 + // On y-edges + Real pz_lo_md_hi = Real(0.5) * ( sol(i,j,k+1) + sol(i-1,j,k+1) + -sol(i,j,k ) - sol(i-1,j,k ) ); + h_xi = Real(0.25) * ( zp(i,j ,k+1) - zp(i-2,j ,k+1) + +zp(i,j+1,k+1) - zp(i-2,j+1,k+1) ); + h_zeta = Real(0.25) * ( zp(i,j ,k+2) - zp(i ,j ,k ) + +zp(i,j+1,k+2) - zp(i ,j+1,k ) ); + pz_lo_md_hi *= h_xi / h_zeta; + + // On y-edges + Real pz_lo_md_lo = Real(0.5) * ( sol(i,j,k ) + sol(i-1,j,k ) + -sol(i,j,k-1) - sol(i-1,j,k-1) ); + + h_xi = Real(0.25) * ( zp(i,j ,k ) - zp(i-2,j ,k ) + +zp(i,j+1,k ) - zp(i-2,j+1,k ) ); + h_zeta = Real(0.25) * ( zp(i,j ,k+1) - zp(i ,j ,k-1) + +zp(i,j+1,k+1) - zp(i ,j+1,k-1) ); + pz_lo_md_lo *= h_xi / h_zeta; + + // On x-face + px_lo -= Real(0.5) * ( pz_lo_md_hi + pz_lo_md_lo ); + px_lo *= Real(0.5) * ( zp(i,j,k+1) + zp(i,j+1,k+1) - zp(i,j,k) - zp(i,j+1,k) ) * dzinv; +#endif + // On y-edges + fx(i,j,k) = -px_lo; + } + } + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void terrpoisson_flux_y (Box const& box, Array4 const& fy, + Array4 const& sol, + Array4 const& zp, + T dyinv, T dzinv) noexcept +{ + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + Real h_eta, h_zeta; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + + Real py_lo = (sol(i,j,k) - sol(i,j-1,k)) * dyinv; + +#if 1 + // On x-edges + Real pz_md_lo_hi = Real(0.5) * ( sol(i,j,k+1) + sol(i,j-1,k+1) + -sol(i,j,k ) - sol(i,j-1,k ) ); + h_eta = Real(0.25) * ( zp(i ,j,k+1) - zp(i ,j-2,k+1) + +zp(i+1,j,k+1) - zp(i+1,j-2,k+1) ); + h_zeta = Real(0.25) * ( zp(i ,j,k+2) - zp(i ,j ,k ) + +zp(i+1,j,k+2) - zp(i+1,j ,k ) ); + pz_md_lo_hi *= h_eta/ h_zeta; + + // On x-edges + Real pz_md_lo_lo = Real(0.5) * ( sol(i,j,k ) + sol(i,j-1,k ) + -sol(i,j,k-1) - sol(i,j-1,k-1) ); + + h_eta = Real(0.25) * ( zp(i ,j,k ) - zp(i ,j-2,k ) + +zp(i+1,j,k ) - zp(i+1,j-2,k ) ); + h_zeta = Real(0.25) * ( zp(i ,j,k+1) - zp(i ,j ,k-1) + +zp(i+1,j,k+1) - zp(i+1,j ,k-1) ); + pz_md_lo_lo *= h_eta/ h_zeta; + + // On y-face + py_lo -= Real(0.5) * ( pz_md_lo_hi + pz_md_lo_lo ); + py_lo *= Real(0.5) * ( zp(i,j,k+1) + zp(i+1,j,k+1) - zp(i,j,k) - zp(i+1,j,k) ) * dzinv; +#endif + + fy(i,j,k) = -py_lo; + } + } + } +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void terrpoisson_flux_z (Box const& box, Array4 const& fz, + Array4 const& sol, + Array4 const& zp, + T dxinv, T dyinv, T dzinv) noexcept +{ + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + Real h_xi, h_eta, h_zeta; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + // On z-face + Real pz_lo = (sol(i,j,k ) - sol(i,j,k-1)) * dzinv; + +#if 1 + // On corners + Real px_hi_md_lo = Real(0.5) * ( sol(i+1,j ,k ) - sol(i ,j ,k ) + +sol(i+1,j ,k-1) - sol(i ,j ,k-1)); + Real px_lo_md_lo = Real(0.5) * ( sol(i ,j ,k ) - sol(i-1,j ,k ) + +sol(i ,j ,k-1) - sol(i-1,j ,k-1)); + Real py_md_hi_lo = Real(0.5) * ( sol(i ,j+1,k ) - sol(i ,j ,k ) + +sol(i ,j+1,k-1) - sol(i ,j ,k-1)); + Real py_md_lo_lo = Real(0.5) * ( sol(i ,j ,k ) - sol(i ,j-1,k ) + +sol(i ,j ,k-1) - sol(i ,j-1,k-1)); + + // On y-edges + Real pz_hi_md_lo = Real(0.5) * ( sol(i+1,j,k ) + sol(i,j,k ) + -sol(i+1,j,k-1) - sol(i,j,k-1) ); + h_xi = Real(0.25) * ( zp(i+1,j ,k ) - zp(i-1,j ,k ) + +zp(i+1,j+1,k ) - zp(i-1,j+1,k ) ); + h_zeta = Real(0.25) * ( zp(i+1,j ,k+1) - zp(i+1,j ,k-1) + +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) ); + pz_hi_md_lo *= h_xi / h_zeta; + + // On y-edges + Real pz_lo_md_lo = Real(0.5) * ( sol(i,j,k ) + sol(i-1,j,k ) + -sol(i,j,k-1) - sol(i-1,j,k-1) ); + h_xi = Real(0.25) * ( zp(i,j ,k ) - zp(i-2,j ,k ) + +zp(i,j+1,k ) - zp(i-2,j+1,k ) ); + h_zeta = Real(0.25) * ( zp(i,j ,k+1) - zp(i ,j ,k-1) + +zp(i,j+1,k+1) - zp(i ,j+1,k-1) ); + pz_lo_md_lo *= h_xi / h_zeta; + + // On x-edges + Real pz_md_hi_lo = Real(0.5) * ( sol(i,j+1,k ) + sol(i,j,k ) + -sol(i,j+1,k-1) - sol(i,j,k-1) ); + h_eta = Real(0.25) * ( zp(i ,j+1,k ) - zp(i ,j-1,k) + +zp(i+1,j+1,k ) - zp(i+1,j-1,k) ); + h_zeta = Real(0.25) * ( zp(i ,j+1,k+1) - zp(i ,j+1,k-1) + +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) ); + pz_md_hi_lo *= h_eta/ h_zeta; + + // On x-edges + Real pz_md_lo_lo = Real(0.5) * ( sol(i,j,k ) + sol(i,j-1,k ) + -sol(i,j,k-1) - sol(i,j-1,k-1) ); + h_eta = Real(0.25) * ( zp(i ,j,k ) - zp(i ,j-2,k ) + +zp(i+1,j,k ) - zp(i+1,j-2,k ) ); + h_zeta = Real(0.25) * ( zp(i ,j,k+1) - zp(i ,j ,k-1) + +zp(i+1,j,k+1) - zp(i+1,j ,k-1) ); + pz_md_lo_lo *= h_eta/ h_zeta; + + // On z-face + Real h_xi_on_zlo = 0.5 * (zp(i+1,j+1,k) + zp(i+1,j,k) - zp(i,j+1,k) - zp(i,j,k)) * dxinv; + Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k) + zp(i,j+1,k) - zp(i+1,j,k) - zp(i,j,k)) * dyinv; + + pz_lo -= 0.5 * h_xi_on_zlo * ( (px_hi_md_lo + px_lo_md_lo) - (pz_hi_md_lo + pz_lo_md_lo) ); + pz_lo -= 0.5 * h_eta_on_zlo * ( (py_md_hi_lo + py_md_lo_lo) - (pz_md_hi_lo + pz_md_lo_lo) ); + +#endif + fz(i,j,k) = -pz_lo; + } + } + } +} + +} // amrex +#endif diff --git a/Source/Utils/ERF_solve_with_gmres.cpp b/Source/Utils/ERF_solve_with_gmres.cpp index 4fd1539d9..b1b2550f7 100644 --- a/Source/Utils/ERF_solve_with_gmres.cpp +++ b/Source/Utils/ERF_solve_with_gmres.cpp @@ -1,8 +1,8 @@ #include "ERF.H" #include "ERF_Utils.H" +#include "ERF_MLTerrainPoisson.H" #include -//#include #include #include @@ -11,12 +11,10 @@ using namespace amrex; /** * Solve the Poisson equation using GMRES */ -void ERF::solve_with_gmres (int lev, Vector& /*rhs*/, Vector& /*phi*/, Vector>& /*fluxes*/) -//void ERF::solve_with_gmres (int lev, Vector& rhs, Vector& phi, Vector>& fluxes) +void ERF::solve_with_gmres (int lev, Vector& rhs, Vector& phi, Vector>& fluxes) { BL_PROFILE("ERF::solve_with_gmres()"); -#if 0 auto const dom_lo = lbound(geom[lev].Domain()); auto const dom_hi = ubound(geom[lev].Domain()); @@ -36,8 +34,8 @@ void ERF::solve_with_gmres (int lev, Vector& /*rhs*/, Vector auto bclo = get_projection_bc(Orientation::low); auto bchi = get_projection_bc(Orientation::high); - // amrex::Print() << "BCLO " << bclo[0] << " " << bclo[1] << " " << bclo[2] << std::endl; - // amrex::Print() << "BCHI " << bchi[0] << " " << bchi[1] << " " << bchi[2] << std::endl; + amrex::Print() << "BCLO " << bclo[0] << " " << bclo[1] << " " << bclo[2] << std::endl; + amrex::Print() << "BCHI " << bchi[0] << " " << bchi[1] << " " << bchi[2] << std::endl; Real reltol = solverChoice.poisson_reltol; Real abstol = solverChoice.poisson_abstol; @@ -62,5 +60,4 @@ void ERF::solve_with_gmres (int lev, Vector& /*rhs*/, Vector Vector phi_vec; phi_vec.resize(1); phi_vec[0] = &phi[0]; terrpoisson.getFluxes(GetVecOfArrOfPtrs(fluxes), phi_vec); -#endif } diff --git a/Source/Utils/Make.package b/Source/Utils/Make.package index 3fc5cd0ec..b999b05f1 100644 --- a/Source/Utils/Make.package +++ b/Source/Utils/Make.package @@ -10,6 +10,9 @@ CEXE_headers += ERF_TerrainMetrics.H CEXE_headers += ERF_TileNoZ.H CEXE_headers += ERF_Utils.H +CEXE_headers += ERF_TerrainPoisson_3D_K.H +CEXE_headers += ERF_MLTerrainPoisson.H + CEXE_headers += ERF_ParFunctions.H CEXE_headers += ERF_Sat_methods.H From cea5f92dd6d9b58969549976efb5dd0b143ea834 Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Mon, 18 Nov 2024 14:14:56 -0800 Subject: [PATCH 2/3] this now compiles --- Source/ERF.cpp | 1 + Source/Utils/ERF_MLTerrainPoisson.H | 14 ++++++++------ Source/Utils/ERF_TerrainPoisson_3D_K.H | 16 ++++++++-------- Source/Utils/ERF_solve_with_gmres.cpp | 2 ++ 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/Source/ERF.cpp b/Source/ERF.cpp index 236de3b05..f9fdc92a9 100644 --- a/Source/ERF.cpp +++ b/Source/ERF.cpp @@ -868,6 +868,7 @@ ERF::InitData_post () // Note -- this projection is only defined for no terrain if (solverChoice.project_initial_velocity) { Real dummy_dt = 1.0; + if (verbose) amrex::Print() << "Projecting the initial velocities..." << std::endl; for (int lev = 0; lev <= finest_level; ++lev) { project_velocities(lev, dummy_dt, vars_new[lev], pp_inc[lev]); diff --git a/Source/Utils/ERF_MLTerrainPoisson.H b/Source/Utils/ERF_MLTerrainPoisson.H index 8c6a03ccc..c81539010 100644 --- a/Source/Utils/ERF_MLTerrainPoisson.H +++ b/Source/Utils/ERF_MLTerrainPoisson.H @@ -16,6 +16,8 @@ class MLTerrainPoissonT { public: + using MLCellABecLapT::getFluxes; + using FAB = typename MF::fab_type; using RT = typename MF::value_type; @@ -278,15 +280,15 @@ MLTerrainPoissonT::FFlux (int amrlev, const MFIter& mfi, const auto& solarr = sol.array(); const auto& zpa = m_a_zphys_nd[amrlev].array(mfi); - RT odx = RT(dxinv[0]); - RT ody = RT(dxinv[1]); - RT odz = RT(dxinv[2]); + RT dxinv = RT(dxinv[0]); + RT dyinv = RT(dxinv[1]); + RT dzinv = RT(dxinv[2]); if (this->hiddenDirection() != 0) { Box bflux = amrex::surroundingNodes(box, 0); AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox, { - terrpoisson_flux_x(tbox, fxarr, solarr, zpa, odx, odz); + terrpoisson_flux_x(tbox, fxarr, solarr, zpa, invdx, invdz); }); } else { flux[0]->template setVal(RT(0.0)); @@ -295,7 +297,7 @@ MLTerrainPoissonT::FFlux (int amrlev, const MFIter& mfi, Box bflux = amrex::surroundingNodes(box, 1); AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox, { - terrpoisson_flux_y(tbox, fyarr, solarr, zpa, ody, odz); + terrpoisson_flux_y(tbox, fyarr, solarr, zpa, invdy, invdz); }); } else { flux[1]->template setVal(RT(0.0)); @@ -304,7 +306,7 @@ MLTerrainPoissonT::FFlux (int amrlev, const MFIter& mfi, Box bflux = amrex::surroundingNodes(box, 2); AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox, { - terrpoisson_flux_z(tbox, fzarr, solarr, zpa, odx, ody, odz); + terrpoisson_flux_z(tbox, fzarr, solarr, zpa, invdx, invdy, invdz); }); } else { flux[2]->template setVal(RT(0.0)); diff --git a/Source/Utils/ERF_TerrainPoisson_3D_K.H b/Source/Utils/ERF_TerrainPoisson_3D_K.H index 73e41931a..9d714a28e 100644 --- a/Source/Utils/ERF_TerrainPoisson_3D_K.H +++ b/Source/Utils/ERF_TerrainPoisson_3D_K.H @@ -9,7 +9,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void terrpoisson_adotx (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& zp, - T odx, T ody, T odz) noexcept + T dxinv, T invdy, T dzinv) noexcept { #if 1 Real h_xi, h_eta, h_zeta; @@ -40,7 +40,7 @@ void terrpoisson_adotx (int i, int j, int k, Array4 const& y, // On x-face px_hi -= Real(0.5) * ( pz_hi_md_hi + pz_hi_md_lo ); - px_hi *= Real(0.5) * ( zp(i+1,j,k+1) + zp(i+1,j+1,k+1) - zp(i+1,j,k) - zp(i+1,j+1,k) ) * odz; + px_hi *= Real(0.5) * ( zp(i+1,j,k+1) + zp(i+1,j+1,k+1) - zp(i+1,j,k) - zp(i+1,j+1,k) ) * dzinv; // ********************************************************* // Lo x-face @@ -69,7 +69,7 @@ void terrpoisson_adotx (int i, int j, int k, Array4 const& y, // On x-face px_lo -= Real(0.5) * ( pz_lo_md_hi + pz_lo_md_lo ); - px_lo *= Real(0.5) * ( zp(i,j,k+1) + zp(i,j+1,k+1) - zp(i,j,k) - zp(i,j+1,k) ) * odz; + px_lo *= Real(0.5) * ( zp(i,j,k+1) + zp(i,j+1,k+1) - zp(i,j,k) - zp(i,j+1,k) ) * dzinv; // ********************************************************* // Hi y-face @@ -97,7 +97,7 @@ void terrpoisson_adotx (int i, int j, int k, Array4 const& y, // On y-face py_hi -= Real(0.5) * ( pz_md_hi_hi + pz_md_hi_lo ); - py_hi *= Real(0.5) * ( zp(i+1,j,k+1) + zp(i+1,j+1,k+1) - zp(i+1,j,k) - zp(i+1,j+1,k) ) * odz; + py_hi *= Real(0.5) * ( zp(i+1,j,k+1) + zp(i+1,j+1,k+1) - zp(i+1,j,k) - zp(i+1,j+1,k) ) * dzinv; // ********************************************************* // Lo y-face @@ -126,7 +126,7 @@ void terrpoisson_adotx (int i, int j, int k, Array4 const& y, // On y-face py_lo -= Real(0.5) * ( pz_md_lo_hi + pz_md_lo_lo ); - py_lo *= Real(0.5) * ( zp(i,j,k+1) + zp(i+1,j,k+1) - zp(i,j,k) - zp(i+1,j,k) ) * odz; + py_lo *= Real(0.5) * ( zp(i,j,k+1) + zp(i+1,j,k+1) - zp(i,j,k) - zp(i+1,j,k) ) * dzinv; // ********************************************************* // Hi z-face @@ -183,9 +183,9 @@ void terrpoisson_adotx (int i, int j, int k, Array4 const& y, pz_hi = x(i,j,k+1) - x(i,j,k ); pz_lo = x(i,j,k ) - x(i,j,k-1); - y(i,j,k) = odx * odx * (px_hi - px_lo) - + ody * ody * (py_hi - py_lo) - + odz * odz * (pz_hi - pz_lo); + y(i,j,k) = dxinv * dxinv * (px_hi - px_lo) + + invdy * invdy * (py_hi - py_lo) + + dzinv * dzinv * (pz_hi - pz_lo); } template diff --git a/Source/Utils/ERF_solve_with_gmres.cpp b/Source/Utils/ERF_solve_with_gmres.cpp index b1b2550f7..05f80f9e6 100644 --- a/Source/Utils/ERF_solve_with_gmres.cpp +++ b/Source/Utils/ERF_solve_with_gmres.cpp @@ -8,6 +8,8 @@ using namespace amrex; +template class amrex::MLTerrainPoissonT; + /** * Solve the Poisson equation using GMRES */ From 71242ba173e7cbe97dfc4be2741228847d3f7a4c Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Mon, 18 Nov 2024 14:17:16 -0800 Subject: [PATCH 3/3] fix typo --- Source/Utils/ERF_MLTerrainPoisson.H | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Utils/ERF_MLTerrainPoisson.H b/Source/Utils/ERF_MLTerrainPoisson.H index c81539010..62d2023a8 100644 --- a/Source/Utils/ERF_MLTerrainPoisson.H +++ b/Source/Utils/ERF_MLTerrainPoisson.H @@ -280,9 +280,9 @@ MLTerrainPoissonT::FFlux (int amrlev, const MFIter& mfi, const auto& solarr = sol.array(); const auto& zpa = m_a_zphys_nd[amrlev].array(mfi); - RT dxinv = RT(dxinv[0]); - RT dyinv = RT(dxinv[1]); - RT dzinv = RT(dxinv[2]); + RT invdx = RT(dxinv[0]); + RT invdy = RT(dxinv[1]); + RT invdz = RT(dxinv[2]); if (this->hiddenDirection() != 0) { Box bflux = amrex::surroundingNodes(box, 0);