From e8c3bd5ea5ad77f33a1281e49a01b525b22a61d0 Mon Sep 17 00:00:00 2001 From: Ann Almgren Date: Sun, 8 Dec 2024 12:42:32 -0800 Subject: [PATCH] fix GMRES solver (#2012) * fix GMRES solver * update so code compiles without FFT * add ignore_unused --- .../LinearSolvers/ERF_ComputeDivergence.cpp | 7 +- Source/LinearSolvers/ERF_FFTUtils.H | 12 +- Source/LinearSolvers/ERF_PoissonSolve.cpp | 18 ++- Source/LinearSolvers/ERF_SolveWithGMRES.cpp | 6 +- Source/LinearSolvers/ERF_TerrainPoisson.H | 15 +-- Source/LinearSolvers/ERF_TerrainPoisson.cpp | 107 +++++++++--------- .../LinearSolvers/ERF_TerrainPoisson_3D_K.H | 82 +++++++------- Submodules/AMReX | 2 +- 8 files changed, 132 insertions(+), 117 deletions(-) diff --git a/Source/LinearSolvers/ERF_ComputeDivergence.cpp b/Source/LinearSolvers/ERF_ComputeDivergence.cpp index 6a96dd8c6..08102e387 100644 --- a/Source/LinearSolvers/ERF_ComputeDivergence.cpp +++ b/Source/LinearSolvers/ERF_ComputeDivergence.cpp @@ -62,14 +62,15 @@ void ERF::compute_divergence (int lev, MultiFab& rhs, Vector& mom_mf, const Array4& ax_arr = ax[lev]->const_array(mfi); const Array4& ay_arr = ay[lev]->const_array(mfi); - const Array4& az_arr = az[lev]->const_array(mfi); const Array4& dJ_arr = detJ_cc[lev]->const_array(mfi); - + // + // az == 1 for terrain-fitted coordinates + // ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { rhs_arr(i,j,k) = ((ax_arr(i+1,j,k)*rho0u_arr(i+1,j,k) - ax_arr(i,j,k)*rho0u_arr(i,j,k)) * dxInv[0] +(ay_arr(i,j+1,k)*rho0v_arr(i,j+1,k) - ay_arr(i,j,k)*rho0v_arr(i,j,k)) * dxInv[1] - +(az_arr(i,j,k+1)*rho0w_arr(i,j,k+1) - az_arr(i,j,k)*rho0w_arr(i,j,k)) * dxInv[2]) / dJ_arr(i,j,k); + +( rho0w_arr(i,j,k+1) - rho0w_arr(i,j,k)) * dxInv[2]) / dJ_arr(i,j,k); }); } // mfi diff --git a/Source/LinearSolvers/ERF_FFTUtils.H b/Source/LinearSolvers/ERF_FFTUtils.H index c5ae0556d..1036be58e 100644 --- a/Source/LinearSolvers/ERF_FFTUtils.H +++ b/Source/LinearSolvers/ERF_FFTUtils.H @@ -21,7 +21,7 @@ get_fft_bc (Geometry const& lev_geom, (lev_geom.Domain().smallEnd(dir) == bounding_box.smallEnd(dir)) && (lev_geom.Domain().bigEnd(dir) == bounding_box.bigEnd(dir)) ) { r[dir] = std::make_pair(FFT::Boundary::periodic,FFT::Boundary::periodic); - amrex::Print() << "SETTING " << dir << " TO PERIODIC " << std::endl; + // amrex::Print() << "SETTING " << dir << " TO PERIODIC " << std::endl; } else if ( (lev_geom.Domain().smallEnd(dir) == bounding_box.smallEnd(dir)) && @@ -30,7 +30,7 @@ get_fft_bc (Geometry const& lev_geom, (bc_type_hi != "Outflow" && bc_type_hi != "Open") ) { r[dir] = std::make_pair(FFT::Boundary::even,FFT::Boundary::even); - amrex::Print() << "SETTING " << dir << " TO EVEN EVEN " << std::endl; + // amrex::Print() << "SETTING " << dir << " TO EVEN EVEN " << std::endl; } else if ( (lev_geom.Domain().smallEnd(dir) == bounding_box.smallEnd(dir)) && @@ -39,26 +39,26 @@ get_fft_bc (Geometry const& lev_geom, (bc_type_hi == "Outflow" || bc_type_hi == "Open") ) { r[dir] = std::make_pair(FFT::Boundary::odd,FFT::Boundary::odd); - amrex::Print() << "SETTING " << dir << " TO ODD ODD " << std::endl; + // amrex::Print() << "SETTING " << dir << " TO ODD ODD " << std::endl; } else if ( (lev_geom.Domain().smallEnd(dir) == bounding_box.smallEnd(dir)) && (bc_type_lo == "Outflow" || bc_type_lo == "Open") ) { r[dir] = std::make_pair(FFT::Boundary::odd,FFT::Boundary::even); - amrex::Print() << "SETTING " << dir << " TO ODD EVEN " << std::endl; + // amrex::Print() << "SETTING " << dir << " TO ODD EVEN " << std::endl; } else if ( (lev_geom.Domain().bigEnd(dir) == bounding_box.bigEnd(dir)) && (bc_type_hi == "Outflow" || bc_type_hi == "Open") ) { r[dir] = std::make_pair(FFT::Boundary::even,FFT::Boundary::odd); - amrex::Print() << "SETTING " << dir << " TO EVEN ODD " << std::endl; + // amrex::Print() << "SETTING " << dir << " TO EVEN ODD " << std::endl; } else { r[dir] = std::make_pair(FFT::Boundary::even,FFT::Boundary::even); - amrex::Print() << "SETTING " << dir << " TO EVEN EVEN " << std::endl; + // amrex::Print() << "SETTING " << dir << " TO EVEN EVEN " << std::endl; } } // dir diff --git a/Source/LinearSolvers/ERF_PoissonSolve.cpp b/Source/LinearSolvers/ERF_PoissonSolve.cpp index 0e8db53a1..34d3ea898 100644 --- a/Source/LinearSolvers/ERF_PoissonSolve.cpp +++ b/Source/LinearSolvers/ERF_PoissonSolve.cpp @@ -77,7 +77,7 @@ void ERF::project_velocities (int lev, Real l_dt, Vector& mom_mf, Mult Real rhsnorm = rhs[0].norm0(); if (mg_verbose > 0) { - Print() << "Max norm of divergence before solve at level " << lev << " : " << rhsnorm << std::endl; + Print() << "Max/L2 norm of divergence before solve at level " << lev << " : " << rhsnorm << " " << rhs[0].norm2() << std::endl; } // **************************************************************************** @@ -214,7 +214,21 @@ void ERF::project_velocities (int lev, Real l_dt, Vector& mom_mf, Mult { compute_divergence(lev, rhs[0], mom_mf, geom_tmp[0]); - amrex::Print() << "Max norm of divergence after solve at level " << lev << " : " << rhs[0].norm0() << std::endl; + Print() << "Max/L2 norm of divergence after solve at level " << lev << " : " << rhs[0].norm0() << " " << rhs[0].norm2() << std::endl; + +#if 0 + // FOR DEBUGGING ONLY + for ( MFIter mfi(rhs[0],TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Array4& rhs_arr = rhs[0].const_array(mfi); + Box bx = mfi.validbox(); + ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { + if (std::abs(rhs_arr(i,j,k)) > 1.e-10) { + amrex::Print() << "RHS AT " << IntVect(i,j,k) << " " << rhs_arr(i,j,k) << std::endl; + } + }); + } // mfi +#endif } // mg_verbose diff --git a/Source/LinearSolvers/ERF_SolveWithGMRES.cpp b/Source/LinearSolvers/ERF_SolveWithGMRES.cpp index 9c05aaac0..43301a5e6 100644 --- a/Source/LinearSolvers/ERF_SolveWithGMRES.cpp +++ b/Source/LinearSolvers/ERF_SolveWithGMRES.cpp @@ -7,11 +7,12 @@ using namespace amrex; /** - * Solve the Poisson equation using GMRES + * Solve the Poisson equation using FFT-preconditioned GMRES */ void ERF::solve_with_gmres (int lev, Vector& rhs, Vector& phi, Vector>& fluxes) { +#ifdef ERF_USE_FFT BL_PROFILE("ERF::solve_with_gmres()"); Real reltol = solverChoice.poisson_reltol; @@ -31,4 +32,7 @@ void ERF::solve_with_gmres (int lev, Vector& rhs, Vector& ph gmsolver.solve(phi[0], rhs[0], reltol, abstol); tp.getFluxes(phi[0], fluxes[0]); +#else + amrex::ignore_unused(lev, rhs, phi, fluxes); +#endif } diff --git a/Source/LinearSolvers/ERF_TerrainPoisson.H b/Source/LinearSolvers/ERF_TerrainPoisson.H index 97efd6afe..31dacc0c3 100644 --- a/Source/LinearSolvers/ERF_TerrainPoisson.H +++ b/Source/LinearSolvers/ERF_TerrainPoisson.H @@ -1,14 +1,13 @@ #ifndef ERF_TERRAIN_POISSON_H_ #define ERF_TERRAIN_POISSON_H_ -#include "ERF_TerrainPoisson_3D_K.H" +#include +#include #ifdef ERF_USE_FFT -#include -#endif -#include -#include +#include "ERF_TerrainPoisson_3D_K.H" +#include class TerrainPoisson { @@ -24,6 +23,8 @@ public: void apply(amrex::MultiFab& lhs, amrex::MultiFab const& rhs); + void apply_bcs(amrex::MultiFab& phi); + void usePrecond(bool precond_flag); void getFluxes(amrex::MultiFab& phi, amrex::Array& fluxes); @@ -57,9 +58,9 @@ private: amrex::Gpu::DeviceVector m_stretched_dz_d; const amrex::MultiFab* m_zphys; -#ifdef ERF_USE_FFT std::unique_ptr> m_2D_fft_precond; -#endif + amrex::Array,AMREX_SPACEDIM> bc_fft; }; #endif +#endif diff --git a/Source/LinearSolvers/ERF_TerrainPoisson.cpp b/Source/LinearSolvers/ERF_TerrainPoisson.cpp index 4302d517c..7eea971f7 100644 --- a/Source/LinearSolvers/ERF_TerrainPoisson.cpp +++ b/Source/LinearSolvers/ERF_TerrainPoisson.cpp @@ -1,3 +1,5 @@ +#ifdef ERF_USE_FFT + #include "ERF_TerrainPoisson.H" #include "ERF_FFTUtils.H" @@ -14,15 +16,11 @@ TerrainPoisson::TerrainPoisson (Geometry const& geom, BoxArray const& ba, m_stretched_dz_d(stretched_dz_lev_d), m_zphys(z_phys_nd) { -#ifdef ERF_USE_FFT if (!m_2D_fft_precond) { Box bounding_box = ba.minimalBox(); - auto bc_fft = get_fft_bc(geom,domain_bc_type,bounding_box); + bc_fft = get_fft_bc(geom,domain_bc_type,bounding_box); m_2D_fft_precond = std::make_unique>(geom,bc_fft); } -#else - amrex::ignore_unused(domain_bc_type); -#endif } void TerrainPoisson::usePrecond (bool use_precond_in) @@ -34,9 +32,6 @@ void TerrainPoisson::apply (MultiFab& lhs, MultiFab const& rhs) { AMREX_ASSERT(rhs.nGrowVect().allGT(0)); - auto domlo = lbound(m_geom.Domain()); - auto domhi = ubound(m_geom.Domain()); - MultiFab& xx = const_cast(rhs); auto const& dxinv = m_geom.InvCellSizeArray(); @@ -44,62 +39,87 @@ void TerrainPoisson::apply (MultiFab& lhs, MultiFab const& rhs) auto const& y = lhs.arrays(); auto const& zpa = m_zphys->const_arrays(); - // Impose periodic and internal boundary conditions - xx.FillBoundary(m_geom.periodicity()); + apply_bcs(xx); + + auto const& xc = xx.const_arrays(); + ParallelFor(rhs, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + terrpoisson_adotx(i, j, k, y[b], xc[b], zpa[b], dxinv[0], dxinv[1]); + }); +} + +void TerrainPoisson::apply_bcs (MultiFab& phi) +{ + auto domlo = lbound(m_geom.Domain()); + auto domhi = ubound(m_geom.Domain()); + + phi.FillBoundary(m_geom.periodicity()); if (!m_geom.isPeriodic(0)) { - for (MFIter mfi(xx,true); mfi.isValid(); ++mfi) + for (MFIter mfi(phi,true); mfi.isValid(); ++mfi) { Box bx = mfi.tilebox(); - const Array4& x_arr = xx.array(mfi); + const Array4& phi_arr = phi.array(mfi); ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) { if (i == domlo.x) { - x_arr(i-1,j,k) = x_arr(i,j,k); - } else if (i == domhi.x) { // OUTFLOW - x_arr(i+1,j,k) = -x_arr(i,j,k); + if (bc_fft[0].first == FFT::Boundary::even) { + phi_arr(i-1,j,k) = phi_arr(i,j,k); + } else if (bc_fft[0].first == FFT::Boundary::odd) { + phi_arr(i-1,j,k) = -phi_arr(i,j,k); + } + } else if (i == domhi.x) { + if (bc_fft[0].second == FFT::Boundary::even) { + phi_arr(i+1,j,k) = phi_arr(i,j,k); + } else if (bc_fft[0].second == FFT::Boundary::odd) { + phi_arr(i+1,j,k) = -phi_arr(i,j,k); + } } }); - } + } // mfi } if (!m_geom.isPeriodic(1)) { - for (MFIter mfi(xx,true); mfi.isValid(); ++mfi) + for (MFIter mfi(phi,true); mfi.isValid(); ++mfi) { Box bx = mfi.tilebox(); Box bx2(bx); bx2.grow(0,1); - const Array4& x_arr = xx.array(mfi); + const Array4& phi_arr = phi.array(mfi); ParallelFor(bx2, [=] AMREX_GPU_DEVICE (int i, int j, int k) { if (j == domlo.y) { - x_arr(i,j-1,k) = x_arr(i,j,k); + if (bc_fft[1].first == FFT::Boundary::even) { + phi_arr(i,j-1,k) = phi_arr(i,j,k); + } else if (bc_fft[1].first == FFT::Boundary::odd) { + phi_arr(i,j-1,k) = -phi_arr(i,j,k); + } } else if (j == domhi.y) { - x_arr(i,j+1,k) = x_arr(i,j,k); + if (bc_fft[1].second == FFT::Boundary::even) { + phi_arr(i,j+1,k) = phi_arr(i,j,k); + } else if (bc_fft[1].second == FFT::Boundary::odd) { + phi_arr(i,j+1,k) = -phi_arr(i,j,k); + } } }); } // mfi } - for (MFIter mfi(xx,true); mfi.isValid(); ++mfi) + for (MFIter mfi(phi,true); mfi.isValid(); ++mfi) { Box bx = mfi.tilebox(); Box bx2(bx); bx2.grow(0,1); Box bx3(bx2); bx3.grow(1,1); - const Array4& x_arr = xx.array(mfi); + const Array4& phi_arr = phi.array(mfi); ParallelFor(bx3, [=] AMREX_GPU_DEVICE (int i, int j, int k) { if (k == domlo.z) { - x_arr(i,j,k-1) = x_arr(i,j,k); + phi_arr(i,j,k-1) = phi_arr(i,j,k); } else if (k == domhi.z) { - x_arr(i,j,k+1) = x_arr(i,j,k); + phi_arr(i,j,k+1) = phi_arr(i,j,k); } }); } // mfi - auto const& xc = xx.const_arrays(); - ParallelFor(rhs, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) - { - terrpoisson_adotx(i, j, k, y[b], xc[b], zpa[b], dxinv[0], dxinv[1]); - }); + phi.FillBoundary(m_geom.periodicity()); } void TerrainPoisson::getFluxes (MultiFab& phi, @@ -113,42 +133,24 @@ void TerrainPoisson::getFluxes (MultiFab& phi, auto const& x = phi.const_arrays(); auto const& zpa = m_zphys->const_arrays(); - phi.FillBoundary(m_geom.periodicity()); + apply_bcs(phi); auto const& fx = fluxes[0].arrays(); ParallelFor(fluxes[0], [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) { - if (i == domlo.x) { - fx[b](i,j,k) = 0.0; - } else if (i == domhi.x+1) { - fx[b](i,j,k) = 0.0; - } else { - fx[b](i,j,k) = terrpoisson_flux_x(i,j,k,x[b],zpa[b],dxinv[0]); - } + fx[b](i,j,k) = terrpoisson_flux_x(i,j,k,x[b],zpa[b],dxinv[0]); }); auto const& fy = fluxes[1].arrays(); ParallelFor(fluxes[1], [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) { - if (j == domlo.y) { - fy[b](i,j,k) = 0.0; - } else if (j == domhi.y+1) { - fy[b](i,j,k) = 0.0; - } else { - fy[b](i,j,k) = terrpoisson_flux_y(i,j,k,x[b],zpa[b],dxinv[1]); - } + fy[b](i,j,k) = terrpoisson_flux_y(i,j,k,x[b],zpa[b],dxinv[1]); }); auto const& fz = fluxes[2].arrays(); ParallelFor(fluxes[2], [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) { - if (k == domlo.z) { - fz[b](i,j,k) = 0.0; - } else if (k == domhi.z+1) { - fz[b](i,j,k) = 0.0; - } else { - fz[b](i,j,k) = terrpoisson_flux_z(i,j,k,x[b],zpa[b],dxinv[0],dxinv[1]); - } + fz[b](i,j,k) = terrpoisson_flux_z(i,j,k,x[b],zpa[b],dxinv[0],dxinv[1]); }); } @@ -199,8 +201,6 @@ void TerrainPoisson::precond (MultiFab& lhs, MultiFab const& rhs) #ifdef ERF_USE_FFT if (m_use_precond) { - amrex::Print() << "Using the hybrid FFT solver as a preconditioner..." << std::endl; - // Make a version that isn't constant MultiFab& rhs_tmp = const_cast(rhs); @@ -249,3 +249,4 @@ void TerrainPoisson::setToZero(MultiFab& v) { v.setVal(0); } +#endif diff --git a/Source/LinearSolvers/ERF_TerrainPoisson_3D_K.H b/Source/LinearSolvers/ERF_TerrainPoisson_3D_K.H index 93a5e6ca4..ab76fb086 100644 --- a/Source/LinearSolvers/ERF_TerrainPoisson_3D_K.H +++ b/Source/LinearSolvers/ERF_TerrainPoisson_3D_K.H @@ -162,7 +162,6 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, using amrex::Real; Real h_xi, h_eta, h_zeta; -#if 1 // ********************************************************* // Hi x-face // ********************************************************* @@ -170,8 +169,8 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, Real px_hi = (x(i+1,j,k) - x(i,j,k)) * dxinv; // On y-edges - Real pz_hi_md_hi = Real(0.25) * ( x(i+1,j,k+1) + x(i,j,k+1) - -x(i+1,j,k ) - x(i,j,k ) ); + Real pz_hi_md_hi = Real(0.5) * ( x(i+1,j ,k+1) + x(i ,j ,k+1) + -x(i+1,j ,k ) - x(i ,j ,k ) ); h_xi = Real(0.25) * ( zp(i+1,j ,k+1) - zp(i-1,j ,k+1) +zp(i+1,j+1,k+1) - zp(i-1,j+1,k+1) ) * dxinv; h_zeta = Real(0.25) * ( zp(i+1,j ,k+2) - zp(i+1,j ,k ) @@ -179,8 +178,8 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, pz_hi_md_hi *= h_xi / h_zeta; // On y-edges - Real pz_hi_md_lo = Real(0.5) * ( x(i+1,j,k ) + x(i,j,k ) - -x(i+1,j,k-1) - x(i,j,k-1) ); + Real pz_hi_md_lo = Real(0.5) * ( x(i+1,j ,k ) + x(i ,j ,k ) + -x(i+1,j ,k-1) - x(i ,j ,k-1) ); h_xi = Real(0.25) * ( zp(i+1,j ,k ) - zp(i-1,j ,k ) +zp(i+1,j+1,k ) - zp(i-1,j+1,k ) ) * dxinv; h_zeta = Real(0.25) * ( zp(i+1,j ,k+1) - zp(i+1,j ,k-1) @@ -196,8 +195,8 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, Real px_lo = (x(i,j,k) - x(i-1,j,k)) * dxinv; // On y-edges - Real pz_lo_md_hi = Real(0.5) * ( x(i,j,k+1) + x(i-1,j,k+1) - -x(i,j,k ) - x(i-1,j,k ) ); + Real pz_lo_md_hi = Real(0.5) * ( x(i,j,k+1) + x(i-1,j,k+1) + -x(i,j,k ) - x(i-1,j,k ) ); h_xi = Real(0.25) * ( zp(i,j ,k+1) - zp(i-2,j ,k+1) +zp(i,j+1,k+1) - zp(i-2,j+1,k+1) ) * dxinv; h_zeta = Real(0.25) * ( zp(i,j ,k+2) - zp(i ,j ,k ) @@ -205,9 +204,8 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, pz_lo_md_hi *= h_xi / h_zeta; // On y-edges - Real pz_lo_md_lo = Real(0.5) * ( x(i,j,k ) + x(i-1,j,k ) - -x(i,j,k-1) - x(i-1,j,k-1) ); - + Real pz_lo_md_lo = Real(0.5) * ( x(i,j,k ) + x(i-1,j,k ) + -x(i,j,k-1) - x(i-1,j,k-1) ); h_xi = Real(0.25) * ( zp(i,j ,k ) - zp(i-2,j ,k ) +zp(i,j+1,k ) - zp(i-2,j+1,k ) ) * dxinv; h_zeta = Real(0.25) * ( zp(i,j ,k+1) - zp(i ,j ,k-1) @@ -224,22 +222,19 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, Real py_hi = (x(i,j+1,k) - x(i,j,k)) * dyinv; // On x-edges - Real pz_md_hi_hi = Real(0.25) * ( x(i,j+1,k+1) + x(i,j,k+1) - -x(i,j+1,k ) - x(i,j,k ) ); + Real pz_md_hi_hi = Real(0.5) * ( x(i,j+1,k+1) + x(i,j,k+1) + -x(i,j+1,k ) - x(i,j,k ) ); h_eta = Real(0.25) * ( zp(i ,j+1,k+1) - zp(i ,j-1,k+1) +zp(i+1,j+1,k+1) - zp(i+1,j-1,k+1) ) * dyinv; - h_zeta = Real(0.25) * ( zp(i ,j+1,k+2) - zp(i ,j+1,k ) +zp(i+1,j+1,k+2) - zp(i+1,j+1,k ) ); pz_md_hi_hi *= h_eta/ h_zeta; // On x-edges - Real pz_md_hi_lo = Real(0.5) * ( x(i,j+1,k ) + x(i,j,k ) - -x(i,j+1,k-1) - x(i,j,k-1) ); - + Real pz_md_hi_lo = Real(0.5) * ( x(i,j+1,k ) + x(i,j,k ) + -x(i,j+1,k-1) - x(i,j,k-1) ); h_eta = Real(0.25) * ( zp(i ,j+1,k ) - zp(i ,j-1,k) +zp(i+1,j+1,k ) - zp(i+1,j-1,k) ) * dyinv; - h_zeta = Real(0.25) * ( zp(i ,j+1,k+1) - zp(i ,j+1,k-1) +zp(i+1,j+1,k+1) - zp(i+1,j+1,k-1) ); pz_md_hi_lo *= h_eta/ h_zeta; @@ -254,23 +249,19 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, Real py_lo = (x(i,j,k) - x(i,j-1,k)) * dyinv; // On x-edges - Real pz_md_lo_hi = Real(0.5) * ( x(i,j,k+1) + x(i,j-1,k+1) - -x(i,j,k ) - x(i,j-1,k ) ); - + Real pz_md_lo_hi = Real(0.5) * ( x(i ,j,k+1) + x(i ,j-1,k+1) + -x(i ,j,k ) - x(i ,j-1,k ) ); h_eta = Real(0.25) * ( zp(i ,j,k+1) - zp(i ,j-2,k+1) +zp(i+1,j,k+1) - zp(i+1,j-2,k+1) ) * dyinv; - h_zeta = Real(0.25) * ( zp(i ,j,k+2) - zp(i ,j ,k ) +zp(i+1,j,k+2) - zp(i+1,j ,k ) ); pz_md_lo_hi *= h_eta/ h_zeta; // On x-edges - Real pz_md_lo_lo = Real(0.5) * ( x(i,j,k ) + x(i,j-1,k ) - -x(i,j,k-1) - x(i,j-1,k-1) ); - + Real pz_md_lo_lo = Real(0.5) * ( x(i ,j,k ) + x(i ,j-1,k ) + -x(i ,j,k-1) - x(i ,j-1,k-1) ); h_eta = Real(0.25) * ( zp(i ,j,k ) - zp(i ,j-2,k ) +zp(i+1,j,k ) - zp(i+1,j-2,k ) ) * dyinv; - h_zeta = Real(0.25) * ( zp(i ,j,k+1) - zp(i ,j ,k-1) +zp(i+1,j,k+1) - zp(i+1,j ,k-1) ); pz_md_lo_lo *= h_eta/ h_zeta; @@ -283,6 +274,9 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, // ********************************************************* // On z-face Real pz_hi = x(i,j,k+1) - x(i,j,k ); + Real hzeta_inv_on_zhi = 8.0 / ( (zp(i,j,k+2) + zp(i+1,j,k+2) + zp(i,j+1,k+2) + zp(i+1,j+1,k+2)) + -(zp(i,j,k ) + zp(i+1,j,k ) + zp(i,j+1,k ) + zp(i+1,j+1,k )) ); + pz_hi *= hzeta_inv_on_zhi; // On corners Real px_hi_md_hi = Real(0.5) * ( x(i+1,j,k+1) - x(i ,j ,k+1) @@ -297,11 +291,9 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, // On z-face Real h_xi_on_zhi = 0.5 * ( zp(i+1,j+1,k+1) + zp(i+1,j,k+1) - zp(i,j+1,k+1) - zp(i,j,k+1) ) * dxinv; Real h_eta_on_zhi = 0.5 * ( zp(i+1,j+1,k+1) + zp(i,j+1,k+1) - zp(i+1,j,k+1) - zp(i,j,k+1) ) * dyinv; - - Real hzeta_inv_on_zhi = 8.0 / ( (zp(i,j,k+2) + zp(i+1,j,k+2) + zp(i,j+1,k+2) + zp(i+1,j+1,k+2)) - -(zp(i,j,k ) + zp(i+1,j,k ) + zp(i,j+1,k ) + zp(i+1,j+1,k )) ); - pz_hi *= hzeta_inv_on_zhi * (1.0 + h_xi_on_zhi*h_xi_on_zhi + h_eta_on_zhi*h_eta_on_zhi); - + // + // Note we do not need to recalculate pz_...hi here + // pz_hi -= 0.5 * h_xi_on_zhi * ( (px_hi_md_hi + px_lo_md_hi) - (pz_hi_md_hi + pz_lo_md_hi) ); pz_hi -= 0.5 * h_eta_on_zhi * ( (py_md_hi_hi + py_md_lo_hi) - (pz_md_hi_hi + pz_md_lo_hi) ); @@ -310,6 +302,9 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, // ********************************************************* // On z-face Real pz_lo = x(i,j,k ) - x(i,j,k-1); + Real hzeta_inv_on_zlo = 8.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1)) + -(zp(i,j,k-1) + zp(i+1,j,k-1) + zp(i,j+1,k-1) + zp(i+1,j+1,k-1)) ); + pz_lo *= hzeta_inv_on_zlo; // On corners Real px_hi_md_lo = Real(0.5) * ( x(i+1,j ,k ) - x(i ,j ,k ) @@ -324,28 +319,27 @@ void terrpoisson_adotx (int i, int j, int k, amrex::Array4 const& y, // On z-face Real h_xi_on_zlo = 0.5 * (zp(i+1,j+1,k) + zp(i+1,j,k) - zp(i,j+1,k) - zp(i,j,k)) * dxinv; Real h_eta_on_zlo = 0.5 * (zp(i+1,j+1,k) + zp(i,j+1,k) - zp(i+1,j,k) - zp(i,j,k)) * dyinv; - - Real hzeta_inv_on_zlo = 8.0 / ( (zp(i,j,k+1) + zp(i+1,j,k+1) + zp(i,j+1,k+1) + zp(i+1,j+1,k+1)) - -(zp(i,j,k-1) + zp(i+1,j,k-1) + zp(i,j+1,k-1) + zp(i+1,j+1,k-1)) ); - pz_lo *= hzeta_inv_on_zlo * (1.0 + h_xi_on_zlo*h_xi_on_zlo + h_eta_on_zlo*h_eta_on_zlo); - + // + // Note we do not need to recalculate pz_...lo here + // pz_lo -= 0.5 * h_xi_on_zlo * ( (px_hi_md_lo + px_lo_md_lo) - (pz_hi_md_lo + pz_lo_md_lo) ); pz_lo -= 0.5 * h_eta_on_zlo * ( (py_md_hi_lo + py_md_lo_lo) - (pz_md_hi_lo + pz_md_lo_lo) ); -#else + // ********************************************************* + // Version which calls flux routines + // ********************************************************* // // This option uses calls to the flux routines so there is // some duplicated computation // This option should give the same answer as above // - Real px_lo = -terrpoisson_flux_x(i ,j,k,x,zp,dxinv); - Real px_hi = -terrpoisson_flux_x(i+1,j,k,x,zp,dxinv); - Real py_lo = -terrpoisson_flux_y(i,j ,k,x,zp,dxinv); - Real py_hi = -terrpoisson_flux_y(i,j+1,k,x,zp,dyinv); - Real pz_lo = -terrpoisson_flux_z(i,j,k ,x,zp,dxinv,dyinv); - Real pz_hi = -terrpoisson_flux_z(i,j,k+1,x,zp,dxinv,dyinv); -#endif - + // Real px_lo = -terrpoisson_flux_x(i ,j,k,x,zp,dxinv); + // Real px_hi = -terrpoisson_flux_x(i+1,j,k,x,zp,dxinv); + // Real py_lo = -terrpoisson_flux_y(i,j ,k,x,zp,dxinv); + // Real py_hi = -terrpoisson_flux_y(i,j+1,k,x,zp,dyinv); + // Real pz_lo = -terrpoisson_flux_z(i,j,k ,x,zp,dxinv,dyinv); + // Real pz_hi = -terrpoisson_flux_z(i,j,k+1,x,zp,dxinv,dyinv); + // // ********************************************************* // Adotx // ********************************************************* diff --git a/Submodules/AMReX b/Submodules/AMReX index f7f3ee9f5..96db0a665 160000 --- a/Submodules/AMReX +++ b/Submodules/AMReX @@ -1 +1 @@ -Subproject commit f7f3ee9f57170e227af1fee1c6b95e5b41a45af8 +Subproject commit 96db0a665ff1e6bbe638490fd02d3aafb9188f6b