diff --git a/Docs/sphinx_doc/TimeAdvance.rst b/Docs/sphinx_doc/TimeAdvance.rst index e6d12ef33..872c41409 100644 --- a/Docs/sphinx_doc/TimeAdvance.rst +++ b/Docs/sphinx_doc/TimeAdvance.rst @@ -135,8 +135,14 @@ Then the acoustic substepping evolves the equations in the form - \frac{\partial (\beta_1 W^{\prime \prime, \tau} + \beta_2 W^{\prime \prime, \tau + \delta \tau})}{\partial z} + R^t_{\rho} \right) -where :math:`\beta_1 = 0.5 (1 - \beta_s)` and :math:`\beta_2 = 0.5 (1 + \beta_s)` with :math:`\beta_s = 0.1`. -:math:`\beta_s` is the acoustic step off-centering coefficient and 0.1 is the typical WRF value. This off-centering is intended to provide damping of both horizontally and vertically propagating sound waves by biasing the time average toward the future time step. +where :math:`\beta_1 = 0.5 (1 - \beta_s)` and :math:`\beta_2 = 0.5 (1 + \beta_s)`. + +:math:`\beta_s` is the acoustic step off-centering coefficient. When we do implicit substepping, we use +the typical WRF value of 0.1. This off-centering is intended to provide damping of both horizontally +and vertically propagating sound waves by biasing the time average toward the future time step. + +When we do fully explicit substepping, we set :math:`\beta_s = -1.0`, which sets +:math:`\beta_1 = 1` and :math:`\beta_2 = 0`. To solve the coupled system, we first evolve the equations for :math:`U^{\prime \prime, \tau + \delta \tau}` and :math:`V^{\prime \prime, \tau + \delta \tau}` explicitly using :math:`\Theta^{\prime \prime, \tau}` which is already known. @@ -149,10 +155,10 @@ to control horizontally propagating sound waves. .. math:: p^{\prime\prime,\tau*} = p^{\prime\prime,\tau} - + \beta_d \left( p^{\prime\prime,\tau} + p^{\prime\prime,\tau-\delta\tau} \right) + + \beta_d \left( p^{\prime\prime,\tau} - p^{\prime\prime,\tau-\delta\tau} \right) where :math:`\tau*` is the forward projected value used in RHS of the acoustic substepping equations for horizontal momentum. According to Skamarock et al, -This is equivalent to including a horizontal diffusion term in the continuity +this is equivalent to including a horizontal diffusion term in the continuity equation. A typical damping coefficient of :math:`\beta_d = 0.1` is used, as in WRF. diff --git a/Exec/Radiation/CMakeLists.txt b/Exec/DevTests/Radiation/CMakeLists.txt similarity index 100% rename from Exec/Radiation/CMakeLists.txt rename to Exec/DevTests/Radiation/CMakeLists.txt diff --git a/Exec/Radiation/ERF_prob.H b/Exec/DevTests/Radiation/ERF_prob.H similarity index 100% rename from Exec/Radiation/ERF_prob.H rename to Exec/DevTests/Radiation/ERF_prob.H diff --git a/Exec/Radiation/ERF_prob.cpp b/Exec/DevTests/Radiation/ERF_prob.cpp similarity index 100% rename from Exec/Radiation/ERF_prob.cpp rename to Exec/DevTests/Radiation/ERF_prob.cpp diff --git a/Exec/Radiation/GNUmakefile b/Exec/DevTests/Radiation/GNUmakefile similarity index 100% rename from Exec/Radiation/GNUmakefile rename to Exec/DevTests/Radiation/GNUmakefile diff --git a/Exec/Radiation/Make.package b/Exec/DevTests/Radiation/Make.package similarity index 100% rename from Exec/Radiation/Make.package rename to Exec/DevTests/Radiation/Make.package diff --git a/Exec/Radiation/README b/Exec/DevTests/Radiation/README similarity index 100% rename from Exec/Radiation/README rename to Exec/DevTests/Radiation/README diff --git a/Exec/Radiation/inputs_radiation b/Exec/DevTests/Radiation/inputs_radiation similarity index 100% rename from Exec/Radiation/inputs_radiation rename to Exec/DevTests/Radiation/inputs_radiation diff --git a/Source/TimeIntegration/ERF_TI_fast_headers.H b/Source/TimeIntegration/ERF_TI_fast_headers.H index 4504db371..5c470d9da 100644 --- a/Source/TimeIntegration/ERF_TI_fast_headers.H +++ b/Source/TimeIntegration/ERF_TI_fast_headers.H @@ -37,7 +37,8 @@ void erf_fast_rhs_N (int step, int nrk, int level, int finest_level, std::unique_ptr& mapfac_v, amrex::YAFluxRegister* fr_as_crse, amrex::YAFluxRegister* fr_as_fine, - bool l_use_moisture, bool l_reflux); + bool l_use_moisture, bool l_reflux, + bool l_implicit_substepping); /** * Function for computing the fast RHS with fixed terrain @@ -64,7 +65,8 @@ void erf_fast_rhs_T (int step, int nrk, int level, int finest_level, std::unique_ptr& mapfac_v, amrex::YAFluxRegister* fr_as_crse, amrex::YAFluxRegister* fr_as_fine, - bool l_use_moisture, bool l_reflux); + bool l_use_moisture, bool l_reflux, + bool l_implicit_substepping); /** * Function for computing the fast RHS with moving terrain @@ -98,7 +100,8 @@ void erf_fast_rhs_MT (int step, int nrk, int level, int finest_level, std::unique_ptr& mapfac_v, amrex::YAFluxRegister* fr_as_crse, amrex::YAFluxRegister* fr_as_fine, - bool l_use_moisture, bool l_reflux); + bool l_use_moisture, bool l_reflux, + bool l_implicit_substepping); /** * Function for computing the coefficients for the tridiagonal solver used in the fast diff --git a/Source/TimeIntegration/ERF_TI_fast_rhs_fun.H b/Source/TimeIntegration/ERF_TI_fast_rhs_fun.H index f61dba333..c3fa90a70 100644 --- a/Source/TimeIntegration/ERF_TI_fast_rhs_fun.H +++ b/Source/TimeIntegration/ERF_TI_fast_rhs_fun.H @@ -20,7 +20,13 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, // Per p2902 of Klemp-Skamarock-Dudhia-2007 // beta_s = -1.0 : fully explicit // beta_s = 1.0 : fully implicit - Real beta_s = 0.1; + Real beta_s; + if (solverChoice.substepping_type[level] == SubsteppingType::Implicit) + { + beta_s = 0.1; + } else { // Fully explicit + beta_s = -1.0; + } // ************************************************************************* // Set up flux registers if using two_way coupling @@ -99,7 +105,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, detJ_cc[level], detJ_cc_new[level], detJ_cc_src[level], dtau, beta_s, inv_fac, mapfac_m[level], mapfac_u[level], mapfac_v[level], - fr_as_crse, fr_as_fine, l_use_moisture, l_reflux); + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); } else { // If this is not the first substep we pass in S_data as the previous step's solution erf_fast_rhs_MT(fast_step, nrk, level, finest_level, @@ -111,7 +117,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, detJ_cc[level], detJ_cc_new[level], detJ_cc_src[level], dtau, beta_s, inv_fac, mapfac_m[level], mapfac_u[level], mapfac_v[level], - fr_as_crse, fr_as_fine, l_use_moisture, l_reflux); + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); } } else if (solverChoice.use_terrain && solverChoice.terrain_type == TerrainType::Static) { if (fast_step == 0) { @@ -127,7 +133,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, S_data, S_scratch, fine_geom, solverChoice.gravity, Omega, z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac, mapfac_m[level], mapfac_u[level], mapfac_v[level], - fr_as_crse, fr_as_fine, l_use_moisture, l_reflux); + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); } else { // If this is not the first substep we pass in S_data as the previous step's solution erf_fast_rhs_T(fast_step, nrk, level, finest_level, @@ -135,7 +141,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, S_data, S_scratch, fine_geom, solverChoice.gravity, Omega, z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac, mapfac_m[level], mapfac_u[level], mapfac_v[level], - fr_as_crse, fr_as_fine, l_use_moisture, l_reflux); + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); } } else { if (fast_step == 0) { @@ -151,7 +157,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, S_data, S_scratch, fine_geom, solverChoice.gravity, dtau, beta_s, inv_fac, mapfac_m[level], mapfac_u[level], mapfac_v[level], - fr_as_crse, fr_as_fine, l_use_moisture, l_reflux); + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); } else { // If this is not the first substep we pass in S_data as the previous step's solution erf_fast_rhs_N(fast_step, nrk, level, finest_level, @@ -159,7 +165,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk, S_data, S_scratch, fine_geom, solverChoice.gravity, dtau, beta_s, inv_fac, mapfac_m[level], mapfac_u[level], mapfac_v[level], - fr_as_crse, fr_as_fine, l_use_moisture, l_reflux); + fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping); } } diff --git a/Source/TimeIntegration/ERF_advance_dycore.cpp b/Source/TimeIntegration/ERF_advance_dycore.cpp index 4761c8646..8c78fcfb3 100644 --- a/Source/TimeIntegration/ERF_advance_dycore.cpp +++ b/Source/TimeIntegration/ERF_advance_dycore.cpp @@ -90,6 +90,7 @@ void ERF::advance_dycore(int level, bool l_use_kturb = ( (tc.les_type != LESType::None) || (tc.pbl_type != PBLType::None) ); bool l_use_moisture = ( solverChoice.moisture_type != MoistureType::None ); + bool l_implicit_substepping = ( solverChoice.substepping_type[level] == SubsteppingType::Implicit ); const bool use_most = (m_most != nullptr); const bool exp_most = (solverChoice.use_explicit_most); diff --git a/Source/TimeIntegration/ERF_fast_rhs_MT.cpp b/Source/TimeIntegration/ERF_fast_rhs_MT.cpp index 1ff3b524b..4bc336b1e 100644 --- a/Source/TimeIntegration/ERF_fast_rhs_MT.cpp +++ b/Source/TimeIntegration/ERF_fast_rhs_MT.cpp @@ -71,7 +71,8 @@ void erf_fast_rhs_MT (int step, int nrk, YAFluxRegister* fr_as_crse, YAFluxRegister* fr_as_fine, bool l_use_moisture, - bool l_reflux) + bool l_reflux, + bool l_implicit_substepping) { BL_PROFILE_REGION("erf_fast_rhs_MT()"); diff --git a/Source/TimeIntegration/ERF_fast_rhs_N.cpp b/Source/TimeIntegration/ERF_fast_rhs_N.cpp index fec38e38e..46b33d73b 100644 --- a/Source/TimeIntegration/ERF_fast_rhs_N.cpp +++ b/Source/TimeIntegration/ERF_fast_rhs_N.cpp @@ -51,7 +51,8 @@ void erf_fast_rhs_N (int step, int nrk, YAFluxRegister* fr_as_crse, YAFluxRegister* fr_as_fine, bool l_use_moisture, - bool l_reflux) + bool l_reflux, + bool l_implicit_substepping) { BL_PROFILE_REGION("erf_fast_rhs_N()"); @@ -368,8 +369,6 @@ void erf_fast_rhs_N (int step, int nrk, // ********************************************************************* // fast_loop_on_shrunk // ********************************************************************* - { - BL_PROFILE("fast_loop_on_shrunk"); //Note we don't act on the bottom or top boundaries of the domain ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k) { @@ -409,90 +408,129 @@ void erf_fast_rhs_N (int step, int nrk, // line 1 RHS_a(i,j,k) = Omega_k + dtau * (slow_rhs_rho_w(i,j,k) + R0_tmp + dtau * beta_2 * R1_tmp); - }); - } // end profile + }); // bx_shrunk_in_k Box b2d = tbz; // Copy constructor b2d.setRange(2,0); - { - BL_PROFILE("fast_rhs_b2d_loop"); #ifdef AMREX_USE_GPU auto const lo = lbound(bx); auto const hi = ubound(bx); - ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int) - { - // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs - RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z); - // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs - // TODO TODO: Note that if we ever change this, we will need to include it in avg_zmom at the top - RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1); + if (l_implicit_substepping) { + + ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int) + { + // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs + RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z); + + // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs + RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1); + + // w = specified Dirichlet value at k = lo.z + soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z); + cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z); + + for (int k = lo.z+1; k <= hi.z+1; k++) { + soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k); + } - // w = specified Dirichlet value at k = lo.z - soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z); - cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z); + cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1); - for (int k = lo.z+1; k <= hi.z+1; k++) { - soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k); - } + for (int k = hi.z; k >= lo.z; k--) { + soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1); + cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + } + }); // b2d - cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1); + } else { // explicit substepping - for (int k = hi.z; k >= lo.z; k--) { - soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1); - cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); - } - }); // b2d + ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int) + { + // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs + RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z); + + // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs + RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1); + + for (int k = lo.z; k <= hi.z+1; k++) { + soln_a(i,j,k) = RHS_a(i,j,k) * inv_coeffB_a(i,j,k); + cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + } + }); // b2d + } // end of explicit substepping #else auto const lo = lbound(bx); auto const hi = ubound(bx); - for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - // w at bottom boundary of grid is 0 if at domain boundary, otherwise w_old + dtau * slow_rhs - RHS_a (i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z); - soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z); + + if (l_implicit_substepping) { + + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + // w at bottom boundary of grid is 0 if at domain boundary, otherwise w_old + dtau * slow_rhs + RHS_a (i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z); + soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z); + } } - } - // Note that if we ever change this, we will need to include it in avg_zmom at the top - for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1); + // Note that if we ever change this, we will need to include it in avg_zmom at the top + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1); + } + } + for (int k = lo.z+1; k <= hi.z+1; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k); + } + } } - } - for (int k = lo.z+1; k <= hi.z+1; ++k) { for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k); + cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1); } } - } - for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD - for (int i = lo.x; i <= hi.x; ++i) { - cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1); + for (int k = hi.z; k >= lo.z; --k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1); + cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + } + } } - } - for (int k = hi.z; k >= lo.z; --k) { + } else { // explicit substepping + for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1); - cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + RHS_a (i,j,lo.z ) = dtau * slow_rhs_rho_w(i,j,lo.z); + RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1); } } - } + + for (int k = lo.z; k <= hi.z+1; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + AMREX_ASSERT(coeffA_a(i,j,k) == 0.0); + AMREX_ASSERT(coeffC_a(i,j,k) == 0.0); + soln_a(i,j,k) = RHS_a(i,j,k) * inv_coeffB_a(i,j,k); + cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k); + } + } + } + + } // end of explicit substepping #endif - } // end profile // ************************************************************************** // Define updates in the RHS of rho and (rho theta) // ************************************************************************** - { - BL_PROFILE("fast_rho_final_update"); ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * old_drho_w(i,j,k ); @@ -512,7 +550,6 @@ void erf_fast_rhs_N (int step, int nrk, temp_rhs_arr(i,j,k,RhoTheta_comp) += 0.5 * dzi * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ); }); - } // end profile // We only add to the flux registers in the final RK step if (l_reflux && nrk == 2) { diff --git a/Source/TimeIntegration/ERF_fast_rhs_T.cpp b/Source/TimeIntegration/ERF_fast_rhs_T.cpp index a26289176..97dd498b7 100644 --- a/Source/TimeIntegration/ERF_fast_rhs_T.cpp +++ b/Source/TimeIntegration/ERF_fast_rhs_T.cpp @@ -57,7 +57,8 @@ void erf_fast_rhs_T (int step, int nrk, YAFluxRegister* fr_as_crse, YAFluxRegister* fr_as_fine, bool l_use_moisture, - bool l_reflux) + bool l_reflux, + bool l_implicit_substepping) { BL_PROFILE_REGION("erf_fast_rhs_T()");