Skip to content

Commit

Permalink
add option for explicit substepping
Browse files Browse the repository at this point in the history
  • Loading branch information
asalmgren committed Oct 19, 2024
1 parent 6a3cdfb commit 1860ff0
Show file tree
Hide file tree
Showing 14 changed files with 126 additions and 71 deletions.
14 changes: 10 additions & 4 deletions Docs/sphinx_doc/TimeAdvance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,14 @@ Then the acoustic substepping evolves the equations in the form
- \frac{\partial (\beta_1 W^{\prime \prime, \tau} + \beta_2 W^{\prime \prime, \tau + \delta \tau})}{\partial z} + R^t_{\rho}
\right)
where :math:`\beta_1 = 0.5 (1 - \beta_s)` and :math:`\beta_2 = 0.5 (1 + \beta_s)` with :math:`\beta_s = 0.1`.
:math:`\beta_s` is the acoustic step off-centering coefficient and 0.1 is the typical WRF value. This off-centering is intended to provide damping of both horizontally and vertically propagating sound waves by biasing the time average toward the future time step.
where :math:`\beta_1 = 0.5 (1 - \beta_s)` and :math:`\beta_2 = 0.5 (1 + \beta_s)`.

:math:`\beta_s` is the acoustic step off-centering coefficient. When we do implicit substepping, we use
the typical WRF value of 0.1. This off-centering is intended to provide damping of both horizontally
and vertically propagating sound waves by biasing the time average toward the future time step.

When we do fully explicit substepping, we set :math:`\beta_s = -1.0`, which sets
:math:`\beta_1 = 1` and :math:`\beta_2 = 0`.

To solve the coupled system, we first evolve the equations for :math:`U^{\prime \prime, \tau + \delta \tau}` and
:math:`V^{\prime \prime, \tau + \delta \tau}` explicitly using :math:`\Theta^{\prime \prime, \tau}` which is already known.
Expand All @@ -149,10 +155,10 @@ to control horizontally propagating sound waves.
.. math::
p^{\prime\prime,\tau*} = p^{\prime\prime,\tau}
+ \beta_d \left( p^{\prime\prime,\tau} + p^{\prime\prime,\tau-\delta\tau} \right)
+ \beta_d \left( p^{\prime\prime,\tau} - p^{\prime\prime,\tau-\delta\tau} \right)
where :math:`\tau*` is the forward projected value used in RHS of the acoustic
substepping equations for horizontal momentum. According to Skamarock et al,
This is equivalent to including a horizontal diffusion term in the continuity
this is equivalent to including a horizontal diffusion term in the continuity
equation. A typical damping coefficient of :math:`\beta_d = 0.1` is used, as in
WRF.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
9 changes: 6 additions & 3 deletions Source/TimeIntegration/ERF_TI_fast_headers.H
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ void erf_fast_rhs_N (int step, int nrk, int level, int finest_level,
std::unique_ptr<amrex::MultiFab>& mapfac_v,
amrex::YAFluxRegister* fr_as_crse,
amrex::YAFluxRegister* fr_as_fine,
bool l_use_moisture, bool l_reflux);
bool l_use_moisture, bool l_reflux,
bool l_implicit_substepping);

/**
* Function for computing the fast RHS with fixed terrain
Expand All @@ -64,7 +65,8 @@ void erf_fast_rhs_T (int step, int nrk, int level, int finest_level,
std::unique_ptr<amrex::MultiFab>& mapfac_v,
amrex::YAFluxRegister* fr_as_crse,
amrex::YAFluxRegister* fr_as_fine,
bool l_use_moisture, bool l_reflux);
bool l_use_moisture, bool l_reflux,
bool l_implicit_substepping);

/**
* Function for computing the fast RHS with moving terrain
Expand Down Expand Up @@ -98,7 +100,8 @@ void erf_fast_rhs_MT (int step, int nrk, int level, int finest_level,
std::unique_ptr<amrex::MultiFab>& mapfac_v,
amrex::YAFluxRegister* fr_as_crse,
amrex::YAFluxRegister* fr_as_fine,
bool l_use_moisture, bool l_reflux);
bool l_use_moisture, bool l_reflux,
bool l_implicit_substepping);

/**
* Function for computing the coefficients for the tridiagonal solver used in the fast
Expand Down
20 changes: 13 additions & 7 deletions Source/TimeIntegration/ERF_TI_fast_rhs_fun.H
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
// Per p2902 of Klemp-Skamarock-Dudhia-2007
// beta_s = -1.0 : fully explicit
// beta_s = 1.0 : fully implicit
Real beta_s = 0.1;
Real beta_s;
if (solverChoice.substepping_type[level] == SubsteppingType::Implicit)
{
beta_s = 0.1;
} else { // Fully explicit
beta_s = -1.0;
}

// *************************************************************************
// Set up flux registers if using two_way coupling
Expand Down Expand Up @@ -99,7 +105,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
detJ_cc[level], detJ_cc_new[level], detJ_cc_src[level],
dtau, beta_s, inv_fac,
mapfac_m[level], mapfac_u[level], mapfac_v[level],
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
} else {
// If this is not the first substep we pass in S_data as the previous step's solution
erf_fast_rhs_MT(fast_step, nrk, level, finest_level,
Expand All @@ -111,7 +117,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
detJ_cc[level], detJ_cc_new[level], detJ_cc_src[level],
dtau, beta_s, inv_fac,
mapfac_m[level], mapfac_u[level], mapfac_v[level],
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
}
} else if (solverChoice.use_terrain && solverChoice.terrain_type == TerrainType::Static) {
if (fast_step == 0) {
Expand All @@ -127,15 +133,15 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
S_data, S_scratch, fine_geom, solverChoice.gravity, Omega,
z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac,
mapfac_m[level], mapfac_u[level], mapfac_v[level],
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
} else {
// If this is not the first substep we pass in S_data as the previous step's solution
erf_fast_rhs_T(fast_step, nrk, level, finest_level,
S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs,
S_data, S_scratch, fine_geom, solverChoice.gravity, Omega,
z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac,
mapfac_m[level], mapfac_u[level], mapfac_v[level],
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
}
} else {
if (fast_step == 0) {
Expand All @@ -151,15 +157,15 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
S_data, S_scratch, fine_geom, solverChoice.gravity,
dtau, beta_s, inv_fac,
mapfac_m[level], mapfac_u[level], mapfac_v[level],
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
} else {
// If this is not the first substep we pass in S_data as the previous step's solution
erf_fast_rhs_N(fast_step, nrk, level, finest_level,
S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs,
S_data, S_scratch, fine_geom, solverChoice.gravity,
dtau, beta_s, inv_fac,
mapfac_m[level], mapfac_u[level], mapfac_v[level],
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
}
}

Expand Down
1 change: 1 addition & 0 deletions Source/TimeIntegration/ERF_advance_dycore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ void ERF::advance_dycore(int level,
bool l_use_kturb = ( (tc.les_type != LESType::None) ||
(tc.pbl_type != PBLType::None) );
bool l_use_moisture = ( solverChoice.moisture_type != MoistureType::None );
bool l_implicit_substepping = ( solverChoice.substepping_type[level] == SubsteppingType::Implicit );

const bool use_most = (m_most != nullptr);
const bool exp_most = (solverChoice.use_explicit_most);
Expand Down
3 changes: 2 additions & 1 deletion Source/TimeIntegration/ERF_fast_rhs_MT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ void erf_fast_rhs_MT (int step, int nrk,
YAFluxRegister* fr_as_crse,
YAFluxRegister* fr_as_fine,
bool l_use_moisture,
bool l_reflux)
bool l_reflux,
bool l_implicit_substepping)
{
BL_PROFILE_REGION("erf_fast_rhs_MT()");

Expand Down
147 changes: 92 additions & 55 deletions Source/TimeIntegration/ERF_fast_rhs_N.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ void erf_fast_rhs_N (int step, int nrk,
YAFluxRegister* fr_as_crse,
YAFluxRegister* fr_as_fine,
bool l_use_moisture,
bool l_reflux)
bool l_reflux,
bool l_implicit_substepping)
{
BL_PROFILE_REGION("erf_fast_rhs_N()");

Expand Down Expand Up @@ -368,8 +369,6 @@ void erf_fast_rhs_N (int step, int nrk,
// *********************************************************************
// fast_loop_on_shrunk
// *********************************************************************
{
BL_PROFILE("fast_loop_on_shrunk");
//Note we don't act on the bottom or top boundaries of the domain
ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
Expand Down Expand Up @@ -409,90 +408,129 @@ void erf_fast_rhs_N (int step, int nrk,

// line 1
RHS_a(i,j,k) = Omega_k + dtau * (slow_rhs_rho_w(i,j,k) + R0_tmp + dtau * beta_2 * R1_tmp);
});
} // end profile
}); // bx_shrunk_in_k

Box b2d = tbz; // Copy constructor
b2d.setRange(2,0);

{
BL_PROFILE("fast_rhs_b2d_loop");
#ifdef AMREX_USE_GPU
auto const lo = lbound(bx);
auto const hi = ubound(bx);
ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
{
// w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);

// w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
// TODO TODO: Note that if we ever change this, we will need to include it in avg_zmom at the top
RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
if (l_implicit_substepping) {

ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
{
// w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);

// w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);

// w = specified Dirichlet value at k = lo.z
soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z);

for (int k = lo.z+1; k <= hi.z+1; k++) {
soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
}

// w = specified Dirichlet value at k = lo.z
soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z);
cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);

for (int k = lo.z+1; k <= hi.z+1; k++) {
soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
}
for (int k = hi.z; k >= lo.z; k--) {
soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
}
}); // b2d

cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
} else { // explicit substepping

for (int k = hi.z; k >= lo.z; k--) {
soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
}
}); // b2d
ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
{
// w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);

// w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);

for (int k = lo.z; k <= hi.z+1; k++) {
soln_a(i,j,k) = RHS_a(i,j,k) * inv_coeffB_a(i,j,k);
cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
}
}); // b2d
} // end of explicit substepping
#else
auto const lo = lbound(bx);
auto const hi = ubound(bx);
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
// w at bottom boundary of grid is 0 if at domain boundary, otherwise w_old + dtau * slow_rhs
RHS_a (i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);

if (l_implicit_substepping) {

for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
// w at bottom boundary of grid is 0 if at domain boundary, otherwise w_old + dtau * slow_rhs
RHS_a (i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
}
}
}
// Note that if we ever change this, we will need to include it in avg_zmom at the top
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
// Note that if we ever change this, we will need to include it in avg_zmom at the top
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
}
}
for (int k = lo.z+1; k <= hi.z+1; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
}
}
}
}
for (int k = lo.z+1; k <= hi.z+1; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
}
}
}
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
for (int k = hi.z; k >= lo.z; --k) {
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
}
}
}
}
for (int k = hi.z; k >= lo.z; --k) {
} else { // explicit substepping

for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
RHS_a (i,j,lo.z ) = dtau * slow_rhs_rho_w(i,j,lo.z);
RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
}
}
}

for (int k = lo.z; k <= hi.z+1; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
AMREX_ASSERT(coeffA_a(i,j,k) == 0.0);
AMREX_ASSERT(coeffC_a(i,j,k) == 0.0);
soln_a(i,j,k) = RHS_a(i,j,k) * inv_coeffB_a(i,j,k);
cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
}
}
}

} // end of explicit substepping
#endif
} // end profile

// **************************************************************************
// Define updates in the RHS of rho and (rho theta)
// **************************************************************************
{
BL_PROFILE("fast_rho_final_update");
ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * old_drho_w(i,j,k );
Expand All @@ -512,7 +550,6 @@ void erf_fast_rhs_N (int step, int nrk,
temp_rhs_arr(i,j,k,RhoTheta_comp) += 0.5 * dzi * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
- zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) );
});
} // end profile

// We only add to the flux registers in the final RK step
if (l_reflux && nrk == 2) {
Expand Down
3 changes: 2 additions & 1 deletion Source/TimeIntegration/ERF_fast_rhs_T.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ void erf_fast_rhs_T (int step, int nrk,
YAFluxRegister* fr_as_crse,
YAFluxRegister* fr_as_fine,
bool l_use_moisture,
bool l_reflux)
bool l_reflux,
bool l_implicit_substepping)
{
BL_PROFILE_REGION("erf_fast_rhs_T()");

Expand Down

0 comments on commit 1860ff0

Please sign in to comment.