add option for explicit substepping

erf-model · Oct 19, 2024 · 1860ff0 · 1860ff0
1 parent 6a3cdfb
commit 1860ff0
Show file tree

Hide file tree

Showing 14 changed files with 126 additions and 71 deletions.
diff --git a/Docs/sphinx_doc/TimeAdvance.rst b/Docs/sphinx_doc/TimeAdvance.rst
@@ -135,8 +135,14 @@ Then the acoustic substepping evolves the equations in the form
           - \frac{\partial (\beta_1 W^{\prime \prime, \tau} + \beta_2 W^{\prime \prime, \tau + \delta \tau})}{\partial z} +  R^t_{\rho}
             \right)
 
-where :math:`\beta_1 = 0.5 (1 - \beta_s)` and :math:`\beta_2 = 0.5 (1 + \beta_s)` with :math:`\beta_s = 0.1`.
-:math:`\beta_s` is the acoustic step off-centering coefficient and 0.1 is the typical WRF value. This off-centering is intended to provide damping of both horizontally and vertically propagating sound waves by biasing the time average toward the future time step.
+where :math:`\beta_1 = 0.5 (1 - \beta_s)` and :math:`\beta_2 = 0.5 (1 + \beta_s)`.
+
+:math:`\beta_s` is the acoustic step off-centering coefficient.  When we do implicit substepping, we use
+the typical WRF value of 0.1. This off-centering is intended to provide damping of both horizontally
+and vertically propagating sound waves by biasing the time average toward the future time step.
+
+When we do fully explicit substepping, we set :math:`\beta_s = -1.0`, which sets
+:math:`\beta_1 = 1` and :math:`\beta_2 = 0`.
 
 To solve the coupled system, we first evolve the equations for :math:`U^{\prime \prime, \tau + \delta \tau}`  and
 :math:`V^{\prime \prime, \tau + \delta \tau}` explicitly using :math:`\Theta^{\prime \prime, \tau}` which is already known.
@@ -149,10 +155,10 @@ to control horizontally propagating sound waves.
 .. math::
 
    p^{\prime\prime,\tau*} = p^{\prime\prime,\tau}
-     + \beta_d \left( p^{\prime\prime,\tau} + p^{\prime\prime,\tau-\delta\tau} \right)
+     + \beta_d \left( p^{\prime\prime,\tau} - p^{\prime\prime,\tau-\delta\tau} \right)
 
 where :math:`\tau*` is the forward projected value used in RHS of the acoustic
 substepping equations for horizontal momentum. According to Skamarock et al,
-This is equivalent to including a horizontal diffusion term in the continuity
+this is equivalent to including a horizontal diffusion term in the continuity
 equation. A typical damping coefficient of :math:`\beta_d = 0.1` is used, as in
 WRF.
diff --git a/Exec/Radiation/CMakeLists.txt → Exec/DevTests/Radiation/CMakeLists.txt b/Exec/Radiation/CMakeLists.txt → Exec/DevTests/Radiation/CMakeLists.txt
diff --git a/Exec/Radiation/ERF_prob.H → Exec/DevTests/Radiation/ERF_prob.H b/Exec/Radiation/ERF_prob.H → Exec/DevTests/Radiation/ERF_prob.H
diff --git a/Exec/Radiation/ERF_prob.cpp → Exec/DevTests/Radiation/ERF_prob.cpp b/Exec/Radiation/ERF_prob.cpp → Exec/DevTests/Radiation/ERF_prob.cpp
diff --git a/Exec/Radiation/GNUmakefile → Exec/DevTests/Radiation/GNUmakefile b/Exec/Radiation/GNUmakefile → Exec/DevTests/Radiation/GNUmakefile
diff --git a/Exec/Radiation/Make.package → Exec/DevTests/Radiation/Make.package b/Exec/Radiation/Make.package → Exec/DevTests/Radiation/Make.package
diff --git a/Exec/Radiation/README → Exec/DevTests/Radiation/README b/Exec/Radiation/README → Exec/DevTests/Radiation/README
diff --git a/Exec/Radiation/inputs_radiation → Exec/DevTests/Radiation/inputs_radiation b/Exec/Radiation/inputs_radiation → Exec/DevTests/Radiation/inputs_radiation
diff --git a/Source/TimeIntegration/ERF_TI_fast_headers.H b/Source/TimeIntegration/ERF_TI_fast_headers.H
@@ -37,7 +37,8 @@ void erf_fast_rhs_N (int step, int nrk, int level, int finest_level,
                      std::unique_ptr<amrex::MultiFab>& mapfac_v,
                      amrex::YAFluxRegister* fr_as_crse,
                      amrex::YAFluxRegister* fr_as_fine,
-                     bool l_use_moisture, bool l_reflux);
+                     bool l_use_moisture, bool l_reflux,
+                     bool l_implicit_substepping);
 
 /**
  * Function for computing the fast RHS with fixed terrain
@@ -64,7 +65,8 @@ void erf_fast_rhs_T (int step, int nrk, int level, int finest_level,
                      std::unique_ptr<amrex::MultiFab>& mapfac_v,
                      amrex::YAFluxRegister* fr_as_crse,
                      amrex::YAFluxRegister* fr_as_fine,
-                     bool l_use_moisture, bool l_reflux);
+                     bool l_use_moisture, bool l_reflux,
+                     bool l_implicit_substepping);
 
 /**
  * Function for computing the fast RHS with moving terrain
@@ -98,7 +100,8 @@ void erf_fast_rhs_MT (int step, int nrk, int level, int finest_level,
                       std::unique_ptr<amrex::MultiFab>& mapfac_v,
                       amrex::YAFluxRegister* fr_as_crse,
                       amrex::YAFluxRegister* fr_as_fine,
-                      bool l_use_moisture, bool l_reflux);
+                      bool l_use_moisture, bool l_reflux,
+                      bool l_implicit_substepping);
 
 /**
  * Function for computing the coefficients for the tridiagonal solver used in the fast

diff --git a/Source/TimeIntegration/ERF_TI_fast_rhs_fun.H b/Source/TimeIntegration/ERF_TI_fast_rhs_fun.H
@@ -20,7 +20,13 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
         // Per p2902 of Klemp-Skamarock-Dudhia-2007
         // beta_s = -1.0 : fully explicit
         // beta_s =  1.0 : fully implicit
-        Real beta_s = 0.1;
+        Real beta_s;
+        if (solverChoice.substepping_type[level] == SubsteppingType::Implicit)
+        {
+            beta_s = 0.1;
+        } else { // Fully explicit
+            beta_s = -1.0;
+        }
 
         // *************************************************************************
         // Set up flux registers if using two_way coupling
@@ -99,7 +105,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
                                   detJ_cc[level],   detJ_cc_new[level],   detJ_cc_src[level],
                                 dtau, beta_s, inv_fac,
                                 mapfac_m[level], mapfac_u[level], mapfac_v[level],
-                                fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
+                                fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
             } else {
                 // If this is not the first substep we pass in S_data as the previous step's solution
                 erf_fast_rhs_MT(fast_step, nrk, level, finest_level,
@@ -111,7 +117,7 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
                                   detJ_cc[level],   detJ_cc_new[level],   detJ_cc_src[level],
                                 dtau, beta_s, inv_fac,
                                 mapfac_m[level], mapfac_u[level], mapfac_v[level],
-                                fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
+                                fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
             }
         } else if (solverChoice.use_terrain && solverChoice.terrain_type == TerrainType::Static) {
             if (fast_step == 0) {
@@ -127,15 +133,15 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
                                S_data, S_scratch, fine_geom, solverChoice.gravity, Omega,
                                z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac,
                                mapfac_m[level], mapfac_u[level], mapfac_v[level],
-                               fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
+                               fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
             } else {
                 // If this is not the first substep we pass in S_data as the previous step's solution
                 erf_fast_rhs_T(fast_step, nrk, level, finest_level,
                                S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs,
                                S_data, S_scratch, fine_geom, solverChoice.gravity, Omega,
                                z_phys_nd[level], detJ_cc[level], dtau, beta_s, inv_fac,
                                mapfac_m[level], mapfac_u[level], mapfac_v[level],
-                               fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
+                               fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
             }
         } else {
             if (fast_step == 0) {
@@ -151,15 +157,15 @@ auto fast_rhs_fun = [&](int fast_step, int /*n_sub*/, int nrk,
                                S_data, S_scratch, fine_geom, solverChoice.gravity,
                                dtau, beta_s, inv_fac,
                                mapfac_m[level], mapfac_u[level], mapfac_v[level],
-                               fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
+                               fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
             } else {
                 // If this is not the first substep we pass in S_data as the previous step's solution
                 erf_fast_rhs_N(fast_step, nrk, level, finest_level,
                                S_slow_rhs, S_data, S_stage, S_prim, pi_stage, fast_coeffs,
                                S_data, S_scratch, fine_geom, solverChoice.gravity,
                                dtau, beta_s, inv_fac,
                                mapfac_m[level], mapfac_u[level], mapfac_v[level],
-                               fr_as_crse, fr_as_fine, l_use_moisture, l_reflux);
+                               fr_as_crse, fr_as_fine, l_use_moisture, l_reflux, l_implicit_substepping);
             }
         }
 

diff --git a/Source/TimeIntegration/ERF_advance_dycore.cpp b/Source/TimeIntegration/ERF_advance_dycore.cpp
@@ -90,6 +90,7 @@ void ERF::advance_dycore(int level,
     bool l_use_kturb   = ( (tc.les_type != LESType::None)   ||
                            (tc.pbl_type != PBLType::None) );
     bool l_use_moisture = ( solverChoice.moisture_type != MoistureType::None );
+    bool l_implicit_substepping = ( solverChoice.substepping_type[level] == SubsteppingType::Implicit );
 
     const bool use_most = (m_most != nullptr);
     const bool exp_most = (solverChoice.use_explicit_most);

diff --git a/Source/TimeIntegration/ERF_fast_rhs_MT.cpp b/Source/TimeIntegration/ERF_fast_rhs_MT.cpp
@@ -71,7 +71,8 @@ void erf_fast_rhs_MT (int step, int nrk,
                       YAFluxRegister* fr_as_crse,
                       YAFluxRegister* fr_as_fine,
                       bool l_use_moisture,
-                      bool l_reflux)
+                      bool l_reflux,
+                      bool l_implicit_substepping)
 {
     BL_PROFILE_REGION("erf_fast_rhs_MT()");
 

diff --git a/Source/TimeIntegration/ERF_fast_rhs_N.cpp b/Source/TimeIntegration/ERF_fast_rhs_N.cpp
@@ -51,7 +51,8 @@ void erf_fast_rhs_N (int step, int nrk,
                      YAFluxRegister* fr_as_crse,
                      YAFluxRegister* fr_as_fine,
                      bool l_use_moisture,
-                     bool l_reflux)
+                     bool l_reflux,
+                     bool l_implicit_substepping)
 {
     BL_PROFILE_REGION("erf_fast_rhs_N()");
 
@@ -368,8 +369,6 @@ void erf_fast_rhs_N (int step, int nrk,
         // *********************************************************************
         // fast_loop_on_shrunk
         // *********************************************************************
-        {
-        BL_PROFILE("fast_loop_on_shrunk");
         //Note we don't act on the bottom or top boundaries of the domain
         ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (int i, int j, int k)
         {
@@ -409,90 +408,129 @@ void erf_fast_rhs_N (int step, int nrk,
 
             // line 1
             RHS_a(i,j,k) = Omega_k + dtau * (slow_rhs_rho_w(i,j,k) + R0_tmp + dtau * beta_2 * R1_tmp);
-        });
-        } // end profile
+        }); // bx_shrunk_in_k
 
         Box b2d = tbz; // Copy constructor
         b2d.setRange(2,0);
 
-        {
-        BL_PROFILE("fast_rhs_b2d_loop");
 #ifdef AMREX_USE_GPU
         auto const lo = lbound(bx);
         auto const hi = ubound(bx);
-        ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
-        {
-          // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
-          RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
 
-          // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
-          // TODO TODO: Note that if we ever change this, we will need to include it in avg_zmom at the top
-          RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
+        if (l_implicit_substepping) {
+
+            ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
+            {
+              // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
+              RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
+
+              // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
+              RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
+
+              // w = specified Dirichlet value at k = lo.z
+                soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
+              cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z);
+
+              for (int k = lo.z+1; k <= hi.z+1; k++) {
+                  soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
+              }
 
-          // w = specified Dirichlet value at k = lo.z
-            soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
-          cur_zmom(i,j,lo.z) = stage_zmom(i,j,lo.z) + soln_a(i,j,lo.z);
+              cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
 
-          for (int k = lo.z+1; k <= hi.z+1; k++) {
-              soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
-          }
+              for (int k = hi.z; k >= lo.z; k--) {
+                  soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
+                  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
+              }
+            }); // b2d
 
-          cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
+        } else { // explicit substepping
 
-          for (int k = hi.z; k >= lo.z; k--) {
-              soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
-              cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
-          }
-        }); // b2d
+            ParallelFor(b2d, [=] AMREX_GPU_DEVICE (int i, int j, int)
+            {
+              // w at bottom boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
+              RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
+
+              // w at top boundary of grid is 0 if at domain boundary, otherwise w = w_old + dtau * slow_rhs
+              RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
+
+              for (int k = lo.z; k <= hi.z+1; k++) {
+                  soln_a(i,j,k) = RHS_a(i,j,k) * inv_coeffB_a(i,j,k);
+                  cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
+              }
+            }); // b2d
+        } // end of explicit substepping
 #else
         auto const lo = lbound(bx);
         auto const hi = ubound(bx);
-        for (int j = lo.y; j <= hi.y; ++j) {
-            AMREX_PRAGMA_SIMD
-            for (int i = lo.x; i <= hi.x; ++i) {
-                // w at bottom boundary of grid is 0 if at domain boundary, otherwise w_old + dtau * slow_rhs
-                RHS_a (i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
-                soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
+
+        if (l_implicit_substepping) {
+
+            for (int j = lo.y; j <= hi.y; ++j) {
+                AMREX_PRAGMA_SIMD
+                for (int i = lo.x; i <= hi.x; ++i) {
+                    // w at bottom boundary of grid is 0 if at domain boundary, otherwise w_old + dtau * slow_rhs
+                    RHS_a (i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
+                    soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
+                }
             }
-        }
-        // Note that if we ever change this, we will need to include it in avg_zmom at the top
-        for (int j = lo.y; j <= hi.y; ++j) {
-            AMREX_PRAGMA_SIMD
-            for (int i = lo.x; i <= hi.x; ++i) {
-                RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
+            // Note that if we ever change this, we will need to include it in avg_zmom at the top
+            for (int j = lo.y; j <= hi.y; ++j) {
+                AMREX_PRAGMA_SIMD
+                for (int i = lo.x; i <= hi.x; ++i) {
+                    RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
+                }
+            }
+            for (int k = lo.z+1; k <= hi.z+1; ++k) {
+                for (int j = lo.y; j <= hi.y; ++j) {
+                    AMREX_PRAGMA_SIMD
+                    for (int i = lo.x; i <= hi.x; ++i) {
+                        soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
+                    }
+                }
             }
-        }
-        for (int k = lo.z+1; k <= hi.z+1; ++k) {
             for (int j = lo.y; j <= hi.y; ++j) {
                 AMREX_PRAGMA_SIMD
                 for (int i = lo.x; i <= hi.x; ++i) {
-                    soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
+                    cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
                 }
             }
-        }
-        for (int j = lo.y; j <= hi.y; ++j) {
-            AMREX_PRAGMA_SIMD
-            for (int i = lo.x; i <= hi.x; ++i) {
-                cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
+            for (int k = hi.z; k >= lo.z; --k) {
+                for (int j = lo.y; j <= hi.y; ++j) {
+                    AMREX_PRAGMA_SIMD
+                    for (int i = lo.x; i <= hi.x; ++i) {
+                        soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
+                        cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
+                    }
+                }
             }
-        }
-        for (int k = hi.z; k >= lo.z; --k) {
+        } else { // explicit substepping
+
             for (int j = lo.y; j <= hi.y; ++j) {
                 AMREX_PRAGMA_SIMD
                 for (int i = lo.x; i <= hi.x; ++i) {
-                    soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) * soln_a(i,j,k+1);
-                    cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
+                    RHS_a (i,j,lo.z  ) = dtau * slow_rhs_rho_w(i,j,lo.z);
+                    RHS_a (i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
                 }
             }
-        }
+
+            for (int k = lo.z; k <= hi.z+1; ++k) {
+                for (int j = lo.y; j <= hi.y; ++j) {
+                    AMREX_PRAGMA_SIMD
+                    for (int i = lo.x; i <= hi.x; ++i) {
+                          AMREX_ASSERT(coeffA_a(i,j,k) == 0.0);
+                          AMREX_ASSERT(coeffC_a(i,j,k) == 0.0);
+                          soln_a(i,j,k) = RHS_a(i,j,k) * inv_coeffB_a(i,j,k);
+                        cur_zmom(i,j,k) = stage_zmom(i,j,k) + soln_a(i,j,k);
+                    }
+                }
+            }
+
+        } // end of explicit substepping
 #endif
-        } // end profile
 
         // **************************************************************************
         // Define updates in the RHS of rho and (rho theta)
         // **************************************************************************
-        {
-        BL_PROFILE("fast_rho_final_update");
         ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
         {
             Real zflux_lo = beta_2 * soln_a(i,j,k  ) + beta_1 * old_drho_w(i,j,k  );
@@ -512,7 +550,6 @@ void erf_fast_rhs_N (int step, int nrk,
             temp_rhs_arr(i,j,k,RhoTheta_comp) += 0.5 * dzi * ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1))
                                                              - zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) );
         });
-        } // end profile
 
         // We only add to the flux registers in the final RK step
         if (l_reflux && nrk == 2) {

diff --git a/Source/TimeIntegration/ERF_fast_rhs_T.cpp b/Source/TimeIntegration/ERF_fast_rhs_T.cpp
@@ -57,7 +57,8 @@ void erf_fast_rhs_T (int step, int nrk,
                      YAFluxRegister* fr_as_crse,
                      YAFluxRegister* fr_as_fine,
                      bool l_use_moisture,
-                     bool l_reflux)
+                     bool l_reflux,
+                     bool l_implicit_substepping)
 {
     BL_PROFILE_REGION("erf_fast_rhs_T()");