+
65 BL_PROFILE_REGION(
"erf_fast_rhs_T()");
+
+
67 const Box& domain = geom.Domain();
+
68 auto const domlo = lbound(domain);
+
69 auto const domhi = ubound(domain);
-
-
+
71 Real beta_1 = 0.5 * (1.0 - beta_s);
+
72 Real beta_2 = 0.5 * (1.0 + beta_s);
-
74 const Real* dx = geom.CellSize();
-
75 const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
+
+
-
-
-
-
-
81 const auto& dm = S_stage_data[
IntVars::cons].DistributionMap();
-
-
83 MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
-
84 MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
-
85 MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
-
86 MultiFab Delta_rho ( ba , dm, 1, 1);
-
87 MultiFab Delta_rho_theta( ba , dm, 1, 1);
-
-
89 MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
-
90 MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
+
77 const Real* dx = geom.CellSize();
+
78 const GpuArray<Real, AMREX_SPACEDIM> dxInv = geom.InvCellSizeArray();
+
+
+
+
+
+
84 const auto& dm = S_stage_data[
IntVars::cons].DistributionMap();
+
+
86 MultiFab Delta_rho_u( convert(ba,IntVect(1,0,0)), dm, 1, 1);
+
87 MultiFab Delta_rho_v( convert(ba,IntVect(0,1,0)), dm, 1, 1);
+
88 MultiFab Delta_rho_w( convert(ba,IntVect(0,0,1)), dm, 1, IntVect(1,1,0));
+
89 MultiFab Delta_rho ( ba , dm, 1, 1);
+
90 MultiFab Delta_rho_theta( ba , dm, 1, 1);
-
92 MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
-
93 MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
-
94 MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
-
95 MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
-
96 MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
-
-
-
-
100 const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
-
101 const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
-
-
-
-
-
-
-
-
-
110 #pragma omp parallel if (Gpu::notInLaunchRegion())
-
-
112 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
-
-
114 const Array4<Real> & cur_cons = S_data[
IntVars::cons].array(mfi);
-
115 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
-
116 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
-
117 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
-
-
119 const Array4<Real>& old_drho = Delta_rho.array(mfi);
-
120 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
-
121 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
-
122 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
-
123 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
-
-
125 const Array4<const Real>& prev_xmom = S_prev[
IntVars::xmom].const_array(mfi);
-
126 const Array4<const Real>& prev_ymom = S_prev[
IntVars::ymom].const_array(mfi);
-
127 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
-
-
129 const Array4<const Real>& stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
-
130 const Array4<const Real>& stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
-
131 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
-
-
133 Box bx = mfi.validbox();
-
134 Box gbx = mfi.tilebox(); gbx.grow(1);
+
92 MultiFab New_rho_u(convert(ba,IntVect(1,0,0)), dm, 1, 1);
+
93 MultiFab New_rho_v(convert(ba,IntVect(0,1,0)), dm, 1, 1);
+
+
95 MultiFab coeff_A_mf(fast_coeffs, make_alias, 0, 1);
+
96 MultiFab inv_coeff_B_mf(fast_coeffs, make_alias, 1, 1);
+
97 MultiFab coeff_C_mf(fast_coeffs, make_alias, 2, 1);
+
98 MultiFab coeff_P_mf(fast_coeffs, make_alias, 3, 1);
+
99 MultiFab coeff_Q_mf(fast_coeffs, make_alias, 4, 1);
+
+
+
+
103 const Array<Real,AMREX_SPACEDIM> grav{0.0, 0.0, -gravity};
+
104 const GpuArray<Real,AMREX_SPACEDIM> grav_gpu{grav[0], grav[1], grav[2]};
+
+
+
+
+
+
+
+
+
113 #pragma omp parallel if (Gpu::notInLaunchRegion())
+
+
115 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
+
+
117 const Array4<Real> & cur_cons = S_data[
IntVars::cons].array(mfi);
+
118 const Array4<const Real>& prev_cons = S_prev[
IntVars::cons].const_array(mfi);
+
119 const Array4<const Real>& stage_cons = S_stage_data[
IntVars::cons].const_array(mfi);
+
120 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
+
+
122 const Array4<Real>& old_drho = Delta_rho.array(mfi);
+
123 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
+
124 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
+
125 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
+
126 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
+
+
128 const Array4<const Real>& prev_xmom = S_prev[
IntVars::xmom].const_array(mfi);
+
129 const Array4<const Real>& prev_ymom = S_prev[
IntVars::ymom].const_array(mfi);
+
130 const Array4<const Real>& prev_zmom = S_prev[
IntVars::zmom].const_array(mfi);
+
+
132 const Array4<const Real>& stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
+
133 const Array4<const Real>& stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
+
134 const Array4<const Real>& stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
-
-
137 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
-
-
-
-
-
143 Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
-
144 Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
-
145 Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
-
-
147 const auto& bx_lo = lbound(bx);
-
148 const auto& bx_hi = ubound(bx);
+
136 Box bx = mfi.validbox();
+
137 Box gbx = mfi.tilebox(); gbx.grow(1);
+
+
+
140 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
+
+
+
+
+
146 Box gtbx = mfi.nodaltilebox(0); gtbx.grow(IntVect(1,1,0));
+
147 Box gtby = mfi.nodaltilebox(1); gtby.grow(IntVect(1,1,0));
+
148 Box gtbz = mfi.nodaltilebox(2); gtbz.grow(IntVect(1,1,0));
-
150 ParallelFor(gtbx, gtby, gtbz,
-
151 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
152 old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
-
153 if (k == bx_lo.z && k != domlo.z) {
-
154 old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
-
155 }
else if (k == bx_hi.z) {
-
156 old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
-
-
-
159 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
160 old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
-
161 if (k == bx_lo.z && k != domlo.z) {
-
162 old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
-
163 }
else if (k == bx_hi.z) {
-
164 old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
-
-
-
167 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
168 old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
-
-
-
171 const Array4<Real>& theta_extrap = extrap.array(mfi);
-
-
173 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
-
-
-
177 theta_extrap(i,j,k) = old_drho_theta(i,j,k);
-
-
179 theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
-
180 ( old_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,
RhoTheta_comp) );
-
-
-
-
-
-
186 #pragma omp parallel if (Gpu::notInLaunchRegion())
-
-
188 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
-
-
-
191 Box gbx = mfi.tilebox(); gbx.grow(1);
-
192 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
-
193 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
-
194 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
195 lagged_delta_rt(i,j,k,
RhoTheta_comp) = old_drho_theta(i,j,k);
-
-
-
-
-
-
-
-
-
204 #pragma omp parallel if (Gpu::notInLaunchRegion())
-
-
206 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
-
-
208 Box bx = mfi.validbox();
-
209 Box tbx = mfi.nodaltilebox(0);
-
210 Box tby = mfi.nodaltilebox(1);
-
-
212 const Array4<const Real> & stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
-
213 const Array4<const Real> & stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
-
214 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
-
-
216 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
-
217 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
+
150 const auto& bx_lo = lbound(bx);
+
151 const auto& bx_hi = ubound(bx);
+
+
153 ParallelFor(gtbx, gtby, gtbz,
+
154 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
155 old_drho_u(i,j,k) = prev_xmom(i,j,k) - stage_xmom(i,j,k);
+
156 if (k == bx_lo.z && k != domlo.z) {
+
157 old_drho_u(i,j,k-1) = old_drho_u(i,j,k);
+
158 }
else if (k == bx_hi.z) {
+
159 old_drho_u(i,j,k+1) = old_drho_u(i,j,k);
+
+
+
162 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
163 old_drho_v(i,j,k) = prev_ymom(i,j,k) - stage_ymom(i,j,k);
+
164 if (k == bx_lo.z && k != domlo.z) {
+
165 old_drho_v(i,j,k-1) = old_drho_v(i,j,k);
+
166 }
else if (k == bx_hi.z) {
+
167 old_drho_v(i,j,k+1) = old_drho_v(i,j,k);
+
+
+
170 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
171 old_drho_w(i,j,k) = prev_zmom(i,j,k) - stage_zmom(i,j,k);
+
+
+
174 const Array4<Real>& theta_extrap = extrap.array(mfi);
+
+
176 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
+
+
+
180 theta_extrap(i,j,k) = old_drho_theta(i,j,k);
+
+
182 theta_extrap(i,j,k) = old_drho_theta(i,j,k) + beta_d *
+
183 ( old_drho_theta(i,j,k) - lagged_delta_rt(i,j,k,
RhoTheta_comp) );
+
+
+
+
+
+
189 #pragma omp parallel if (Gpu::notInLaunchRegion())
+
+
191 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
+
+
+
194 Box gbx = mfi.tilebox(); gbx.grow(1);
+
195 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
+
196 const Array4<Real>& lagged_delta_rt = S_scratch[
IntVars::cons].array(mfi);
+
197 ParallelFor(gbx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
198 lagged_delta_rt(i,j,k,
RhoTheta_comp) = old_drho_theta(i,j,k);
+
+
+
+
+
+
+
+
+
207 #pragma omp parallel if (Gpu::notInLaunchRegion())
+
+
209 for ( MFIter mfi(S_stage_data[
IntVars::cons],TilingIfNotGPU()); mfi.isValid(); ++mfi)
+
+
211 Box bx = mfi.validbox();
+
212 Box tbx = mfi.nodaltilebox(0);
+
213 Box tby = mfi.nodaltilebox(1);
+
+
215 const Array4<const Real> & stage_xmom = S_stage_data[
IntVars::xmom].const_array(mfi);
+
216 const Array4<const Real> & stage_ymom = S_stage_data[
IntVars::ymom].const_array(mfi);
+
217 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
-
219 const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[
IntVars::xmom].const_array(mfi);
-
220 const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[
IntVars::ymom].const_array(mfi);
+
219 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
+
220 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
-
222 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
-
223 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
+
222 const Array4<const Real>& slow_rhs_rho_u = S_slow_rhs[
IntVars::xmom].const_array(mfi);
+
223 const Array4<const Real>& slow_rhs_rho_v = S_slow_rhs[
IntVars::ymom].const_array(mfi);
-
225 const Array4<Real>& cur_xmom = S_data[
IntVars::xmom].array(mfi);
-
226 const Array4<Real>& cur_ymom = S_data[
IntVars::ymom].array(mfi);
+
225 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
+
226 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
-
-
229 const Array4<Real>& avg_xmom = S_scratch[
IntVars::xmom].array(mfi);
-
230 const Array4<Real>& avg_ymom = S_scratch[
IntVars::ymom].array(mfi);
-
-
232 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
-
-
234 const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
-
-
236 const Array4<Real>& theta_extrap = extrap.array(mfi);
-
-
-
239 const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
-
240 const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
-
-
-
-
-
-
-
-
-
-
-
-
252 BL_PROFILE(
"fast_rhs_xymom_T");
-
-
254 const auto& bx_lo = lbound(bx);
-
255 const auto& bx_hi = ubound(bx);
+
228 const Array4<Real>& cur_xmom = S_data[
IntVars::xmom].array(mfi);
+
229 const Array4<Real>& cur_ymom = S_data[
IntVars::ymom].array(mfi);
+
+
+
232 const Array4<Real>& avg_xmom = S_scratch[
IntVars::xmom].array(mfi);
+
233 const Array4<Real>& avg_ymom = S_scratch[
IntVars::ymom].array(mfi);
+
+
235 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
+
+
237 const Array4<const Real>& pi_stage_ca = pi_stage.const_array(mfi);
+
+
239 const Array4<Real>& theta_extrap = extrap.array(mfi);
+
+
+
242 const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
+
243 const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
+
+
+
+
+
+
+
+
+
+
+
+
255 BL_PROFILE(
"fast_rhs_xymom_T");
-
257 ParallelFor(tbx, tby,
-
258 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
-
-
-
-
263 Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
-
264 Real gp_zeta_on_iface = (k == 0) ?
-
265 0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
-
266 -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
-
267 0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
-
268 -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
-
269 Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
-
-
-
272 if (l_use_moisture) {
-
-
-
-
-
-
278 Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
-
279 Real fast_rhs_rho_u = -
Gamma *
R_d * pi_c * gpx;
+
257 const auto& bx_lo = lbound(bx);
+
258 const auto& bx_hi = ubound(bx);
+
+
260 ParallelFor(tbx, tby,
+
261 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+
+
+
+
266 Real gp_xi = (theta_extrap(i,j,k) - theta_extrap(i-1,j,k)) * dxi;
+
267 Real gp_zeta_on_iface = (k == 0) ?
+
268 0.5 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
+
269 -theta_extrap(i-1,j,k ) - theta_extrap(i,j,k ) ) :
+
270 0.25 * dzi * ( theta_extrap(i-1,j,k+1) + theta_extrap(i,j,k+1)
+
271 -theta_extrap(i-1,j,k-1) - theta_extrap(i,j,k-1) );
+
272 Real gpx = gp_xi - (met_h_xi / met_h_zeta) * gp_zeta_on_iface;
+
+
+
275 if (l_use_moisture) {
+
+
+
+
-
281 new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
-
282 + dtau * slow_rhs_rho_u(i,j,k);
-
283 if (k == bx_lo.z && k != domlo.z) {
-
284 new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
-
285 }
else if (k == bx_hi.z) {
-
286 new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
-
-
-
289 avg_xmom(i,j,k) += facinv*new_drho_u(i,j,k);
-
-
291 cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
-
-
293 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
-
-
-
-
298 Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
-
299 Real gp_zeta_on_jface = (k == 0) ?
-
300 0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
-
301 -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
-
302 0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
-
303 -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
-
304 Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
-
-
-
307 if (l_use_moisture) {
-
-
-
-
-
-
313 Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
-
314 Real fast_rhs_rho_v = -
Gamma *
R_d * pi_c * gpy;
+
281 Real pi_c = 0.5 * (pi_stage_ca(i-1,j,k,0) + pi_stage_ca(i ,j,k,0));
+
282 Real fast_rhs_rho_u = -
Gamma *
R_d * pi_c * gpx;
+
+
284 new_drho_u(i, j, k) = old_drho_u(i,j,k) + dtau * fast_rhs_rho_u
+
285 + dtau * slow_rhs_rho_u(i,j,k);
+
286 if (k == bx_lo.z && k != domlo.z) {
+
287 new_drho_u(i,j,k-1) = new_drho_u(i,j,k);
+
288 }
else if (k == bx_hi.z) {
+
289 new_drho_u(i,j,k+1) = new_drho_u(i,j,k);
+
+
+
292 avg_xmom(i,j,k) += facinv*new_drho_u(i,j,k);
+
+
294 cur_xmom(i,j,k) = stage_xmom(i,j,k) + new_drho_u(i,j,k);
+
+
296 [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+
+
+
+
301 Real gp_eta = (theta_extrap(i,j,k) -theta_extrap(i,j-1,k)) * dyi;
+
302 Real gp_zeta_on_jface = (k == 0) ?
+
303 0.5 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
+
304 -theta_extrap(i,j,k ) - theta_extrap(i,j-1,k ) ) :
+
305 0.25 * dzi * ( theta_extrap(i,j,k+1) + theta_extrap(i,j-1,k+1)
+
306 -theta_extrap(i,j,k-1) - theta_extrap(i,j-1,k-1) );
+
307 Real gpy = gp_eta - (met_h_eta / met_h_zeta) * gp_zeta_on_jface;
+
+
+
310 if (l_use_moisture) {
+
+
+
+
-
316 new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
-
317 + dtau * slow_rhs_rho_v(i,j,k);
+
316 Real pi_c = 0.5 * (pi_stage_ca(i,j-1,k,0) + pi_stage_ca(i,j ,k,0));
+
317 Real fast_rhs_rho_v = -
Gamma *
R_d * pi_c * gpy;
-
319 if (k == bx_lo.z && k != domlo.z) {
-
320 new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
-
321 }
else if (k == bx_hi.z) {
-
322 new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
-
-
-
325 avg_ymom(i,j,k) += facinv*new_drho_v(i,j,k);
-
-
327 cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
-
-
-
-
-
-
333 #pragma omp parallel if (Gpu::notInLaunchRegion())
-
-
-
336 std::array<FArrayBox,AMREX_SPACEDIM> flux;
-
+
319 new_drho_v(i, j, k) = old_drho_v(i,j,k) + dtau * fast_rhs_rho_v
+
320 + dtau * slow_rhs_rho_v(i,j,k);
+
+
322 if (k == bx_lo.z && k != domlo.z) {
+
323 new_drho_v(i,j,k-1) = new_drho_v(i,j,k);
+
324 }
else if (k == bx_hi.z) {
+
325 new_drho_v(i,j,k+1) = new_drho_v(i,j,k);
+
+
+
328 avg_ymom(i,j,k) += facinv*new_drho_v(i,j,k);
+
+
330 cur_ymom(i,j,k) = stage_ymom(i,j,k) + new_drho_v(i,j,k);
+
+
+
+
+
+
336 #pragma omp parallel if (Gpu::notInLaunchRegion())
+
-
339 Box bx = mfi.tilebox();
-
340 Box tbz = surroundingNodes(bx,2);
-
-
342 Box vbx = mfi.validbox();
-
343 const auto& vbx_hi = ubound(vbx);
+
339 std::array<FArrayBox,AMREX_SPACEDIM> flux;
+
+
+
342 Box bx = mfi.tilebox();
+
343 Box tbz = surroundingNodes(bx,2);
-
345 const Array4<const Real> & stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
-
346 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
+
345 Box vbx = mfi.validbox();
+
346 const auto& vbx_hi = ubound(vbx);
-
348 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
-
349 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
-
350 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
-
351 const Array4<Real>& old_drho = Delta_rho.array(mfi);
-
352 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
-
-
354 const Array4<const Real>& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
-
355 const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[
IntVars::zmom].const_array(mfi);
+
348 const Array4<const Real> & stage_zmom = S_stage_data[
IntVars::zmom].const_array(mfi);
+
349 const Array4<const Real> & prim = S_stage_prim.const_array(mfi);
+
+
351 const Array4<Real>& old_drho_u = Delta_rho_u.array(mfi);
+
352 const Array4<Real>& old_drho_v = Delta_rho_v.array(mfi);
+
353 const Array4<Real>& old_drho_w = Delta_rho_w.array(mfi);
+
354 const Array4<Real>& old_drho = Delta_rho.array(mfi);
+
355 const Array4<Real>& old_drho_theta = Delta_rho_theta.array(mfi);
-
357 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
-
358 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
+
357 const Array4<const Real>& slow_rhs_cons = S_slow_rhs[
IntVars::cons].const_array(mfi);
+
358 const Array4<const Real>& slow_rhs_rho_w = S_slow_rhs[
IntVars::zmom].const_array(mfi);
-
360 const Array4<Real>& cur_cons = S_data[
IntVars::cons].array(mfi);
-
361 const Array4<Real>& cur_zmom = S_data[
IntVars::zmom].array(mfi);
+
360 const Array4<Real>& new_drho_u = New_rho_u.array(mfi);
+
361 const Array4<Real>& new_drho_v = New_rho_v.array(mfi);
-
-
364 const Array4<Real>& avg_zmom = S_scratch[
IntVars::zmom].array(mfi);
+
363 const Array4<Real>& cur_cons = S_data[
IntVars::cons].array(mfi);
+
364 const Array4<Real>& cur_zmom = S_data[
IntVars::zmom].array(mfi);
-
366 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
-
367 const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
+
+
367 const Array4<Real>& avg_zmom = S_scratch[
IntVars::zmom].array(mfi);
-
369 const Array4< Real>& omega_arr = Omega.array(mfi);
-
-
-
372 const Array4<const Real>& mf_m = mapfac_m->const_array(mfi);
-
373 const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
-
374 const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
-
-
-
-
-
-
-
-
382 FArrayBox temp_rhs_fab;
-
-
-
-
386 RHS_fab.resize (tbz,1,The_Async_Arena());
-
387 soln_fab.resize (tbz,1,The_Async_Arena());
-
388 temp_rhs_fab.resize(tbz,2,The_Async_Arena());
-
-
390 auto const& RHS_a = RHS_fab.array();
-
391 auto const& soln_a = soln_fab.array();
-
392 auto const& temp_rhs_arr = temp_rhs_fab.array();
-
-
394 auto const& coeffA_a = coeff_A_mf.array(mfi);
-
395 auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
-
396 auto const& coeffC_a = coeff_C_mf.array(mfi);
-
397 auto const& coeffP_a = coeff_P_mf.array(mfi);
-
398 auto const& coeffQ_a = coeff_Q_mf.array(mfi);
-
-
-
-
-
403 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
-
404 flux[dir].resize(surroundingNodes(bx,dir),2);
-
405 flux[dir].setVal<RunOn::Device>(0.);
-
-
407 const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
-
408 flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
-
-
-
-
412 BL_PROFILE(
"fast_T_making_rho_rhs");
-
413 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
414 Real h_zeta_cc_xface_hi = 0.5 * dzi *
-
415 ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
-
416 -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
-
-
418 Real h_zeta_cc_xface_lo = 0.5 * dzi *
-
419 ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
-
420 -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
-
-
422 Real h_zeta_cc_yface_hi = 0.5 * dzi *
-
423 ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
-
424 -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
-
-
426 Real h_zeta_cc_yface_lo = 0.5 * dzi *
-
427 ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
-
428 -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
-
-
430 Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_u(i ,j,0);
-
431 Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_u(i+1,j,0);
-
432 Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_v(i,j ,0);
-
433 Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_v(i,j+1,0);
-
-
435 Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);
-
-
-
438 temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
-
439 ( yflux_hi - yflux_lo ) * dyi * mfsq;
-
440 temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
-
441 xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
-
442 ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
-
443 yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
-
-
445 (flx_arr[0])(i,j,k,0) = xflux_lo;
-
446 (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
+
369 const Array4<const Real>& z_nd = z_phys_nd->const_array(mfi);
+
370 const Array4<const Real>& detJ = detJ_cc->const_array(mfi);
+
+
372 const Array4< Real>& omega_arr = Omega.array(mfi);
+
+
+
375 const Array4<const Real>& mf_m = mapfac_m->const_array(mfi);
+
376 const Array4<const Real>& mf_u = mapfac_u->const_array(mfi);
+
377 const Array4<const Real>& mf_v = mapfac_v->const_array(mfi);
+
+
+
+
+
+
+
+
385 FArrayBox temp_rhs_fab;
+
+
+
+
389 RHS_fab.resize (tbz,1,The_Async_Arena());
+
390 soln_fab.resize (tbz,1,The_Async_Arena());
+
391 temp_rhs_fab.resize(tbz,2,The_Async_Arena());
+
+
393 auto const& RHS_a = RHS_fab.array();
+
394 auto const& soln_a = soln_fab.array();
+
395 auto const& temp_rhs_arr = temp_rhs_fab.array();
+
+
397 auto const& coeffA_a = coeff_A_mf.array(mfi);
+
398 auto const& inv_coeffB_a = inv_coeff_B_mf.array(mfi);
+
399 auto const& coeffC_a = coeff_C_mf.array(mfi);
+
400 auto const& coeffP_a = coeff_P_mf.array(mfi);
+
401 auto const& coeffQ_a = coeff_Q_mf.array(mfi);
+
+
+
+
+
406 for (
int dir = 0; dir < AMREX_SPACEDIM; ++dir) {
+
407 flux[dir].resize(surroundingNodes(bx,dir),2);
+
408 flux[dir].setVal<RunOn::Device>(0.);
+
+
410 const GpuArray<const Array4<Real>, AMREX_SPACEDIM>
+
411 flx_arr{{AMREX_D_DECL(flux[0].array(), flux[1].array(), flux[2].array())}};
+
+
+
+
415 BL_PROFILE(
"fast_T_making_rho_rhs");
+
416 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
417 Real h_zeta_cc_xface_hi = 0.5 * dzi *
+
418 ( z_nd(i+1,j ,k+1) + z_nd(i+1,j+1,k+1)
+
419 -z_nd(i+1,j ,k ) - z_nd(i+1,j+1,k ) );
+
+
421 Real h_zeta_cc_xface_lo = 0.5 * dzi *
+
422 ( z_nd(i ,j ,k+1) + z_nd(i ,j+1,k+1)
+
423 -z_nd(i ,j ,k ) - z_nd(i ,j+1,k ) );
+
+
425 Real h_zeta_cc_yface_hi = 0.5 * dzi *
+
426 ( z_nd(i ,j+1,k+1) + z_nd(i+1,j+1,k+1)
+
427 -z_nd(i ,j+1,k ) - z_nd(i+1,j+1,k ) );
+
+
429 Real h_zeta_cc_yface_lo = 0.5 * dzi *
+
430 ( z_nd(i ,j ,k+1) + z_nd(i+1,j ,k+1)
+
431 -z_nd(i ,j ,k ) - z_nd(i+1,j ,k ) );
+
+
433 Real xflux_lo = new_drho_u(i ,j,k)*h_zeta_cc_xface_lo / mf_u(i ,j,0);
+
434 Real xflux_hi = new_drho_u(i+1,j,k)*h_zeta_cc_xface_hi / mf_u(i+1,j,0);
+
435 Real yflux_lo = new_drho_v(i,j ,k)*h_zeta_cc_yface_lo / mf_v(i,j ,0);
+
436 Real yflux_hi = new_drho_v(i,j+1,k)*h_zeta_cc_yface_hi / mf_v(i,j+1,0);
+
+
438 Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);
+
+
+
441 temp_rhs_arr(i,j,k,0) = ( xflux_hi - xflux_lo ) * dxi * mfsq +
+
442 ( yflux_hi - yflux_lo ) * dyi * mfsq;
+
443 temp_rhs_arr(i,j,k,1) = (( xflux_hi * (prim(i,j,k,0) + prim(i+1,j,k,0)) -
+
444 xflux_lo * (prim(i,j,k,0) + prim(i-1,j,k,0)) ) * dxi * mfsq+
+
445 ( yflux_hi * (prim(i,j,k,0) + prim(i,j+1,k,0)) -
+
446 yflux_lo * (prim(i,j,k,0) + prim(i,j-1,k,0)) ) * dyi * mfsq) * 0.5;
-
448 (flx_arr[1])(i,j,k,0) = yflux_lo;
-
449 (flx_arr[1])(i,j,k,1) = (flx_arr[0])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
+
448 (flx_arr[0])(i,j,k,0) = xflux_lo;
+
449 (flx_arr[0])(i,j,k,1) = (flx_arr[0])(i ,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i-1,j,k,0));
-
-
452 (flx_arr[0])(i+1,j,k,0) = xflux_hi;
-
453 (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
-
-
-
456 (flx_arr[1])(i,j+1,k,0) = yflux_hi;
-
457 (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
-
-
-
-
+
451 (flx_arr[1])(i,j,k,0) = yflux_lo;
+
452 (flx_arr[1])(i,j,k,1) = (flx_arr[0])(i,j ,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j-1,k,0));
+
+
+
455 (flx_arr[0])(i+1,j,k,0) = xflux_hi;
+
456 (flx_arr[0])(i+1,j,k,1) = (flx_arr[0])(i+1,j,k,0) * 0.5 * (prim(i,j,k,0) + prim(i+1,j,k,0));
+
+
+
459 (flx_arr[1])(i,j+1,k,0) = yflux_hi;
+
460 (flx_arr[1])(i,j+1,k,1) = (flx_arr[1])(i,j+1,k,0) * 0.5 * (prim(i,j,k,0) + prim(i,j+1,k,0));
+
-
-
-
465 Box gbxo = mfi.nodaltilebox(2);
-
-
-
468 if (gbxo.smallEnd(2) == domlo.z) {
-
469 Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
-
470 gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
-
471 ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
472 omega_arr(i,j,k) = 0.;
-
-
-
475 if (gbxo.bigEnd(2) == domhi.z+1) {
-
476 Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
-
477 gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
-
478 ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
479 omega_arr(i,j,k) = old_drho_w(i,j,k);
-
-
-
482 ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
-
483 omega_arr(i,j,k) =
OmegaFromW(i,j,k,old_drho_w(i,j,k),old_drho_u,old_drho_v,z_nd,dxInv);
-
-
-
-
-
488 Box bx_shrunk_in_k = bx;
-
489 int klo = tbz.smallEnd(2);
-
490 int khi = tbz.bigEnd(2);
-
491 bx_shrunk_in_k.setSmall(2,klo+1);
-
492 bx_shrunk_in_k.setBig(2,khi-1);
-
-
-
-
-
497 Real halfg = std::abs(0.5 * grav_gpu[2]);
-
-
-
500 BL_PROFILE(
"fast_loop_on_shrunk_t");
-
-
502 ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
-
504 Real detJ_on_kface = 0.5 * (detJ(i,j,k) + detJ(i,j,k-1));
-
-
506 Real coeff_P = coeffP_a(i,j,k);
-
507 Real coeff_Q = coeffQ_a(i,j,k);
+
+
+
+
+
+
468 Box gbxo = mfi.nodaltilebox(2);
+
+
+
471 if (gbxo.smallEnd(2) == domlo.z) {
+
472 Box gbxo_lo = gbxo; gbxo_lo.setBig(2,gbxo.smallEnd(2));
+
473 gbxo_mid.setSmall(2,gbxo.smallEnd(2)+1);
+
474 ParallelFor(gbxo_lo, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
475 omega_arr(i,j,k) = 0.;
+
+
+
478 if (gbxo.bigEnd(2) == domhi.z+1) {
+
479 Box gbxo_hi = gbxo; gbxo_hi.setSmall(2,gbxo.bigEnd(2));
+
480 gbxo_mid.setBig(2,gbxo.bigEnd(2)-1);
+
481 ParallelFor(gbxo_hi, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
482 omega_arr(i,j,k) = old_drho_w(i,j,k);
+
+
+
485 ParallelFor(gbxo_mid, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept {
+
486 omega_arr(i,j,k) =
OmegaFromW(i,j,k,old_drho_w(i,j,k),old_drho_u,old_drho_v,z_nd,dxInv);
+
+
+
+
+
491 Box bx_shrunk_in_k = bx;
+
492 int klo = tbz.smallEnd(2);
+
493 int khi = tbz.bigEnd(2);
+
494 bx_shrunk_in_k.setSmall(2,klo+1);
+
495 bx_shrunk_in_k.setBig(2,khi-1);
+
+
+
+
+
500 Real halfg = std::abs(0.5 * grav_gpu[2]);
+
+
+
503 BL_PROFILE(
"fast_loop_on_shrunk_t");
+
+
505 ParallelFor(bx_shrunk_in_k, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+
507 Real detJ_on_kface = 0.5 * (detJ(i,j,k) + detJ(i,j,k-1));
-
509 if (l_use_moisture) {
-
-
-
512 coeff_P /= (1.0 + q);
-
513 coeff_Q /= (1.0 + q);
-
-
-
-
-
-
-
-
521 Real R0_tmp = (-halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )) * detJ(i,j,k )
-
522 + (-halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1)) * detJ(i,j,k-1);
-
-
-
525 Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,
Rho_comp ) * detJ(i,j,k ) +
-
526 slow_rhs_cons(i,j,k-1,
Rho_comp ) * detJ(i,j,k-1) )
-
527 + ( coeff_P * slow_rhs_cons(i,j,k ,
RhoTheta_comp) * detJ(i,j,k ) +
-
528 coeff_Q * slow_rhs_cons(i,j,k-1,
RhoTheta_comp) * detJ(i,j,k-1) );
-
-
530 Real Omega_kp1 = omega_arr(i,j,k+1);
-
531 Real Omega_k = omega_arr(i,j,k );
-
532 Real Omega_km1 = omega_arr(i,j,k-1);
-
-
-
535 R1_tmp += ( halfg ) *
-
536 ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,
Rho_comp) + temp_rhs_arr(i,j,k-1,
Rho_comp));
-
-
-
-
540 coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,
RhoTheta_comp) ) +
-
541 coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,
RhoTheta_comp) ) );
-
-
-
544 RHS_a(i,j,k) = detJ_on_kface * old_drho_w(i,j,k) + dtau * (
-
545 detJ_on_kface * slow_rhs_rho_w(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
-
-
-
548 RHS_a(i,j,k) += detJ_on_kface *
OmegaFromW(i,j,k,0.,new_drho_u,new_drho_v,z_nd,dxInv);
-
-
-
-
-
+
509 Real coeff_P = coeffP_a(i,j,k);
+
510 Real coeff_Q = coeffQ_a(i,j,k);
+
+
512 if (l_use_moisture) {
+
+
+
515 coeff_P /= (1.0 + q);
+
516 coeff_Q /= (1.0 + q);
+
+
+
+
+
+
+
+
524 Real R0_tmp = (-halfg * old_drho(i,j,k ) + coeff_P * old_drho_theta(i,j,k )) * detJ(i,j,k )
+
525 + (-halfg * old_drho(i,j,k-1) + coeff_Q * old_drho_theta(i,j,k-1)) * detJ(i,j,k-1);
+
+
+
528 Real R1_tmp = - halfg * ( slow_rhs_cons(i,j,k ,
Rho_comp ) * detJ(i,j,k ) +
+
529 slow_rhs_cons(i,j,k-1,
Rho_comp ) * detJ(i,j,k-1) )
+
530 + ( coeff_P * slow_rhs_cons(i,j,k ,
RhoTheta_comp) * detJ(i,j,k ) +
+
531 coeff_Q * slow_rhs_cons(i,j,k-1,
RhoTheta_comp) * detJ(i,j,k-1) );
+
+
533 Real Omega_kp1 = omega_arr(i,j,k+1);
+
534 Real Omega_k = omega_arr(i,j,k );
+
535 Real Omega_km1 = omega_arr(i,j,k-1);
+
+
+
538 R1_tmp += ( halfg ) *
+
539 ( beta_1 * dzi * (Omega_kp1 - Omega_km1) + temp_rhs_arr(i,j,k,
Rho_comp) + temp_rhs_arr(i,j,k-1,
Rho_comp));
+
+
+
+
543 coeff_P * ( beta_1 * dzi * (Omega_kp1*theta_t_hi - Omega_k*theta_t_mid) + temp_rhs_arr(i,j,k ,
RhoTheta_comp) ) +
+
544 coeff_Q * ( beta_1 * dzi * (Omega_k*theta_t_mid - Omega_km1*theta_t_lo) + temp_rhs_arr(i,j,k-1,
RhoTheta_comp) ) );
+
+
+
547 RHS_a(i,j,k) = detJ_on_kface * old_drho_w(i,j,k) + dtau * (
+
548 detJ_on_kface * slow_rhs_rho_w(i,j,k) + R0_tmp + dtau*beta_2*R1_tmp);
+
+
+
551 RHS_a(i,j,k) += detJ_on_kface *
OmegaFromW(i,j,k,0.,new_drho_u,new_drho_v,z_nd,dxInv);
+
+
-
555 auto const lo = lbound(bx);
-
556 auto const hi = ubound(bx);
+
+
-
-
559 BL_PROFILE(
"fast_rhs_b2d_loop_t");
-
-
561 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
-
-
-
564 RHS_a(i,j,lo.z ) = dtau * slow_rhs_rho_w(i,j,lo.z);
-
565 RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
-
-
-
568 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
+
558 auto const lo = lbound(bx);
+
559 auto const hi = ubound(bx);
+
+
+
562 BL_PROFILE(
"fast_rhs_b2d_loop_t");
+
+
564 ParallelFor(b2d, [=] AMREX_GPU_DEVICE (
int i,
int j,
int)
+
+
+
567 RHS_a(i,j,lo.z ) = dtau * slow_rhs_rho_w(i,j,lo.z);
+
568 RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
-
570 for (
int k = lo.z+1; k <= hi.z+1; k++) {
-
571 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
-
-
-
574 cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
-
575 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
+
+
571 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
+
+
573 for (
int k = lo.z+1; k <= hi.z+1; k++) {
+
574 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
+
-
577 for (
int k = hi.z; k >= lo.z; k--) {
-
578 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
-
-
-
-
582 for (
int j = lo.y; j <= hi.y; ++j) {
-
-
584 for (
int i = lo.x; i <= hi.x; ++i)
-
-
586 RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
-
587 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
-
-
-
-
591 for (
int i = lo.x; i <= hi.x; ++i)
-
-
593 RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
-
594 soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
-
-
-
-
598 for (
int k = lo.z+1; k <= hi.z; ++k) {
-
599 for (
int j = lo.y; j <= hi.y; ++j) {
-
-
601 for (
int i = lo.x; i <= hi.x; ++i) {
-
602 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
-
-
-
-
606 for (
int k = hi.z; k > lo.z; --k) {
-
607 for (
int j = lo.y; j <= hi.y; ++j) {
-
-
609 for (
int i = lo.x; i <= hi.x; ++i) {
-
610 soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
-
-
-
-
614 if (hi.z == domhi.z) {
-
615 for (
int j = lo.y; j <= hi.y; ++j) {
-
-
617 for (
int i = lo.x; i <= hi.x; ++i) {
-
618 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
-
-
-
-
-
-
-
625 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
-
627 cur_zmom(i,j,k) = stage_zmom(i,j,k);
-
-
-
630 if (lo.z == domlo.z) {
-
631 tbz.setSmall(2,domlo.z+1);
-
-
633 if (hi.z == domhi.z) {
-
634 tbz.setBig(2,domhi.z);
+
577 cur_zmom(i,j,lo.z ) = stage_zmom(i,j,lo.z ) + soln_a(i,j,lo.z );
+
578 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
+
+
580 for (
int k = hi.z; k >= lo.z; k--) {
+
581 soln_a(i,j,k) -= ( coeffC_a(i,j,k) * inv_coeffB_a(i,j,k) ) *soln_a(i,j,k+1);
+
+
+
+
585 for (
int j = lo.y; j <= hi.y; ++j) {
+
+
587 for (
int i = lo.x; i <= hi.x; ++i)
+
+
589 RHS_a(i,j,lo.z) = dtau * slow_rhs_rho_w(i,j,lo.z);
+
590 soln_a(i,j,lo.z) = RHS_a(i,j,lo.z) * inv_coeffB_a(i,j,lo.z);
+
+
+
+
594 for (
int i = lo.x; i <= hi.x; ++i)
+
+
596 RHS_a(i,j,hi.z+1) = dtau * slow_rhs_rho_w(i,j,hi.z+1);
+
597 soln_a(i,j,hi.z+1) = RHS_a(i,j,hi.z+1) * inv_coeffB_a(i,j,hi.z+1);
+
+
+
+
601 for (
int k = lo.z+1; k <= hi.z; ++k) {
+
602 for (
int j = lo.y; j <= hi.y; ++j) {
+
+
604 for (
int i = lo.x; i <= hi.x; ++i) {
+
605 soln_a(i,j,k) = (RHS_a(i,j,k)-coeffA_a(i,j,k)*soln_a(i,j,k-1)) * inv_coeffB_a(i,j,k);
+
+
+
+
609 for (
int k = hi.z; k > lo.z; --k) {
+
610 for (
int j = lo.y; j <= hi.y; ++j) {
+
+
612 for (
int i = lo.x; i <= hi.x; ++i) {
+
613 soln_a(i,j,k) -= (coeffC_a(i,j,k) * inv_coeffB_a(i,j,k)) * soln_a(i,j,k+1);
+
+
+
+
617 if (hi.z == domhi.z) {
+
618 for (
int j = lo.y; j <= hi.y; ++j) {
+
+
620 for (
int i = lo.x; i <= hi.x; ++i) {
+
621 cur_zmom(i,j,hi.z+1) = stage_zmom(i,j,hi.z+1) + soln_a(i,j,hi.z+1);
+
+
+
+
+
+
+
628 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+
630 cur_zmom(i,j,k) = stage_zmom(i,j,k);
+
+
+
633 if (lo.z == domlo.z) {
+
634 tbz.setSmall(2,domlo.z+1);
-
636 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
-
-
638 Real wpp =
WFromOmega(i,j,k,soln_a(i,j,k),new_drho_u,new_drho_v,z_nd,dxInv);
-
639 cur_zmom(i,j,k) += wpp;
-
-
-
-
-
-
-
-
647 BL_PROFILE(
"fast_rho_final_update");
-
648 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
+
636 if (hi.z == domhi.z) {
+
637 tbz.setBig(2,domhi.z);
+
+
639 ParallelFor(tbz, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k)
+
+
641 Real wpp =
WFromOmega(i,j,k,soln_a(i,j,k),new_drho_u,new_drho_v,z_nd,dxInv);
+
642 cur_zmom(i,j,k) += wpp;
+
+
+
+
+
+
-
650 Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
-
651 Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
-
-
-
-
655 avg_zmom(i,j,k) += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
-
656 (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
-
-
-
659 avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
-
660 (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
-
661 (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
-
-
-
664 Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
-
665 cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
+
650 BL_PROFILE(
"fast_rho_final_update");
+
651 ParallelFor(bx, [=] AMREX_GPU_DEVICE (
int i,
int j,
int k) noexcept
+
+
653 Real zflux_lo = beta_2 * soln_a(i,j,k ) + beta_1 * omega_arr(i,j,k);
+
654 Real zflux_hi = beta_2 * soln_a(i,j,k+1) + beta_1 * omega_arr(i,j,k+1);
+
+
+
+
658 avg_zmom(i,j,k) += facinv*zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
+
659 (flx_arr[2])(i,j,k,0) = zflux_lo / (mf_m(i,j,0) * mf_m(i,j,0));
+
+
+
662 avg_zmom(i,j,k+1) += facinv * zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
+
663 (flx_arr[2])(i,j,k+1,0) = zflux_hi / (mf_m(i,j,0) * mf_m(i,j,0));
+
664 (flx_arr[2])(i,j,k+1,1) = (flx_arr[2])(i,j,k+1,0) * 0.5 * (prim(i,j,k) + prim(i,j,k+1));
+
-
667 Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
-
668 ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
-
669 zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
-
-
671 cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
-
-
673 (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
-
-
-
-
-
678 if (l_reflux && nrk == 2) {
-
679 int strt_comp_reflux = 0;
-
-
681 int num_comp_reflux = 1;
-
682 if (level < finest_level) {
-
683 fr_as_crse->CrseAdd(mfi,
-
684 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
-
685 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
-
-
-
688 fr_as_fine->FineAdd(mfi,
-
689 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
-
690 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
-
-
-
-
-
-
696 Gpu::streamSynchronize();
-
-
-
-
-
+
667 Real fast_rhs_rho = -(temp_rhs_arr(i,j,k,0) + ( zflux_hi - zflux_lo ) * dzi) / detJ(i,j,k);
+
668 cur_cons(i,j,k,0) += dtau * (slow_rhs_cons(i,j,k,0) + fast_rhs_rho);
+
+
670 Real fast_rhs_rhotheta = -( temp_rhs_arr(i,j,k,1) + 0.5 *
+
671 ( zflux_hi * (prim(i,j,k) + prim(i,j,k+1)) -
+
672 zflux_lo * (prim(i,j,k) + prim(i,j,k-1)) ) * dzi ) / detJ(i,j,k);
+
+
674 cur_cons(i,j,k,1) += dtau * (slow_rhs_cons(i,j,k,1) + fast_rhs_rhotheta);
+
+
676 (flx_arr[2])(i,j,k,1) = (flx_arr[2])(i,j,k,0) * 0.5 * (prim(i,j,k) + prim(i,j,k-1));
+
+
+
+
+
681 if (l_reflux && nrk == 2) {
+
682 int strt_comp_reflux = 0;
+
+
684 int num_comp_reflux = 1;
+
685 if (level < finest_level) {
+
686 fr_as_crse->CrseAdd(mfi,
+
687 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
+
688 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
+
+
+
691 fr_as_fine->FineAdd(mfi,
+
692 {{AMREX_D_DECL(&(flux[0]), &(flux[1]), &(flux[2]))}},
+
693 dx, dtau, strt_comp_reflux, strt_comp_reflux, num_comp_reflux, RunOn::Device);
+
+
+
+
+
+
699 Gpu::streamSynchronize();
+
+
+
+
+
constexpr amrex::Real R_d
Definition: ERF_Constants.H:10
constexpr amrex::Real Gamma
Definition: ERF_Constants.H:19
#define PrimQ1_comp
Definition: ERF_IndexDefines.H:55
@@ -1006,8 +1014,8 @@