Skip to content

Commit

Permalink
CPU matched benchmark with 4 ranks.
Browse files Browse the repository at this point in the history
  • Loading branch information
AMLattanzi committed Oct 16, 2024
1 parent 6aaf0c5 commit b89ea18
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 124 deletions.
85 changes: 0 additions & 85 deletions Source/Initialization/ERF_Metgrid_utils.H
Original file line number Diff line number Diff line change
Expand Up @@ -99,91 +99,6 @@ init_base_state_from_metgrid (const bool use_moisture,
const amrex::Vector<amrex::FArrayBox>& NC_psfc_fab,
amrex::Vector<amrex::Vector<amrex::FArrayBox>>& fabs_for_bcs);

AMREX_FORCE_INLINE
AMREX_GPU_DEVICE
void
calc_rho_p (const int& kmax,
const int& flag_psfc,
const amrex::Real& psfc,
const amrex::Real& grav,
amrex::Real* Thetad_vec,
amrex::Real* Thetam_vec,
amrex::Real* Q_vec,
amrex::Real* z_vec,
amrex::Real* Rhod_vec,
amrex::Real* Rhom_vec,
amrex::Real* Pd_vec,
amrex::Real* Pm_vec)
{
const int maxiter = 10;
const amrex::Real tol = 1.0e-10;

// Calculate or use moist pressure at the surface.
amrex::Real Psurf;
if (flag_psfc == 1) {
Psurf = psfc;
} else {
amrex::Real t_0 = 290.0; // WRF's model_config_rec%base_temp
amrex::Real a = 50.0; // WRF's model_config_rec%base_lapse
Psurf = p_0*exp(-t_0/a+std::pow((std::pow(t_0/a, 2)-2.0*grav*z_vec[0]/(a*R_d)), 0.5));
}

// Iterations for the first CC point that is 1/2 dz off the surface
{
amrex::Real half_dz = z_vec[0];
amrex::Real qvf = 1.0+(R_v/R_d)*Q_vec[0];
Thetam_vec[0] = Thetad_vec[0]*qvf;
for (int it=0; it<maxiter; it++) {
Pm_vec[0] = Psurf-half_dz*(Rhom_vec[0])*(1.0+Q_vec[0])*grav;
if (Pm_vec[0] < 0.0) Pm_vec[0] = 0.0;
Rhom_vec[0] = (p_0/(R_d*Thetam_vec[0]))*std::pow(Pm_vec[0]/p_0, iGamma);
} // it
}

// Integrate from the first CC point to the top boundary.
for (int k=1; k<=kmax; k++) {

// Assignment only for downward integration in following loop
amrex::Real qvf = 1.0+(R_v/R_d)*Q_vec[k];
Thetam_vec[k] = Thetad_vec[k]*qvf;

// Vertical grid spacing
amrex::Real dz = z_vec[k]-z_vec[k-1];

// Establish known constant
amrex::Real rho_tot_lo = Rhom_vec[k-1] * (1. + Q_vec[k-1]);
amrex::Real C = -Pm_vec[k-1] + 0.5*rho_tot_lo*grav*dz;

// Initial guess and residual
Pm_vec[k] = Pm_vec[k-1];
Rhom_vec[k] = getRhogivenThetaPress(Thetad_vec[k],
Pm_vec[k],
R_d/Cp_d,
Q_vec[k]);
amrex::Real rho_tot_hi = Rhom_vec[k] * (1. + Q_vec[k]);
amrex::Real F = Pm_vec[k] + 0.5*rho_tot_hi*grav*dz + C;

// Do iterations
if (std::abs(F)>tol) HSEutils::Newton_Raphson_hse(tol, R_d/Cp_d, dz,
grav, C, Thetad_vec[k],
Q_vec[k], Q_vec[k],
Pm_vec[k], Rhom_vec[k], F);
} // k

// integrate from the top back down to get dry pressure and density.
Pd_vec[kmax] = Pm_vec[kmax];
Rhod_vec[kmax] = (p_0/(R_d*Thetam_vec[kmax]))*std::pow(Pd_vec[kmax]/p_0, iGamma);
for (int k=kmax-1; k>=0; k--) {
amrex::Real dz = z_vec[k+1]-z_vec[k];
Rhod_vec[k] = Rhod_vec[k+1]; // an initial guess.
for (int it=0; it<maxiter; it++) {
Pd_vec[k] = Pd_vec[k+1]+0.5*dz*(Rhod_vec[k]+Rhod_vec[k+1])*grav;
if (Pd_vec[k] < 0.0) Pd_vec[k] = 0.0;
Rhod_vec[k] = (p_0/(R_d*Thetam_vec[k]))*std::pow(Pd_vec[k]/p_0, iGamma);
} // it
} // k
}

AMREX_FORCE_INLINE
AMREX_GPU_DEVICE
amrex::Real
Expand Down
202 changes: 163 additions & 39 deletions Source/Initialization/ERF_init_from_metgrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,16 +801,6 @@ init_base_state_from_metgrid (const bool use_moisture,
gvbx_ylo.makeSlab(1,gvbx_ylo.smallEnd(1)); gvbx_yhi.makeSlab(1,gvbx_yhi.bigEnd(1));
gvbx_zlo.makeSlab(2,gvbx_zlo.smallEnd(2)); gvbx_zhi.makeSlab(2,gvbx_zhi.bigEnd(2));

// Device vectors for columnwise operations
Gpu::DeviceVector<Real> z_vec_d(kmax+2,0); Real* z_vec = z_vec_d.data();
Gpu::DeviceVector<Real> Thetad_vec_d(kmax+1,0); Real* Thetad_vec = Thetad_vec_d.data();
Gpu::DeviceVector<Real> Thetam_vec_d(kmax+1,0); Real* Thetam_vec = Thetam_vec_d.data();
Gpu::DeviceVector<Real> Rhod_vec_d(kmax+1,0); Real* Rhod_vec = Rhod_vec_d.data();
Gpu::DeviceVector<Real> Rhom_vec_d(kmax+1,0); Real* Rhom_vec = Rhom_vec_d.data();
Gpu::DeviceVector<Real> Pd_vec_d(kmax+1,0); Real* Pd_vec = Pd_vec_d.data();
Gpu::DeviceVector<Real> Pm_vec_d(kmax+1,0); Real* Pm_vec = Pm_vec_d.data();
Gpu::DeviceVector<Real> Q_vec_d(kmax+1,0); Real* Q_vec = Q_vec_d.data();

// Device vectors for psfc flags
Gpu::DeviceVector<int>flag_psfc_d(flag_psfc.size());
Gpu::copy(Gpu::hostToDevice, flag_psfc.begin(), flag_psfc.end(), flag_psfc_d.begin());
Expand All @@ -819,8 +809,8 @@ init_base_state_from_metgrid (const bool use_moisture,
// Define the arena to be used for data allocation
Arena* Arena_Used = The_Arena();
#ifdef AMREX_USE_GPU
// Make sure this lives on CPU and GPU
Arena_Used = The_Pinned_Arena();
// Inside MFiter use async arena
Arena_Used = The_Async_Arena();
#endif
// Expose for copy to GPU
Real grav = CONST_GRAV;
Expand All @@ -829,6 +819,7 @@ init_base_state_from_metgrid (const bool use_moisture,
const Array4<Real>& r_hse_arr = r_hse_fab.array();
const Array4<Real>& p_hse_arr = p_hse_fab.array();
const Array4<Real>& pi_hse_arr = pi_hse_fab.array();
auto psfc_flag = flag_psfc_vec[0];

// ********************************************************
// calculate dry density and dry pressure
Expand All @@ -842,20 +833,86 @@ init_base_state_from_metgrid (const bool use_moisture,

ParallelFor(valid_bx2d, [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
{
for (int k=0; k<=kmax; k++) {
z_vec[k] = new_z(i,j,k);
Thetad_vec[k] = new_data(i,j,k,RhoTheta_comp);
Q_vec[k] = (use_moisture) ? new_data(i,j,k,RhoQ_comp) : 0.0;
const int maxiter = 10;
const amrex::Real tol = 1.0e-10;

// Low and Hi column variables
Real psurf;
Real z_lo, z_hi;
Real p_lo, p_hi;
Real qv_lo, qv_hi;
Real rd_lo, rd_hi;
Real thetad_lo, thetad_hi;

// Calculate or use pressure at the surface.
if (psfc_flag == 1) {
psurf = orig_psfc(i,j,0);
} else {
z_lo = new_z(i,j,0);
Real t_0 = 290.0; // WRF's model_config_rec%base_temp
Real a = 50.0; // WRF's model_config_rec%base_lapse
psurf = p_0*exp(-t_0/a+std::pow((std::pow(t_0/a, 2.)-2.0*grav*z_lo/(a*R_d)), 0.5));
}
z_vec[kmax+1] = new_z(i,j,kmax+1);

calc_rho_p(kmax, flag_psfc_vec[0], orig_psfc(i,j,0),
grav, Thetad_vec, Thetam_vec, Q_vec, z_vec,
Rhod_vec, Rhom_vec, Pd_vec, Pm_vec);
// Iterations for the first CC point that is 1/2 dz off the surface
{
z_lo = new_z(i,j,0);
qv_lo = (use_moisture) ? new_data(i,j,0,RhoQ_comp) : 0.0;
rd_lo = 0.0; // initial guess
thetad_lo = new_data(i,j,0,RhoTheta_comp);
Real half_dz = z_lo;
Real qvf = 1.0+(R_v/R_d)*qv_lo;
Real thetam = thetad_lo*qvf;
for (int it=0; it<maxiter; it++) {
p_lo = psurf-half_dz*rd_lo*(1.0+qv_lo)*grav;
if (p_lo < 0.0) p_lo = 0.0;
rd_lo = (p_0/(R_d*thetam))*std::pow(p_lo/p_0, iGamma);
} // it
p_hse_arr(i,j,0) = p_lo;
r_hse_arr(i,j,0) = rd_lo;
}

for (int k=0; k<=kmax; k++) {
p_hse_arr(i,j,k) = Pm_vec[k];
r_hse_arr(i,j,k) = Rhom_vec[k];
// Iterations for k \in [1 kmax]
for (int k=1; k<=kmax; k++) {
// Known hi data
z_hi = new_z(i,j,k);
qv_hi = (use_moisture) ? new_data(i,j,k,RhoQ_comp) : 0.0;
thetad_hi = new_data(i,j,k,RhoTheta_comp);

// Initial guesses for hi data
p_hi = p_lo;
rd_hi = getRhogivenThetaPress(thetad_hi,
p_hi,
R_d/Cp_d,
qv_hi);

// Vertical grid spacing
Real dz = z_hi - z_lo;

// Establish known constant
Real rho_tot_lo = rd_lo * (1. + qv_lo);
Real C = -p_lo + 0.5*rho_tot_lo*grav*dz;

// Initial residual
Real rho_tot_hi = rd_hi * (1. + qv_hi);
Real F = p_hi + 0.5*rho_tot_hi*grav*dz + C;

// Do iterations
if (std::abs(F)>tol) HSEutils::Newton_Raphson_hse(tol, R_d/Cp_d, dz,
grav, C, thetad_hi,
qv_hi, qv_hi, p_hi,
rd_hi, F);

// Copy solution to base state
p_hse_arr(i,j,k) = p_hi;
r_hse_arr(i,j,k) = rd_hi;

// Copy hi to lo
z_lo = z_hi;
p_lo = p_hi;
qv_lo = qv_hi;
rd_lo = rd_hi;
thetad_lo = thetad_hi;
}
});

Expand Down Expand Up @@ -926,41 +983,108 @@ init_base_state_from_metgrid (const bool use_moisture,
}

int ntimes = NC_psfc_fab.size();
for (int it=0; it<ntimes; it++) {
for (int itime=0; itime<ntimes; itime++) {
FArrayBox p_hse_bcs_fab;
FArrayBox pi_hse_bcs_fab;
p_hse_bcs_fab.resize(state_fab.box(), 1, Arena_Used);
auto psfc_flag = flag_psfc_vec[itime];

// ********************************************************
// calculate dry density and dry pressure
// ********************************************************
// calculate density and dry pressure on the new grid.
Box valid_bx2d = valid_bx;
valid_bx2d.setRange(2,0);
auto const orig_psfc = NC_psfc_fab[it].const_array();
auto const orig_psfc = NC_psfc_fab[itime].const_array();
auto const new_z = z_phys_cc_fab.const_array();
auto Theta_arr = fabs_for_bcs[it][MetGridBdyVars::T].array();
auto Q_arr = (use_moisture ) ? fabs_for_bcs[it][MetGridBdyVars::QV].array() : Array4<Real>{};
auto Theta_arr = fabs_for_bcs[itime][MetGridBdyVars::T].array();
auto Q_arr = (use_moisture ) ? fabs_for_bcs[itime][MetGridBdyVars::QV].array() : Array4<Real>{};
auto p_hse_arr = p_hse_bcs_fab.array();

ParallelFor(valid_bx2d, [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
{
for (int k=0; k<=kmax; k++) {
z_vec[k] = new_z(i,j,k);
Thetad_vec[k] = Theta_arr(i,j,k);
Q_vec[k] = (use_moisture) ? Q_arr(i,j,k) : 0.0;
const int maxiter = 10;
const amrex::Real tol = 1.0e-10;

// Low and Hi column variables
Real psurf;
Real z_lo, z_hi;
Real p_lo, p_hi;
Real qv_lo, qv_hi;
Real rd_lo, rd_hi;
Real thetad_lo, thetad_hi;

// Calculate or use pressure at the surface.
if (psfc_flag == 1) {
psurf = orig_psfc(i,j,0);
} else {
z_lo = new_z(i,j,0);
Real t_0 = 290.0; // WRF's model_config_rec%base_temp
Real a = 50.0; // WRF's model_config_rec%base_lapse
psurf = p_0*exp(-t_0/a+std::pow((std::pow(t_0/a, 2.)-2.0*grav*z_lo/(a*R_d)), 0.5));
}

// Iterations for the first CC point that is 1/2 dz off the surface
{
z_lo = new_z(i,j,0);
qv_lo = (use_moisture) ? Q_arr(i,j,0) : 0.0;
rd_lo = 0.0; // initial guess
thetad_lo = Theta_arr(i,j,0);
Real half_dz = z_lo;
Real qvf = 1.0+(R_v/R_d)*qv_lo;
Real thetam = thetad_lo*qvf;
for (int it=0; it<maxiter; it++) {
p_lo = psurf-half_dz*rd_lo*(1.0+qv_lo)*grav;
if (p_lo < 0.0) p_lo = 0.0;
rd_lo = (p_0/(R_d*thetam))*std::pow(p_lo/p_0, iGamma);
} // it
p_hse_arr(i,j,0) = p_lo;
}
z_vec[kmax+1] = new_z(i,j,kmax+1);

calc_rho_p(kmax, flag_psfc_vec[it], orig_psfc(i,j,0),
grav, Thetad_vec, Thetam_vec, Q_vec, z_vec,
Rhod_vec, Rhom_vec, Pd_vec, Pm_vec);
// Iterations for k \in [1 kmax]
for (int k=1; k<=kmax; k++) {
// Known hi data
z_hi = new_z(i,j,k);
qv_hi = (use_moisture) ? Q_arr(i,j,k) : 0.0;
thetad_hi = Theta_arr(i,j,k);

// Initial guesses for hi data
p_hi = p_lo;
rd_hi = getRhogivenThetaPress(thetad_hi,
p_hi,
R_d/Cp_d,
qv_hi);

// Vertical grid spacing
Real dz = z_hi - z_lo;

// Establish known constant
Real rho_tot_lo = rd_lo * (1. + qv_lo);
Real C = -p_lo + 0.5*rho_tot_lo*grav*dz;

// Initial residual
Real rho_tot_hi = rd_hi * (1. + qv_hi);
Real F = p_hi + 0.5*rho_tot_hi*grav*dz + C;

// Do iterations
if (std::abs(F)>tol) HSEutils::Newton_Raphson_hse(tol, R_d/Cp_d, dz,
grav, C, thetad_hi,
qv_hi, qv_hi, p_hi,
rd_hi, F);

// Copy solution to base state
p_hse_arr(i,j,k) = p_hi;

// Copy hi to lo
z_lo = z_hi;
p_lo = p_hi;
qv_lo = qv_hi;
rd_lo = rd_hi;
thetad_lo = thetad_hi;
}

for (int k=0; k<=kmax; k++) {
p_hse_arr(i,j,k) = Pm_vec[k];
} // k
});
} // it
} // itime
}


Expand Down

0 comments on commit b89ea18

Please sign in to comment.