diff --git a/Source/Initialization/ERF_Metgrid_utils.H b/Source/Initialization/ERF_Metgrid_utils.H index 2bb002d60..fd6c6fafb 100644 --- a/Source/Initialization/ERF_Metgrid_utils.H +++ b/Source/Initialization/ERF_Metgrid_utils.H @@ -113,8 +113,8 @@ AMREX_FORCE_INLINE AMREX_GPU_DEVICE void lagrange_interp (const int& order, - amrex::Vector& x, - amrex::Vector& y, + amrex::Real* x, + amrex::Real* y, amrex::Real& new_x, amrex::Real& new_y) { @@ -125,15 +125,15 @@ lagrange_interp (const int& order, // (xk-x0)(xk-x1)...(xk-xk-1)(xk-xk+1)...(xk-xn) amrex::Real Px = 0.0; for (int i=0; i <= order; i++) { - amrex::Real numer = 1.0; - amrex::Real denom = 1.0; + amrex::Real n = 1.0; + amrex::Real d = 1.0; for (int k=0; k <= order; k++) { if (k == i) continue; - numer *= new_x-x[k]; - denom *= x[i]-x[k]; + n *= new_x-x[k]; + d *= x[i]-x[k]; } - if (denom != 0.0) { - Px += y[i]*numer/denom; + if (d != 0.0) { + Px += y[i]*n/d; } } new_y = Px; @@ -147,12 +147,12 @@ lagrange_setup (char var_type, const int& orig_n, const int& new_n, const int& order, - amrex::Vector& orig_x_z, - amrex::Vector& orig_x_p, - amrex::Vector& orig_y, - amrex::Vector& new_x_z, - amrex::Vector& new_x_p, - amrex::Vector& new_y) + amrex::Real* orig_x_z, + amrex::Real* orig_x_p, + amrex::Real* orig_y, + amrex::Real* new_x_z, + amrex::Real* new_x_p, + amrex::Real* new_y) { if (order < 1) amrex::Abort("metgrid initialization, we cannot go lower order than linear"); @@ -166,8 +166,9 @@ lagrange_setup (char var_type, // if ((i == 391) && (j == 3)) debug = true; for (int new_k=0; new_k < new_n; new_k++) { +#ifndef AMREX_USE_GPU if (debug) amrex::Print() << "new_k=" << new_k; - +#endif // Find bounding x values and store the indices. bool extrapolating = true; int kl, kr; @@ -206,16 +207,22 @@ lagrange_setup (char var_type, int ksta = kl-((order/2)-1); int kend = ksta+order; amrex::Real new_x; - amrex::Vector orig_x_sub; + amrex::GpuArray orig_x_sub; + amrex::Real* orig_x_sub_p = orig_x_sub.data(); if (exp_interp) { new_x = new_x_p[new_k]; - orig_x_sub = {orig_x_p.begin()+ksta, orig_x_p.begin()+kend}; + orig_x_sub_p[0] = orig_x_p[ksta]; + orig_x_sub_p[1] = orig_x_p[kend]; } else { new_x = new_x_z[new_k]; - orig_x_sub = {orig_x_z.begin()+ksta, orig_x_z.begin()+kend}; + orig_x_sub_p[0] = orig_x_z[ksta]; + orig_x_sub_p[1] = orig_x_z[kend]; } - amrex::Vector orig_y_sub = {orig_y.begin()+ksta, orig_y.begin()+kend}; - lagrange_interp(order, orig_x_sub, orig_y_sub, new_x, new_y[new_k]); + amrex::GpuArray orig_y_sub; + amrex::Real* orig_y_sub_p = orig_y_sub.data(); + orig_y_sub_p[0] = orig_y[ksta]; + orig_y_sub_p[1] = orig_y[kend]; + lagrange_interp(order, orig_x_sub_p, orig_y_sub_p, new_x, new_y[new_k]); } else { amrex::Abort("metgrid initialization, lost in lagrange_setup (odd order)"); } @@ -225,101 +232,135 @@ lagrange_setup (char var_type, { int ksta = kl-(order/2-1); int kend = ksta+order; + int ksize = kend-ksta; +#ifndef AMREX_USE_GPU if (debug) amrex::Print() << " (1a) new_x=" << new_x_z[new_k] << " new_x_p=" << new_x_p[new_k] << " kl=" << kl << " kr=" << kr <<" ksta=" << ksta << " kend=" << kend << std::endl; +#endif amrex::Real new_x; - amrex::Vector orig_x_sub, orig_y_sub; + amrex::GpuArray orig_x_sub; + amrex::GpuArray orig_y_sub; + amrex::Real* orig_x_sub_p = orig_x_sub.data(); + amrex::Real* orig_y_sub_p = orig_y_sub.data(); if (exp_interp) { new_x = new_x_p[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_p[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_p[k]; } } else { new_x = new_x_z[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_z[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_z[k]; } } - for (int k=ksta; k <= kend; k++) orig_y_sub.push_back(orig_y[k]); + for (int k=ksta; k <= kend; k++) { orig_y_sub_p[k-ksta] = orig_y[k]; } +#ifndef AMREX_USE_GPU if (debug) { amrex::Print() << " orig_x_sub = ["; - for (int k=0; k < orig_x_sub.size(); k++) amrex::Print() << " " << orig_x_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_x_sub_p[k]; amrex::Print() << "]" << std::endl; amrex::Print() << " orig_y_sub = ["; - for (int k=0; k < orig_y_sub.size(); k++) amrex::Print() << " " << orig_y_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_y_sub_p[k]; amrex::Print() << "]" << std::endl; } - lagrange_interp(order, orig_x_sub, orig_y_sub, new_x, new_y_l); +#endif + lagrange_interp(order, orig_x_sub_p, orig_y_sub_p, new_x, new_y_l); } { int ksta = kl-order/2; int kend = ksta+order; + int ksize = kend-ksta; +#ifndef AMREX_USE_GPU if (debug) amrex::Print() << "new_k=" << new_k << " (1b) new_x=" << new_x_z[new_k] << " new_x_p=" << new_x_p[new_k] << " kl=" << kl << " kr=" << kr <<" ksta=" << ksta << " kend=" << kend << std::endl; +#endif amrex::Real new_x; - amrex::Vector orig_x_sub, orig_y_sub; + amrex::GpuArray orig_x_sub; + amrex::GpuArray orig_y_sub; + amrex::Real* orig_x_sub_p = orig_x_sub.data(); + amrex::Real* orig_y_sub_p = orig_y_sub.data(); if (exp_interp) { new_x = new_x_p[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_p[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_p[k]; } } else { new_x = new_x_z[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_z[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_z[k]; } } - for (int k=ksta; k <= kend; k++) orig_y_sub.push_back(orig_y[k]); + for (int k=ksta; k <= kend; k++) { orig_y_sub_p[k-ksta] = orig_y[k]; } +#ifndef AMREX_USE_GPU if (debug) { amrex::Print() << " orig_x_sub = ["; - for (int k=0; k < orig_x_sub.size(); k++) amrex::Print() << " " << orig_x_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_x_sub_p[k]; amrex::Print() << "]" << std::endl; amrex::Print() << " orig_y_sub = ["; - for (int k=0; k < orig_y_sub.size(); k++) amrex::Print() << " " << orig_y_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_y_sub_p[k]; amrex::Print() << "]" << std::endl; } - lagrange_interp(order, orig_x_sub, orig_y_sub, new_x, new_y_r); +#endif + lagrange_interp(order, orig_x_sub_p, orig_y_sub_p, new_x, new_y_r); } new_y[new_k] = 0.5*(new_y_l+new_y_r); if (debug) amrex::Print() << " new_y=" << new_y[new_k] << std::endl; } else if ((kl-(order/2-1) >= 0) && (kr+order/2 <= orig_n-1)) { int ksta = kl-(order/2-1); int kend = ksta+order; + int ksize = kend-ksta; +#ifndef AMREX_USE_GPU if (debug) amrex::Print() << " (2) new_x=" << new_x_z[new_k] << " new_x_p=" << new_x_p[new_k] << " kl=" << kl << " kr=" << kr <<" ksta=" << ksta << " kend=" << kend << std::endl; +#endif amrex::Real new_x; - amrex::Vector orig_x_sub, orig_y_sub; + amrex::GpuArray orig_x_sub; + amrex::GpuArray orig_y_sub; + amrex::Real* orig_x_sub_p = orig_x_sub.data(); + amrex::Real* orig_y_sub_p = orig_y_sub.data(); if (exp_interp) { new_x = new_x_p[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_p[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_p[k]; } } else { new_x = new_x_z[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_z[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_z[k]; } } - for (int k=ksta; k <= kend; k++) orig_y_sub.push_back(orig_y[k]); + for (int k=ksta; k <= kend; k++) { orig_y_sub_p[k-ksta] = orig_y[k]; } +#ifndef AMREX_USE_GPU if (debug) { amrex::Print() << " orig_x_sub = ["; - for (int k=0; k < orig_x_sub.size(); k++) amrex::Print() << " " << orig_x_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_x_sub_p[k]; amrex::Print() << "]" << std::endl; amrex::Print() << " orig_y_sub = ["; - for (int k=0; k < orig_y_sub.size(); k++) amrex::Print() << " " << orig_y_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_y_sub_p[k]; amrex::Print() << "]" << std::endl; } - lagrange_interp(order, orig_x_sub, orig_y_sub, new_x, new_y[new_k]); +#endif + lagrange_interp(order, orig_x_sub_p, orig_y_sub_p, new_x, new_y[new_k]); if (debug) amrex::Print() << " new_y=" << new_y[new_k] << std::endl; } else if ((kl-order/2 >= 0) && (kr+(order/2-1) <= orig_n-1)) { int ksta = kl-order/2; int kend = ksta+order; + int ksize = kend-ksta; +#ifndef AMREX_USE_GPU if (debug) amrex::Print() << " (3) new_x=" << new_x_z[new_k] << " new_x_p=" << new_x_p[new_k] << " kl=" << kl << " kr=" << kr <<" ksta=" << ksta << " kend=" << kend << std::endl; +#endif amrex::Real new_x; - amrex::Vector orig_x_sub, orig_y_sub; + amrex::GpuArray orig_x_sub; + amrex::GpuArray orig_y_sub; + amrex::Real* orig_x_sub_p = orig_x_sub.data(); + amrex::Real* orig_y_sub_p = orig_y_sub.data(); if (exp_interp) { new_x = new_x_p[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_p[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_p[k]; } } else { new_x = new_x_z[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_z[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_z[k]; } } - for (int k=ksta; k <= kend; k++) orig_y_sub.push_back(orig_y[k]); + for (int k=ksta; k <= kend; k++) { orig_y_sub_p[k-ksta] = orig_y[k]; } +#ifndef AMREX_USE_GPU if (debug) { amrex::Print() << " orig_x_sub = ["; - for (int k=0; k < orig_x_sub.size(); k++) amrex::Print() << " " << orig_x_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_x_sub_p[k]; amrex::Print() << "]" << std::endl; amrex::Print() << " orig_y_sub = ["; - for (int k=0; k < orig_y_sub.size(); k++) amrex::Print() << " " << orig_y_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_y_sub_p[k]; amrex::Print() << "]" << std::endl; } - lagrange_interp(order, orig_x_sub, orig_y_sub, new_x, new_y[new_k]); +#endif + lagrange_interp(order, orig_x_sub_p, orig_y_sub_p, new_x, new_y[new_k]); +#ifndef AMREX_USE_GPU if (debug) amrex::Print() << " new_y=" << new_y[new_k] << std::endl; +#endif } else { amrex::Abort("metgrid initialization, lost in lagrange_setup (even order)"); } @@ -327,27 +368,37 @@ lagrange_setup (char var_type, // Linear interpolation. int ksta = kl; int kend = kr; + int ksize = kend-ksta; +#ifndef AMREX_USE_GPU if (debug) amrex::Print() << " (4) new_x=" << new_x_z[new_k] << " new_x_p=" << new_x_p[new_k] << " kl=" << kl << " kr=" << kr <<" ksta=" << ksta << " kend=" << kend << std::endl; +#endif amrex::Real new_x; - amrex::Vector orig_x_sub, orig_y_sub; + amrex::GpuArray orig_x_sub; + amrex::GpuArray orig_y_sub; + amrex::Real* orig_x_sub_p = orig_x_sub.data(); + amrex::Real* orig_y_sub_p = orig_y_sub.data(); if (exp_interp) { new_x = new_x_p[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_p[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_p[k]; } } else { new_x = new_x_z[new_k]; - for (int k=ksta; k <= kend; k++) orig_x_sub.push_back(orig_x_z[k]); + for (int k=ksta; k <= kend; k++) { orig_x_sub_p[k-ksta] = orig_x_z[k]; } } - for (int k=ksta; k <= kend; k++) orig_y_sub.push_back(orig_y[k]); + for (int k=ksta; k <= kend; k++) { orig_y_sub_p[k-ksta] = orig_y[k]; } +#ifndef AMREX_USE_GPU if (debug) { amrex::Print() << " orig_x_sub = ["; - for (int k=0; k < orig_x_sub.size(); k++) amrex::Print() << " " << orig_x_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_x_sub_p[k]; amrex::Print() << "]" << std::endl; amrex::Print() << " orig_y_sub = ["; - for (int k=0; k < orig_y_sub.size(); k++) amrex::Print() << " " << orig_y_sub[k]; + for (int k=0; k < ksize; k++) amrex::Print() << " " << orig_y_sub_p[k]; amrex::Print() << "]" << std::endl; } - lagrange_interp(1, orig_x_sub, orig_y_sub, new_x, new_y[new_k]); +#endif + lagrange_interp(1, orig_x_sub_p, orig_y_sub_p, new_x, new_y[new_k]); +#ifndef AMREX_USE_GPU if (debug) amrex::Print() << " new_y=" << new_y[new_k] << std::endl; +#endif } } } @@ -387,49 +438,54 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, { // Here we closely follow WRF's vert_interp from // dyn_em/module_initialize_real.F, although changes have been - // made to accomodate interpolation relative to height instead of + // made to accommodate interpolation relative to height instead of // pressure. int imax_orig = amrex::ubound(amrex::Box(orig_data)).x; int jmax_orig = amrex::ubound(amrex::Box(orig_data)).y; int kmax_orig = amrex::ubound(amrex::Box(orig_data)).z; int kmax_new = amrex::ubound(amrex::Box(new_z_full)).z; - amrex::Vector new_z, new_p, new_data; - new_z.resize(kmax_new); - new_p.resize(kmax_new); - new_data.resize(kmax_new); + AMREX_ASSERT(kmax_orig < 256); + AMREX_ASSERT(kmax_new < 256); + + amrex::GpuArray new_z; + amrex::GpuArray new_p; + amrex::GpuArray new_data; + amrex::Real* new_z_p = new_z.data(); + amrex::Real* new_p_p = new_p.data(); + amrex::Real* new_data_p = new_data.data(); for (int k=0; k < kmax_new; k++) { if (stag == 'X') { - new_z[k] = 0.25*(new_z_full(i,j,k)+new_z_full(i,j+1,k)+new_z_full(i,j,k+1)+new_z_full(i,j+1,k+1)); + new_z_p[k] = 0.25*(new_z_full(i,j,k)+new_z_full(i,j+1,k)+new_z_full(i,j,k+1)+new_z_full(i,j+1,k+1)); } else if (stag == 'Y') { - new_z[k] = 0.25*(new_z_full(i,j,k)+new_z_full(i+1,j,k)+new_z_full(i,j,k+1)+new_z_full(i+1,j,k+1)); + new_z_p[k] = 0.25*(new_z_full(i,j,k)+new_z_full(i+1,j,k)+new_z_full(i,j,k+1)+new_z_full(i+1,j,k+1)); } else if (stag == 'M') { - new_z[k] = 0.125*(new_z_full(i,j,k )+new_z_full(i,j+1,k )+new_z_full(i+1,j,k )+new_z_full(i+1,j+1,k )+ + new_z_p[k] = 0.125*(new_z_full(i,j,k )+new_z_full(i,j+1,k )+new_z_full(i+1,j,k )+new_z_full(i+1,j+1,k )+ new_z_full(i,j,k+1)+new_z_full(i,j+1,k+1)+new_z_full(i+1,j,k+1)+new_z_full(i+1,j+1,k+1)); } - calc_p_isothermal(new_z[k], new_p[k]); + calc_p_isothermal(new_z_p[k], new_p_p[k]); } - amrex::Vector orig_z; - orig_z.resize(kmax_orig); + amrex::GpuArray orig_z; + amrex::Real* orig_z_p = orig_z.data(); for (int k=0; k < kmax_orig; k++) { if (stag == 'M') { - orig_z[k] = orig_z_full(i,j,k); + orig_z_p[k] = orig_z_full(i,j,k); } else if (stag == 'X') { if (i == 0) { - orig_z[k] = orig_z_full(i,j,k); + orig_z_p[k] = orig_z_full(i,j,k); } else if (i == imax_orig) { - orig_z[k] = orig_z_full(imax_orig-1,j,k); + orig_z_p[k] = orig_z_full(imax_orig-1,j,k); } else { - orig_z[k] = 0.5*(orig_z_full(i,j,k)+orig_z_full(i-1,j,k)); + orig_z_p[k] = 0.5*(orig_z_full(i,j,k)+orig_z_full(i-1,j,k)); } } else if (stag == 'Y') { if (j == 0) { - orig_z[k] = orig_z_full(i,j,k); + orig_z_p[k] = orig_z_full(i,j,k); } else if (j == jmax_orig) { - orig_z[k] = orig_z_full(i,jmax_orig-1,k); + orig_z_p[k] = orig_z_full(i,jmax_orig-1,k); } else { - orig_z[k] = 0.5*(orig_z_full(i,j,k)+orig_z_full(i,j-1,k)); + orig_z_p[k] = 0.5*(orig_z_full(i,j,k)+orig_z_full(i,j-1,k)); } } } @@ -444,7 +500,7 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, // different than the topography processed by WPS. int k_above_sfc = 0; for (int k=1; k < kmax_orig; k++) { - if (orig_z[k] > orig_z[0]) { + if (orig_z_p[k] > orig_z_p[0]) { k_above_sfc = k; break; } @@ -453,8 +509,12 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, int zap = 0; int zap_below = 0; int zap_above = 0; - amrex::Vector ordered_z, ordered_data; - + int kend_order; + amrex::ignore_unused(zap,zap_above); + amrex::GpuArray ordered_z; + amrex::GpuArray ordered_data; + amrex::Real* ordered_z_p = ordered_z.data(); + amrex::Real* ordered_data_p = ordered_data.data(); if (k_above_sfc > 1) { // The levels are not monotonically increasing in height, so // we sort and then make "artistic" quality control choices. @@ -462,8 +522,8 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, // Insert levels that are below the surface. for (int k=1; k < k_above_sfc; k++) { - ordered_z.push_back(orig_z[k]); - ordered_data.push_back(orig_data(i,j,k)); + ordered_z_p[count] = orig_z_p[k]; + ordered_data_p[count] = orig_data(i,j,k); count++; } @@ -475,8 +535,8 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, // pressure-space. For simplicity, calculate delta P assuming a // baroclinic atmosphere. amrex::Real Pu, Pl; - calc_p_isothermal(orig_z[0], Pu); - calc_p_isothermal(ordered_z[count-1], Pl); + calc_p_isothermal(orig_z_p[0], Pu); + calc_p_isothermal(ordered_z_p[count-1], Pl); if (Pl-Pu < metgrid_proximity) { count--; zap = 1; @@ -484,8 +544,8 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, } // Insert the surface level. - ordered_z.push_back(orig_z[0]); - ordered_data.push_back(orig_data(i,j,0)); + ordered_z_p[count] = orig_z_p[0]; + ordered_data_p[count] = orig_data(i,j,0); count++; // Quoting WRF's comments, the next level to use is at, @@ -498,7 +558,7 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, // ERF levels from the surface. if (metgrid_force_sfc_k > 0) { for (int k=k_above_sfc; k < kmax_orig; k++) { - if (orig_z[k] > new_z[metgrid_force_sfc_k-1]) { + if (orig_z_p[k] > new_z_p[metgrid_force_sfc_k-1]) { knext = k; break; } else { @@ -510,8 +570,8 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, // Check if the level that is nearest to and above the surface // is "too close". If so, we'll ignore that level. - calc_p_isothermal(orig_z[knext], Pu); - calc_p_isothermal(ordered_z[count-1], Pl); + calc_p_isothermal(orig_z_p[knext], Pu); + calc_p_isothermal(ordered_z_p[count-1], Pl); if (Pl-Pu < metgrid_proximity) { knext++; zap++; @@ -520,17 +580,18 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, // Insert levels that are above the surface. for (int k=knext; k < kmax_orig; k++) { - ordered_z.push_back(orig_z[k]); - ordered_data.push_back(orig_data(i,j,k)); + ordered_z_p[count] = orig_z_p[k]; + ordered_data_p[count] = orig_data(i,j,k); count++; } + kend_order = count; } else { // The surface is the lowest level in the origin data. // Insert the surface. - ordered_z.push_back(orig_z[0]); - ordered_data.push_back(orig_data(i,j,0)); + ordered_z_p[0] = orig_z[0]; + ordered_data_p[0] = orig_data(i,j,0); // Similar to above, conditionally more strongly use the // surface data. @@ -538,7 +599,7 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, int knext = count; if (metgrid_force_sfc_k > 0) { for (int k=knext; k < kmax_orig; k++) { - if (orig_z[k] > new_z[metgrid_force_sfc_k]) { + if (orig_z_p[k] > new_z_p[metgrid_force_sfc_k]) { knext = k; break; } else { @@ -552,64 +613,63 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, // "too close" to the prior valid level. for (int k=knext; k < kmax_orig; k++) { amrex::Real Pu, Pl; - calc_p_isothermal(orig_z[k], Pu); - calc_p_isothermal(ordered_z[count-1], Pl); + calc_p_isothermal(orig_z_p[k], Pu); + calc_p_isothermal(ordered_z_p[count-1], Pl); if (Pl-Pu < metgrid_proximity) { zap++; zap_above++; continue; } - ordered_z.push_back(orig_z[k]); - ordered_data.push_back(orig_data(i,j,k)); + ordered_z_p[count] = orig_z_p[k]; + ordered_data_p[count] = orig_data(i,j,k); count++; } + kend_order = count; } int ksta, kend; if (metgrid_use_below_sfc && metgrid_use_sfc) { // Use all levels. int count = 0; - for (int k=1; k < ordered_z.size(); k++) { + for (int k=1; k < kend_order; k++) { count++; - if (ordered_z[ordered_z.size()-1] == orig_z[k]) break; + if (ordered_z_p[kend_order-1] == orig_z_p[k]) break; } ksta = 0; - kend = ksta+count-1; + kend = ksta+kend_order-1; } else if (metgrid_use_below_sfc && !metgrid_use_sfc) { // Use all levels except for the surface. int ksfc = 0; for (int k=0; k < kmax_orig; k++) { - if (ordered_z[k] == orig_z[0]) { + if (ordered_z_p[k] == orig_z_p[0]) { ksfc = k; break; } } for (int k=ksfc; k < kmax_orig-1; k++) { - ordered_z[k] = ordered_z[k+1]; - ordered_data[k] = ordered_data[k+1]; + ordered_z_p[k] = ordered_z_p[k+1]; + ordered_data_p[k] = ordered_data_p[k+1]; } - ordered_z.resize(kmax_orig-1); - ordered_data.resize(kmax_orig-1); int count = 0; for (int k=0; k < kmax_orig; k++) { count++; - if (orig_z[kmax_orig-1] == ordered_z[k]) break; + if (orig_z_p[kmax_orig-1] == ordered_z_p[k]) break; } ksta = 0; - kend = ksta+count-1; + kend = ksta+kend_order-1; } else if (!metgrid_use_below_sfc && metgrid_use_sfc) { // Use all levels above and including the surface. int kcount = k_above_sfc-1-zap_below; int count = 0; for (int k=0; k < kmax_orig; k++) { - if (ordered_z[kcount] == orig_z[k]) { + if (ordered_z_p[kcount] == orig_z_p[k]) { kcount++; count++; } } ksta = k_above_sfc-1-zap_below; - kend = ksta+count-1; + kend = ksta+kend_order-1; } else { // We shouldn't be in here! amrex::Abort("metgrid initialization, !use_levels below_ground && !metgrid_use_sfc"); @@ -628,27 +688,39 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, // amrex::Abort("metgrid initialization, use_trop not yet implemented"); // } - amrex::Vector ordered_p; - for (int k=0; k < ordered_z.size(); k++) { - amrex::Real p; - calc_p_isothermal(ordered_z[k], p); - ordered_p.push_back(p); + amrex::GpuArray ordered_p; + amrex::Real* ordered_p_p = ordered_p.data(); + for (int k=0; k < kend_order; k++) { + calc_p_isothermal(ordered_z_p[k], ordered_p_p[k]); } - int new_n = 1; + int size = 0; + amrex::Real p_tmp = ordered_p[ksta]; + for (int k=ksta+1; k <= kend; k++) { + if ((p_tmp-ordered_p_p[k]) > metgrid_proximity) { + size++; + p_tmp = ordered_p_p[k]; + } + } + int new_n = 0; int zap_final = 0; - amrex::Vector final_z, final_p, final_data; - final_z.push_back(ordered_z[ksta]); - final_p.push_back(ordered_p[ksta]); - final_data.push_back(ordered_data[ksta]); + amrex::GpuArray final_z; + amrex::GpuArray final_p; + amrex::GpuArray final_data; + amrex::Real* final_z_p = final_z.data(); + amrex::Real* final_p_p = final_p.data(); + amrex::Real* final_data_p = final_data.data(); + final_z_p[0] = ordered_z[ksta]; + final_p_p[0] = ordered_p[ksta]; + final_data_p[0] = ordered_data[ksta]; for (int k=ksta+1; k <= kend; k++) { - if ((final_p[new_n-1]-ordered_p[k]) < metgrid_proximity) { + if ((final_p_p[new_n]-ordered_p_p[k]) < metgrid_proximity) { zap_final++; } else { new_n++; - final_z.push_back(ordered_z[k]); - final_p.push_back(ordered_p[k]); - final_data.push_back(ordered_data[k]); + final_z_p[new_n] = ordered_z_p[k]; + final_p_p[new_n] = ordered_p_p[k]; + final_data_p[new_n] = ordered_data_p[k]; } } kend -= zap_final; @@ -659,12 +731,12 @@ interpolate_column_metgrid (const bool& metgrid_use_below_sfc, kend-ksta, kmax_new, metgrid_order, - final_z, - final_p, - final_data, - new_z, - new_p, - new_data); + final_z_p, + final_p_p, + final_data_p, + new_z_p, + new_p_p, + new_data_p); // Optionally replace the lowest level of data with the surface // field from the origin data.