diff --git a/Source/ERF.H b/Source/ERF.H index 66e40e58b..61134f455 100644 --- a/Source/ERF.H +++ b/Source/ERF.H @@ -161,7 +161,8 @@ public: void sample_points (int lev, amrex::Real time, amrex::IntVect cell, amrex::MultiFab& mf); void sample_lines (int lev, amrex::Real time, amrex::IntVect cell, amrex::MultiFab& mf); - void derive_diag_profiles (amrex::Gpu::HostVector& h_avg_u , amrex::Gpu::HostVector& h_avg_v , + void derive_diag_profiles (amrex::Real time, + amrex::Gpu::HostVector& h_avg_u , amrex::Gpu::HostVector& h_avg_v , amrex::Gpu::HostVector& h_avg_w , amrex::Gpu::HostVector& h_avg_rho, amrex::Gpu::HostVector& h_avg_th , amrex::Gpu::HostVector& h_avg_ksgs, amrex::Gpu::HostVector& h_avg_kturb, diff --git a/Source/IO/ERF_Write1DProfiles.cpp b/Source/IO/ERF_Write1DProfiles.cpp index 62f016099..588ba7009 100644 --- a/Source/IO/ERF_Write1DProfiles.cpp +++ b/Source/IO/ERF_Write1DProfiles.cpp @@ -34,7 +34,8 @@ ERF::write_1D_profiles (Real time) Gpu::HostVector h_avg_sgshfx, h_avg_sgsdiss; // only output tau_{theta,w} and epsilon for now if (NumDataLogs() > 1) { - derive_diag_profiles(h_avg_u, h_avg_v, h_avg_w, + derive_diag_profiles(time, + h_avg_u, h_avg_v, h_avg_w, h_avg_rho, h_avg_th, h_avg_ksgs, h_avg_kturb, h_avg_qv, h_avg_qc, h_avg_qr, h_avg_wqv, h_avg_wqc, h_avg_wqr, h_avg_qi, h_avg_qs, h_avg_qg, h_avg_uu, h_avg_uv, h_avg_uw, h_avg_vv, h_avg_vw, h_avg_ww, @@ -179,18 +180,21 @@ ERF::write_1D_profiles (Real time) * @param h_avg_pw Profile for pressure perturbation * z-velocity on Host */ -void ERF::derive_diag_profiles(Gpu::HostVector& h_avg_u , Gpu::HostVector& h_avg_v , Gpu::HostVector& h_avg_w, - Gpu::HostVector& h_avg_rho , Gpu::HostVector& h_avg_th , Gpu::HostVector& h_avg_ksgs, - Gpu::HostVector& h_avg_kturb, Gpu::HostVector& h_avg_qv , Gpu::HostVector& h_avg_qc , Gpu::HostVector& h_avg_qr, - Gpu::HostVector& h_avg_wqv , Gpu::HostVector& h_avg_wqc, Gpu::HostVector& h_avg_wqr, - Gpu::HostVector& h_avg_qi , Gpu::HostVector& h_avg_qs , Gpu::HostVector& h_avg_qg, - Gpu::HostVector& h_avg_uu , Gpu::HostVector& h_avg_uv , Gpu::HostVector& h_avg_uw, - Gpu::HostVector& h_avg_vv , Gpu::HostVector& h_avg_vw , Gpu::HostVector& h_avg_ww, - Gpu::HostVector& h_avg_uth , Gpu::HostVector& h_avg_vth, Gpu::HostVector& h_avg_wth, - Gpu::HostVector& h_avg_thth, - Gpu::HostVector& h_avg_uiuiu , Gpu::HostVector& h_avg_uiuiv , Gpu::HostVector& h_avg_uiuiw, - Gpu::HostVector& h_avg_p, - Gpu::HostVector& h_avg_pu , Gpu::HostVector& h_avg_pv , Gpu::HostVector& h_avg_pw) +void ERF::derive_diag_profiles(Real time, + Gpu::HostVector& h_avg_u , Gpu::HostVector& h_avg_v , Gpu::HostVector& h_avg_w, + Gpu::HostVector& h_avg_rho , Gpu::HostVector& h_avg_th , Gpu::HostVector& h_avg_ksgs, + Gpu::HostVector& h_avg_kturb, Gpu::HostVector& h_avg_qv, + Gpu::HostVector& h_avg_qc , Gpu::HostVector& h_avg_qr, + Gpu::HostVector& h_avg_wqv , Gpu::HostVector& h_avg_wqc, Gpu::HostVector& h_avg_wqr, + Gpu::HostVector& h_avg_qi , Gpu::HostVector& h_avg_qs , Gpu::HostVector& h_avg_qg, + Gpu::HostVector& h_avg_uu , Gpu::HostVector& h_avg_uv , Gpu::HostVector& h_avg_uw, + Gpu::HostVector& h_avg_vv , Gpu::HostVector& h_avg_vw , Gpu::HostVector& h_avg_ww, + Gpu::HostVector& h_avg_uth , Gpu::HostVector& h_avg_vth, Gpu::HostVector& h_avg_wth, + Gpu::HostVector& h_avg_thth, + Gpu::HostVector& h_avg_uiuiu , Gpu::HostVector& h_avg_uiuiv, + Gpu::HostVector& h_avg_uiuiw, + Gpu::HostVector& h_avg_p, + Gpu::HostVector& h_avg_pu , Gpu::HostVector& h_avg_pv , Gpu::HostVector& h_avg_pw) { // We assume that this is always called at level 0 diff --git a/Source/IO/ERF_WriteScalarProfiles.cpp b/Source/IO/ERF_WriteScalarProfiles.cpp index 0370afa54..48285c369 100644 --- a/Source/IO/ERF_WriteScalarProfiles.cpp +++ b/Source/IO/ERF_WriteScalarProfiles.cpp @@ -203,7 +203,15 @@ ERF::cloud_fraction (Real time) AMREX_ASSERT(numpts < Long(std::numeric_limits::max)); #if 1 - ParallelDescriptor::ReduceIntMax(p,static_cast(numpts)); + if (amrex::UseGpuAwareMpi()) { + ParallelDescriptor::ReduceIntMax(p,static_cast(numpts)); + } else { + Gpu::PinnedVector hv(numpts); + Gpu::copyAsync(Gpu::deviceToHost, p, p+numpts, hv.data()); + Gpu::streamSynchronize(); + ParallelDescriptor::ReduceIntMax(hv.data(),static_cast(numpts)); + Gpu::copyAsync(Gpu::hostToDevice, hv,data(), hv.data()+numpts, p); + } // Sum over component 0 Long num_cloudy = qc_2d.template sum(0);