diff --git a/Exec/WindFarmTests/SingleTurbine/SimpleActuatorDisk/WithTerrain/ERF_prob.cpp b/Exec/WindFarmTests/SingleTurbine/SimpleActuatorDisk/WithTerrain/ERF_prob.cpp index 4df40c464..093b501e4 100644 --- a/Exec/WindFarmTests/SingleTurbine/SimpleActuatorDisk/WithTerrain/ERF_prob.cpp +++ b/Exec/WindFarmTests/SingleTurbine/SimpleActuatorDisk/WithTerrain/ERF_prob.cpp @@ -180,7 +180,6 @@ Problem::init_custom_terrain ( // Domain cell size and real bounds auto dx = geom.CellSizeArray(); auto ProbLoArr = geom.ProbLoArray(); - auto ProbHiArr = geom.ProbHiArray(); // Domain valid box (z_nd is nodal) const amrex::Box& domain = geom.Domain(); @@ -189,8 +188,6 @@ Problem::init_custom_terrain ( int domlo_z = domain.smallEnd(2); // User function parameters - Real a = 0.5; - Real num = 8 * a * a * a; Real xcen = 500.0; Real ycen = 500.0; diff --git a/Source/WindFarmParametrization/ERF_InitWindFarm.cpp b/Source/WindFarmParametrization/ERF_InitWindFarm.cpp index e4dea885e..df1fe76d9 100644 --- a/Source/WindFarmParametrization/ERF_InitWindFarm.cpp +++ b/Source/WindFarmParametrization/ERF_InitWindFarm.cpp @@ -350,100 +350,29 @@ WindFarm::read_windfarm_airfoil_tables (const std::string windfarm_airfoil_table set_blade_airfoil_spec(bld_airfoil_aoa, bld_airfoil_Cl, bld_airfoil_Cd); } -void -WindFarm::gatherKeyValuePairs(const std::vector>& localData, - std::vector>& globalData) -{ - int myRank = amrex::ParallelDescriptor::MyProc(); - int nProcs = amrex::ParallelDescriptor::NProcs(); - - int localSize = localData.size(); - - // Prepare separate vectors for keys and values - std::vector localKeys(localSize); - std::vector localValues(localSize); - - for (size_t i = 0; i < localData.size(); ++i) { - localKeys[i] = localData[i].first; - localValues[i] = localData[i].second; - } - - // Gather sizes of the arrays from all processes - std::vector allSizes(nProcs); - amrex::ParallelDescriptor::Gather(&localSize, 1, allSizes.data(), 1, 0); - - // Compute displacements for global data - std::vector displs(nProcs, 0); - int totalSize = 0; - if (amrex::ParallelDescriptor::MyProc() == 0) { - for (int i = 0; i < nProcs; ++i) { - displs[i] = totalSize; - totalSize += allSizes[i]; - } - globalData.resize(totalSize); - } - - // Gather keys - std::vector globalKeys(totalSize); - amrex::ParallelDescriptor::Gatherv(localKeys.data(), localSize, - globalKeys.data(), allSizes, - displs, 0); - - // Gather values - std::vector globalValues(totalSize); - amrex::ParallelDescriptor::Gatherv(localValues.data(), localSize, - globalValues.data(), allSizes, - displs, 0); - - // Rank 0 combines keys and values into globalData - if (myRank == 0) { - globalData.clear(); - for (int i = 0; i < totalSize; ++i) { - globalData.emplace_back(globalKeys[i], globalValues[i]); - } - - // Sort global data by keys - std::sort(globalData.begin(), globalData.end()); - } - - // Broadcast the global data to all processes - int globalCount = globalData.size(); - amrex::ParallelDescriptor::Bcast(&globalCount, 1, 0); - - if (myRank != 0) { - globalData.resize(globalCount); - } - - // Broadcast the actual pairs - for (int i = 0; i < globalCount; ++i) { - amrex::ParallelDescriptor::Bcast(&globalData[i].first, 1, 0); - amrex::ParallelDescriptor::Bcast(&globalData[i].second, 1, 0); - } -} - - void WindFarm::fill_Nturb_multifab (const Geometry& geom, MultiFab& mf_Nturb, std::unique_ptr& z_phys_nd) { - zloc.resize(xloc.size(),-1.0); - turb_index.resize(xloc.size(),-1); + zloc.resize(xloc.size(),0.0); + Vector is_counted; + is_counted.resize(xloc.size(),0); amrex::Gpu::DeviceVector d_xloc(xloc.size()); amrex::Gpu::DeviceVector d_yloc(yloc.size()); amrex::Gpu::DeviceVector d_zloc(xloc.size()); - amrex::Gpu::DeviceVector d_turb_index(xloc.size()); + amrex::Gpu::DeviceVector d_is_counted(xloc.size()); amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, xloc.begin(), xloc.end(), d_xloc.begin()); amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, yloc.begin(), yloc.end(), d_yloc.begin()); amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, zloc.begin(), zloc.end(), d_zloc.begin()); - amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, turb_index.begin(), turb_index.end(), d_turb_index.begin()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, is_counted.begin(), is_counted.end(), d_is_counted.begin()); Real* d_xloc_ptr = d_xloc.data(); Real* d_yloc_ptr = d_yloc.data(); Real* d_zloc_ptr = d_zloc.data(); - int* d_turb_index_ptr = d_turb_index.data(); + int* d_is_counted_ptr = d_is_counted.data(); mf_Nturb.setVal(0); @@ -477,39 +406,46 @@ WindFarm::fill_Nturb_multifab (const Geometry& geom, for(int it=0; it x1 and d_xloc_ptr[it]+1e-3 < x2 and d_yloc_ptr[it]+1e-3 > y1 and d_yloc_ptr[it]+1e-3 < y2){ - Nturb_array(i,j,k,0) = Nturb_array(i,j,k,0) + 1; - if(is_terrain) { - d_zloc_ptr[it] = z_nd_arr(i,j,k0); - d_turb_index_ptr[it] = it; - } - else { - d_zloc_ptr[it] = 0.0; - d_turb_index_ptr[it] = it; - } + Nturb_array(i,j,k,0) = Nturb_array(i,j,k,0) + 1; + // Perform atomic operations to ensure "increment only once" + if (is_terrain) { + int expected = 0; + int desired = 1; + // Atomic Compare-And-Swap: Increment only if d_is_counted_ptr[it] was 0 + if (Gpu::Atomic::CAS(&d_is_counted_ptr[it], expected, desired) == expected) { + // The current thread successfully set d_is_counted_ptr[it] from 0 to 1 + Gpu::Atomic::Add(&d_zloc_ptr[it], z_nd_arr(i, j, k0)); + } + } } } }); } Gpu::copy(Gpu::deviceToHost, d_zloc.begin(), d_zloc.end(), zloc.begin()); - Gpu::copy(Gpu::deviceToHost, d_turb_index.begin(), d_turb_index.end(), turb_index.begin()); - - std::vector> turb_index_zloc; - for(int it=0;it> turb_index_zloc_glob; + amrex::ParallelAllReduce::Sum(zloc.data(), + zloc.size(), + amrex::ParallelContext::CommunicatorAll()); - gatherKeyValuePairs(turb_index_zloc, turb_index_zloc_glob); + amrex::ParallelAllReduce::Sum(is_counted.data(), + is_counted.size(), + amrex::ParallelContext::CommunicatorAll()); - // Each process now has the global array - for (const auto& kv : turb_index_zloc_glob) { - //std::cout << "Rank " << amrex::ParallelDescriptor::MyProc() << "Global data" << kv.first << " " << kv.second << "\n"; - zloc[kv.first] = kv.second; + for(int it=0;it>& localData, - std::vector>& globalData); - void advance (const amrex::Geometry& a_geom, const amrex::Real& dt_advance, amrex::MultiFab& cons_in, @@ -154,7 +151,6 @@ public: protected: amrex::Vector xloc, yloc, zloc; - amrex::Vector turb_index; amrex::Real my_turb_disk_angle; amrex::Real hub_height, rotor_rad, thrust_coeff_standing, nominal_power; amrex::Vector wind_speed, thrust_coeff, power;