diff --git a/src/parallel/ALLLoadBalancer.cpp b/src/parallel/ALLLoadBalancer.cpp index d6ff1c95ea..b58965216a 100644 --- a/src/parallel/ALLLoadBalancer.cpp +++ b/src/parallel/ALLLoadBalancer.cpp @@ -5,24 +5,26 @@ */ #include "ALLLoadBalancer.h" -ALLLoadBalancer::ALLLoadBalancer(std::array boxMin, std::array boxMax, double gamma, - MPI_Comm comm, std::array globalSize, - std::array localCoordinates, std::array minimalPartitionSize) - : _all(3 /*dim*/, gamma) { - std::vector points; - points.emplace_back(3, boxMin.data()); - points.emplace_back(3, boxMax.data()); +ALLLoadBalancer::ALLLoadBalancer(const std::array &boxMin, const std::array &boxMax, double gamma, + MPI_Comm comm, const std::array &globalSize, + const std::array &localCoordinates, + const std::array &minimalPartitionSize) + : _all(3 /*dim*/, gamma), _minimalPartitionSize(minimalPartitionSize) { + // convert input into non-const vector because that is what ALL expects + std::vector points { + {3, boxMin.data()}, + {3, boxMax.data()}, + }; _all.set_vertices(points); - std::array global_size{static_cast(globalSize[0]), static_cast(globalSize[1]), + // convert input into non-const int arrays because that is what ALL expects + std::array globalSizeIntArray{static_cast(globalSize[0]), static_cast(globalSize[1]), static_cast(globalSize[2])}; std::array coords{static_cast(localCoordinates[0]), static_cast(localCoordinates[1]), static_cast(localCoordinates[2])}; - _all.set_proc_grid_params(coords.data(), global_size.data()); + _all.set_proc_grid_params(coords.data(), globalSizeIntArray.data()); _all.set_communicator(comm); - _coversWholeDomain = {globalSize[0] == 1, global_size[1] == 1, global_size[2] == 1}; - - _minimalPartitionSize = minimalPartitionSize; + _coversWholeDomain = {globalSizeIntArray[0] == 1, globalSizeIntArray[1] == 1, globalSizeIntArray[2] == 1}; } std::tuple, std::array> ALLLoadBalancer::rebalance(double work) { _all.set_work(work); @@ -30,8 +32,8 @@ std::tuple, std::array> ALLLoadBalancer::rebala _all.set_min_domain_size(ALL_LB_t::STAGGERED, _minimalPartitionSize.data()); _all.balance(ALL_LB_t::STAGGERED); auto resultVertices = _all.get_result_vertices(); - std::array boxMin{resultVertices[0].x(0), resultVertices[0].x(1), resultVertices[0].x(2)}; - std::array boxMax{resultVertices[1].x(0), resultVertices[1].x(1), resultVertices[1].x(2)}; _all.set_vertices(resultVertices); + const std::array boxMin{resultVertices[0].x(0), resultVertices[0].x(1), resultVertices[0].x(2)}; + const std::array boxMax{resultVertices[1].x(0), resultVertices[1].x(1), resultVertices[1].x(2)}; return std::make_tuple(boxMin, boxMax); } diff --git a/src/parallel/ALLLoadBalancer.h b/src/parallel/ALLLoadBalancer.h index 9284dcfcf2..b70562ef14 100644 --- a/src/parallel/ALLLoadBalancer.h +++ b/src/parallel/ALLLoadBalancer.h @@ -11,9 +11,9 @@ class ALLLoadBalancer : public LoadBalancer { public: - ALLLoadBalancer(std::array boxMin, std::array boxMax, double gamma, MPI_Comm comm, - std::array globalSize, std::array localCoordinates, - std::array minimalPartitionSize); + ALLLoadBalancer(const std::array &boxMin, const std::array &boxMax, double gamma, MPI_Comm comm, + const std::array& globalSize, const std::array& localCoordinates, + const std::array& minimalPartitionSize); ~ALLLoadBalancer() override = default; std::tuple, std::array> rebalance(double work) override; @@ -21,7 +21,7 @@ class ALLLoadBalancer : public LoadBalancer { // nothing yet. } - std::array getCoversWholeDomain() override { return _coversWholeDomain; } + const std::array& getCoversWholeDomain() const override { return _coversWholeDomain; } private: ALL _all; diff --git a/src/parallel/DomainDecomposition.cpp b/src/parallel/DomainDecomposition.cpp index 4aa51be05a..bf2a87437a 100644 --- a/src/parallel/DomainDecomposition.cpp +++ b/src/parallel/DomainDecomposition.cpp @@ -87,15 +87,15 @@ bool DomainDecomposition::queryBalanceAndExchangeNonBlocking(bool /*forceRebalan void DomainDecomposition::balanceAndExchange(double /*lastTraversalTime*/, bool /*forceRebalancing*/, ParticleContainer* moleculeContainer, Domain* domain) { if (sendLeavingWithCopies()) { - Log::global_log->debug() << "DD: Sending Leaving and Halos." << std::endl; + Log::global_log->debug() << "DD: Sending Leaving and Halos.\n"; DomainDecompMPIBase::exchangeMoleculesMPI(moleculeContainer, domain, LEAVING_AND_HALO_COPIES); } else { - Log::global_log->debug() << "DD: Sending Leaving." << std::endl; + Log::global_log->debug() << "DD: Sending Leaving.\n"; DomainDecompMPIBase::exchangeMoleculesMPI(moleculeContainer, domain, LEAVING_ONLY); #ifndef MARDYN_AUTOPAS moleculeContainer->deleteOuterParticles(); #endif - Log::global_log->debug() << "DD: Sending Halos." << std::endl; + Log::global_log->debug() << "DD: Sending Halos.\n"; DomainDecompMPIBase::exchangeMoleculesMPI(moleculeContainer, domain, HALO_COPIES); } } diff --git a/src/parallel/GeneralDomainDecomposition.cpp b/src/parallel/GeneralDomainDecomposition.cpp index fbae56f0e9..efd6aa750f 100644 --- a/src/parallel/GeneralDomainDecomposition.cpp +++ b/src/parallel/GeneralDomainDecomposition.cpp @@ -36,22 +36,23 @@ void GeneralDomainDecomposition::initializeALL() { Log::global_log->info() << "gridSize:" << gridSize[0] << ", " << gridSize[1] << ", " << gridSize[2] << std::endl; Log::global_log->info() << "gridCoords:" << gridCoords[0] << ", " << gridCoords[1] << ", " << gridCoords[2] << std::endl; std::tie(_boxMin, _boxMax) = initializeRegularGrid(_domainLength, gridSize, gridCoords); - if (_forceLatchingToLinkedCellsGrid and not _gridSize.has_value()) { + if (_forceLatchingToLinkedCellsGrid and not _latchGridSize.has_value()) { std::array forcedGridSize{}; for(size_t dim = 0; dim < 3; ++dim){ - size_t numCells = _domainLength[dim] / _interactionLength; + // if we calculate 3.5 cells per dim there is only space for 3 -> floor + const auto numCells = std::floor(_domainLength[dim] / _interactionLength); forcedGridSize[dim] = _domainLength[dim] / numCells; } - _gridSize = forcedGridSize; + _latchGridSize = forcedGridSize; } - if (_gridSize.has_value()) { + if (_latchGridSize.has_value()) { std::tie(_boxMin, _boxMax) = latchToGridSize(_boxMin, _boxMax); } #ifdef ENABLE_ALLLBL // Increased slightly to prevent rounding errors. const double safetyFactor = 1. + 1.e-10; const std::array minimalDomainSize = - _gridSize.has_value() ? *_gridSize + _latchGridSize.has_value() ? *_latchGridSize : std::array{_interactionLength * safetyFactor, _interactionLength * safetyFactor, _interactionLength * safetyFactor}; @@ -96,17 +97,17 @@ void GeneralDomainDecomposition::balanceAndExchange(double lastTraversalTime, bo moleculeContainer->deleteOuterParticles(); // rebalance - Log::global_log->info() << "rebalancing..." << std::endl; + Log::global_log->debug() << "rebalancing..." << std::endl; Log::global_log->set_mpi_output_all(); Log::global_log->debug() << "work:" << lastTraversalTime << std::endl; Log::global_log->set_mpi_output_root(0); auto [newBoxMin, newBoxMax] = _loadBalancer->rebalance(lastTraversalTime); - if (_gridSize.has_value()) { + if (_latchGridSize.has_value()) { std::tie(newBoxMin, newBoxMax) = latchToGridSize(newBoxMin, newBoxMax); } // migrate the particles, this will rebuild the moleculeContainer! - Log::global_log->info() << "migrating particles" << std::endl; + Log::global_log->debug() << "migrating particles" << std::endl; migrateParticles(domain, moleculeContainer, newBoxMin, newBoxMax); #ifndef MARDYN_AUTOPAS @@ -119,9 +120,9 @@ void GeneralDomainDecomposition::balanceAndExchange(double lastTraversalTime, bo _boxMax = newBoxMax; // init communication partners - Log::global_log->info() << "updating communication partners" << std::endl; + Log::global_log->debug() << "updating communication partners" << std::endl; initCommPartners(moleculeContainer, domain); - Log::global_log->info() << "rebalancing finished" << std::endl; + Log::global_log->debug() << "rebalancing finished" << std::endl; DomainDecompMPIBase::exchangeMoleculesMPI(moleculeContainer, domain, HALO_COPIES); } else { if (sendLeavingWithCopies()) { @@ -250,7 +251,7 @@ void GeneralDomainDecomposition::migrateParticles(Domain* domain, ParticleContai void GeneralDomainDecomposition::initCommPartners(ParticleContainer* moleculeContainer, Domain* domain) { // init communication partners - auto coversWholeDomain = _loadBalancer->getCoversWholeDomain(); + const auto coversWholeDomain = _loadBalancer->getCoversWholeDomain(); for (int d = 0; d < DIMgeom; ++d) { // this needs to be updated for proper initialization of the neighbours _neighbourCommunicationScheme->setCoverWholeDomain(d, coversWholeDomain[d]); @@ -292,12 +293,12 @@ void GeneralDomainDecomposition::readXML(XMLfileUnits& xmlconfig) { << strings.size() << "!" << std::endl; mardyn_exit(8134); } - _gridSize = {std::stod(strings[0]), std::stod(strings[1]), std::stod(strings[2])}; + _latchGridSize = {std::stod(strings[0]), std::stod(strings[1]), std::stod(strings[2])}; } else { double gridSize = std::stod(gridSizeString); - _gridSize = {gridSize, gridSize, gridSize}; + _latchGridSize = {gridSize, gridSize, gridSize}; } - for (auto gridSize : *_gridSize) { + for (auto gridSize : *_latchGridSize) { if (gridSize < _interactionLength) { Log::global_log->error() << "GeneralDomainDecomposition's gridSize (" << gridSize << ") is smaller than the interactionLength (" << _interactionLength diff --git a/src/parallel/GeneralDomainDecomposition.h b/src/parallel/GeneralDomainDecomposition.h index 7adbde1a76..12dd37f026 100644 --- a/src/parallel/GeneralDomainDecomposition.h +++ b/src/parallel/GeneralDomainDecomposition.h @@ -172,7 +172,7 @@ class GeneralDomainDecomposition : public DomainDecompMPIBase { std::pair, std::array> latchToGridSize(std::array boxMin, std::array boxMax) { for (size_t ind = 0; ind < 3; ++ind) { - double currentGridSize = (*_gridSize)[ind]; + const double currentGridSize = (*_latchGridSize)[ind]; // For boxmin, the lower domain boundary is 0, so that's always fine! boxMin[ind] = std::round(boxMin[ind] / currentGridSize) * currentGridSize; // update boxmax only if it isn't at the very top of the domain! @@ -197,10 +197,10 @@ class GeneralDomainDecomposition : public DomainDecompMPIBase { size_t _initFrequency{500}; /** - * Optionally safe a given grid size on which the process boundaries are bound/latched. + * Optionally, give a grid size (=3D size of one grid cell) on which the process boundaries are bound/latched. * If no value is given, it is not used. */ - std::optional> _gridSize{}; + std::optional> _latchGridSize{}; /** * Bool that indicates whether a grid should be forced even if no gridSize is set. diff --git a/src/parallel/LoadBalancer.h b/src/parallel/LoadBalancer.h index 2b11b02498..f6b4e2faf5 100644 --- a/src/parallel/LoadBalancer.h +++ b/src/parallel/LoadBalancer.h @@ -40,5 +40,5 @@ class LoadBalancer { * Indicates if the current process / MPI rank spans the full length of a dimension. * @return Array of bools, for each dimension one value: true, iff the process spans the entire domain along this dimension. */ - virtual std::array getCoversWholeDomain() = 0; + virtual const std::array& getCoversWholeDomain() const = 0; }; diff --git a/src/parallel/NeighborAcquirer.cpp b/src/parallel/NeighborAcquirer.cpp index 86d69fe757..db4ae4078b 100644 --- a/src/parallel/NeighborAcquirer.cpp +++ b/src/parallel/NeighborAcquirer.cpp @@ -18,49 +18,50 @@ * saved in partners01. */ std::tuple, std::vector> NeighborAcquirer::acquireNeighbors( - const std::array &globalDomainLength, HaloRegion *ownRegion, std::vector &desiredRegions, + const std::array &globalDomainLength, HaloRegion *ownRegion, const std::vector &desiredRegions, const MPI_Comm &comm, bool excludeOwnRank) { - int my_rank; // my rank + int my_rank{}; // my rank MPI_Comm_rank(comm, &my_rank); - int num_processes; // the number of processes in comm + int num_processes{}; // the number of processes in comm MPI_Comm_size(comm, &num_processes); - int num_regions = desiredRegions.size(); // the number of regions I would like to acquire from other processes + const auto num_regions = desiredRegions.size(); // the number of regions I would like to acquire from other processes // tell the other processes how much you are going to send - int num_bytes_send = - sizeof(int) * 2 + (sizeof(double) * 3 + sizeof(double) * 3 + sizeof(int) * 3 + sizeof(double) * 1) * - num_regions; // how many bytes am I going to send to all the other processes? - std::vector num_bytes_receive_vec(num_processes, 0); // vector of number of bytes I am going to receive - // MPI_Allreduce(&num_bytes_send, &num_bytes_receive, 1, MPI_INT, MPI_SUM, comm); - MPI_Allgather(&num_bytes_send, 1, MPI_INT, num_bytes_receive_vec.data(), 1, MPI_INT, comm); + // how many bytes am I going to send to all the other processes + const int num_bytes_send = + sizeof(int) * 2 + (sizeof(double) * 3 + sizeof(double) * 3 + sizeof(int) * 3 + sizeof(double) * 1) * num_regions; - // create byte buffer + // create byte send buffer std::vector outgoingDesiredRegionsVector(num_bytes_send); // outgoing byte buffer - int i = 0; - int p = 0; // msg format: rank | number_of_regions | region_01 | region_02 | ... - - memcpy(outgoingDesiredRegionsVector.data() + i, &my_rank, sizeof(int)); - i += sizeof(int); - memcpy(outgoingDesiredRegionsVector.data() + i, &num_regions, sizeof(int)); - i += sizeof(int); + // fill the buffer + int bufferPosition = 0; + memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, &my_rank, sizeof(int)); + bufferPosition += sizeof(int); + memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, &num_regions, sizeof(int)); + bufferPosition += sizeof(int); for (auto ®ion : desiredRegions) { // filling up the outgoing byte buffer - memcpy(outgoingDesiredRegionsVector.data() + i, region.rmin, sizeof(double) * 3); - i += sizeof(double) * 3; - memcpy(outgoingDesiredRegionsVector.data() + i, region.rmax, sizeof(double) * 3); - i += sizeof(double) * 3; - memcpy(outgoingDesiredRegionsVector.data() + i, region.offset, sizeof(int) * 3); - i += sizeof(int) * 3; - memcpy(outgoingDesiredRegionsVector.data() + i, ®ion.width, sizeof(double)); - i += sizeof(double); + memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, region.rmin, sizeof(double) * 3); + bufferPosition += sizeof(double) * 3; + memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, region.rmax, sizeof(double) * 3); + bufferPosition += sizeof(double) * 3; + memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, region.offset, sizeof(int) * 3); + bufferPosition += sizeof(int) * 3; + memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, ®ion.width, sizeof(double)); + bufferPosition += sizeof(double); } + // set up structure information data for the Allgatherv operation + // vector of number of bytes I am going to receive + std::vector num_bytes_receive_vec(num_processes, 0); + MPI_Allgather(&num_bytes_send, 1, MPI_INT, num_bytes_receive_vec.data(), 1, MPI_INT, comm); + // vector of offsets (=displacement in MPI) in the receive buffer + std::vector num_bytes_displacements(num_processes, 0); int num_bytes_receive = 0; - std::vector num_bytes_displacements(num_processes, 0); // vector of number of bytes I am going to receive for (int j = 0; j < num_processes; j++) { num_bytes_displacements[j] = num_bytes_receive; num_bytes_receive += num_bytes_receive_vec[j]; @@ -74,38 +75,42 @@ std::tuple, std::vector> std::vector numberOfRegionsToSendToRank(num_processes, 0); // outgoing row - int bytesOneRegion = + // parse / deserialize received data + constexpr int bytesOneRegion = sizeof(double) * 3 + sizeof(double) * 3 + sizeof(int) * 3 + sizeof(double) + sizeof(double) * 3; - std::vector>> sendingList(num_processes); // the regions I own and want to send - std::vector comm_partners02; + // the regions I own and want to send: ranks> + std::vector>> sendingList(num_processes); - i = 0; - while (i != num_bytes_receive) { - int rank; - int regions; + std::vector comm_partners02{}; + bufferPosition = 0; + while (bufferPosition < num_bytes_receive /*== buffer length*/) { - memcpy(&rank, incomingDesiredRegionsVector.data() + i, sizeof(int)); - i += sizeof(int); // 4 - memcpy(®ions, incomingDesiredRegionsVector.data() + i, sizeof(int)); - i += sizeof(int); // 4 + int rank{}; + memcpy(&rank, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(int)); + bufferPosition += sizeof(int); // 4 + int regions{}; + memcpy(®ions, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(int)); + bufferPosition += sizeof(int); // 4 - for (int j = 0; j < regions; j++) { + for (int regionId = 0; regionId < regions; ++regionId) { HaloRegion unshiftedRegion{}; - memcpy(unshiftedRegion.rmin, incomingDesiredRegionsVector.data() + i, sizeof(double) * 3); - i += sizeof(double) * 3; // 24 - memcpy(unshiftedRegion.rmax, incomingDesiredRegionsVector.data() + i, sizeof(double) * 3); - i += sizeof(double) * 3; // 24 - memcpy(unshiftedRegion.offset, incomingDesiredRegionsVector.data() + i, sizeof(int) * 3); - i += sizeof(int) * 3; // 12 - memcpy(&unshiftedRegion.width, incomingDesiredRegionsVector.data() + i, sizeof(double)); - i += sizeof(double); // 4 + memcpy(unshiftedRegion.rmin, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(double) * 3); + bufferPosition += sizeof(double) * 3; // 24 + memcpy(unshiftedRegion.rmax, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(double) * 3); + bufferPosition += sizeof(double) * 3; // 24 + memcpy(unshiftedRegion.offset, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(int) * 3); + bufferPosition += sizeof(int) * 3; // 12 + memcpy(&unshiftedRegion.width, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(double)); + bufferPosition += sizeof(double); // 4 // msg format one region: rmin | rmax | offset | width | shift - auto shiftedRegionShiftPair = getPotentiallyShiftedRegions(globalDomainLength, unshiftedRegion); - - std::vector regionsToTest = shiftedRegionShiftPair.first; - std::vector> shifts = shiftedRegionShiftPair.second; - + const auto [regionsToTest, shifts] = getPotentiallyShiftedRegions(globalDomainLength, unshiftedRegion); + // Before every set of push_backs make sure there is enough space for this set + all remaining. + // This guarantees that there is enough space for the current set of push_backs, and, if subsequent sets + // are smaller, further reallocations can be avoided. This potentially leads to an overestimate but comes + // with the advantage of fewer resizes. + sendingList[rank].reserve(sendingList[rank].size() + ((regions - regionId) * regionsToTest.size())); + comm_partners02.reserve(comm_partners02.size() + ((regions - regionId) * regionsToTest.size())); for(size_t regionIndex = 0; regionIndex < regionsToTest.size(); ++regionIndex){ auto regionToTest = regionsToTest[regionIndex]; if ((not excludeOwnRank or rank != my_rank) and isIncluded(ownRegion, ®ionToTest)) { @@ -113,16 +118,20 @@ std::tuple, std::vector> numberOfRegionsToSendToRank[rank]++; // this is a region I will send to rank - auto overlappedRegion = overlap(*ownRegion, regionToTest); // different shift for the overlap? + const auto overlappedRegion = overlap(*ownRegion, regionToTest); // different shift for the overlap? // make a note in partners02 - don't forget to squeeze partners02 - bool enlarged[3][2] = {{false}}; - for (int k = 0; k < 3; k++) currentShift[k] *= -1; + constexpr bool enlarged[3][2] = {{false}}; + for (int k = 0; k < 3; k++) { + currentShift[k] *= -1; + } comm_partners02.emplace_back(rank, overlappedRegion.rmin, overlappedRegion.rmax, overlappedRegion.rmin, overlappedRegion.rmax, currentShift.data(), overlappedRegion.offset, enlarged); - for (int k = 0; k < 3; k++) currentShift[k] *= -1; + for (int k = 0; k < 3; k++) { + currentShift[k] *= -1; + } // Undo the shift. So it is again in the perspective of the rank we got this region from. // We cannot use unshiftedRegion, as it is not overlapped and thus potentially too big. @@ -143,7 +152,7 @@ std::tuple, std::vector> std::vector singleRegion(bytesOneRegion); - p = 0; + int p = 0; memcpy(&singleRegion[p], unshiftedOverlappedRegion.rmin, sizeof(double) * 3); p += sizeof(double) * 3; memcpy(&singleRegion[p], unshiftedOverlappedRegion.rmax, sizeof(double) * 3); @@ -155,7 +164,7 @@ std::tuple, std::vector> memcpy(&singleRegion[p], currentShift.data(), sizeof(double) * 3); //p += sizeof(double) * 3; - sendingList[rank].push_back(std::move(singleRegion)); + sendingList[rank].emplace_back(std::move(singleRegion)); } } } @@ -218,19 +227,17 @@ std::tuple, std::vector> std::vector comm_partners01; // the communication partners // receive data (blocking) - int byte_counter = 0; - /** * We now receive as many regions as we previously determined that we will receive. * For that we keep track, how many regions we received and increase this according to the number of regions * received per MPI operation. */ - while (byte_counter < numberOfRegionsToReceive[my_rank] * bytesOneRegion) { + for (int byte_counter = 0; byte_counter < numberOfRegionsToReceive[my_rank] * bytesOneRegion; ) { // MPI_PROBE MPI_Probe(MPI_ANY_SOURCE, 1, comm, &probe_status); // interpret probe - int source = probe_status.MPI_SOURCE; - int bytes; + const auto source = probe_status.MPI_SOURCE; + int bytes{}; MPI_Get_count(&probe_status, MPI_BYTE, &bytes); // we have receive `bytes` bytes. So we increase the byte_counter. byte_counter += bytes; @@ -238,24 +245,26 @@ std::tuple, std::vector> std::vector raw_neighbours(bytes); MPI_Recv(raw_neighbours.data(), bytes, MPI_BYTE, source, 1, comm, &rec_status); // Interpret Buffer and add neighbours - for (int k = 0; k < (bytes / bytesOneRegion); k++) { // number of regions from this process + const auto numRegionsToReceive = bytes / bytesOneRegion; + comm_partners01.reserve(std::max(comm_partners01.size(), static_cast(numberOfRegionsToReceive[my_rank] * numRegionsToReceive))); + for (int regionId = 0; regionId < numRegionsToReceive; ++regionId) { // number of regions from this process HaloRegion region{}; - double shift[3]; - i = k * bytesOneRegion; + bufferPosition = regionId * bytesOneRegion; - memcpy(region.rmin, raw_neighbours.data() + i, sizeof(double) * 3); - i += sizeof(double) * 3; - memcpy(region.rmax, raw_neighbours.data() + i, sizeof(double) * 3); - i += sizeof(double) * 3; - memcpy(region.offset, raw_neighbours.data() + i, sizeof(int) * 3); - i += sizeof(int) * 3; - memcpy(®ion.width, raw_neighbours.data() + i, sizeof(double)); - i += sizeof(double); + memcpy(region.rmin, raw_neighbours.data() + bufferPosition, sizeof(double) * 3); + bufferPosition += sizeof(double) * 3; + memcpy(region.rmax, raw_neighbours.data() + bufferPosition, sizeof(double) * 3); + bufferPosition += sizeof(double) * 3; + memcpy(region.offset, raw_neighbours.data() + bufferPosition, sizeof(int) * 3); + bufferPosition += sizeof(int) * 3; + memcpy(®ion.width, raw_neighbours.data() + bufferPosition, sizeof(double)); + bufferPosition += sizeof(double); - memcpy(shift, raw_neighbours.data() + i, sizeof(double) * 3); - i += sizeof(double) * 3; + double shift[3]; + memcpy(shift, raw_neighbours.data() + bufferPosition, sizeof(double) * 3); + // bufferPosition += sizeof(double) * 3; - bool enlarged[3][2] = {{false}}; + constexpr bool enlarged[3][2] = {{false}}; comm_partners01.emplace_back(source, region.rmin, region.rmax, region.rmin, region.rmax, shift, region.offset, enlarged); diff --git a/src/parallel/NeighborAcquirer.h b/src/parallel/NeighborAcquirer.h index 5fb9cba960..0f92e35cee 100644 --- a/src/parallel/NeighborAcquirer.h +++ b/src/parallel/NeighborAcquirer.h @@ -29,7 +29,7 @@ class NeighborAcquirer { * second vector will own the particles. */ static std::tuple, std::vector> acquireNeighbors( - const std::array& globalDomainLength, HaloRegion* ownRegion, std::vector& desiredRegions, + const std::array& globalDomainLength, HaloRegion* ownRegion, const std::vector& desiredRegions, const MPI_Comm& comm, bool excludeOwnRank=true); static std::vector squeezePartners(const std::vector& partners); diff --git a/src/parallel/NeighbourCommunicationScheme.cpp b/src/parallel/NeighbourCommunicationScheme.cpp index 7eaa8e94ea..ab4190d750 100644 --- a/src/parallel/NeighbourCommunicationScheme.cpp +++ b/src/parallel/NeighbourCommunicationScheme.cpp @@ -434,16 +434,17 @@ void NeighbourCommunicationScheme::selectNeighbours(MessageType msgType, bool im void DirectNeighbourCommunicationScheme::initCommunicationPartners(double cutoffRadius, Domain * domain, DomainDecompMPIBase* domainDecomp, ParticleContainer* moleculeContainer) { // corners of the process-specific domain - double rmin[DIMgeom]; // lower corner - double rmax[DIMgeom]; // higher corner - - for (int d = 0; d < DIMgeom; d++) { - rmin[d] = domainDecomp->getBoundingBoxMin(d, domain); - rmax[d] = domainDecomp->getBoundingBoxMax(d, domain); - - // TODO: this should be safe, as long as molecules don't start flying around - // at the speed of one cutoffRadius per time step - } + static_assert(DIMgeom == 3); // The initialization here assumes 3 dimensions! + const std::array localLowerCorner{ + domainDecomp->getBoundingBoxMin(0, domain), + domainDecomp->getBoundingBoxMin(1, domain), + domainDecomp->getBoundingBoxMin(2, domain), + }; + const std::array localUpperCorner{ + domainDecomp->getBoundingBoxMax(0, domain), + domainDecomp->getBoundingBoxMax(1, domain), + domainDecomp->getBoundingBoxMax(2, domain), + }; if (_pushPull) { for (unsigned int d = 0; d < _commDimms; d++) { // why free? @@ -458,18 +459,17 @@ void DirectNeighbourCommunicationScheme::initCommunicationPartners(double cutoff } } - HaloRegion ownRegion = {rmin[0], rmin[1], rmin[2], rmax[0], rmax[1], rmax[2], 0, 0, 0, cutoffRadius}; + HaloRegion ownRegion = {localLowerCorner[0], localLowerCorner[1], localLowerCorner[2], localUpperCorner[0], localUpperCorner[1], localUpperCorner[2], 0, 0, 0, cutoffRadius}; if (_pushPull) { - double* cellLength = moleculeContainer->getHaloSize(); + double* const cellLength = moleculeContainer->getHaloSize(); // halo/force regions std::vector haloOrForceRegions = _zonalMethod->getHaloImportForceExportRegions(ownRegion, cutoffRadius, _coversWholeDomain, cellLength); std::vector leavingRegions = - _zonalMethod->getLeavingExportRegions(ownRegion, cutoffRadius, - _coversWholeDomain); + _zonalMethod->getLeavingExportRegions(ownRegion, cutoffRadius, _coversWholeDomain); - std::array globalDomainLength{domain->getGlobalLength(0), domain->getGlobalLength(1), + const std::array globalDomainLength{domain->getGlobalLength(0), domain->getGlobalLength(1), domain->getGlobalLength(2)}; // assuming p1 sends regions to p2 std::tie((*_haloImportForceExportNeighbours)[0], (*_haloExportForceImportNeighbours)[0]) = @@ -525,9 +525,10 @@ void IndirectNeighbourCommunicationScheme::initExchangeMoleculesMPI1D(ParticleCo const int numNeighbours = (*_neighbours)[d].size(); std::vector dummy; for (int i = 0; i < numNeighbours; ++i) { - Log::global_log->debug() << "Rank " << domainDecomp->getRank() << " is initiating communication to" << std::endl; + Log::global_log->debug() << "Rank " << domainDecomp->getRank() + << " is initiating communication to " << (*_neighbours)[d][i].getRank() << "\n"; (*_neighbours)[d][i].initSend(moleculeContainer, domainDecomp->getCommunicator(), - domainDecomp->getMPIParticleType(), msgType, dummy, false, true/*do halo position change*/); + domainDecomp->getMPIParticleType(), msgType, dummy, false, true/*do halo position check*/); } } diff --git a/src/particleContainer/LinkedCellTraversals/OriginalCellPairTraversal.h b/src/particleContainer/LinkedCellTraversals/OriginalCellPairTraversal.h index c54e9045fc..16324a213c 100644 --- a/src/particleContainer/LinkedCellTraversals/OriginalCellPairTraversal.h +++ b/src/particleContainer/LinkedCellTraversals/OriginalCellPairTraversal.h @@ -123,7 +123,7 @@ inline void OriginalCellPairTraversal::computeNeighbourOffsets() { mardyn_assert(forwardNeighbourIndex == 13); mardyn_assert(backwardNeighbourIndex == 13); - Log::global_log->info() << "Neighbour offsets are bounded by " + Log::global_log->debug() << "Neighbour offsets are bounded by " << minNeighbourOffset << ", " << maxNeighbourOffset << std::endl; } diff --git a/src/particleContainer/LinkedCells.cpp b/src/particleContainer/LinkedCells.cpp index d214872be7..bb03396c28 100644 --- a/src/particleContainer/LinkedCells.cpp +++ b/src/particleContainer/LinkedCells.cpp @@ -131,7 +131,7 @@ void LinkedCells::readXML(XMLfileUnits& xmlconfig) { } bool LinkedCells::rebuild(double bBoxMin[3], double bBoxMax[3]) { - Log::global_log->info() << "REBUILD OF LinkedCells" << std::endl; + Log::global_log->debug() << "REBUILD OF LinkedCells" << std::endl; for (int i = 0; i < 3; i++) { this->_boundingBoxMin[i] = bBoxMin[i]; @@ -139,13 +139,13 @@ bool LinkedCells::rebuild(double bBoxMin[3], double bBoxMax[3]) { // _haloWidthInNumCells[i] = ::ceil(_cellsInCutoff); _haloWidthInNumCells[i] = _cellsInCutoff; } - Log::global_log->info() << "Bounding box: " << "[" << bBoxMin[0] << ", " << bBoxMax[0] << "]" << " x " << "[" + Log::global_log->debug() << "Bounding box: " << "[" << bBoxMin[0] << ", " << bBoxMax[0] << "]" << " x " << "[" << bBoxMin[1] << ", " << bBoxMax[1] << "]" << " x " << "[" << bBoxMin[2] << ", " << bBoxMax[2] << "]" << std::endl; int numberOfCells = 1; - Log::global_log->info() << "Using " << _cellsInCutoff << " cells in cutoff." << std::endl; + Log::global_log->debug() << "Using " << _cellsInCutoff << " cells in cutoff." << std::endl; float rc = (_cutoffRadius / _cellsInCutoff); for (int dim = 0; dim < 3; dim++) { @@ -171,7 +171,7 @@ bool LinkedCells::rebuild(double bBoxMin[3], double bBoxMax[3]) { _haloBoundingBoxMax[dim] = _boundingBoxMax[dim] + _haloLength[dim]; } - Log::global_log->info() << "Cells per dimension (incl. halo): " << _cellsPerDimension[0] << " x " + Log::global_log->debug() << "Cells per dimension (incl. halo): " << _cellsPerDimension[0] << " x " << _cellsPerDimension[1] << " x " << _cellsPerDimension[2] << std::endl; diff --git a/src/particleContainer/TraversalTuner.h b/src/particleContainer/TraversalTuner.h index b4f0f27a65..65b072512a 100644 --- a/src/particleContainer/TraversalTuner.h +++ b/src/particleContainer/TraversalTuner.h @@ -132,34 +132,35 @@ TraversalTuner::~TraversalTuner() { template void TraversalTuner::findOptimalTraversal() { - // TODO implement autotuning here! At the moment the traversal is chosen via readXML! + // ls1 always uses the traversal selected via the XML + // If you want auto tuning activate AutoPas via CMake _optimalTraversal = _traversals[selectedTraversal].first; // log traversal - if (dynamic_cast *>(_optimalTraversal)) - Log::global_log->info() << "Using HalfShellTraversal." << std::endl; - else if (dynamic_cast *>(_optimalTraversal)) - Log::global_log->info() << "Using OriginalCellPairTraversal." << std::endl; + if (dynamic_cast *>(_optimalTraversal)) + Log::global_log->debug() << "Using SlicedCellPairTraversal." << std::endl; else if (dynamic_cast *>(_optimalTraversal)) - Log::global_log->info() << "Using C08CellPairTraversal without eighthShell." << std::endl; - else if (dynamic_cast *>(_optimalTraversal)) - Log::global_log->info() << "Using C08CellPairTraversal with eighthShell." << std::endl; + Log::global_log->debug() << "Using C08CellPairTraversal without eighthShell." << std::endl; else if (dynamic_cast *>(_optimalTraversal)) - Log::global_log->info() << "Using C04CellPairTraversal." << std::endl; + Log::global_log->debug() << "Using C04CellPairTraversal." << std::endl; + else if (dynamic_cast *>(_optimalTraversal)) + Log::global_log->debug() << "Using C08CellPairTraversal with eighthShell." << std::endl; + else if (dynamic_cast *>(_optimalTraversal)) + Log::global_log->debug() << "Using HalfShellTraversal." << std::endl; + else if (dynamic_cast *>(_optimalTraversal)) + Log::global_log->debug() << "Using OriginalCellPairTraversal." << std::endl; else if (dynamic_cast *>(_optimalTraversal)) - Log::global_log->info() << "Using MidpointTraversal." << std::endl; + Log::global_log->debug() << "Using MidpointTraversal." << std::endl; else if (dynamic_cast *>(_optimalTraversal)) - Log::global_log->info() << "Using NeutralTerritoryTraversal." << std::endl; + Log::global_log->debug() << "Using NeutralTerritoryTraversal." << std::endl; else if (dynamic_cast *>(_optimalTraversal)) { - Log::global_log->info() << "Using QuickschedTraversal." << std::endl; + Log::global_log->debug() << "Using QuickschedTraversal." << std::endl; #ifndef QUICKSCHED Log::global_log->error() << "MarDyn was compiled without Quicksched Support. Aborting!" << std::endl; mardyn_exit(1); #endif - } else if (dynamic_cast *>(_optimalTraversal)) - Log::global_log->info() << "Using SlicedCellPairTraversal." << std::endl; - else + } else Log::global_log->warning() << "Using unknown traversal." << std::endl; if (_cellsInCutoff > _optimalTraversal->maxCellsInCutoff()) {