diff --git a/src/parallel/ALLLoadBalancer.cpp b/src/parallel/ALLLoadBalancer.cpp
index d6ff1c95ea..b58965216a 100644
--- a/src/parallel/ALLLoadBalancer.cpp
+++ b/src/parallel/ALLLoadBalancer.cpp
@@ -5,24 +5,26 @@
  */
 
 #include "ALLLoadBalancer.h"
-ALLLoadBalancer::ALLLoadBalancer(std::array<double, 3> boxMin, std::array<double, 3> boxMax, double gamma,
-								 MPI_Comm comm, std::array<size_t, 3> globalSize,
-								 std::array<size_t, 3> localCoordinates, std::array<double, 3> minimalPartitionSize)
-	: _all(3 /*dim*/, gamma) {
-	std::vector<Point> points;
-	points.emplace_back(3, boxMin.data());
-	points.emplace_back(3, boxMax.data());
+ALLLoadBalancer::ALLLoadBalancer(const std::array<double, 3> &boxMin, const std::array<double, 3> &boxMax, double gamma,
+								 MPI_Comm comm, const std::array<size_t, 3> &globalSize,
+								 const std::array<size_t, 3> &localCoordinates,
+								 const std::array<double, 3> &minimalPartitionSize)
+	: _all(3 /*dim*/, gamma), _minimalPartitionSize(minimalPartitionSize) {
+	// convert input into non-const vector because that is what ALL expects
+	std::vector<Point> points {
+		{3, boxMin.data()},
+		{3, boxMax.data()},
+	};
 	_all.set_vertices(points);
-	std::array<int, 3> global_size{static_cast<int>(globalSize[0]), static_cast<int>(globalSize[1]),
+	// convert input into non-const int arrays because that is what ALL expects
+	std::array<int, 3> globalSizeIntArray{static_cast<int>(globalSize[0]), static_cast<int>(globalSize[1]),
 								   static_cast<int>(globalSize[2])};
 	std::array<int, 3> coords{static_cast<int>(localCoordinates[0]), static_cast<int>(localCoordinates[1]),
 							  static_cast<int>(localCoordinates[2])};
-	_all.set_proc_grid_params(coords.data(), global_size.data());
+	_all.set_proc_grid_params(coords.data(), globalSizeIntArray.data());
 	_all.set_communicator(comm);
 
-	_coversWholeDomain = {globalSize[0] == 1, global_size[1] == 1, global_size[2] == 1};
-
-	_minimalPartitionSize = minimalPartitionSize;
+	_coversWholeDomain = {globalSizeIntArray[0] == 1, globalSizeIntArray[1] == 1, globalSizeIntArray[2] == 1};
 }
 std::tuple<std::array<double, 3>, std::array<double, 3>> ALLLoadBalancer::rebalance(double work) {
 	_all.set_work(work);
@@ -30,8 +32,8 @@ std::tuple<std::array<double, 3>, std::array<double, 3>> ALLLoadBalancer::rebala
 	_all.set_min_domain_size(ALL_LB_t::STAGGERED, _minimalPartitionSize.data());
 	_all.balance(ALL_LB_t::STAGGERED);
 	auto resultVertices = _all.get_result_vertices();
-	std::array<double, 3> boxMin{resultVertices[0].x(0), resultVertices[0].x(1), resultVertices[0].x(2)};
-	std::array<double, 3> boxMax{resultVertices[1].x(0), resultVertices[1].x(1), resultVertices[1].x(2)};
 	_all.set_vertices(resultVertices);
+	const std::array<double, 3> boxMin{resultVertices[0].x(0), resultVertices[0].x(1), resultVertices[0].x(2)};
+	const std::array<double, 3> boxMax{resultVertices[1].x(0), resultVertices[1].x(1), resultVertices[1].x(2)};
 	return std::make_tuple(boxMin, boxMax);
 }
diff --git a/src/parallel/ALLLoadBalancer.h b/src/parallel/ALLLoadBalancer.h
index 9284dcfcf2..b70562ef14 100644
--- a/src/parallel/ALLLoadBalancer.h
+++ b/src/parallel/ALLLoadBalancer.h
@@ -11,9 +11,9 @@
 
 class ALLLoadBalancer : public LoadBalancer {
 public:
-	ALLLoadBalancer(std::array<double, 3> boxMin, std::array<double, 3> boxMax, double gamma, MPI_Comm comm,
-					std::array<size_t, 3> globalSize, std::array<size_t, 3> localCoordinates,
-					std::array<double, 3> minimalPartitionSize);
+	ALLLoadBalancer(const std::array<double, 3> &boxMin, const std::array<double, 3> &boxMax, double gamma, MPI_Comm comm,
+					const std::array<size_t, 3>& globalSize, const std::array<size_t, 3>& localCoordinates,
+					const std::array<double, 3>& minimalPartitionSize);
 
 	~ALLLoadBalancer() override = default;
 	std::tuple<std::array<double, 3>, std::array<double, 3>> rebalance(double work) override;
@@ -21,7 +21,7 @@ class ALLLoadBalancer : public LoadBalancer {
 		// nothing yet.
 	}
 
-	std::array<bool, 3> getCoversWholeDomain() override { return _coversWholeDomain; }
+	const std::array<bool, 3>& getCoversWholeDomain() const override { return _coversWholeDomain; }
 
 private:
 	ALL<double, double> _all;
diff --git a/src/parallel/DomainDecomposition.cpp b/src/parallel/DomainDecomposition.cpp
index 4aa51be05a..bf2a87437a 100644
--- a/src/parallel/DomainDecomposition.cpp
+++ b/src/parallel/DomainDecomposition.cpp
@@ -87,15 +87,15 @@ bool DomainDecomposition::queryBalanceAndExchangeNonBlocking(bool /*forceRebalan
 void DomainDecomposition::balanceAndExchange(double /*lastTraversalTime*/, bool /*forceRebalancing*/, ParticleContainer* moleculeContainer,
 		Domain* domain) {
 	if (sendLeavingWithCopies()) {
-		Log::global_log->debug() << "DD: Sending Leaving and Halos." << std::endl;
+		Log::global_log->debug() << "DD: Sending Leaving and Halos.\n";
 		DomainDecompMPIBase::exchangeMoleculesMPI(moleculeContainer, domain, LEAVING_AND_HALO_COPIES);
 	} else {
-		Log::global_log->debug() << "DD: Sending Leaving." << std::endl;
+		Log::global_log->debug() << "DD: Sending Leaving.\n";
 		DomainDecompMPIBase::exchangeMoleculesMPI(moleculeContainer, domain, LEAVING_ONLY);
 #ifndef MARDYN_AUTOPAS
 		moleculeContainer->deleteOuterParticles();
 #endif
-		Log::global_log->debug() << "DD: Sending Halos." << std::endl;
+		Log::global_log->debug() << "DD: Sending Halos.\n";
 		DomainDecompMPIBase::exchangeMoleculesMPI(moleculeContainer, domain, HALO_COPIES);
 	}
 }
diff --git a/src/parallel/GeneralDomainDecomposition.cpp b/src/parallel/GeneralDomainDecomposition.cpp
index fbae56f0e9..efd6aa750f 100644
--- a/src/parallel/GeneralDomainDecomposition.cpp
+++ b/src/parallel/GeneralDomainDecomposition.cpp
@@ -36,22 +36,23 @@ void GeneralDomainDecomposition::initializeALL() {
 	Log::global_log->info() << "gridSize:" << gridSize[0] << ", " << gridSize[1] << ", " << gridSize[2] << std::endl;
 	Log::global_log->info() << "gridCoords:" << gridCoords[0] << ", " << gridCoords[1] << ", " << gridCoords[2] << std::endl;
 	std::tie(_boxMin, _boxMax) = initializeRegularGrid(_domainLength, gridSize, gridCoords);
-	if (_forceLatchingToLinkedCellsGrid and not _gridSize.has_value()) {
+	if (_forceLatchingToLinkedCellsGrid and not _latchGridSize.has_value()) {
 		std::array<double, 3> forcedGridSize{};
 		for(size_t dim = 0; dim < 3; ++dim){
-			size_t numCells = _domainLength[dim] / _interactionLength;
+			// if we calculate 3.5 cells per dim there is only space for 3 -> floor
+			const auto numCells = std::floor(_domainLength[dim] / _interactionLength);
 			forcedGridSize[dim] = _domainLength[dim] / numCells;
 		}
-		_gridSize = forcedGridSize;
+		_latchGridSize = forcedGridSize;
 	}
-	if (_gridSize.has_value()) {
+	if (_latchGridSize.has_value()) {
 		std::tie(_boxMin, _boxMax) = latchToGridSize(_boxMin, _boxMax);
 	}
 #ifdef ENABLE_ALLLBL
 	// Increased slightly to prevent rounding errors.
 	const double safetyFactor = 1. + 1.e-10;
 	const std::array<double, 3> minimalDomainSize =
-		_gridSize.has_value() ? *_gridSize
+		_latchGridSize.has_value() ? *_latchGridSize
 							  : std::array{_interactionLength * safetyFactor, _interactionLength * safetyFactor,
 										   _interactionLength * safetyFactor};
 
@@ -96,17 +97,17 @@ void GeneralDomainDecomposition::balanceAndExchange(double lastTraversalTime, bo
 			moleculeContainer->deleteOuterParticles();
 
 			// rebalance
-			Log::global_log->info() << "rebalancing..." << std::endl;
+			Log::global_log->debug() << "rebalancing..." << std::endl;
 
 			Log::global_log->set_mpi_output_all();
 			Log::global_log->debug() << "work:" << lastTraversalTime << std::endl;
 			Log::global_log->set_mpi_output_root(0);
 			auto [newBoxMin, newBoxMax] = _loadBalancer->rebalance(lastTraversalTime);
-			if (_gridSize.has_value()) {
+			if (_latchGridSize.has_value()) {
 				std::tie(newBoxMin, newBoxMax) = latchToGridSize(newBoxMin, newBoxMax);
 			}
 			// migrate the particles, this will rebuild the moleculeContainer!
-			Log::global_log->info() << "migrating particles" << std::endl;
+			Log::global_log->debug() << "migrating particles" << std::endl;
 			migrateParticles(domain, moleculeContainer, newBoxMin, newBoxMax);
 
 #ifndef MARDYN_AUTOPAS
@@ -119,9 +120,9 @@ void GeneralDomainDecomposition::balanceAndExchange(double lastTraversalTime, bo
 			_boxMax = newBoxMax;
 
 			// init communication partners
-			Log::global_log->info() << "updating communication partners" << std::endl;
+			Log::global_log->debug() << "updating communication partners" << std::endl;
 			initCommPartners(moleculeContainer, domain);
-			Log::global_log->info() << "rebalancing finished" << std::endl;
+			Log::global_log->debug() << "rebalancing finished" << std::endl;
 			DomainDecompMPIBase::exchangeMoleculesMPI(moleculeContainer, domain, HALO_COPIES);
 		} else {
 			if (sendLeavingWithCopies()) {
@@ -250,7 +251,7 @@ void GeneralDomainDecomposition::migrateParticles(Domain* domain, ParticleContai
 
 void GeneralDomainDecomposition::initCommPartners(ParticleContainer* moleculeContainer,
 												  Domain* domain) {  // init communication partners
-	auto coversWholeDomain = _loadBalancer->getCoversWholeDomain();
+	const auto coversWholeDomain = _loadBalancer->getCoversWholeDomain();
 	for (int d = 0; d < DIMgeom; ++d) {
 		// this needs to be updated for proper initialization of the neighbours
 		_neighbourCommunicationScheme->setCoverWholeDomain(d, coversWholeDomain[d]);
@@ -292,12 +293,12 @@ void GeneralDomainDecomposition::readXML(XMLfileUnits& xmlconfig) {
 					<< strings.size() << "!" << std::endl;
 				mardyn_exit(8134);
 			}
-			_gridSize = {std::stod(strings[0]), std::stod(strings[1]), std::stod(strings[2])};
+			_latchGridSize = {std::stod(strings[0]), std::stod(strings[1]), std::stod(strings[2])};
 		} else {
 			double gridSize = std::stod(gridSizeString);
-			_gridSize = {gridSize, gridSize, gridSize};
+			_latchGridSize = {gridSize, gridSize, gridSize};
 		}
-		for (auto gridSize : *_gridSize) {
+		for (auto gridSize : *_latchGridSize) {
 			if (gridSize < _interactionLength) {
 				Log::global_log->error() << "GeneralDomainDecomposition's gridSize (" << gridSize
 									<< ") is smaller than the interactionLength (" << _interactionLength
diff --git a/src/parallel/GeneralDomainDecomposition.h b/src/parallel/GeneralDomainDecomposition.h
index 7adbde1a76..12dd37f026 100644
--- a/src/parallel/GeneralDomainDecomposition.h
+++ b/src/parallel/GeneralDomainDecomposition.h
@@ -172,7 +172,7 @@ class GeneralDomainDecomposition : public DomainDecompMPIBase {
 	std::pair<std::array<double, 3>, std::array<double, 3>> latchToGridSize(std::array<double, 3> boxMin,
 																			std::array<double, 3> boxMax) {
 		for (size_t ind = 0; ind < 3; ++ind) {
-			double currentGridSize = (*_gridSize)[ind];
+			const double currentGridSize = (*_latchGridSize)[ind];
 			// For boxmin, the lower domain boundary is 0, so that's always fine!
 			boxMin[ind] = std::round(boxMin[ind] / currentGridSize) * currentGridSize;
 			// update boxmax only if it isn't at the very top of the domain!
@@ -197,10 +197,10 @@ class GeneralDomainDecomposition : public DomainDecompMPIBase {
 	size_t _initFrequency{500};
 
 	/**
-	 * Optionally safe a given grid size on which the process boundaries are bound/latched.
+	 * Optionally, give a grid size (=3D size of one grid cell) on which the process boundaries are bound/latched.
 	 * If no value is given, it is not used.
 	 */
-	std::optional<std::array<double, 3>> _gridSize{};
+	std::optional<std::array<double, 3>> _latchGridSize{};
 
 	/**
 	 * Bool that indicates whether a grid should be forced even if no gridSize is set.
diff --git a/src/parallel/LoadBalancer.h b/src/parallel/LoadBalancer.h
index 2b11b02498..f6b4e2faf5 100644
--- a/src/parallel/LoadBalancer.h
+++ b/src/parallel/LoadBalancer.h
@@ -40,5 +40,5 @@ class LoadBalancer {
 	 * Indicates if the current process / MPI rank spans the full length of a dimension.
 	 * @return Array of bools, for each dimension one value: true, iff the process spans the entire domain along this dimension.
 	 */
-	virtual std::array<bool, 3> getCoversWholeDomain() = 0;
+	virtual const std::array<bool, 3>& getCoversWholeDomain() const = 0;
 };
diff --git a/src/parallel/NeighborAcquirer.cpp b/src/parallel/NeighborAcquirer.cpp
index 86d69fe757..db4ae4078b 100644
--- a/src/parallel/NeighborAcquirer.cpp
+++ b/src/parallel/NeighborAcquirer.cpp
@@ -18,49 +18,50 @@
  * saved in partners01.
  */
 std::tuple<std::vector<CommunicationPartner>, std::vector<CommunicationPartner>> NeighborAcquirer::acquireNeighbors(
-	const std::array<double, 3> &globalDomainLength, HaloRegion *ownRegion, std::vector<HaloRegion> &desiredRegions,
+	const std::array<double, 3> &globalDomainLength, HaloRegion *ownRegion, const std::vector<HaloRegion> &desiredRegions,
 	const MPI_Comm &comm, bool excludeOwnRank) {
 
-	int my_rank;  // my rank
+	int my_rank{};  // my rank
 	MPI_Comm_rank(comm, &my_rank);
-	int num_processes;  // the number of processes in comm
+	int num_processes{};  // the number of processes in comm
 	MPI_Comm_size(comm, &num_processes);
 
-	int num_regions = desiredRegions.size();  // the number of regions I would like to acquire from other processes
+	const auto num_regions = desiredRegions.size();  // the number of regions I would like to acquire from other processes
 
 	// tell the other processes how much you are going to send
-	int num_bytes_send =
-		sizeof(int) * 2 + (sizeof(double) * 3 + sizeof(double) * 3 + sizeof(int) * 3 + sizeof(double) * 1) *
-							  num_regions;  // how many bytes am I going to send to all the other processes?
-	std::vector<int> num_bytes_receive_vec(num_processes, 0);  // vector of number of bytes I am going to receive
-	// MPI_Allreduce(&num_bytes_send, &num_bytes_receive, 1, MPI_INT, MPI_SUM, comm);
-	MPI_Allgather(&num_bytes_send, 1, MPI_INT, num_bytes_receive_vec.data(), 1, MPI_INT, comm);
+	// how many bytes am I going to send to all the other processes
+	const int num_bytes_send =
+		sizeof(int) * 2 + (sizeof(double) * 3 + sizeof(double) * 3 + sizeof(int) * 3 + sizeof(double) * 1) * num_regions;
 
-	// create byte buffer
+	// create byte send buffer
 	std::vector<unsigned char> outgoingDesiredRegionsVector(num_bytes_send);  // outgoing byte buffer
-	int i = 0;
-	int p = 0;
 
 	// msg format: rank | number_of_regions | region_01 | region_02 | ...
-
-	memcpy(outgoingDesiredRegionsVector.data() + i, &my_rank, sizeof(int));
-	i += sizeof(int);
-	memcpy(outgoingDesiredRegionsVector.data() + i, &num_regions, sizeof(int));
-	i += sizeof(int);
+	// fill the buffer
+	int bufferPosition = 0;
+	memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, &my_rank, sizeof(int));
+	bufferPosition += sizeof(int);
+	memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, &num_regions, sizeof(int));
+	bufferPosition += sizeof(int);
 
 	for (auto &region : desiredRegions) {  // filling up the outgoing byte buffer
-		memcpy(outgoingDesiredRegionsVector.data() + i, region.rmin, sizeof(double) * 3);
-		i += sizeof(double) * 3;
-		memcpy(outgoingDesiredRegionsVector.data() + i, region.rmax, sizeof(double) * 3);
-		i += sizeof(double) * 3;
-		memcpy(outgoingDesiredRegionsVector.data() + i, region.offset, sizeof(int) * 3);
-		i += sizeof(int) * 3;
-		memcpy(outgoingDesiredRegionsVector.data() + i, &region.width, sizeof(double));
-		i += sizeof(double);
+		memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, region.rmin, sizeof(double) * 3);
+		bufferPosition += sizeof(double) * 3;
+		memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, region.rmax, sizeof(double) * 3);
+		bufferPosition += sizeof(double) * 3;
+		memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, region.offset, sizeof(int) * 3);
+		bufferPosition += sizeof(int) * 3;
+		memcpy(outgoingDesiredRegionsVector.data() + bufferPosition, &region.width, sizeof(double));
+		bufferPosition += sizeof(double);
 	}
 
+	// set up structure information data for the Allgatherv operation
+	// vector of number of bytes I am going to receive
+	std::vector<int> num_bytes_receive_vec(num_processes, 0);
+	MPI_Allgather(&num_bytes_send, 1, MPI_INT, num_bytes_receive_vec.data(), 1, MPI_INT, comm);
+	// vector of offsets (=displacement in MPI) in the receive buffer
+	std::vector<int> num_bytes_displacements(num_processes, 0);
 	int num_bytes_receive = 0;
-	std::vector<int> num_bytes_displacements(num_processes, 0);  // vector of number of bytes I am going to receive
 	for (int j = 0; j < num_processes; j++) {
 		num_bytes_displacements[j] = num_bytes_receive;
 		num_bytes_receive += num_bytes_receive_vec[j];
@@ -74,38 +75,42 @@ std::tuple<std::vector<CommunicationPartner>, std::vector<CommunicationPartner>>
 
 	std::vector<int> numberOfRegionsToSendToRank(num_processes, 0);       // outgoing row
 
-	int bytesOneRegion =
+	// parse / deserialize received data
+	constexpr int bytesOneRegion =
 		sizeof(double) * 3 + sizeof(double) * 3 + sizeof(int) * 3 + sizeof(double) + sizeof(double) * 3;
-	std::vector<std::vector<std::vector<unsigned char>>> sendingList(num_processes);  // the regions I own and want to send
-	std::vector<CommunicationPartner> comm_partners02;
+	// the regions I own and want to send: ranks<regions<regionData>>
+	std::vector<std::vector<std::vector<unsigned char>>> sendingList(num_processes);
 
-	i = 0;
-	while (i != num_bytes_receive) {
-		int rank;
-		int regions;
+	std::vector<CommunicationPartner> comm_partners02{};
+	bufferPosition = 0;
+	while (bufferPosition < num_bytes_receive /*== buffer length*/) {
 
-		memcpy(&rank, incomingDesiredRegionsVector.data() + i, sizeof(int));
-		i += sizeof(int);  // 4
-		memcpy(&regions, incomingDesiredRegionsVector.data() + i, sizeof(int));
-		i += sizeof(int);  // 4
+		int rank{};
+		memcpy(&rank, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(int));
+		bufferPosition += sizeof(int);  // 4
+		int regions{};
+		memcpy(&regions, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(int));
+		bufferPosition += sizeof(int);  // 4
 
-		for (int j = 0; j < regions; j++) {
+		for (int regionId = 0; regionId < regions; ++regionId) {
 			HaloRegion unshiftedRegion{};
-			memcpy(unshiftedRegion.rmin, incomingDesiredRegionsVector.data() + i, sizeof(double) * 3);
-			i += sizeof(double) * 3;  // 24
-			memcpy(unshiftedRegion.rmax, incomingDesiredRegionsVector.data() + i, sizeof(double) * 3);
-			i += sizeof(double) * 3;  // 24
-			memcpy(unshiftedRegion.offset, incomingDesiredRegionsVector.data() + i, sizeof(int) * 3);
-			i += sizeof(int) * 3;  // 12
-			memcpy(&unshiftedRegion.width, incomingDesiredRegionsVector.data() + i, sizeof(double));
-			i += sizeof(double);  // 4
+			memcpy(unshiftedRegion.rmin, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(double) * 3);
+			bufferPosition += sizeof(double) * 3;  // 24
+			memcpy(unshiftedRegion.rmax, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(double) * 3);
+			bufferPosition += sizeof(double) * 3;  // 24
+			memcpy(unshiftedRegion.offset, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(int) * 3);
+			bufferPosition += sizeof(int) * 3;  // 12
+			memcpy(&unshiftedRegion.width, incomingDesiredRegionsVector.data() + bufferPosition, sizeof(double));
+			bufferPosition += sizeof(double);  // 4
 
 			// msg format one region: rmin | rmax | offset | width | shift
-			auto shiftedRegionShiftPair = getPotentiallyShiftedRegions(globalDomainLength, unshiftedRegion);
-
-			std::vector<HaloRegion> regionsToTest = shiftedRegionShiftPair.first;
-			std::vector<std::array<double, 3>> shifts  = shiftedRegionShiftPair.second;
-
+			const auto [regionsToTest, shifts] = getPotentiallyShiftedRegions(globalDomainLength, unshiftedRegion);
+			// Before every set of push_backs make sure there is enough space for this set + all remaining.
+			// This guarantees that there is enough space for the current set of push_backs, and, if subsequent sets
+			// are smaller, further reallocations can be avoided. This potentially leads to an overestimate but comes
+			// with the advantage of fewer resizes.
+			sendingList[rank].reserve(sendingList[rank].size() + ((regions - regionId) * regionsToTest.size()));
+			comm_partners02.reserve(comm_partners02.size() + ((regions - regionId) * regionsToTest.size()));
 			for(size_t regionIndex = 0; regionIndex < regionsToTest.size(); ++regionIndex){
 				auto regionToTest = regionsToTest[regionIndex];
 				if ((not excludeOwnRank or rank != my_rank) and isIncluded(ownRegion, &regionToTest)) {
@@ -113,16 +118,20 @@ std::tuple<std::vector<CommunicationPartner>, std::vector<CommunicationPartner>>
 
 					numberOfRegionsToSendToRank[rank]++;  // this is a region I will send to rank
 
-					auto overlappedRegion = overlap(*ownRegion, regionToTest);  // different shift for the overlap?
+					const auto overlappedRegion = overlap(*ownRegion, regionToTest);  // different shift for the overlap?
 
 					// make a note in partners02 - don't forget to squeeze partners02
-					bool enlarged[3][2] = {{false}};
-					for (int k = 0; k < 3; k++) currentShift[k] *= -1;
+					constexpr bool enlarged[3][2] = {{false}};
+					for (int k = 0; k < 3; k++) {
+						currentShift[k] *= -1;
+					}
 
 					comm_partners02.emplace_back(rank, overlappedRegion.rmin, overlappedRegion.rmax, overlappedRegion.rmin,
 												 overlappedRegion.rmax, currentShift.data(), overlappedRegion.offset, enlarged);
 
-					for (int k = 0; k < 3; k++) currentShift[k] *= -1;
+					for (int k = 0; k < 3; k++) {
+						currentShift[k] *= -1;
+					}
 
 					// Undo the shift. So it is again in the perspective of the rank we got this region from.
 					// We cannot use unshiftedRegion, as it is not overlapped and thus potentially too big.
@@ -143,7 +152,7 @@ std::tuple<std::vector<CommunicationPartner>, std::vector<CommunicationPartner>>
 
 					std::vector<unsigned char> singleRegion(bytesOneRegion);
 
-					p = 0;
+					int p = 0;
 					memcpy(&singleRegion[p], unshiftedOverlappedRegion.rmin, sizeof(double) * 3);
 					p += sizeof(double) * 3;
 					memcpy(&singleRegion[p], unshiftedOverlappedRegion.rmax, sizeof(double) * 3);
@@ -155,7 +164,7 @@ std::tuple<std::vector<CommunicationPartner>, std::vector<CommunicationPartner>>
 					memcpy(&singleRegion[p], currentShift.data(), sizeof(double) * 3);
 					//p += sizeof(double) * 3;
 
-					sendingList[rank].push_back(std::move(singleRegion));
+					sendingList[rank].emplace_back(std::move(singleRegion));
 				}
 			}
 		}
@@ -218,19 +227,17 @@ std::tuple<std::vector<CommunicationPartner>, std::vector<CommunicationPartner>>
 	std::vector<CommunicationPartner> comm_partners01;  // the communication partners
 
 	// receive data (blocking)
-	int byte_counter = 0;
-
 	/**
 	 * We now receive as many regions as we previously determined that we will receive.
 	 * For that we keep track, how many regions we received and increase this according to the number of regions
 	 * received per MPI operation.
 	 */
-	while (byte_counter < numberOfRegionsToReceive[my_rank] * bytesOneRegion) {
+	for (int byte_counter = 0; byte_counter < numberOfRegionsToReceive[my_rank] * bytesOneRegion; ) {
 		// MPI_PROBE
 		MPI_Probe(MPI_ANY_SOURCE, 1, comm, &probe_status);
 		// interpret probe
-		int source = probe_status.MPI_SOURCE;
-		int bytes;
+		const auto source = probe_status.MPI_SOURCE;
+		int bytes{};
 		MPI_Get_count(&probe_status, MPI_BYTE, &bytes);
 		// we have receive `bytes` bytes. So we increase the byte_counter.
 		byte_counter += bytes;
@@ -238,24 +245,26 @@ std::tuple<std::vector<CommunicationPartner>, std::vector<CommunicationPartner>>
 		std::vector<unsigned char> raw_neighbours(bytes);
 		MPI_Recv(raw_neighbours.data(), bytes, MPI_BYTE, source, 1, comm, &rec_status);
 		// Interpret Buffer and add neighbours
-		for (int k = 0; k < (bytes / bytesOneRegion); k++) {  // number of regions from this process
+		const auto numRegionsToReceive = bytes / bytesOneRegion;
+		comm_partners01.reserve(std::max(comm_partners01.size(), static_cast<size_t>(numberOfRegionsToReceive[my_rank] * numRegionsToReceive)));
+		for (int regionId = 0; regionId < numRegionsToReceive; ++regionId) {  // number of regions from this process
 			HaloRegion region{};
-			double shift[3];
-			i = k * bytesOneRegion;
+			bufferPosition = regionId * bytesOneRegion;
 
-			memcpy(region.rmin, raw_neighbours.data() + i, sizeof(double) * 3);
-			i += sizeof(double) * 3;
-			memcpy(region.rmax, raw_neighbours.data() + i, sizeof(double) * 3);
-			i += sizeof(double) * 3;
-			memcpy(region.offset, raw_neighbours.data() + i, sizeof(int) * 3);
-			i += sizeof(int) * 3;
-			memcpy(&region.width, raw_neighbours.data() + i, sizeof(double));
-			i += sizeof(double);
+			memcpy(region.rmin, raw_neighbours.data() + bufferPosition, sizeof(double) * 3);
+			bufferPosition += sizeof(double) * 3;
+			memcpy(region.rmax, raw_neighbours.data() + bufferPosition, sizeof(double) * 3);
+			bufferPosition += sizeof(double) * 3;
+			memcpy(region.offset, raw_neighbours.data() + bufferPosition, sizeof(int) * 3);
+			bufferPosition += sizeof(int) * 3;
+			memcpy(&region.width, raw_neighbours.data() + bufferPosition, sizeof(double));
+			bufferPosition += sizeof(double);
 
-			memcpy(shift, raw_neighbours.data() + i, sizeof(double) * 3);
-			i += sizeof(double) * 3;
+			double shift[3];
+			memcpy(shift, raw_neighbours.data() + bufferPosition, sizeof(double) * 3);
+			// bufferPosition += sizeof(double) * 3;
 
-			bool enlarged[3][2] = {{false}};
+			constexpr bool enlarged[3][2] = {{false}};
 
 			comm_partners01.emplace_back(source, region.rmin, region.rmax, region.rmin, region.rmax, shift,
 										 region.offset, enlarged);
diff --git a/src/parallel/NeighborAcquirer.h b/src/parallel/NeighborAcquirer.h
index 5fb9cba960..0f92e35cee 100644
--- a/src/parallel/NeighborAcquirer.h
+++ b/src/parallel/NeighborAcquirer.h
@@ -29,7 +29,7 @@ class NeighborAcquirer {
 	 * second vector will own the particles.
 	 */
 	static std::tuple<std::vector<CommunicationPartner>, std::vector<CommunicationPartner>> acquireNeighbors(
-		const std::array<double, 3>& globalDomainLength, HaloRegion* ownRegion, std::vector<HaloRegion>& desiredRegions,
+		const std::array<double, 3>& globalDomainLength, HaloRegion* ownRegion, const std::vector<HaloRegion>& desiredRegions,
 		const MPI_Comm& comm, bool excludeOwnRank=true);
 
 	static std::vector<CommunicationPartner> squeezePartners(const std::vector<CommunicationPartner>& partners);
diff --git a/src/parallel/NeighbourCommunicationScheme.cpp b/src/parallel/NeighbourCommunicationScheme.cpp
index 7eaa8e94ea..ab4190d750 100644
--- a/src/parallel/NeighbourCommunicationScheme.cpp
+++ b/src/parallel/NeighbourCommunicationScheme.cpp
@@ -434,16 +434,17 @@ void NeighbourCommunicationScheme::selectNeighbours(MessageType msgType, bool im
 void DirectNeighbourCommunicationScheme::initCommunicationPartners(double cutoffRadius, Domain * domain,
 		DomainDecompMPIBase* domainDecomp, ParticleContainer* moleculeContainer) {
 	// corners of the process-specific domain
-	double rmin[DIMgeom]; // lower corner
-	double rmax[DIMgeom]; // higher corner
-
-	for (int d = 0; d < DIMgeom; d++) {
-		rmin[d] = domainDecomp->getBoundingBoxMin(d, domain);
-		rmax[d] = domainDecomp->getBoundingBoxMax(d, domain);
-
-		// TODO: this should be safe, as long as molecules don't start flying around
-		// at the speed of one cutoffRadius per time step
-	}
+	static_assert(DIMgeom == 3); // The initialization here assumes 3 dimensions!
+	const std::array<double, DIMgeom> localLowerCorner{
+		domainDecomp->getBoundingBoxMin(0, domain),
+		domainDecomp->getBoundingBoxMin(1, domain),
+		domainDecomp->getBoundingBoxMin(2, domain),
+	};
+	const std::array<double, DIMgeom> localUpperCorner{
+		domainDecomp->getBoundingBoxMax(0, domain),
+		domainDecomp->getBoundingBoxMax(1, domain),
+		domainDecomp->getBoundingBoxMax(2, domain),
+	};
 
 	if (_pushPull) {
 		for (unsigned int d = 0; d < _commDimms; d++) { // why free?
@@ -458,18 +459,17 @@ void DirectNeighbourCommunicationScheme::initCommunicationPartners(double cutoff
 		}
 	}
 
-	HaloRegion ownRegion = {rmin[0], rmin[1], rmin[2], rmax[0], rmax[1], rmax[2], 0, 0, 0, cutoffRadius};
+	HaloRegion ownRegion = {localLowerCorner[0], localLowerCorner[1], localLowerCorner[2], localUpperCorner[0], localUpperCorner[1], localUpperCorner[2], 0, 0, 0, cutoffRadius};
 
 	if (_pushPull) {
-		double* cellLength = moleculeContainer->getHaloSize();
+		double* const cellLength = moleculeContainer->getHaloSize();
 		// halo/force regions
 		std::vector<HaloRegion> haloOrForceRegions =
 			_zonalMethod->getHaloImportForceExportRegions(ownRegion, cutoffRadius, _coversWholeDomain, cellLength);
 		std::vector<HaloRegion> leavingRegions =
-				_zonalMethod->getLeavingExportRegions(ownRegion, cutoffRadius,
-						_coversWholeDomain);
+			_zonalMethod->getLeavingExportRegions(ownRegion, cutoffRadius, _coversWholeDomain);
 
-		std::array<double, 3> globalDomainLength{domain->getGlobalLength(0), domain->getGlobalLength(1),
+		const std::array<double, 3> globalDomainLength{domain->getGlobalLength(0), domain->getGlobalLength(1),
 												 domain->getGlobalLength(2)};
 		// assuming p1 sends regions to p2
 		std::tie((*_haloImportForceExportNeighbours)[0], (*_haloExportForceImportNeighbours)[0]) =
@@ -525,9 +525,10 @@ void IndirectNeighbourCommunicationScheme::initExchangeMoleculesMPI1D(ParticleCo
 		const int numNeighbours = (*_neighbours)[d].size();
 		std::vector<Molecule> dummy;
 		for (int i = 0; i < numNeighbours; ++i) {
-			Log::global_log->debug() << "Rank " << domainDecomp->getRank() << " is initiating communication to" << std::endl;
+			Log::global_log->debug() << "Rank " << domainDecomp->getRank()
+									 << " is initiating communication to " << (*_neighbours)[d][i].getRank() << "\n";
 			(*_neighbours)[d][i].initSend(moleculeContainer, domainDecomp->getCommunicator(),
-					domainDecomp->getMPIParticleType(), msgType, dummy, false, true/*do halo position change*/);
+					domainDecomp->getMPIParticleType(), msgType, dummy, false, true/*do halo position check*/);
 		}
 
 	}
diff --git a/src/particleContainer/LinkedCellTraversals/OriginalCellPairTraversal.h b/src/particleContainer/LinkedCellTraversals/OriginalCellPairTraversal.h
index c54e9045fc..16324a213c 100644
--- a/src/particleContainer/LinkedCellTraversals/OriginalCellPairTraversal.h
+++ b/src/particleContainer/LinkedCellTraversals/OriginalCellPairTraversal.h
@@ -123,7 +123,7 @@ inline void OriginalCellPairTraversal<CellTemplate>::computeNeighbourOffsets() {
 	mardyn_assert(forwardNeighbourIndex == 13);
 	mardyn_assert(backwardNeighbourIndex == 13);
 
-	Log::global_log->info() << "Neighbour offsets are bounded by "
+	Log::global_log->debug() << "Neighbour offsets are bounded by "
 			<< minNeighbourOffset << ", " << maxNeighbourOffset << std::endl;
 
 }
diff --git a/src/particleContainer/LinkedCells.cpp b/src/particleContainer/LinkedCells.cpp
index d214872be7..bb03396c28 100644
--- a/src/particleContainer/LinkedCells.cpp
+++ b/src/particleContainer/LinkedCells.cpp
@@ -131,7 +131,7 @@ void LinkedCells::readXML(XMLfileUnits& xmlconfig) {
 }
 
 bool LinkedCells::rebuild(double bBoxMin[3], double bBoxMax[3]) {
-	Log::global_log->info() << "REBUILD OF LinkedCells" << std::endl;
+	Log::global_log->debug() << "REBUILD OF LinkedCells" << std::endl;
 
 	for (int i = 0; i < 3; i++) {
 		this->_boundingBoxMin[i] = bBoxMin[i];
@@ -139,13 +139,13 @@ bool LinkedCells::rebuild(double bBoxMin[3], double bBoxMax[3]) {
 //		_haloWidthInNumCells[i] = ::ceil(_cellsInCutoff);
 		_haloWidthInNumCells[i] = _cellsInCutoff;
 	}
-	Log::global_log->info() << "Bounding box: " << "[" << bBoxMin[0] << ", " << bBoxMax[0] << "]" << " x " << "["
+	Log::global_log->debug() << "Bounding box: " << "[" << bBoxMin[0] << ", " << bBoxMax[0] << "]" << " x " << "["
 			<< bBoxMin[1] << ", " << bBoxMax[1] << "]" << " x " << "[" << bBoxMin[2] << ", " << bBoxMax[2] << "]"
 			<< std::endl;
 
 	int numberOfCells = 1;
 
-	Log::global_log->info() << "Using " << _cellsInCutoff << " cells in cutoff." << std::endl;
+	Log::global_log->debug() << "Using " << _cellsInCutoff << " cells in cutoff." << std::endl;
 	float rc = (_cutoffRadius / _cellsInCutoff);
 
 	for (int dim = 0; dim < 3; dim++) {
@@ -171,7 +171,7 @@ bool LinkedCells::rebuild(double bBoxMin[3], double bBoxMax[3]) {
 		_haloBoundingBoxMax[dim] = _boundingBoxMax[dim] + _haloLength[dim];
 	}
 
-	Log::global_log->info() << "Cells per dimension (incl. halo): " << _cellsPerDimension[0] << " x "
+	Log::global_log->debug() << "Cells per dimension (incl. halo): " << _cellsPerDimension[0] << " x "
 			<< _cellsPerDimension[1] << " x " << _cellsPerDimension[2] << std::endl;
 
 
diff --git a/src/particleContainer/TraversalTuner.h b/src/particleContainer/TraversalTuner.h
index b4f0f27a65..65b072512a 100644
--- a/src/particleContainer/TraversalTuner.h
+++ b/src/particleContainer/TraversalTuner.h
@@ -132,34 +132,35 @@ TraversalTuner<CellTemplate>::~TraversalTuner() {
 
 template<class CellTemplate>
 void TraversalTuner<CellTemplate>::findOptimalTraversal() {
-	// TODO implement autotuning here! At the moment the traversal is chosen via readXML!
+  // ls1 always uses the traversal selected via the XML
+  // If you want auto tuning activate AutoPas via CMake
 
 	_optimalTraversal = _traversals[selectedTraversal].first;
 
 	// log traversal
-	if (dynamic_cast<HalfShellTraversal<CellTemplate> *>(_optimalTraversal))
-		Log::global_log->info() << "Using HalfShellTraversal." << std::endl;
-	else if (dynamic_cast<OriginalCellPairTraversal<CellTemplate> *>(_optimalTraversal))
-		Log::global_log->info() << "Using OriginalCellPairTraversal." << std::endl;
+	if (dynamic_cast<SlicedCellPairTraversal<CellTemplate> *>(_optimalTraversal))
+		Log::global_log->debug() << "Using SlicedCellPairTraversal." << std::endl;
 	else if (dynamic_cast<C08CellPairTraversal<CellTemplate> *>(_optimalTraversal))
-		Log::global_log->info() << "Using C08CellPairTraversal without eighthShell." << std::endl;
-	else if (dynamic_cast<C08CellPairTraversal<CellTemplate, true> *>(_optimalTraversal))
-		Log::global_log->info() << "Using C08CellPairTraversal with eighthShell." << std::endl;
+		Log::global_log->debug() << "Using C08CellPairTraversal without eighthShell." << std::endl;
 	else if (dynamic_cast<C04CellPairTraversal<CellTemplate> *>(_optimalTraversal))
-		Log::global_log->info() << "Using C04CellPairTraversal." << std::endl;
+		Log::global_log->debug() << "Using C04CellPairTraversal." << std::endl;
+	else if (dynamic_cast<C08CellPairTraversal<CellTemplate, true> *>(_optimalTraversal))
+		Log::global_log->debug() << "Using C08CellPairTraversal with eighthShell." << std::endl;
+	else if (dynamic_cast<HalfShellTraversal<CellTemplate> *>(_optimalTraversal))
+		Log::global_log->debug() << "Using HalfShellTraversal." << std::endl;
+	else if (dynamic_cast<OriginalCellPairTraversal<CellTemplate> *>(_optimalTraversal))
+		Log::global_log->debug() << "Using OriginalCellPairTraversal." << std::endl;
 	else if (dynamic_cast<MidpointTraversal<CellTemplate> *>(_optimalTraversal))
-		Log::global_log->info() << "Using MidpointTraversal." << std::endl;
+		Log::global_log->debug() << "Using MidpointTraversal." << std::endl;
 	else if (dynamic_cast<NeutralTerritoryTraversal<CellTemplate> *>(_optimalTraversal))
-		Log::global_log->info() << "Using NeutralTerritoryTraversal." << std::endl;
+		Log::global_log->debug() << "Using NeutralTerritoryTraversal." << std::endl;
 	else if (dynamic_cast<QuickschedTraversal<CellTemplate> *>(_optimalTraversal)) {
-		Log::global_log->info() << "Using QuickschedTraversal." << std::endl;
+		Log::global_log->debug() << "Using QuickschedTraversal." << std::endl;
 #ifndef QUICKSCHED
 		Log::global_log->error() << "MarDyn was compiled without Quicksched Support. Aborting!" << std::endl;
 		mardyn_exit(1);
 #endif
-	} else if (dynamic_cast<SlicedCellPairTraversal<CellTemplate> *>(_optimalTraversal))
-		Log::global_log->info() << "Using SlicedCellPairTraversal." << std::endl;
-	else
+	} else
 		Log::global_log->warning() << "Using unknown traversal." << std::endl;
 
 	if (_cellsInCutoff > _optimalTraversal->maxCellsInCutoff()) {