From 4fd225c0046e713fff1eec2ec649b1ded42c2eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Fri, 21 Jun 2024 08:55:00 +0200 Subject: [PATCH 01/33] Spawn slaves from master --- CMakeLists_files.cmake | 2 + opm/simulators/flow/FlowGenericVanguard.cpp | 2 +- opm/simulators/flow/FlowMain.hpp | 10 +- opm/simulators/flow/Main.cpp | 4 +- opm/simulators/flow/Main.hpp | 2 + .../flow/ReservoirCouplingMaster.cpp | 97 +++++++++++++++++++ .../flow/ReservoirCouplingMaster.hpp | 61 ++++++++++++ .../flow/ReservoirCouplingSlave.cpp | 52 ++++++++++ .../flow/ReservoirCouplingSlave.hpp | 50 ++++++++++ .../flow/SimulatorFullyImplicitBlackoil.cpp | 4 + .../flow/SimulatorFullyImplicitBlackoil.hpp | 42 +++++++- .../utils/UnsupportedFlowKeywords.cpp | 2 - opm/simulators/utils/readDeck.cpp | 41 ++++++-- opm/simulators/utils/readDeck.hpp | 3 +- 14 files changed, 353 insertions(+), 19 deletions(-) create mode 100644 opm/simulators/flow/ReservoirCouplingMaster.cpp create mode 100644 opm/simulators/flow/ReservoirCouplingMaster.hpp create mode 100644 opm/simulators/flow/ReservoirCouplingSlave.cpp create mode 100644 opm/simulators/flow/ReservoirCouplingSlave.hpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index cda462e0fa5..0f625a2bfcb 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -112,6 +112,8 @@ list (APPEND MAIN_SOURCE_FILES opm/simulators/flow/RegionPhasePVAverage.cpp opm/simulators/flow/SimulatorConvergenceOutput.cpp opm/simulators/flow/SimulatorFullyImplicitBlackoil.cpp + opm/simulators/flow/ReservoirCouplingMaster.cpp + opm/simulators/flow/ReservoirCouplingSlave.cpp opm/simulators/flow/SimulatorReportBanners.cpp opm/simulators/flow/SimulatorSerializer.cpp opm/simulators/flow/SolutionContainers.cpp diff --git a/opm/simulators/flow/FlowGenericVanguard.cpp b/opm/simulators/flow/FlowGenericVanguard.cpp index 1622411b98f..1f6e0af49e5 100644 --- a/opm/simulators/flow/FlowGenericVanguard.cpp +++ b/opm/simulators/flow/FlowGenericVanguard.cpp @@ -177,7 +177,7 @@ void FlowGenericVanguard::readDeck(const std::string& filename) modelParams_.actionState_, modelParams_.wtestState_, modelParams_.eclSummaryConfig_, - nullptr, "normal", "normal", "100", false, false, false, {}); + nullptr, "normal", "normal", "100", false, false, false, {}, /*slaveMode=*/false); modelParams_.setupTime_ = setupTimer.stop(); } diff --git a/opm/simulators/flow/FlowMain.hpp b/opm/simulators/flow/FlowMain.hpp index 24a888c637a..de789692ab7 100644 --- a/opm/simulators/flow/FlowMain.hpp +++ b/opm/simulators/flow/FlowMain.hpp @@ -50,8 +50,8 @@ namespace Opm::Parameters { // Do not merge parallel output files or warn about them struct EnableLoggingFalloutWarning { static constexpr bool value = false; }; - struct OutputInterval { static constexpr int value = 1; }; +struct Slave { static constexpr bool value = false; }; } // namespace Opm::Parameters @@ -100,6 +100,10 @@ namespace Opm { Parameters::Register ("Developer option to see whether logging was on non-root processors. " "In that case it will be appended to the *.DBG or *.PRT files"); + Parameters::Register + ("Specify if the simulation is a slave simulation in a master-slave simulation"); + Parameters::Hide(); + Simulator::registerParameters(); // register the base parameters registerAllParameters_(/*finalizeRegistration=*/false); @@ -366,7 +370,7 @@ namespace Opm { // Callback that will be called from runSimulatorInitOrRun_(). int runSimulatorRunCallback_() { - SimulatorReport report = simulator_->run(*simtimer_); + SimulatorReport report = simulator_->run(*simtimer_, this->argc_, this->argv_); runSimulatorAfterSim_(report); return report.success.exit_status; } @@ -374,7 +378,7 @@ namespace Opm { // Callback that will be called from runSimulatorInitOrRun_(). int runSimulatorInitCallback_() { - simulator_->init(*simtimer_); + simulator_->init(*simtimer_, this->argc_, this->argv_); return EXIT_SUCCESS; } diff --git a/opm/simulators/flow/Main.cpp b/opm/simulators/flow/Main.cpp index 3abc4c07da7..c592e600710 100644 --- a/opm/simulators/flow/Main.cpp +++ b/opm/simulators/flow/Main.cpp @@ -230,6 +230,7 @@ void Main::readDeck(const std::string& deckFilename, const bool keepKeywords, const std::size_t numThreads, const int output_param, + const bool slaveMode, const std::string& parameters, std::string_view moduleVersion, std::string_view compileTimestamp) @@ -265,7 +266,8 @@ void Main::readDeck(const std::string& deckFilename, init_from_restart_file, outputCout_, keepKeywords, - outputInterval); + outputInterval, + slaveMode); verifyValidCellGeometry(FlowGenericVanguard::comm(), *this->eclipseState_); diff --git a/opm/simulators/flow/Main.hpp b/opm/simulators/flow/Main.hpp index 9a4e0f47c60..f051faa8c14 100644 --- a/opm/simulators/flow/Main.hpp +++ b/opm/simulators/flow/Main.hpp @@ -413,6 +413,7 @@ class Main keepKeywords, getNumThreads(), Parameters::Get(), + Parameters::Get(), cmdline_params, Opm::moduleVersion(), Opm::compileTimestamp()); @@ -697,6 +698,7 @@ class Main const bool keepKeywords, const std::size_t numThreads, const int output_param, + const bool slaveMode, const std::string& parameters, std::string_view moduleVersion, std::string_view compileTimestamp); diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp new file mode 100644 index 00000000000..a0adac86ad7 --- /dev/null +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -0,0 +1,97 @@ +/* + Copyright 2024 Equinor AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include + +#include + +#include +#include +#include +#include + +#include + +#include + + +#include +#include + +#include + +namespace Opm { + +ReservoirCouplingMaster::ReservoirCouplingMaster( + const Parallel::Communication &comm, + const Schedule &schedule +) : + comm_{comm}, + schedule_{schedule} +{ } + +// NOTE: This functions is executed for all ranks, but only rank 0 will spawn +// the slave processes +void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char **argv) { + const auto& rescoup = this->schedule_[0].rescoup(); + char *flow_program_name = argv[0]; + for (const auto& [slave_name, slave] : rescoup.slaves()) { + auto master_slave_comm = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); + // TODO: Here we should extract the relevant options from the master argv + // and forward them to the slave process, for now we just pass the deck file name + const auto& data_file_name = slave.dataFilename(); + const auto& directory_path = slave.directoryPath(); + // Concatenate the directory path and the data file name to get the full path + std::filesystem::path dir_path(directory_path); + std::filesystem::path data_file(data_file_name); + std::filesystem::path full_path = dir_path / data_file; + std::vector slave_argv(3); + slave_argv[0] = flow_program_name; + slave_argv[1] = const_cast(full_path.c_str()); + slave_argv[2] = nullptr; + auto num_procs = slave.numprocs(); + std::vector errcodes(num_procs); + MPI_Info info; + MPI_Info_create(&info); + const std::string log_file = fmt::format("{}.log", slave_name); + // TODO: We need to decide how to handle the output from the slave processes + // For now we just redirect the output to a log file with the name of the slave + MPI_Info_set(info, "output", log_file.c_str()); + MPI_Info_set(info, "error", log_file.c_str()); + int spawn_result = MPI_Comm_spawn( + slave_argv[0], + slave_argv.data(), + /*maxprocs=*/num_procs, + /*info=*/info, + /*root=*/0, // Rank 0 spawns the slave processes + /*comm=*/this->comm_, + /*intercomm=*/master_slave_comm.get(), + /*array_of_errcodes=*/errcodes.data() + ); + MPI_Info_free(&info); + if (spawn_result != MPI_SUCCESS || (*master_slave_comm == MPI_COMM_NULL)) { + OPM_THROW(std::runtime_error, "Failed to spawn slave process"); + } + this->master_slave_comm_.push_back(std::move(master_slave_comm)); + this->slave_names_.push_back(slave_name); + } +} + +} // namespace Opm + diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp new file mode 100644 index 00000000000..b70cc35c6a5 --- /dev/null +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -0,0 +1,61 @@ +/* + Copyright 2024 Equinor AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef OPM_RESERVOIR_COUPLING_MASTER_HPP +#define OPM_RESERVOIR_COUPLING_MASTER_HPP + +#include +#include +#include + +#include + +#include + +namespace Opm { + +class ReservoirCouplingMaster { +public: + + ReservoirCouplingMaster(const Parallel::Communication &comm, const Schedule &schedule); + + // Custom deleter for MPI_Comm + struct MPI_Comm_Deleter { + void operator()(MPI_Comm* comm) const { + if (*comm != MPI_COMM_NULL) { + MPI_Comm_free(comm); + } + delete comm; + } + }; + using MPI_Comm_Ptr = std::unique_ptr; + + void spawnSlaveProcesses(int argc, char **argv); + + +private: + const Parallel::Communication &comm_; + const Schedule& schedule_; + // MPI communicators for the slave processes + std::vector master_slave_comm_; + std::vector slave_names_; +}; + +} // namespace Opm +#endif // OPM_RESERVOIR_COUPLING_MASTER_HPP diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp new file mode 100644 index 00000000000..3726dd26a9c --- /dev/null +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -0,0 +1,52 @@ +/* + Copyright 2024 Equinor AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include +#include + +#include +#include +#include +#include + +#include + +#include + +namespace Opm { + +ReservoirCouplingSlave::ReservoirCouplingSlave( + const Parallel::Communication &comm, + const Schedule &schedule +) : + comm_{comm}, + schedule_{schedule} +{ } +void ReservoirCouplingSlave::sendSimulationStartDateToMasterProcess() { + // TODO: Implement this function next + + //this->slave_master_comm_ = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); + //MPI_Comm_get_parent(this->slave_master_comm_.get()); + //if (*(this->slave_master_comm_) == MPI_COMM_NULL) { + // OPM_THROW(std::runtime_error, "Slave process is not spawned by a master process"); + //} + OpmLog::info("Sent simulation start date to master process"); +} + +} // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingSlave.hpp b/opm/simulators/flow/ReservoirCouplingSlave.hpp new file mode 100644 index 00000000000..12f1d9590e3 --- /dev/null +++ b/opm/simulators/flow/ReservoirCouplingSlave.hpp @@ -0,0 +1,50 @@ +/* + Copyright 2024 Equinor AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef OPM_RESERVOIR_COUPLING_SLAVE_HPP +#define OPM_RESERVOIR_COUPLING_SLAVE_HPP + +#include +#include +#include +#include + +#include + +#include + +namespace Opm { + +class ReservoirCouplingSlave { +public: + using MPI_Comm_Ptr = ReservoirCouplingMaster::MPI_Comm_Ptr; + + ReservoirCouplingSlave(const Parallel::Communication &comm, const Schedule &schedule); + void sendSimulationStartDateToMasterProcess(); + +private: + const Parallel::Communication &comm_; + const Schedule& schedule_; + // MPI parent communicator for a slave process + MPI_Comm_Ptr slave_master_comm_{nullptr}; + +}; + +} // namespace Opm +#endif // OPM_RESERVOIR_COUPLING_SLAVE_HPP diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.cpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.cpp index aac95efe23c..e4f2a8a796c 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.cpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.cpp @@ -58,6 +58,10 @@ void registerSimulatorParameters() ("FileName for .OPMRST file used to load serialized state. " "If empty, CASENAME.OPMRST is used."); Parameters::Hide(); + Parameters::Register + ("Specify if the simulation is a slave simulation in a master-slave simulation"); + Parameters::Hide(); + } } // namespace Opm::detail diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index 42e18dd98c3..1158e352efa 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -24,6 +24,9 @@ #include +#include +#include +#include #include #include @@ -35,6 +38,8 @@ #include #include #include +#include +#include #include #include #include @@ -63,6 +68,7 @@ struct SaveStep { static constexpr auto* value = ""; }; struct SaveFile { static constexpr auto* value = ""; }; struct LoadFile { static constexpr auto* value = ""; }; struct LoadStep { static constexpr int value = -1; }; +struct Slave { static constexpr bool value = false; }; } // namespace Opm::Parameters @@ -78,6 +84,8 @@ namespace Opm { template class SimulatorFullyImplicitBlackoil : private SerializableSim { +protected: + struct MPI_Comm_Deleter; public: using Simulator = GetPropType; using Grid = GetPropType; @@ -171,9 +179,9 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim /// \param[in,out] timer governs the requested reporting timesteps /// \param[in,out] state state of reservoir: pressure, fluxes /// \return simulation report, with timing data - SimulatorReport run(SimulatorTimer& timer) + SimulatorReport run(SimulatorTimer& timer, int argc, char** argv) { - init(timer); + init(timer, argc, argv); // Make cache up to date. No need for updating it in elementCtx. // NB! Need to be at the correct step in case of restart simulator_.setEpisodeIndex(timer.currentStepNum()); @@ -188,8 +196,32 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim return finalize(); } - void init(SimulatorTimer &timer) + // NOTE: The argc and argv will be used when launching a slave process + void init(SimulatorTimer &timer, int argc, char** argv) { + auto slave_mode = Parameters::get(); + if (slave_mode) { + this->reservoirCouplingSlave_ = + std::make_unique( + FlowGenericVanguard::comm(), + this->schedule() + ); + this->reservoirCouplingSlave_->sendSimulationStartDateToMasterProcess(); + } + else { + // For now, we require that SLAVES and GRUPMAST are defined at the first + // schedule step, so it is enough to check the first step. See the + // keyword handlers in opm-common for more information. + auto master_mode = this->schedule()[0].rescoup().masterMode(); + if (master_mode) { + this->reservoirCouplingMaster_ = + std::make_unique( + FlowGenericVanguard::comm(), + this->schedule() + ); + this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); + } + } simulator_.setEpisodeIndex(-1); // Create timers and file for writing timing info. @@ -555,6 +587,10 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim SimulatorConvergenceOutput convergence_output_; + bool slaveMode_{false}; + std::unique_ptr reservoirCouplingMaster_{nullptr}; + std::unique_ptr reservoirCouplingSlave_{nullptr}; + SimulatorSerializer serializer_; }; diff --git a/opm/simulators/utils/UnsupportedFlowKeywords.cpp b/opm/simulators/utils/UnsupportedFlowKeywords.cpp index d8afce54947..aa737331798 100644 --- a/opm/simulators/utils/UnsupportedFlowKeywords.cpp +++ b/opm/simulators/utils/UnsupportedFlowKeywords.cpp @@ -242,9 +242,7 @@ const KeywordValidation::UnsupportedKeywords& unsupportedKeywords() {"GRAVDRB", {true, std::nullopt}}, {"GRAVDRM", {true, std::nullopt}}, {"GRDREACH", {true, std::nullopt}}, - {"GRUPMAST", {true, std::nullopt}}, {"GRUPRIG", {true, std::nullopt}}, - {"GRUPSLAV", {true, std::nullopt}}, {"GRUPTARG", {true, std::nullopt}}, {"GSATINJE", {true, std::nullopt}}, {"GSEPCOND", {true, std::nullopt}}, diff --git a/opm/simulators/utils/readDeck.cpp b/opm/simulators/utils/readDeck.cpp index bdb16c75a7c..102a9f89eb3 100644 --- a/opm/simulators/utils/readDeck.cpp +++ b/opm/simulators/utils/readDeck.cpp @@ -54,7 +54,11 @@ #include #include +#include +#include +#include #include + #include #include #include @@ -151,7 +155,7 @@ namespace { if (schedule == nullptr) { schedule = std::make_shared (deck, eclipseState, parseContext, errorGuard, - std::move(python), lowActionParsingStrictness, /*slave_mode=*/false, + std::move(python), lowActionParsingStrictness, /*slaveMode=*/false, keepKeywords, outputInterval, init_state); } @@ -180,14 +184,14 @@ namespace { std::unique_ptr& udqState, std::unique_ptr& actionState, std::unique_ptr& wtestState, - Opm::ErrorGuard& errorGuard) + Opm::ErrorGuard& errorGuard, + const bool slaveMode) { if (schedule == nullptr) { schedule = std::make_shared (deck, eclipseState, parseContext, - errorGuard, std::move(python), lowActionParsingStrictness, keepKeywords); + errorGuard, std::move(python), lowActionParsingStrictness, slaveMode, keepKeywords); } - udqState = std::make_unique ((*schedule)[0].udq().params().undefinedValue()); @@ -233,6 +237,24 @@ namespace { #endif } + void inconsistentScheduleError(const std::string& message) + { + OPM_THROW(std::logic_error, + fmt::format("Inconsistent SCHEDULE section: {}", message)); + } + + void checkScheduleKeywordConsistency(const Opm::Schedule& schedule) + { + const auto& final_state = schedule.back(); + const auto& rescoup = final_state.rescoup(); + if (rescoup.slaveCount() > 0 && rescoup.masterGroupCount() == 0) { + inconsistentScheduleError("SLAVES keyword without GRUPMAST keyword"); + } + if (rescoup.slaveCount() == 0 && rescoup.masterGroupCount() > 0) { + inconsistentScheduleError("GRUPMAST keyword without SLAVES keyword"); + } + } + void readOnIORank(Opm::Parallel::Communication comm, const std::string& deckFilename, const Opm::ParseContext* parseContext, @@ -249,7 +271,8 @@ namespace { const bool lowActionParsingStrictness, const bool keepKeywords, const std::optional& outputInterval, - Opm::ErrorGuard& errorGuard) + Opm::ErrorGuard& errorGuard, + const bool slaveMode) { OPM_TIMEBLOCK(readDeck); if (((schedule == nullptr) || (summaryConfig == nullptr)) && @@ -282,9 +305,10 @@ namespace { lowActionParsingStrictness, keepKeywords, std::move(python), schedule, udqState, actionState, wtestState, - errorGuard); + errorGuard, slaveMode); } + checkScheduleKeywordConsistency(*schedule); eclipseState->appendAqufluxSchedule(schedule->getAquiferFluxSchedule()); if (Opm::OpmLog::hasBackend("STDOUT_LOGGER")) { @@ -539,7 +563,8 @@ void Opm::readDeck(Opm::Parallel::Communication comm, const bool initFromRestart, const bool checkDeck, const bool keepKeywords, - const std::optional& outputInterval) + const std::optional& outputInterval, + const bool slaveMode) { auto errorGuard = std::make_unique(); @@ -573,7 +598,7 @@ void Opm::readDeck(Opm::Parallel::Communication comm, eclipseState, schedule, udqState, actionState, wtestState, summaryConfig, std::move(python), initFromRestart, checkDeck, treatCriticalAsNonCritical, lowActionParsingStrictness, - keepKeywords, outputInterval, *errorGuard); + keepKeywords, outputInterval, *errorGuard, slaveMode); // Update schedule so that re-parsing after actions use same strictness assert(schedule); diff --git a/opm/simulators/utils/readDeck.hpp b/opm/simulators/utils/readDeck.hpp index 87da15315fe..dc7692ad471 100644 --- a/opm/simulators/utils/readDeck.hpp +++ b/opm/simulators/utils/readDeck.hpp @@ -99,7 +99,8 @@ void readDeck(Parallel::Communication comm, bool initFromRestart, bool checkDeck, bool keepKeywords, - const std::optional& outputInterval); + const std::optional& outputInterval, + bool slaveMode); void verifyValidCellGeometry(Parallel::Communication comm, const EclipseState& eclipseState); From bf5962061ab7a40c0433a0af8c92e27431072280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Fri, 28 Jun 2024 10:38:56 +0200 Subject: [PATCH 02/33] Do not specify program name twice Do not specify slave program name twice when launching slave process --- opm/simulators/flow/ReservoirCouplingMaster.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index a0adac86ad7..aa613073303 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -61,10 +61,9 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char std::filesystem::path dir_path(directory_path); std::filesystem::path data_file(data_file_name); std::filesystem::path full_path = dir_path / data_file; - std::vector slave_argv(3); - slave_argv[0] = flow_program_name; - slave_argv[1] = const_cast(full_path.c_str()); - slave_argv[2] = nullptr; + std::vector slave_argv(2); + slave_argv[0] = const_cast(full_path.c_str()); + slave_argv[1] = nullptr; auto num_procs = slave.numprocs(); std::vector errcodes(num_procs); MPI_Info info; @@ -75,7 +74,7 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char MPI_Info_set(info, "output", log_file.c_str()); MPI_Info_set(info, "error", log_file.c_str()); int spawn_result = MPI_Comm_spawn( - slave_argv[0], + flow_program_name, slave_argv.data(), /*maxprocs=*/num_procs, /*info=*/info, From ba7c1d589bdc0dbd5257fa7117d28a2211cbe3b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Fri, 28 Jun 2024 12:49:25 +0200 Subject: [PATCH 03/33] Open MPI does not support output redirection Open MPI does not support output redirection for spawned child processes. --- opm/simulators/flow/ReservoirCouplingMaster.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index aa613073303..28c7b8d3f13 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -66,24 +66,20 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char slave_argv[1] = nullptr; auto num_procs = slave.numprocs(); std::vector errcodes(num_procs); - MPI_Info info; - MPI_Info_create(&info); const std::string log_file = fmt::format("{}.log", slave_name); - // TODO: We need to decide how to handle the output from the slave processes - // For now we just redirect the output to a log file with the name of the slave - MPI_Info_set(info, "output", log_file.c_str()); - MPI_Info_set(info, "error", log_file.c_str()); + // TODO: We need to decide how to handle the output from the slave processes.. + // As far as I can tell, open MPI does not support redirecting the output + // to a file, so we might need to implement a custom solution for this int spawn_result = MPI_Comm_spawn( flow_program_name, slave_argv.data(), /*maxprocs=*/num_procs, - /*info=*/info, + /*info=*/MPI_INFO_NULL, /*root=*/0, // Rank 0 spawns the slave processes /*comm=*/this->comm_, /*intercomm=*/master_slave_comm.get(), /*array_of_errcodes=*/errcodes.data() ); - MPI_Info_free(&info); if (spawn_result != MPI_SUCCESS || (*master_slave_comm == MPI_COMM_NULL)) { OPM_THROW(std::runtime_error, "Failed to spawn slave process"); } From 71d06c5bdb7f56180242ddf894811ba74dadccea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Fri, 28 Jun 2024 13:36:10 +0200 Subject: [PATCH 04/33] Pass parameter --slave=true to the slaves --- opm/simulators/flow/ReservoirCouplingMaster.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 28c7b8d3f13..7fd8b022583 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -61,12 +61,13 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char std::filesystem::path dir_path(directory_path); std::filesystem::path data_file(data_file_name); std::filesystem::path full_path = dir_path / data_file; - std::vector slave_argv(2); - slave_argv[0] = const_cast(full_path.c_str()); - slave_argv[1] = nullptr; + std::vector slave_argv(3); + char enable_slave_arg[] = "--slave=true"; + slave_argv[0] = enable_slave_arg; + slave_argv[1] = const_cast(full_path.c_str()); + slave_argv[2] = nullptr; auto num_procs = slave.numprocs(); std::vector errcodes(num_procs); - const std::string log_file = fmt::format("{}.log", slave_name); // TODO: We need to decide how to handle the output from the slave processes.. // As far as I can tell, open MPI does not support redirecting the output // to a file, so we might need to implement a custom solution for this @@ -81,6 +82,14 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char /*array_of_errcodes=*/errcodes.data() ); if (spawn_result != MPI_SUCCESS || (*master_slave_comm == MPI_COMM_NULL)) { + for (unsigned int i = 0; i < num_procs; i++) { + if (errcodes[i] != MPI_SUCCESS) { + char error_string[MPI_MAX_ERROR_STRING]; + int length_of_error_string; + MPI_Error_string(errcodes[i], error_string, &length_of_error_string); + std::cerr << "Error spawning process " << i << ": " << error_string << std::endl; + } + } OPM_THROW(std::runtime_error, "Failed to spawn slave process"); } this->master_slave_comm_.push_back(std::move(master_slave_comm)); From e58ada1e7666d74af812d3c4134371d17696804e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Fri, 28 Jun 2024 14:10:55 +0200 Subject: [PATCH 05/33] Copy command line parameters from master Copy command line parameters from master to slave command line. Also replace data file name in master argv with data file name of the slave. --- .../flow/ReservoirCouplingMaster.cpp | 42 ++++++++++++++++--- .../flow/ReservoirCouplingMaster.hpp | 4 ++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 7fd8b022583..419cd512da3 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -46,6 +46,10 @@ ReservoirCouplingMaster::ReservoirCouplingMaster( schedule_{schedule} { } +// ------------------ +// Public methods +// ------------------ + // NOTE: This functions is executed for all ranks, but only rank 0 will spawn // the slave processes void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char **argv) { @@ -61,11 +65,7 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char std::filesystem::path dir_path(directory_path); std::filesystem::path data_file(data_file_name); std::filesystem::path full_path = dir_path / data_file; - std::vector slave_argv(3); - char enable_slave_arg[] = "--slave=true"; - slave_argv[0] = enable_slave_arg; - slave_argv[1] = const_cast(full_path.c_str()); - slave_argv[2] = nullptr; + std::vector slave_argv = getSlaveArgv(argc, argv, full_path); auto num_procs = slave.numprocs(); std::vector errcodes(num_procs); // TODO: We need to decide how to handle the output from the slave processes.. @@ -87,7 +87,7 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char char error_string[MPI_MAX_ERROR_STRING]; int length_of_error_string; MPI_Error_string(errcodes[i], error_string, &length_of_error_string); - std::cerr << "Error spawning process " << i << ": " << error_string << std::endl; + OpmLog::info(fmt::format("Error spawning process {}: {}", i, error_string)); } } OPM_THROW(std::runtime_error, "Failed to spawn slave process"); @@ -97,5 +97,35 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char } } +// ------------------ +// Private methods +// ------------------ + +std::vector ReservoirCouplingMaster::getSlaveArgv( + int argc, char **argv, const std::filesystem::path &data_file +) { + // Calculate the size of the slave_argv vector like this: + // - We will not use the first argument, as it is the program name + // - We will replace the data file name in argv with the data_file path + // - We will also add the argument "--slave=true" to the argv + // - And we will add a nullptr at the end of the argv + // So the size of the slave_argv vector will be argc + 1 + // + // Assume: All parameters will be on the form --parameter=value (as a string without spaces) + std::vector slave_argv(argc + 1); + for (int i = 1; i < argc; i++) { + // Check if the argument starts with "--", if not it will be a positional argument + // and we will replace it with the data file path + if (std::string(argv[i]).substr(0, 2) == "--") { + slave_argv[i-1] = argv[i]; + } else { + slave_argv[i-1] = const_cast(data_file.c_str()); + } + } + slave_argv[argc-1] = const_cast("--slave=true"); + slave_argv[argc] = nullptr; + return slave_argv; +} + } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index b70cc35c6a5..8411e124d46 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -26,6 +26,7 @@ #include +#include #include namespace Opm { @@ -50,6 +51,9 @@ class ReservoirCouplingMaster { private: + std::vector getSlaveArgv( + int argc, char **argv, const std::filesystem::path &data_file); + const Parallel::Communication &comm_; const Schedule& schedule_; // MPI communicators for the slave processes From dffe282bf0f6a83a3d6aaae523b300210d36f5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Fri, 28 Jun 2024 15:49:13 +0200 Subject: [PATCH 06/33] Redirect slave standard output to a logfile --- opm/simulators/flow/Main.cpp | 23 +++++++++++ opm/simulators/flow/Main.hpp | 1 + .../flow/ReservoirCouplingMaster.cpp | 41 ++++++++++++++----- .../flow/ReservoirCouplingMaster.hpp | 7 +++- 4 files changed, 60 insertions(+), 12 deletions(-) diff --git a/opm/simulators/flow/Main.cpp b/opm/simulators/flow/Main.cpp index c592e600710..bf4b6bb5ab7 100644 --- a/opm/simulators/flow/Main.cpp +++ b/opm/simulators/flow/Main.cpp @@ -49,6 +49,7 @@ namespace Opm { Main::Main(int argc, char** argv, bool ownMPI) : argc_(argc), argv_(argv), ownMPI_(ownMPI) { + handleReservoirCouplingSlaveStdoutStderr_(); if (ownMPI_) { initMPI(); } @@ -132,6 +133,28 @@ Main::~Main() #endif } +void Main::handleReservoirCouplingSlaveStdoutStderr_() +{ + // If first command line argument is "--slave-log-file=", + // then redirect stdout and stderr to the specified file. + if (this->argc_ >= 2) { + std::string_view arg = this->argv_[1]; + if (arg.substr(0, 17) == "--slave-log-file=") { + std::string filename = std::string(arg.substr(17)); + freopen(filename.c_str(), "w", stdout); + freopen(filename.c_str(), "w", stderr); + this->argc_ -= 1; + char *program_name = this->argv_[0]; + this->argv_ += 1; + // We assume the "argv" array pointers remain valid (not freed) for the lifetime + // of this program, so the following assignment is safe. + // Overwrite the first argument with the program name, i.e. pretend the program + // was called with the same arguments, but without the "--slave-log-file" argument. + this->argv_[0] = program_name; + } + } +} + void Main::setArgvArgc_(const std::string& filename) { this->deckFilename_ = filename; diff --git a/opm/simulators/flow/Main.hpp b/opm/simulators/flow/Main.hpp index f051faa8c14..6974f1665c1 100644 --- a/opm/simulators/flow/Main.hpp +++ b/opm/simulators/flow/Main.hpp @@ -150,6 +150,7 @@ class Main ~Main(); void setArgvArgc_(const std::string& filename); + void handleReservoirCouplingSlaveStdoutStderr_(); void initMPI(); diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 419cd512da3..242eff9cb37 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -52,20 +52,21 @@ ReservoirCouplingMaster::ReservoirCouplingMaster( // NOTE: This functions is executed for all ranks, but only rank 0 will spawn // the slave processes -void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char **argv) { +void ReservoirCouplingMaster::spawnSlaveProcesses(int argc, char **argv) { const auto& rescoup = this->schedule_[0].rescoup(); char *flow_program_name = argv[0]; for (const auto& [slave_name, slave] : rescoup.slaves()) { auto master_slave_comm = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); - // TODO: Here we should extract the relevant options from the master argv - // and forward them to the slave process, for now we just pass the deck file name const auto& data_file_name = slave.dataFilename(); const auto& directory_path = slave.directoryPath(); // Concatenate the directory path and the data file name to get the full path std::filesystem::path dir_path(directory_path); std::filesystem::path data_file(data_file_name); std::filesystem::path full_path = dir_path / data_file; - std::vector slave_argv = getSlaveArgv(argc, argv, full_path); + std::string log_filename; // the getSlaveArgv() function will set this + std::vector slave_argv = getSlaveArgv( + argc, argv, full_path, slave_name, log_filename + ); auto num_procs = slave.numprocs(); std::vector errcodes(num_procs); // TODO: We need to decide how to handle the output from the slave processes.. @@ -92,6 +93,12 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char } OPM_THROW(std::runtime_error, "Failed to spawn slave process"); } + OpmLog::info( + fmt::format( + "Spawned reservoir coupling slave simulation for slave with name: " + "{}. Standard output logfile name: {}.log", slave_name, slave_name + ) + ); this->master_slave_comm_.push_back(std::move(master_slave_comm)); this->slave_names_.push_back(slave_name); } @@ -102,28 +109,40 @@ void ReservoirCouplingMaster::spawnSlaveProcesses([[maybe_unused]]int argc, char // ------------------ std::vector ReservoirCouplingMaster::getSlaveArgv( - int argc, char **argv, const std::filesystem::path &data_file + int argc, + char **argv, + const std::filesystem::path &data_file, + const std::string &slave_name, + std::string &log_filename ) { // Calculate the size of the slave_argv vector like this: // - We will not use the first argument, as it is the program name // - We will replace the data file name in argv with the data_file path + // - We insert as first argument --slave-log-file=.log // - We will also add the argument "--slave=true" to the argv // - And we will add a nullptr at the end of the argv - // So the size of the slave_argv vector will be argc + 1 + // So the size of the slave_argv vector will be argc + 2 // // Assume: All parameters will be on the form --parameter=value (as a string without spaces) - std::vector slave_argv(argc + 1); + // + // Important: The returned vector will have pointers to argv pointers, + // data_file string buffer, and slave_name string buffer. So the caller + // must ensure that these buffers are not deallocated before the slave_argv has + // been used. + std::vector slave_argv(argc + 2); + log_filename = "--slave-log-file=" + slave_name + ".log"; + slave_argv[0] = const_cast(log_filename.c_str()); for (int i = 1; i < argc; i++) { // Check if the argument starts with "--", if not it will be a positional argument // and we will replace it with the data file path if (std::string(argv[i]).substr(0, 2) == "--") { - slave_argv[i-1] = argv[i]; + slave_argv[i] = argv[i]; } else { - slave_argv[i-1] = const_cast(data_file.c_str()); + slave_argv[i] = const_cast(data_file.c_str()); } } - slave_argv[argc-1] = const_cast("--slave=true"); - slave_argv[argc] = nullptr; + slave_argv[argc] = const_cast("--slave=true"); + slave_argv[argc+1] = nullptr; return slave_argv; } diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index 8411e124d46..3db6ed845a3 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -52,7 +52,12 @@ class ReservoirCouplingMaster { private: std::vector getSlaveArgv( - int argc, char **argv, const std::filesystem::path &data_file); + int argc, + char **argv, + const std::filesystem::path &data_file, + const std::string &slave_name, + std::string &log_filename + ); const Parallel::Communication &comm_; const Schedule& schedule_; From 097f951557895ccd7a589b2928f9ddcb198b3381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 5 Aug 2024 14:43:18 +0200 Subject: [PATCH 07/33] Improved comments --- opm/simulators/flow/ReservoirCouplingMaster.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 242eff9cb37..022c4f258a2 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -116,11 +116,11 @@ std::vector ReservoirCouplingMaster::getSlaveArgv( std::string &log_filename ) { // Calculate the size of the slave_argv vector like this: - // - We will not use the first argument, as it is the program name - // - We will replace the data file name in argv with the data_file path - // - We insert as first argument --slave-log-file=.log - // - We will also add the argument "--slave=true" to the argv - // - And we will add a nullptr at the end of the argv + // - We will not use the first argument in argv, as it is the program name + // - Replace the data file name in argv with the data_file path + // - Insert as first argument --slave-log-file=.log + // - Also add the argument "--slave=true" to the argv + // - Add a nullptr at the end of the argv // So the size of the slave_argv vector will be argc + 2 // // Assume: All parameters will be on the form --parameter=value (as a string without spaces) @@ -133,7 +133,7 @@ std::vector ReservoirCouplingMaster::getSlaveArgv( log_filename = "--slave-log-file=" + slave_name + ".log"; slave_argv[0] = const_cast(log_filename.c_str()); for (int i = 1; i < argc; i++) { - // Check if the argument starts with "--", if not it will be a positional argument + // Check if the argument starts with "--", if not, we will assume it is a positional argument // and we will replace it with the data file path if (std::string(argv[i]).substr(0, 2) == "--") { slave_argv[i] = argv[i]; @@ -141,7 +141,7 @@ std::vector ReservoirCouplingMaster::getSlaveArgv( slave_argv[i] = const_cast(data_file.c_str()); } } - slave_argv[argc] = const_cast("--slave=true"); + slave_argv[argc] = "--slave=true"; slave_argv[argc+1] = nullptr; return slave_argv; } From ef62ea7a7dba4bed2091a5d15bf57ffb203cbdd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Tue, 6 Aug 2024 21:33:35 +0200 Subject: [PATCH 08/33] Add missing header files --- .../flow/ReservoirCouplingMaster.cpp | 2 +- .../flow/SimulatorFullyImplicitBlackoil.hpp | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 022c4f258a2..2d8836eae46 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -141,7 +141,7 @@ std::vector ReservoirCouplingMaster::getSlaveArgv( slave_argv[i] = const_cast(data_file.c_str()); } } - slave_argv[argc] = "--slave=true"; + slave_argv[argc] = const_cast("--slave=true"); slave_argv[argc+1] = nullptr; return slave_argv; } diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index 1158e352efa..c4be91c7dd2 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -212,14 +212,16 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim // For now, we require that SLAVES and GRUPMAST are defined at the first // schedule step, so it is enough to check the first step. See the // keyword handlers in opm-common for more information. - auto master_mode = this->schedule()[0].rescoup().masterMode(); - if (master_mode) { - this->reservoirCouplingMaster_ = - std::make_unique( - FlowGenericVanguard::comm(), - this->schedule() - ); - this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); + if (!this->schedule()[0].rescoup.empty()) { + auto master_mode = this->schedule()[0].rescoup().masterMode(); + if (master_mode) { + this->reservoirCouplingMaster_ = + std::make_unique( + FlowGenericVanguard::comm(), + this->schedule() + ); + this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); + } } } simulator_.setEpisodeIndex(-1); From 5dfbf50b7d1d595f264acb7b2a211ea2b681c0e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Tue, 6 Aug 2024 21:49:07 +0200 Subject: [PATCH 09/33] Remove debug code Remove debug code that was introduced by mistake in the previous commit --- opm/simulators/flow/FlowMain.hpp | 2 -- .../flow/SimulatorFullyImplicitBlackoil.hpp | 20 +++++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/opm/simulators/flow/FlowMain.hpp b/opm/simulators/flow/FlowMain.hpp index de789692ab7..db48ee6b307 100644 --- a/opm/simulators/flow/FlowMain.hpp +++ b/opm/simulators/flow/FlowMain.hpp @@ -51,8 +51,6 @@ namespace Opm::Parameters { // Do not merge parallel output files or warn about them struct EnableLoggingFalloutWarning { static constexpr bool value = false; }; struct OutputInterval { static constexpr int value = 1; }; -struct Slave { static constexpr bool value = false; }; - } // namespace Opm::Parameters namespace Opm { diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index c4be91c7dd2..fb3350a4be3 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -199,7 +199,7 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim // NOTE: The argc and argv will be used when launching a slave process void init(SimulatorTimer &timer, int argc, char** argv) { - auto slave_mode = Parameters::get(); + auto slave_mode = Parameters::get(); if (slave_mode) { this->reservoirCouplingSlave_ = std::make_unique( @@ -212,16 +212,14 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim // For now, we require that SLAVES and GRUPMAST are defined at the first // schedule step, so it is enough to check the first step. See the // keyword handlers in opm-common for more information. - if (!this->schedule()[0].rescoup.empty()) { - auto master_mode = this->schedule()[0].rescoup().masterMode(); - if (master_mode) { - this->reservoirCouplingMaster_ = - std::make_unique( - FlowGenericVanguard::comm(), - this->schedule() - ); - this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); - } + auto master_mode = this->schedule()[0].rescoup().masterMode(); + if (master_mode) { + this->reservoirCouplingMaster_ = + std::make_unique( + FlowGenericVanguard::comm(), + this->schedule() + ); + this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); } } simulator_.setEpisodeIndex(-1); From 864b55f99b7e0bdcf3e1a393a3bfe0ae33e75d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 12 Aug 2024 15:17:38 +0200 Subject: [PATCH 10/33] Rebased, and fixed command line parsing Create one log file for each slave subprocess. Redirect both stdout and stderr to this file --- opm/simulators/flow/Main.cpp | 45 +++++++++++++---- opm/simulators/flow/Main.hpp | 5 +- opm/simulators/flow/ReservoirCoupling.hpp | 49 +++++++++++++++++++ .../flow/ReservoirCouplingMaster.cpp | 2 +- .../flow/ReservoirCouplingMaster.hpp | 13 +---- .../flow/ReservoirCouplingSlave.cpp | 2 + .../flow/SimulatorFullyImplicitBlackoil.hpp | 2 +- 7 files changed, 94 insertions(+), 24 deletions(-) create mode 100644 opm/simulators/flow/ReservoirCoupling.hpp diff --git a/opm/simulators/flow/Main.cpp b/opm/simulators/flow/Main.cpp index bf4b6bb5ab7..2a8d18029cf 100644 --- a/opm/simulators/flow/Main.cpp +++ b/opm/simulators/flow/Main.cpp @@ -44,12 +44,16 @@ #include #endif +#include +#include // for open() +#include // for dup2(), close() + namespace Opm { Main::Main(int argc, char** argv, bool ownMPI) : argc_(argc), argv_(argv), ownMPI_(ownMPI) { - handleReservoirCouplingSlaveStdoutStderr_(); + maybeSaveReservoirCouplingSlaveLogFilename_(); if (ownMPI_) { initMPI(); } @@ -133,16 +137,17 @@ Main::~Main() #endif } -void Main::handleReservoirCouplingSlaveStdoutStderr_() +void Main::maybeSaveReservoirCouplingSlaveLogFilename_() { // If first command line argument is "--slave-log-file=", // then redirect stdout and stderr to the specified file. if (this->argc_ >= 2) { std::string_view arg = this->argv_[1]; if (arg.substr(0, 17) == "--slave-log-file=") { - std::string filename = std::string(arg.substr(17)); - freopen(filename.c_str(), "w", stdout); - freopen(filename.c_str(), "w", stderr); + // For now, just save the basename of the filename and we will append the rank + // to the filename after having called MPI_Init() to avoid all ranks outputting + // to the same file. + this->reservoirCouplingSlaveOutputFilename_ = arg.substr(17); this->argc_ -= 1; char *program_name = this->argv_[0]; this->argv_ += 1; @@ -155,6 +160,27 @@ void Main::handleReservoirCouplingSlaveStdoutStderr_() } } +void Main::maybeRedirectReservoirCouplingSlaveOutput_() { + if (!this->reservoirCouplingSlaveOutputFilename_.empty()) { + std::string filename = this->reservoirCouplingSlaveOutputFilename_ + + "." + std::to_string(FlowGenericVanguard::comm().rank()) + ".log"; + int fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd == -1) { + std::string error_msg = "Slave: Failed to open stdout+stderr file" + filename; + perror(error_msg.c_str()); + MPI_Abort(MPI_COMM_WORLD, /*status=*/1); + } + // Redirect stdout and stderr to the file. + if (dup2(fd, fileno(stdout)) == -1 || dup2(fileno(stdout), fileno(stderr)) == -1) { + std::string error_msg = "Slave: Failed to redirect stdout+stderr to " + filename; + perror(error_msg.c_str()); + close(fd); + MPI_Abort(MPI_COMM_WORLD, /*status=*/1); + } + close(fd); + } +} + void Main::setArgvArgc_(const std::string& filename) { this->deckFilename_ = filename; @@ -187,6 +213,7 @@ void Main::initMPI() FlowGenericVanguard::setCommunication(std::make_unique()); handleTestSplitCommunicatorCmdLine_(); + maybeRedirectReservoirCouplingSlaveOutput_(); #if HAVE_MPI if (test_split_comm_ && FlowGenericVanguard::comm().size() > 1) { @@ -318,8 +345,8 @@ void Main::setupDamaris(const std::string& outputDir ) //const auto find_replace_map; //const auto find_replace_map = Opm::DamarisOutput::DamarisKeywords(EclGenericVanguard::comm(), outputDir); std::map find_replace_map; - find_replace_map = DamarisOutput::getDamarisKeywords(FlowGenericVanguard::comm(), outputDir); - + find_replace_map = Opm::DamarisOutput::getDamarisKeywords(FlowGenericVanguard::comm(), outputDir); + // By default EnableDamarisOutputCollective is true so all simulation results will // be written into one single file for each iteration using Parallel HDF5. // If set to false, FilePerCore mode is used in Damaris, then simulation results in each @@ -331,9 +358,9 @@ void Main::setupDamaris(const std::string& outputDir ) find_replace_map); int is_client; MPI_Comm new_comm; - // damaris_start() is where the Damaris Server ranks will block, until damaris_stop() + // damaris_start() is where the Damaris Server ranks will block, until damaris_stop() // is called from the client ranks - int err = damaris_start(&is_client); + int err = damaris_start(&is_client); isSimulationRank_ = (is_client > 0); if (isSimulationRank_ && err == DAMARIS_OK) { damaris_client_comm_get(&new_comm); diff --git a/opm/simulators/flow/Main.hpp b/opm/simulators/flow/Main.hpp index 6974f1665c1..455bd8eb091 100644 --- a/opm/simulators/flow/Main.hpp +++ b/opm/simulators/flow/Main.hpp @@ -150,8 +150,8 @@ class Main ~Main(); void setArgvArgc_(const std::string& filename); - void handleReservoirCouplingSlaveStdoutStderr_(); - + void maybeSaveReservoirCouplingSlaveLogFilename_(); + void maybeRedirectReservoirCouplingSlaveOutput_(); void initMPI(); int runDynamic() @@ -743,6 +743,7 @@ class Main // To demonstrate run with non_world_comm bool test_split_comm_ = false; bool isSimulationRank_ = true; + std::string reservoirCouplingSlaveOutputFilename_{}; #if HAVE_DAMARIS bool enableDamarisOutput_ = false; #endif diff --git a/opm/simulators/flow/ReservoirCoupling.hpp b/opm/simulators/flow/ReservoirCoupling.hpp new file mode 100644 index 00000000000..9b3f777fab8 --- /dev/null +++ b/opm/simulators/flow/ReservoirCoupling.hpp @@ -0,0 +1,49 @@ +/* + Copyright 2024 Equinor AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef OPM_RESERVOIR_COUPLING_HPP +#define OPM_RESERVOIR_COUPLING_HPP +#include +#include + +namespace Opm { +namespace ReservoirCoupling { + +enum class MessageTag : int { + SimulationStartDate, + SimulationEndDate, + SlaveProcessTermination +}; + +// Custom deleter for MPI_Comm +struct MPI_Comm_Deleter { + void operator()(MPI_Comm* comm) const { + if (*comm != MPI_COMM_NULL) { + MPI_Comm_free(comm); + } + delete comm; + } +}; + +using MPI_Comm_Ptr = std::unique_ptr; + +} // namespace ReservoirCoupling +} // namespace Opm + +#endif // OPM_RESERVOIR_COUPLING_HPP diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 2d8836eae46..a6a11de1ab0 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -130,7 +130,7 @@ std::vector ReservoirCouplingMaster::getSlaveArgv( // must ensure that these buffers are not deallocated before the slave_argv has // been used. std::vector slave_argv(argc + 2); - log_filename = "--slave-log-file=" + slave_name + ".log"; + log_filename = "--slave-log-file=" + slave_name; // .log extension will be added by the slave process slave_argv[0] = const_cast(log_filename.c_str()); for (int i = 1; i < argc; i++) { // Check if the argument starts with "--", if not, we will assume it is a positional argument diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index 3db6ed845a3..e9091b7bd48 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -21,6 +21,7 @@ #define OPM_RESERVOIR_COUPLING_MASTER_HPP #include +#include #include #include @@ -36,19 +37,9 @@ class ReservoirCouplingMaster { ReservoirCouplingMaster(const Parallel::Communication &comm, const Schedule &schedule); - // Custom deleter for MPI_Comm - struct MPI_Comm_Deleter { - void operator()(MPI_Comm* comm) const { - if (*comm != MPI_COMM_NULL) { - MPI_Comm_free(comm); - } - delete comm; - } - }; - using MPI_Comm_Ptr = std::unique_ptr; - void spawnSlaveProcesses(int argc, char **argv); + using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; private: std::vector getSlaveArgv( diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index 3726dd26a9c..690214158e4 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -18,6 +18,7 @@ */ #include +#include #include #include @@ -46,6 +47,7 @@ void ReservoirCouplingSlave::sendSimulationStartDateToMasterProcess() { //if (*(this->slave_master_comm_) == MPI_COMM_NULL) { // OPM_THROW(std::runtime_error, "Slave process is not spawned by a master process"); //} + //MPI_Send(&start_date, 1, MPI_INT, 0, 0, parentcomm); OpmLog::info("Sent simulation start date to master process"); } diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index fb3350a4be3..84d6fb8d80c 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -199,7 +199,7 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim // NOTE: The argc and argv will be used when launching a slave process void init(SimulatorTimer &timer, int argc, char** argv) { - auto slave_mode = Parameters::get(); + auto slave_mode = Parameters::Get(); if (slave_mode) { this->reservoirCouplingSlave_ = std::make_unique( From d4855b08e1b266bece14f910967e38765ee90d49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 9 Sep 2024 09:16:03 +0200 Subject: [PATCH 11/33] Check if MPI is enabled Exclude the reservoir coupling stuff if MPI is not enabled --- opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index 84d6fb8d80c..a60a220c06e 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -24,9 +24,14 @@ #include +#if HAVE_MPI #include #include #include +#include +#include +#endif + #include #include @@ -199,6 +204,7 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim // NOTE: The argc and argv will be used when launching a slave process void init(SimulatorTimer &timer, int argc, char** argv) { +#if HAVE_MPI auto slave_mode = Parameters::Get(); if (slave_mode) { this->reservoirCouplingSlave_ = @@ -222,6 +228,7 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); } } +#endif simulator_.setEpisodeIndex(-1); // Create timers and file for writing timing info. @@ -587,9 +594,11 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim SimulatorConvergenceOutput convergence_output_; +#if HAVE_MPI bool slaveMode_{false}; std::unique_ptr reservoirCouplingMaster_{nullptr}; std::unique_ptr reservoirCouplingSlave_{nullptr}; +#endif SimulatorSerializer serializer_; }; From 7dfc25048c6e2c15898fb4fdc9d9f3cdee30bab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Tue, 6 Aug 2024 21:33:35 +0200 Subject: [PATCH 12/33] Add missing header files --- .../flow/SimulatorFullyImplicitBlackoil.hpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index a60a220c06e..1f62bb95bcb 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -218,14 +218,16 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim // For now, we require that SLAVES and GRUPMAST are defined at the first // schedule step, so it is enough to check the first step. See the // keyword handlers in opm-common for more information. - auto master_mode = this->schedule()[0].rescoup().masterMode(); - if (master_mode) { - this->reservoirCouplingMaster_ = - std::make_unique( - FlowGenericVanguard::comm(), - this->schedule() - ); - this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); + if (!this->schedule()[0].rescoup.empty()) { + auto master_mode = this->schedule()[0].rescoup().masterMode(); + if (master_mode) { + this->reservoirCouplingMaster_ = + std::make_unique( + FlowGenericVanguard::comm(), + this->schedule() + ); + this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); + } } } #endif From e47c89832d321fb008cbc63bb0cb9595076bb8bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 12 Aug 2024 15:17:38 +0200 Subject: [PATCH 13/33] Rebased, and fixed command line parsing Create one log file for each slave subprocess. Redirect both stdout and stderr to this file --- opm/simulators/flow/Main.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opm/simulators/flow/Main.cpp b/opm/simulators/flow/Main.cpp index 2a8d18029cf..e47c8a8d71d 100644 --- a/opm/simulators/flow/Main.cpp +++ b/opm/simulators/flow/Main.cpp @@ -48,6 +48,10 @@ #include // for open() #include // for dup2(), close() +#include +#include // for open() +#include // for dup2(), close() + namespace Opm { Main::Main(int argc, char** argv, bool ownMPI) From 09aa0be11c3f9aaf114258389cd67b79280b4bea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 12 Aug 2024 15:17:38 +0200 Subject: [PATCH 14/33] Send slave start dates to master --- .../flow/ReservoirCouplingMaster.cpp | 32 ++++++++++++++++++ .../flow/ReservoirCouplingMaster.hpp | 10 +++--- .../flow/ReservoirCouplingSlave.cpp | 33 +++++++++++++------ .../flow/ReservoirCouplingSlave.hpp | 5 +-- .../flow/SimulatorFullyImplicitBlackoil.hpp | 19 +++++------ 5 files changed, 73 insertions(+), 26 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index a6a11de1ab0..3113dd03975 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -50,6 +50,37 @@ ReservoirCouplingMaster::ReservoirCouplingMaster( // Public methods // ------------------ +void ReservoirCouplingMaster::receiveSimulationStartDateFromSlaves() { + this->slave_start_dates_.resize(this->num_slaves_); + if (this->comm_.rank() == 0) { + // Ensure that std::time_t is of type long since we are sending it over MPI with MPI_LONG + static_assert(std::is_same::value, "std::time_t is not of type long"); + for (unsigned int i = 0; i < this->master_slave_comm_.size(); i++) { + std::time_t start_date; + int result = MPI_Recv( + &start_date, + /*count=*/1, + /*datatype=*/MPI_LONG, + /*source_rank=*/0, + /*tag=*/static_cast(MessageTag::SimulationStartDate), + *this->master_slave_comm_[i].get(), + MPI_STATUS_IGNORE + ); + if (result != MPI_SUCCESS) { + OPM_THROW(std::runtime_error, "Failed to receive simulation start date from slave process"); + } + this->slave_start_dates_[i] = start_date; + OpmLog::info( + fmt::format( + "Received simulation start date from slave process with name: {}. " + "Start date: {}", this->slave_names_[i], start_date + ) + ); + } + } + this->comm_.broadcast(this->slave_start_dates_.data(), this->num_slaves_, /*emitter_rank=*/0); +} + // NOTE: This functions is executed for all ranks, but only rank 0 will spawn // the slave processes void ReservoirCouplingMaster::spawnSlaveProcesses(int argc, char **argv) { @@ -101,6 +132,7 @@ void ReservoirCouplingMaster::spawnSlaveProcesses(int argc, char **argv) { ); this->master_slave_comm_.push_back(std::move(master_slave_comm)); this->slave_names_.push_back(slave_name); + this->num_slaves_++; } } diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index e9091b7bd48..6d1ed57b18f 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -34,12 +34,13 @@ namespace Opm { class ReservoirCouplingMaster { public: + using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; + using MessageTag = ReservoirCoupling::MessageTag; ReservoirCouplingMaster(const Parallel::Communication &comm, const Schedule &schedule); void spawnSlaveProcesses(int argc, char **argv); - - using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; + void receiveSimulationStartDateFromSlaves(); private: std::vector getSlaveArgv( @@ -52,9 +53,10 @@ class ReservoirCouplingMaster { const Parallel::Communication &comm_; const Schedule& schedule_; - // MPI communicators for the slave processes - std::vector master_slave_comm_; + std::size_t num_slaves_ = 0; // Initially zero, will be updated in spawnSlaveProcesses() + std::vector master_slave_comm_; // MPI communicators for the slave processes std::vector slave_names_; + std::vector slave_start_dates_; }; } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index 690214158e4..f369b0bf116 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -38,17 +38,30 @@ ReservoirCouplingSlave::ReservoirCouplingSlave( ) : comm_{comm}, schedule_{schedule} -{ } +{ + this->slave_master_comm_ = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); + MPI_Comm_get_parent(this->slave_master_comm_.get()); + if (*(this->slave_master_comm_) == MPI_COMM_NULL) { + OPM_THROW(std::runtime_error, "Slave process is not spawned by a master process"); + } +} + void ReservoirCouplingSlave::sendSimulationStartDateToMasterProcess() { - // TODO: Implement this function next - - //this->slave_master_comm_ = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); - //MPI_Comm_get_parent(this->slave_master_comm_.get()); - //if (*(this->slave_master_comm_) == MPI_COMM_NULL) { - // OPM_THROW(std::runtime_error, "Slave process is not spawned by a master process"); - //} - //MPI_Send(&start_date, 1, MPI_INT, 0, 0, parentcomm); - OpmLog::info("Sent simulation start date to master process"); + + if (this->comm_.rank() == 0) { + // Ensure that std::time_t is of type long since we are sending it over MPI with MPI_LONG + static_assert(std::is_same::value, "std::time_t is not of type long"); + std::time_t start_date = this->schedule_.getStartTime(); + MPI_Send( + &start_date, + /*count=*/1, + /*datatype=*/MPI_LONG, + /*dest_rank=*/0, + /*tag=*/static_cast(MessageTag::SimulationStartDate), + *this->slave_master_comm_ + ); + OpmLog::info("Sent simulation start date to master process from rank 0"); + } } } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingSlave.hpp b/opm/simulators/flow/ReservoirCouplingSlave.hpp index 12f1d9590e3..e34b8cfeb7e 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.hpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.hpp @@ -20,7 +20,7 @@ #ifndef OPM_RESERVOIR_COUPLING_SLAVE_HPP #define OPM_RESERVOIR_COUPLING_SLAVE_HPP -#include +#include #include #include #include @@ -33,7 +33,8 @@ namespace Opm { class ReservoirCouplingSlave { public: - using MPI_Comm_Ptr = ReservoirCouplingMaster::MPI_Comm_Ptr; + using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; + using MessageTag = ReservoirCoupling::MessageTag; ReservoirCouplingSlave(const Parallel::Communication &comm, const Schedule &schedule); void sendSimulationStartDateToMasterProcess(); diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index 1f62bb95bcb..1a8dbfb40de 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -218,16 +218,15 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim // For now, we require that SLAVES and GRUPMAST are defined at the first // schedule step, so it is enough to check the first step. See the // keyword handlers in opm-common for more information. - if (!this->schedule()[0].rescoup.empty()) { - auto master_mode = this->schedule()[0].rescoup().masterMode(); - if (master_mode) { - this->reservoirCouplingMaster_ = - std::make_unique( - FlowGenericVanguard::comm(), - this->schedule() - ); - this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); - } + auto master_mode = this->schedule()[0].rescoup().masterMode(); + if (master_mode) { + this->reservoirCouplingMaster_ = + std::make_unique( + FlowGenericVanguard::comm(), + this->schedule() + ); + this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); + this->reservoirCouplingMaster_->receiveSimulationStartDateFromSlaves(); } } #endif From 9ad5b8a7f22dbb6d3c1c80be6788650192a79bdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 12 Aug 2024 15:17:38 +0200 Subject: [PATCH 15/33] Rebased, and fixed command line parsing Create one log file for each slave subprocess. Redirect both stdout and stderr to this file --- opm/simulators/flow/Main.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opm/simulators/flow/Main.cpp b/opm/simulators/flow/Main.cpp index e47c8a8d71d..2c27ccef18d 100644 --- a/opm/simulators/flow/Main.cpp +++ b/opm/simulators/flow/Main.cpp @@ -52,6 +52,10 @@ #include // for open() #include // for dup2(), close() +#include +#include // for open() +#include // for dup2(), close() + namespace Opm { Main::Main(int argc, char** argv, bool ownMPI) From 48856f9f464458cf195b5e9ff7e45746949100ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 19 Aug 2024 23:45:56 +0200 Subject: [PATCH 16/33] Timestepping for reservoir coupling Implement adaptive time stepping for master and slave procesess when using reservoir coupling. The original adaptive time stepping method is refactored at the same time. --- opm/simulators/flow/FlowMain.hpp | 1 + opm/simulators/flow/ReservoirCoupling.hpp | 157 +- .../flow/ReservoirCouplingMaster.cpp | 112 +- .../flow/ReservoirCouplingMaster.hpp | 7 +- .../flow/ReservoirCouplingSlave.cpp | 54 +- .../flow/ReservoirCouplingSlave.hpp | 9 +- .../flow/SimulatorFullyImplicitBlackoil.hpp | 12 +- .../timestepping/AdaptiveSimulatorTimer.cpp | 31 +- .../timestepping/AdaptiveSimulatorTimer.hpp | 15 +- .../timestepping/AdaptiveTimeStepping.hpp | 333 ++-- .../AdaptiveTimeStepping_impl.hpp | 1535 ++++++++++++----- .../timestepping/SimulatorTimerInterface.hpp | 73 +- opm/simulators/utils/readDeck.cpp | 2 - 13 files changed, 1720 insertions(+), 621 deletions(-) diff --git a/opm/simulators/flow/FlowMain.hpp b/opm/simulators/flow/FlowMain.hpp index db48ee6b307..77a1f140e43 100644 --- a/opm/simulators/flow/FlowMain.hpp +++ b/opm/simulators/flow/FlowMain.hpp @@ -51,6 +51,7 @@ namespace Opm::Parameters { // Do not merge parallel output files or warn about them struct EnableLoggingFalloutWarning { static constexpr bool value = false; }; struct OutputInterval { static constexpr int value = 1; }; + } // namespace Opm::Parameters namespace Opm { diff --git a/opm/simulators/flow/ReservoirCoupling.hpp b/opm/simulators/flow/ReservoirCoupling.hpp index 9b3f777fab8..75cffd73e72 100644 --- a/opm/simulators/flow/ReservoirCoupling.hpp +++ b/opm/simulators/flow/ReservoirCoupling.hpp @@ -20,15 +20,18 @@ #ifndef OPM_RESERVOIR_COUPLING_HPP #define OPM_RESERVOIR_COUPLING_HPP #include +#include +#include #include namespace Opm { namespace ReservoirCoupling { enum class MessageTag : int { - SimulationStartDate, - SimulationEndDate, - SlaveProcessTermination + SlaveSimulationStartDate, + SlaveProcessTermination, + SlaveNextReportDate, + SlaveNextTimeStep, }; // Custom deleter for MPI_Comm @@ -43,6 +46,154 @@ struct MPI_Comm_Deleter { using MPI_Comm_Ptr = std::unique_ptr; +// This class represents a time point. +// It is currently used to represent an epoch time (a double value in seconds since the epoch), +// or an elapsed time (a double value in seconds since the start of the simulation). +// To avoid numerical issues when adding or subtracting time points and then later comparing +// for equality with for example a given report date, we use a tolerance value. +class TimePoint { +private: + double time; + // TODO: Epoch values often lies in the range of [1e9,1e11], so a tolerance value of 1e-10 + // might be a little too small. However, for elapsed time values, the range is often + // in the range of [0, 1e8], so a tolerance value of 1e-10 should be sufficient. + // NOTE: 1 nano-second = 1e-9 seconds + static constexpr double tol = 1e-10; // Tolerance value + +public: + TimePoint() : time(0.0) {} + explicit TimePoint(double t) : time(t) {} + TimePoint(const TimePoint& other) : time(other.time) {} + + // Assignment operator for double + TimePoint& operator=(double t) { + time = t; + return *this; + } + + // Copy assignment operator + TimePoint& operator=(const TimePoint& other) { + if (this != &other) { + time = other.time; + } + return *this; + } + + double getTime() const { return time; } + + // Equality operator + bool operator==(const TimePoint& other) const { + return std::abs(time - other.time) < tol; + } + + // Inequality operator + bool operator!=(const TimePoint& other) const { + return !(*this == other); + } + + // Less than operator + bool operator<(const TimePoint& other) const { + return (time < other.time) && !(*this == other); + } + + // Comparison operator: double < TimePoint + friend bool operator<(double lhs, const TimePoint& rhs) { + return lhs < rhs.time; + } + + // Comparison operator: TimePoint < double + bool operator<(double rhs) const { + return time < rhs; + } + + // Less than or equal to operator + bool operator<=(const TimePoint& other) const { + return (time < other.time) || (*this == other); + } + + // Comparison operator: double <= TimePoint + friend bool operator<=(double lhs, const TimePoint& rhs) { + return lhs <= rhs.time; + } + + // Comparison operator: TimePoint <= double + bool operator<=(double rhs) const { + return time <= rhs; + } + + // Greater than operator + bool operator>(const TimePoint& other) const { + return (time > other.time) && !(*this == other); + } + + // Comparison operator: double > TimePoint + friend bool operator>(double lhs, const TimePoint& rhs) { + return lhs > rhs.time; + } + + // Comparison operator: TimePoint > double + bool operator>(double rhs) const { + return time > rhs; + } + + // Greater than or equal to operator + bool operator>=(const TimePoint& other) const { + return (time > other.time) || (*this == other); + } + + // Comparison operator: TimePoint >= double + bool operator>=(double rhs) const { + return time >= rhs; + } + + // Comparison operator: double >= TimePoint + friend bool operator>=(double lhs, const TimePoint& rhs) { + return lhs >= rhs.time; + } + + // Addition operator: TimePoint + TimePoint (summing their times) + TimePoint operator+(const TimePoint& other) const { + return TimePoint(time + other.time); + } + + // Addition operator: TimePoint + double + TimePoint operator+(double delta) const { + return TimePoint(time + delta); + } + + // Friend addition operator: double + TimePoint + friend TimePoint operator+(double lhs, const TimePoint& rhs) { + return TimePoint(lhs + rhs.time); + } + + // Overload += operator for adding a double + TimePoint& operator+=(double delta) { + time += delta; + return *this; + } + + // Subtraction operator: TimePoint - TimePoint (resulting in a new TimePoint) + TimePoint operator-(const TimePoint& other) const { + return TimePoint(time - other.time); + } + + // Subtraction operator: TimePoint - double + TimePoint operator-(double delta) const { + return TimePoint(time - delta); + } + + // Friend subtraction operator: double - TimePoint + friend TimePoint operator-(double lhs, const TimePoint& rhs) { + return TimePoint(lhs - rhs.time); + } + + // Stream insertion operator for easy printing + friend std::ostream& operator<<(std::ostream& os, const TimePoint& tp) { + os << tp.time; + return os; + } +}; + } // namespace ReservoirCoupling } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 3113dd03975..57bf674fc4b 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -44,41 +44,133 @@ ReservoirCouplingMaster::ReservoirCouplingMaster( ) : comm_{comm}, schedule_{schedule} -{ } +{ + this->start_date_ = this->schedule_.getStartTime(); +} // ------------------ // Public methods // ------------------ +double ReservoirCouplingMaster::maybeChopSubStep( + double suggested_timestep_original, double elapsed_time +) const +{ + // Check if the suggested timestep needs to be adjusted based on the slave processes' + // next report step, or if the slave process has not started yet: the start of a slave process. + double start_date = static_cast(this->start_date_); + TimePoint step_start_date{start_date + elapsed_time}; + TimePoint step_end_date{step_start_date + suggested_timestep_original}; + TimePoint suggested_timestep{suggested_timestep_original}; + for (unsigned int i = 0; i < this->num_slaves_; i++) { + double slave_start_date = static_cast(this->slave_start_dates_[i]); + TimePoint slave_next_report_date{this->slave_next_report_time_offsets_[i] + slave_start_date}; + if (slave_start_date > step_end_date) { + // The slave process has not started yet, and will not start during this timestep + continue; + } + TimePoint slave_elapsed_time; + if (slave_start_date <= step_start_date) { + // The slave process has already started, and will continue during this timestep + if (slave_next_report_date > step_end_date) { + // The slave process will not report during this timestep + continue; + } + // The slave process will report during this timestep + slave_elapsed_time = slave_next_report_date - step_start_date; + } + else { + // The slave process will start during the timestep, but not at the beginning + slave_elapsed_time = slave_start_date - step_start_date; + } + suggested_timestep = slave_elapsed_time; + step_end_date = step_start_date + suggested_timestep; + } + return suggested_timestep.getTime(); +} + +void ReservoirCouplingMaster::sendNextTimeStepToSlaves(double timestep) { + if (this->comm_.rank() == 0) { + for (unsigned int i = 0; i < this->master_slave_comm_.size(); i++) { + MPI_Send( + ×tep, + /*count=*/1, + /*datatype=*/MPI_DOUBLE, + /*dest_rank=*/0, + /*tag=*/static_cast(MessageTag::SlaveNextTimeStep), + *this->master_slave_comm_[i].get() + ); + OpmLog::info(fmt::format( + "Sent next time step {} from master process rank 0 to slave process " + "rank 0 with name: {}", timestep, this->slave_names_[i]) + ); + } + } +} + +void ReservoirCouplingMaster::receiveNextReportDateFromSlaves() { + if (this->comm_.rank() == 0) { + for (unsigned int i = 0; i < this->master_slave_comm_.size(); i++) { + double slave_next_report_time_offset; // Elapsed time from the beginning of the simulation + int result = MPI_Recv( + &slave_next_report_time_offset, + /*count=*/1, + /*datatype=*/MPI_DOUBLE, + /*source_rank=*/0, + /*tag=*/static_cast(MessageTag::SlaveNextReportDate), + *this->master_slave_comm_[i].get(), + MPI_STATUS_IGNORE + ); + if (result != MPI_SUCCESS) { + OPM_THROW(std::runtime_error, "Failed to receive next report date from slave process"); + } + this->slave_next_report_time_offsets_[i] = slave_next_report_time_offset; + OpmLog::info( + fmt::format( + "Received simulation slave next report date from slave process with name: {}. " + "Next report date: {}", this->slave_names_[i], slave_next_report_time_offset + ) + ); + } + } + this->comm_.broadcast( + this->slave_next_report_time_offsets_.data(), this->num_slaves_, /*emitter_rank=*/0 + ); + OpmLog::info("Broadcasted slave next report dates to all ranks"); +} + void ReservoirCouplingMaster::receiveSimulationStartDateFromSlaves() { - this->slave_start_dates_.resize(this->num_slaves_); if (this->comm_.rank() == 0) { // Ensure that std::time_t is of type long since we are sending it over MPI with MPI_LONG static_assert(std::is_same::value, "std::time_t is not of type long"); for (unsigned int i = 0; i < this->master_slave_comm_.size(); i++) { - std::time_t start_date; + std::time_t slave_start_date; int result = MPI_Recv( - &start_date, + &slave_start_date, /*count=*/1, /*datatype=*/MPI_LONG, /*source_rank=*/0, - /*tag=*/static_cast(MessageTag::SimulationStartDate), + /*tag=*/static_cast(MessageTag::SlaveSimulationStartDate), *this->master_slave_comm_[i].get(), MPI_STATUS_IGNORE ); if (result != MPI_SUCCESS) { OPM_THROW(std::runtime_error, "Failed to receive simulation start date from slave process"); } - this->slave_start_dates_[i] = start_date; + if (slave_start_date < this->start_date_) { + OPM_THROW(std::runtime_error, "Slave process start date is before master process start date"); + } + this->slave_start_dates_[i] = slave_start_date; OpmLog::info( fmt::format( "Received simulation start date from slave process with name: {}. " - "Start date: {}", this->slave_names_[i], start_date + "Start date: {}", this->slave_names_[i], slave_start_date ) ); } } this->comm_.broadcast(this->slave_start_dates_.data(), this->num_slaves_, /*emitter_rank=*/0); + OpmLog::info("Broadcasted slave start dates to all ranks"); } // NOTE: This functions is executed for all ranks, but only rank 0 will spawn @@ -91,8 +183,8 @@ void ReservoirCouplingMaster::spawnSlaveProcesses(int argc, char **argv) { const auto& data_file_name = slave.dataFilename(); const auto& directory_path = slave.directoryPath(); // Concatenate the directory path and the data file name to get the full path - std::filesystem::path dir_path(directory_path); - std::filesystem::path data_file(data_file_name); + std::filesystem::path dir_path{directory_path}; + std::filesystem::path data_file{data_file_name}; std::filesystem::path full_path = dir_path / data_file; std::string log_filename; // the getSlaveArgv() function will set this std::vector slave_argv = getSlaveArgv( @@ -134,6 +226,8 @@ void ReservoirCouplingMaster::spawnSlaveProcesses(int argc, char **argv) { this->slave_names_.push_back(slave_name); this->num_slaves_++; } + this->slave_start_dates_.resize(this->num_slaves_); + this->slave_next_report_time_offsets_.resize(this->num_slaves_); } // ------------------ diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index 6d1ed57b18f..182ec0f0188 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -36,11 +36,14 @@ class ReservoirCouplingMaster { public: using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; using MessageTag = ReservoirCoupling::MessageTag; - + using TimePoint = ReservoirCoupling::TimePoint; ReservoirCouplingMaster(const Parallel::Communication &comm, const Schedule &schedule); + double maybeChopSubStep(double suggested_timestep, double current_time) const; void spawnSlaveProcesses(int argc, char **argv); void receiveSimulationStartDateFromSlaves(); + void receiveNextReportDateFromSlaves(); + void sendNextTimeStepToSlaves(double next_time_step); private: std::vector getSlaveArgv( @@ -53,10 +56,12 @@ class ReservoirCouplingMaster { const Parallel::Communication &comm_; const Schedule& schedule_; + std::time_t start_date_; // Master process' simulation start date std::size_t num_slaves_ = 0; // Initially zero, will be updated in spawnSlaveProcesses() std::vector master_slave_comm_; // MPI communicators for the slave processes std::vector slave_names_; std::vector slave_start_dates_; + std::vector slave_next_report_time_offsets_; // Elapsed time from the beginning of the simulation }; } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index f369b0bf116..5f567feb689 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -29,15 +29,18 @@ #include #include +#include namespace Opm { ReservoirCouplingSlave::ReservoirCouplingSlave( const Parallel::Communication &comm, - const Schedule &schedule + const Schedule &schedule, + const SimulatorTimer &timer ) : comm_{comm}, - schedule_{schedule} + schedule_{schedule}, + timer_{timer} { this->slave_master_comm_ = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); MPI_Comm_get_parent(this->slave_master_comm_.get()); @@ -46,8 +49,51 @@ ReservoirCouplingSlave::ReservoirCouplingSlave( } } -void ReservoirCouplingSlave::sendSimulationStartDateToMasterProcess() { +double ReservoirCouplingSlave::receiveNextTimeStepFromMaster() { + double timestep; + if (this->comm_.rank() == 0) { + int result = MPI_Recv( + ×tep, + /*count=*/1, + /*datatype=*/MPI_DOUBLE, + /*source_rank=*/0, + /*tag=*/static_cast(MessageTag::SlaveNextTimeStep), + *this->slave_master_comm_, + MPI_STATUS_IGNORE + ); + if (result != MPI_SUCCESS) { + OPM_THROW(std::runtime_error, "Failed to receive next time step from master"); + } + OpmLog::info( + fmt::format("Slave rank 0 received next timestep {} from master.", timestep) + ); + } + this->comm_.broadcast(×tep, 1, /*emitter_rank=*/0); + OpmLog::info("Broadcasted slave next time step to all ranks"); + return timestep; +} + + +void ReservoirCouplingSlave::sendNextReportDateToMasterProcess() { + if (this->comm_.rank() == 0) { + double elapsed_time = this->timer_.simulationTimeElapsed(); + double current_step_length = this->timer_.currentStepLength(); + // NOTE: This is an offset in seconds from the start date, so it will be 0 if the next report + // would be the start date. In general, it should be a positive number. + double next_report_time_offset = elapsed_time + current_step_length; + MPI_Send( + &next_report_time_offset, + /*count=*/1, + /*datatype=*/MPI_DOUBLE, + /*dest_rank=*/0, + /*tag=*/static_cast(MessageTag::SlaveNextReportDate), + *this->slave_master_comm_ + ); + OpmLog::info("Sent next report date to master process from rank 0"); + } +} +void ReservoirCouplingSlave::sendSimulationStartDateToMasterProcess() { if (this->comm_.rank() == 0) { // Ensure that std::time_t is of type long since we are sending it over MPI with MPI_LONG static_assert(std::is_same::value, "std::time_t is not of type long"); @@ -57,7 +103,7 @@ void ReservoirCouplingSlave::sendSimulationStartDateToMasterProcess() { /*count=*/1, /*datatype=*/MPI_LONG, /*dest_rank=*/0, - /*tag=*/static_cast(MessageTag::SimulationStartDate), + /*tag=*/static_cast(MessageTag::SlaveSimulationStartDate), *this->slave_master_comm_ ); OpmLog::info("Sent simulation start date to master process from rank 0"); diff --git a/opm/simulators/flow/ReservoirCouplingSlave.hpp b/opm/simulators/flow/ReservoirCouplingSlave.hpp index e34b8cfeb7e..b7579a890b2 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.hpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -36,15 +37,19 @@ class ReservoirCouplingSlave { using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; using MessageTag = ReservoirCoupling::MessageTag; - ReservoirCouplingSlave(const Parallel::Communication &comm, const Schedule &schedule); + ReservoirCouplingSlave( + const Parallel::Communication &comm, const Schedule &schedule, const SimulatorTimer &timer + ); void sendSimulationStartDateToMasterProcess(); + void sendNextReportDateToMasterProcess(); + double receiveNextTimeStepFromMaster(); private: const Parallel::Communication &comm_; const Schedule& schedule_; + const SimulatorTimer &timer_; // MPI parent communicator for a slave process MPI_Comm_Ptr slave_master_comm_{nullptr}; - }; } // namespace Opm diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index 1a8dbfb40de..07f7bcb1908 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -206,11 +206,12 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim { #if HAVE_MPI auto slave_mode = Parameters::Get(); + FlowGenericVanguard::comm().barrier(); if (slave_mode) { this->reservoirCouplingSlave_ = std::make_unique( FlowGenericVanguard::comm(), - this->schedule() + this->schedule(), timer ); this->reservoirCouplingSlave_->sendSimulationStartDateToMasterProcess(); } @@ -252,7 +253,14 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim else { adaptiveTimeStepping_ = std::make_unique(unitSystem, max_next_tstep, terminalOutput_); } - +#if HAVE_MPI + if (slave_mode) { + adaptiveTimeStepping_->setReservoirCouplingSlave(this->reservoirCouplingSlave_.get()); + } + else if (this->schedule()[0].rescoup().masterMode()) { + adaptiveTimeStepping_->setReservoirCouplingMaster(this->reservoirCouplingMaster_.get()); + } +#endif if (isRestart()) { // For restarts the simulator may have gotten some information // about the next timestep size from the OPMEXTRA field diff --git a/opm/simulators/timestepping/AdaptiveSimulatorTimer.cpp b/opm/simulators/timestepping/AdaptiveSimulatorTimer.cpp index 2c36efcf56d..4fe45e5ab35 100644 --- a/opm/simulators/timestepping/AdaptiveSimulatorTimer.cpp +++ b/opm/simulators/timestepping/AdaptiveSimulatorTimer.cpp @@ -36,25 +36,28 @@ namespace Opm { AdaptiveSimulatorTimer:: - AdaptiveSimulatorTimer( const SimulatorTimerInterface& timer, - const double lastStepTaken, - const double maxTimeStep ) - : start_date_time_( std::make_shared(timer.startDateTime()) ) - , start_time_( timer.simulationTimeElapsed() ) - , total_time_( start_time_ + timer.currentStepLength() ) - , report_step_( timer.reportStepNum() ) - , max_time_step_( maxTimeStep ) - , current_time_( start_time_ ) - , dt_( 0.0 ) - , current_step_( 0 ) - , steps_() - , lastStepFailed_( false ) + AdaptiveSimulatorTimer( const boost::posix_time::ptime simulation_start_time, + const double step_length, + const double elapsed_time, + const double last_step_taken, + const int report_step, + const double max_time_step ) + : start_date_time_{ std::make_shared(simulation_start_time) } + , start_time_{ elapsed_time } + , total_time_{ start_time_ + step_length } + , report_step_{ report_step } + , max_time_step_{ max_time_step } + , current_time_{ start_time_ } + , dt_{ 0.0 } + , current_step_{ 0 } + , steps_{} + , last_step_failed_{ false } { // reserve memory for sub steps steps_.reserve( 10 ); // set appropriate value for dt_ - provideTimeStepEstimate( lastStepTaken ); + provideTimeStepEstimate( last_step_taken ); } bool AdaptiveSimulatorTimer::initialStep () const diff --git a/opm/simulators/timestepping/AdaptiveSimulatorTimer.hpp b/opm/simulators/timestepping/AdaptiveSimulatorTimer.hpp index 3dffd49084d..31ae49f4ce8 100644 --- a/opm/simulators/timestepping/AdaptiveSimulatorTimer.hpp +++ b/opm/simulators/timestepping/AdaptiveSimulatorTimer.hpp @@ -44,9 +44,12 @@ namespace Opm /// \param timer in case of sub stepping this is the outer timer /// \param lastStepTaken last suggested time step /// \param maxTimeStep maximum time step allowed - AdaptiveSimulatorTimer( const SimulatorTimerInterface& timer, - const double lastStepTaken, - const double maxTimeStep = std::numeric_limits::max() ); + AdaptiveSimulatorTimer( const boost::posix_time::ptime simulation_start_time, + const double step_length, + const double elapsed_time, + const double last_step_taken, + const int report_step, + const double max_time_step = std::numeric_limits::max() ); /// \brief advance time by currentStepLength AdaptiveSimulatorTimer& operator++ (); @@ -101,10 +104,10 @@ namespace Opm boost::posix_time::ptime startDateTime() const; /// \brief Return true if last time step failed - bool lastStepFailed() const {return lastStepFailed_;} + bool lastStepFailed() const {return last_step_failed_;} /// \brief tell the timestepper whether timestep failed or not - void setLastStepFailed(bool lastStepFailed) {lastStepFailed_ = lastStepFailed;} + void setLastStepFailed(bool last_step_failed) {last_step_failed_ = last_step_failed;} /// return copy of object virtual std::unique_ptr< SimulatorTimerInterface > clone() const; @@ -121,7 +124,7 @@ namespace Opm int current_step_; std::vector< double > steps_; - bool lastStepFailed_; + bool last_step_failed_; }; diff --git a/opm/simulators/timestepping/AdaptiveTimeStepping.hpp b/opm/simulators/timestepping/AdaptiveTimeStepping.hpp index 822d5ee8a57..daf0f455330 100644 --- a/opm/simulators/timestepping/AdaptiveTimeStepping.hpp +++ b/opm/simulators/timestepping/AdaptiveTimeStepping.hpp @@ -19,12 +19,23 @@ #include #include -#include +#if HAVE_MPI +#define RESERVOIR_COUPLING_ENABLED +#endif +#ifdef RESERVOIR_COUPLING_ENABLED +#include +#include +#include +#endif + #include #include #include +#include +#include #include #include +#include #include namespace Opm::Parameters { @@ -55,148 +66,208 @@ class UnitSystem; struct StepReport; namespace detail { - -void logTimer(const AdaptiveSimulatorTimer& substepTimer); - -std::set consistentlyFailingWells(const std::vector& sr); - -void registerAdaptiveParameters(); - -std::tuple, - bool> -createController(const UnitSystem& unitSystem); - + void logTimer(const AdaptiveSimulatorTimer& substep_timer); + std::set consistentlyFailingWells(const std::vector& sr); + void registerAdaptiveParameters(); } - // AdaptiveTimeStepping - //--------------------- - template - class AdaptiveTimeStepping +template +class AdaptiveTimeStepping +{ +private: + using Scalar = GetPropType; + template + class SolutionTimeErrorSolverWrapper : public RelativeChangeInterface { - using Scalar = GetPropType; - template - class SolutionTimeErrorSolverWrapper : public RelativeChangeInterface - { - const Solver& solver_; - public: - SolutionTimeErrorSolverWrapper(const Solver& solver) - : solver_(solver) - {} - - /// return || u^n+1 - u^n || / || u^n+1 || - double relativeChange() const - { return solver_.model().relativeChange(); } - }; - - template - void logException_(const E& exception, bool verbose) - { - if (verbose) { - std::string message; - message = "Caught Exception: "; - message += exception.what(); - OpmLog::debug(message); - } - } - public: - AdaptiveTimeStepping() = default; - - //! \brief contructor taking parameter object - explicit AdaptiveTimeStepping(const UnitSystem& unitSystem, - const double max_next_tstep = -1.0, - const bool terminalOutput = true); - - //! \brief contructor taking parameter object - //! \param tuning Pointer to ecl TUNING keyword - //! \param timeStep current report step - AdaptiveTimeStepping(double max_next_tstep, - const Tuning& tuning, - const UnitSystem& unitSystem, - const bool terminalOutput = true); - - static void registerParameters(); - - /** \brief step method that acts like the solver::step method - in a sub cycle of time steps - \param tuningUpdater Function used to update TUNING parameters before each - time step. ACTIONX might change tuning. - */ - template - SimulatorReport step(const SimulatorTimer& simulatorTimer, - Solver& solver, - const bool isEvent, - const std::function tuningUpdater); - - /** \brief Returns the simulator report for the failed substeps of the last - * report step. - */ - double suggestedNextStep() const - { return suggestedNextTimestep_; } - - void setSuggestedNextStep(const double x) - { suggestedNextTimestep_ = x; } + SolutionTimeErrorSolverWrapper(const Solver& solver); + double relativeChange() const; + private: + const Solver& solver_; + }; - void updateTUNING(double max_next_tstep, const Tuning& tuning); + // Forward declaration of SubStepIteration + // TODO: This forward declaration is not enough for GCC version 9.4.0 (released June 2021), + // but it works fine with GCC version 13.2.0 (released July 2023). + template class SubStepIteration; - void updateNEXTSTEP(double max_next_tstep); + template + class SubStepper { + public: + SubStepper( + AdaptiveTimeStepping& adaptive_time_stepping, + const SimulatorTimer& simulator_timer, + Solver& solver, + const bool is_event, + const std::function& tuning_updater + ); + + AdaptiveTimeStepping& getAdaptiveTimerStepper(); + SimulatorReport run(); + friend class AdaptiveTimeStepping::template SubStepIteration; - template - void serializeOp(Serializer& serializer); + private: + bool isReservoirCouplingMaster_() const; + bool isReservoirCouplingSlave_() const; + void maybeModifySuggestedTimeStepAtBeginningOfReportStep_(const double originalTimeStep); + bool maybeUpdateTuning_(double elapsed, double dt, int sub_step_number) const; + double maxTimeStep_() const; + SimulatorReport runStepOriginal_(); +#ifdef RESERVOIR_COUPLING_ENABLED + ReservoirCouplingMaster& reservoirCouplingMaster_(); + ReservoirCouplingSlave& reservoirCouplingSlave_(); + SimulatorReport runStepReservoirCouplingMaster_(); + SimulatorReport runStepReservoirCouplingSlave_(); +#endif + double suggestedNextTimestep_() const; + + AdaptiveTimeStepping& adaptive_time_stepping_; + const SimulatorTimer& simulator_timer_; + Solver& solver_; + const bool is_event_; + const std::function& tuning_updater_; + }; - static AdaptiveTimeStepping serializationTestObjectHardcoded(); - static AdaptiveTimeStepping serializationTestObjectPID(); - static AdaptiveTimeStepping serializationTestObjectPIDIt(); - static AdaptiveTimeStepping serializationTestObjectSimple(); + template + class SubStepIteration { + public: + SubStepIteration( + SubStepper& substepper, + AdaptiveSimulatorTimer& substep_timer, + const double original_time_step, + const bool final_step + ); - bool operator==(const AdaptiveTimeStepping& rhs) const; + SimulatorReport run(); private: - template - static AdaptiveTimeStepping serializationTestObject_(); - - template - void allocAndSerialize(Serializer& serializer) - { - if (!serializer.isSerializing()) { - timeStepControl_ = std::make_unique(); - } - serializer(*static_cast(timeStepControl_.get())); - } - - template - bool castAndComp(const AdaptiveTimeStepping& Rhs) const - { - const T* lhs = static_cast(timeStepControl_.get()); - const T* rhs = static_cast(Rhs.timeStepControl_.get()); - return *lhs == *rhs; - } - - protected: - void init_(const UnitSystem& unitSystem); - - using TimeStepController = std::unique_ptr; - - TimeStepControlType timeStepControlType_{TimeStepControlType::PIDAndIterationCount}; //!< type of time step control object - TimeStepController timeStepControl_{}; //!< time step control object - double restartFactor_{}; //!< factor to multiply time step with when solver fails to converge - double growthFactor_{}; //!< factor to multiply time step when solver recovered from failed convergence - double maxGrowth_{}; //!< factor that limits the maximum growth of a time step - double maxTimeStep_{}; //!< maximal allowed time step size in days - double minTimeStep_{}; //!< minimal allowed time step size before throwing - bool ignoreConvergenceFailure_{false}; //!< continue instead of stop when minimum time step is reached - int solverRestartMax_{}; //!< how many restart of solver are allowed - bool solverVerbose_{false}; //!< solver verbosity - bool timestepVerbose_{false}; //!< timestep verbosity - double suggestedNextTimestep_{}; //!< suggested size of next timestep - bool fullTimestepInitially_{false}; //!< beginning with the size of the time step from data file - double timestepAfterEvent_{}; //!< suggested size of timestep after an event - bool useNewtonIteration_{false}; //!< use newton iteration count for adaptive time step control - double minTimeStepBeforeShuttingProblematicWells_{}; //! < shut problematic wells when time step size in days are less than this + bool checkContinueOnUnconvergedSolution_(double dt) const; + void checkTimeStepMaxRestartLimit_(const int restarts) const; + void checkTimeStepMinLimit_(const int new_time_step) const; + void chopTimeStep_(const double new_time_step); + bool chopTimeStepOrCloseFailingWells_(const int new_time_step); + boost::posix_time::ptime currentDateTime_() const; + int getNumIterations_(const SimulatorReportSingle &substep_report) const; + double growthFactor_() const; + bool ignoreConvergenceFailure_() const; + void maybeReportSubStep_(SimulatorReportSingle substep_report) const; + double maybeRestrictTimeStepGrowth_( + const double dt, double dt_estimate, const int restarts) const; + void maybeUpdateTuningAndTimeStep_(); + double maxGrowth_() const; + double minTimeStepBeforeClosingWells_() const; + double minTimeStep_() const; + double restartFactor_() const; + SimulatorReportSingle runSubStep_(); + int solverRestartMax_() const; + double suggestedNextTimestep_() const; + void setSuggestedNextStep_(double step); + void setTimeStep_(double dt_estimate); + Solver& solver_() const; + bool solverVerbose_() const; + const SimulatorTimer& simulatorTimer_() const; + boost::posix_time::ptime startDateTime_() const; + double timeStepControlComputeEstimate_( + const double dt, const int iterations, double elapsed) const; + bool timeStepVerbose_() const; + void updateSuggestedNextStep_(); + bool useNewtonIteration_() const; + double writeOutput_() const; + + SubStepper& substepper_; + AdaptiveSimulatorTimer& substep_timer_; + const double original_time_step_; + const bool final_step_; + std::string cause_of_failure_; + AdaptiveTimeStepping& adaptive_time_stepping_; }; -} -#include +public: + AdaptiveTimeStepping() = default; + + AdaptiveTimeStepping( + const UnitSystem& unitSystem, + const double max_next_tstep = -1.0, + const bool terminalOutput = true + ); + + AdaptiveTimeStepping( + double max_next_tstep, + const Tuning& tuning, + const UnitSystem& unitSystem, + const bool terminalOutput = true + ); + bool operator==(const AdaptiveTimeStepping& rhs); + + static void registerParameters(); +#ifdef RESERVOIR_COUPLING_ENABLED + void setReservoirCouplingMaster(ReservoirCouplingMaster *reservoir_coupling_master); + void setReservoirCouplingSlave(ReservoirCouplingSlave *reservoir_coupling_slave); +#endif + void setSuggestedNextStep(const double x); + double suggestedNextStep() const; + + template + SimulatorReport step(const SimulatorTimer& simulator_timer, + Solver& solver, + const bool is_event, + const std::function + tuning_updater); + + void updateTUNING(double max_next_tstep, const Tuning& tuning); + void updateNEXTSTEP(double max_next_tstep); + + template + void serializeOp(Serializer& serializer); + + static AdaptiveTimeStepping serializationTestObjectHardcoded(); + static AdaptiveTimeStepping serializationTestObjectPID(); + static AdaptiveTimeStepping serializationTestObjectPIDIt(); + static AdaptiveTimeStepping serializationTestObjectSimple(); + +private: + void maybeModifySuggestedTimeStepAtBeginningOfReportStep_(const double original_time_step, + const bool is_event); + + template + static AdaptiveTimeStepping serializationTestObject_(); + + template + void allocAndSerialize(Serializer& serializer); + + template + bool castAndComp(const AdaptiveTimeStepping& Rhs) const; + +protected: + void init_(const UnitSystem& unitSystem); + + using TimeStepController = std::unique_ptr; + + TimeStepControlType time_step_control_type_; //!< type of time step control object + TimeStepController time_step_control_; //!< time step control object + double restart_factor_; //!< factor to multiply time step with when solver fails to converge + double growth_factor_; //!< factor to multiply time step when solver recovered from failed convergence + double max_growth_; //!< factor that limits the maximum growth of a time step + double max_time_step_; //!< maximal allowed time step size in days + double min_time_step_; //!< minimal allowed time step size before throwing + bool ignore_convergence_failure_; //!< continue instead of stop when minimum time step is reached + int solver_restart_max_; //!< how many restart of solver are allowed + bool solver_verbose_; //!< solver verbosity + bool timestep_verbose_; //!< timestep verbosity + double suggested_next_timestep_; //!< suggested size of next timestep + bool full_timestep_initially_; //!< beginning with the size of the time step from data file + double timestep_after_event_; //!< suggested size of timestep after an event + bool use_newton_iteration_; //!< use newton iteration count for adaptive time step control + + //! < shut problematic wells when time step size in days are less than this + double min_time_step_before_shutting_problematic_wells_; +#ifdef RESERVOIR_COUPLING_ENABLED + ReservoirCouplingMaster *reservoir_coupling_master_ = nullptr; + ReservoirCouplingSlave *reservoir_coupling_slave_ = nullptr; +#endif +}; + +} // namespace Opm +#include #endif // OPM_ADAPTIVE_TIME_STEPPING_HPP diff --git a/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp b/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp index 26343d47ff0..74c6c2c233c 100644 --- a/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp +++ b/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp @@ -1,11 +1,24 @@ /* + Copyright 2024 Equinor ASA. + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . */ + #ifndef OPM_ADAPTIVE_TIME_STEPPING_IMPL_HPP #define OPM_ADAPTIVE_TIME_STEPPING_IMPL_HPP - -// Improve IDE experience -#ifndef OPM_ADAPTIVE_TIME_STEPPING_HPP -#include #include #endif @@ -31,533 +44,1225 @@ #include namespace Opm { +/********************************************* + * Public methods of AdaptiveTimeStepping + * ******************************************/ + + +//! \brief contructor taking parameter object +template +AdaptiveTimeStepping::AdaptiveTimeStepping( + const UnitSystem& unit_system, + const double max_next_tstep, + const bool terminal_output +) + : time_step_control_{} + , restart_factor_{Parameters::Get>()} // 0.33 + , growth_factor_{Parameters::Get>()} // 2.0 + , max_growth_{Parameters::Get>()} // 3.0 + , max_time_step_{ + Parameters::Get>() * 24 * 60 * 60} // 365.25 + , min_time_step_{ + unit_system.to_si(UnitSystem::measure::time, + Parameters::Get>())} // 1e-12; + , ignore_convergence_failure_{ + Parameters::Get()} // false; + , solver_restart_max_{Parameters::Get()} // 10 + , solver_verbose_{Parameters::Get() > 0 && terminal_output} // 2 + , timestep_verbose_{Parameters::Get() > 0 && terminal_output} // 2 + , suggested_next_timestep_{ + (max_next_tstep <= 0 ? Parameters::Get() + : max_next_tstep) * 24 * 60 * 60} // 1.0 + , full_timestep_initially_{Parameters::Get()} // false + , timestep_after_event_{ + Parameters::Get>() * 24 * 60 * 60} // 1e30 + , use_newton_iteration_{false} + , min_time_step_before_shutting_problematic_wells_{ + Parameters::Get() * unit::day} + +{ + init_(unit_system); +} + +//! \brief contructor +//! \param tuning Pointer to ecl TUNING keyword +template +AdaptiveTimeStepping::AdaptiveTimeStepping(double max_next_tstep, + const Tuning& tuning, + const UnitSystem& unit_system, + const bool terminal_output +) + : time_step_control_{} + , restart_factor_{tuning.TSFCNV} + , growth_factor_{tuning.TFDIFF} + , max_growth_{tuning.TSFMAX} + , max_time_step_{tuning.TSMAXZ} // 365.25 + , min_time_step_{tuning.TSFMIN} // 0.1; + , ignore_convergence_failure_{true} + , solver_restart_max_{Parameters::Get()} // 10 + , solver_verbose_{Parameters::Get() > 0 && terminal_output} // 2 + , timestep_verbose_{Parameters::Get() > 0 && terminal_output} // 2 + , suggested_next_timestep_{ + max_next_tstep <= 0 ? Parameters::Get() * 24 * 60 * 60 + : max_next_tstep} // 1.0 + , full_timestep_initially_{Parameters::Get()} // false + , timestep_after_event_{tuning.TMAXWC} // 1e30 + , use_newton_iteration_{false} + , min_time_step_before_shutting_problematic_wells_{ + Parameters::Get() * unit::day} +{ + init_(unit_system); +} template +bool AdaptiveTimeStepping:: -AdaptiveTimeStepping(const UnitSystem& unitSystem, - const double max_next_tstep, - const bool terminalOutput) - : timeStepControl_() - , restartFactor_(Parameters::Get>()) // 0.33 - , growthFactor_(Parameters::Get>()) // 2.0 - , maxGrowth_(Parameters::Get>()) // 3.0 - , maxTimeStep_(Parameters::Get>() * 24 * 60 * 60) // 365.25 - , minTimeStep_(unitSystem.to_si(UnitSystem::measure::time, Parameters::Get>())) // 1e-12; - , ignoreConvergenceFailure_(Parameters::Get()) // false; - , solverRestartMax_(Parameters::Get()) // 10 - , solverVerbose_(Parameters::Get() > 0 && terminalOutput) // 2 - , timestepVerbose_(Parameters::Get() > 0 && terminalOutput) // 2 - , suggestedNextTimestep_((max_next_tstep <= 0 ? Parameters::Get() : max_next_tstep) * 24 * 60 * 60) // 1.0 - , fullTimestepInitially_(Parameters::Get()) // false - , timestepAfterEvent_(Parameters::Get>() * 24 * 60 * 60) // 1e30 - , useNewtonIteration_(false) - , minTimeStepBeforeShuttingProblematicWells_(Parameters::Get() * unit::day) - -{ - init_(unitSystem); +operator==(const AdaptiveTimeStepping& rhs) +{ + if (this->time_step_control_type_ != rhs.time_step_control_type_ || + (this->time_step_control_ && !rhs.time_step_control_) || + (!this->time_step_control_ && rhs.time_step_control_)) { + return false; + } + + bool result = false; + switch (this->time_step_control_type_) { + case TimeStepControlType::HardCodedTimeStep: + result = castAndComp(rhs); + break; + case TimeStepControlType::PIDAndIterationCount: + result = castAndComp(rhs); + break; + case TimeStepControlType::SimpleIterationCount: + result = castAndComp(rhs); + break; + case TimeStepControlType::PID: + result = castAndComp(rhs); + break; + } + + return result && + this->restart_factor_ == rhs.restart_factor_ && + this->growth_factor_ == rhs.growth_factor_ && + this->max_growth_ == rhs.max_growth_ && + this->max_time_step_ == rhs.max_time_step_ && + this->min_time_step_ == rhs.min_time_step_ && + this->ignore_convergence_failure_ == rhs.ignore_convergence_failure_ && + this->solver_restart_max_== rhs.solver_restart_max_ && + this->solver_verbose_ == rhs.solver_verbose_ && + this->full_timestep_initially_ == rhs.full_timestep_initially_ && + this->timestep_after_event_ == rhs.timestep_after_event_ && + this->use_newton_iteration_ == rhs.use_newton_iteration_ && + this->min_time_step_before_shutting_problematic_wells_ == + rhs.min_time_step_before_shutting_problematic_wells_; } template +void AdaptiveTimeStepping:: -AdaptiveTimeStepping(double max_next_tstep, - const Tuning& tuning, - const UnitSystem& unitSystem, - const bool terminalOutput) - : timeStepControl_() - , restartFactor_(tuning.TSFCNV) - , growthFactor_(tuning.TFDIFF) - , maxGrowth_(tuning.TSFMAX) - , maxTimeStep_(tuning.TSMAXZ) // 365.25 - , minTimeStep_(tuning.TSFMIN) // 0.1; - , ignoreConvergenceFailure_(true) - , solverRestartMax_(Parameters::Get()) // 10 - , solverVerbose_(Parameters::Get() > 0 && terminalOutput) // 2 - , timestepVerbose_(Parameters::Get() > 0 && terminalOutput) // 2 - , suggestedNextTimestep_(max_next_tstep <= 0 ? Parameters::Get() * 24 * 60 * 60 : max_next_tstep) // 1.0 - , fullTimestepInitially_(Parameters::Get()) // false - , timestepAfterEvent_(tuning.TMAXWC) // 1e30 - , useNewtonIteration_(false) - , minTimeStepBeforeShuttingProblematicWells_(Parameters::Get() * unit::day) -{ - init_(unitSystem); -} - -template -void AdaptiveTimeStepping::registerParameters() +registerParameters() { registerEclTimeSteppingParameters(); detail::registerAdaptiveParameters(); } +#ifdef RESERVOIR_COUPLING_ENABLED template -template +void +AdaptiveTimeStepping:: +setReservoirCouplingMaster(ReservoirCouplingMaster *reservoir_coupling_master) +{ + this->reservoir_coupling_master_ = reservoir_coupling_master; +} + +template +void +AdaptiveTimeStepping:: +setReservoirCouplingSlave(ReservoirCouplingSlave *reservoir_coupling_slave) +{ + this->reservoir_coupling_slave_ = reservoir_coupling_slave; +} +#endif + +/** \brief step method that acts like the solver::step method + in a sub cycle of time steps + \param tuningUpdater Function used to update TUNING parameters before each + time step. ACTIONX might change tuning. +*/ +template +template SimulatorReport AdaptiveTimeStepping:: -step(const SimulatorTimer& simulatorTimer, +step(const SimulatorTimer& simulator_timer, Solver& solver, - const bool isEvent, - const std::function tuningUpdater) + const bool is_event, + const std::function tuning_updater +) { - // Maybe update tuning - tuningUpdater(simulatorTimer.simulationTimeElapsed(), suggestedNextTimestep_, 0); - SimulatorReport report; - const double timestep = simulatorTimer.currentStepLength(); + SubStepper sub_stepper{ + *this, simulator_timer, solver, is_event, tuning_updater, + }; + return sub_stepper.run(); +} + +template +template +void +AdaptiveTimeStepping:: +serializeOp(Serializer& serializer) +{ + serializer(this->time_step_control_type_); + switch (this->time_step_control_type_) { + case TimeStepControlType::HardCodedTimeStep: + allocAndSerialize(serializer); + break; + case TimeStepControlType::PIDAndIterationCount: + allocAndSerialize(serializer); + break; + case TimeStepControlType::SimpleIterationCount: + allocAndSerialize(serializer); + break; + case TimeStepControlType::PID: + allocAndSerialize(serializer); + break; + } + serializer(this->restart_factor_); + serializer(this->growth_factor_); + serializer(this->max_growth_); + serializer(this->max_time_step_); + serializer(this->min_time_step_); + serializer(this->ignore_convergence_failure_); + serializer(this->solver_restart_max_); + serializer(this->solver_verbose_); + serializer(this->timestep_verbose_); + serializer(this->suggested_next_timestep_); + serializer(this->full_timestep_initially_); + serializer(this->timestep_after_event_); + serializer(this->use_newton_iteration_); + serializer(this->min_time_step_before_shutting_problematic_wells_); +} + +template +AdaptiveTimeStepping +AdaptiveTimeStepping:: +serializationTestObjectHardcoded() +{ + return serializationTestObject_(); +} + +template +AdaptiveTimeStepping +AdaptiveTimeStepping:: +serializationTestObjectPID() +{ + return serializationTestObject_(); +} + +template +AdaptiveTimeStepping +AdaptiveTimeStepping:: +serializationTestObjectPIDIt() +{ + return serializationTestObject_(); +} + +template +AdaptiveTimeStepping +AdaptiveTimeStepping:: +serializationTestObjectSimple() +{ + return serializationTestObject_(); +} + + +template +void +AdaptiveTimeStepping:: +setSuggestedNextStep(const double x) +{ + this->suggested_next_timestep_ = x; +} + +template +double +AdaptiveTimeStepping:: +suggestedNextStep() const +{ + return this->suggested_next_timestep_; +} + + +template +void +AdaptiveTimeStepping:: +updateNEXTSTEP(double max_next_tstep) +{ + // \Note Only update next suggested step if TSINIT was explicitly + // set in TUNING or NEXTSTEP is active. + if (max_next_tstep > 0) { + this->suggested_next_timestep_ = max_next_tstep; + } +} +template +void +AdaptiveTimeStepping:: +updateTUNING(double max_next_tstep, const Tuning& tuning) +{ + this->restart_factor_ = tuning.TSFCNV; + this->growth_factor_ = tuning.TFDIFF; + this->max_growth_ = tuning.TSFMAX; + this->max_time_step_ = tuning.TSMAXZ; + updateNEXTSTEP(max_next_tstep); + this->timestep_after_event_ = tuning.TMAXWC; +} + +/********************************************* + * Private methods of AdaptiveTimeStepping + * ******************************************/ + +template +template +void +AdaptiveTimeStepping:: +allocAndSerialize(Serializer& serializer) +{ + if (!serializer.isSerializing()) { + this->time_step_control_ = std::make_unique(); + } + serializer(*static_cast(this->time_step_control_.get())); +} + +template +template +bool +AdaptiveTimeStepping:: +castAndComp(const AdaptiveTimeStepping& Rhs) const +{ + const T* lhs = static_cast(this->time_step_control_.get()); + const T* rhs = static_cast(Rhs.time_step_control_.get()); + return *lhs == *rhs; +} + +template +void +AdaptiveTimeStepping:: +maybeModifySuggestedTimeStepAtBeginningOfReportStep_(const double original_time_step, + bool is_event) +{ // init last time step as a fraction of the given time step - if (suggestedNextTimestep_ < 0) { - suggestedNextTimestep_ = restartFactor_ * timestep; + if (this->suggested_next_timestep_ < 0) { + this->suggested_next_timestep_ = this->restart_factor_ * original_time_step; } - if (fullTimestepInitially_) { - suggestedNextTimestep_ = timestep; + if (this->full_timestep_initially_) { + this->suggested_next_timestep_ = original_time_step; } // use seperate time step after event - if (isEvent && timestepAfterEvent_ > 0) { - suggestedNextTimestep_ = timestepAfterEvent_; + if (is_event && this->timestep_after_event_ > 0) { + this->suggested_next_timestep_ = this->timestep_after_event_; } +} - auto& simulator = solver.model().simulator(); - auto& problem = simulator.problem(); +template +template +AdaptiveTimeStepping +AdaptiveTimeStepping:: +serializationTestObject_() +{ + AdaptiveTimeStepping result; - // create adaptive step timer with previously used sub step size - AdaptiveSimulatorTimer substepTimer(simulatorTimer, suggestedNextTimestep_, maxTimeStep_); + result.restart_factor_ = 1.0; + result.growth_factor_ = 2.0; + result.max_growth_ = 3.0; + result.max_time_step_ = 4.0; + result.min_time_step_ = 5.0; + result.ignore_convergence_failure_ = true; + result.solver_restart_max_ = 6; + result.solver_verbose_ = true; + result.timestep_verbose_ = true; + result.suggested_next_timestep_ = 7.0; + result.full_timestep_initially_ = true; + result.use_newton_iteration_ = true; + result.min_time_step_before_shutting_problematic_wells_ = 9.0; + result.time_step_control_type_ = Controller::Type; + result.time_step_control_ = + std::make_unique(Controller::serializationTestObject()); - // counter for solver restarts - int restarts = 0; + return result; +} - // sub step time loop - while (!substepTimer.done()) { - // Maybe update tuning - // get current delta t - auto oldValue = suggestedNextTimestep_; - if (tuningUpdater(substepTimer.simulationTimeElapsed(), - substepTimer.currentStepLength(), - substepTimer.currentStepNum())) { - // Use provideTimeStepEstimate to make we sure don't simulate longer than the report step is. - substepTimer.provideTimeStepEstimate(suggestedNextTimestep_); - suggestedNextTimestep_ = oldValue; - } - const double dt = substepTimer.currentStepLength(); - if (timestepVerbose_) { - detail::logTimer(substepTimer); - } +/********************************************* + * Protected methods of AdaptiveTimeStepping + * ******************************************/ - SimulatorReportSingle substepReport; - std::string causeOfFailure; - try { - substepReport = solver.step(substepTimer); +template +void AdaptiveTimeStepping:: +init_(const UnitSystem& unitSystem) +{ + std::tie(time_step_control_type_, + time_step_control_, + use_newton_iteration_) = detail::createController(unitSystem); + // make sure growth factor is something reasonable + if (growthFactor_ < 1.0) { + OPM_THROW(std::runtime_error, + "Growth factor cannot be less than 1."); + } +} - if (solverVerbose_) { - // report number of linear iterations - OpmLog::debug("Overall linear iterations used: " + - std::to_string(substepReport.total_linear_iterations)); - } - } - catch (const TooManyIterations& e) { - substepReport = solver.failureReport(); - causeOfFailure = "Solver convergence failure - Iteration limit reached"; - logException_(e, solverVerbose_); - // since linearIterations is < 0 this will restart the solver + +/************************************************ + * Private class SubStepper public methods + ************************************************/ + +template +template +AdaptiveTimeStepping::SubStepper:: +SubStepper( + AdaptiveTimeStepping& adaptive_time_stepping, + const SimulatorTimer& simulator_timer, + Solver& solver, + const bool is_event, + const std::function& tuning_updater + +) + : adaptive_time_stepping_{adaptive_time_stepping} + , simulator_timer_{simulator_timer} + , solver_{solver} + , is_event_{is_event} + , tuning_updater_{tuning_updater} +{ +} + +template +template +AdaptiveTimeStepping& +AdaptiveTimeStepping::SubStepper:: +getAdaptiveTimerStepper() +{ + return adaptive_time_stepping_; +} + +template +template +SimulatorReport +AdaptiveTimeStepping::SubStepper:: +run() +{ +#ifdef RESERVOIR_COUPLING_ENABLED + if (isReservoirCouplingSlave_() && reservoirCouplingSlave_().activated()) { + return runStepReservoirCouplingSlave_(); + } + else if (isReservoirCouplingMaster_() && reservoirCouplingMaster_().activated()) { + return runStepReservoirCouplingMaster_(); + } + else { + return runStepOriginal_(); + } +#else + return runStepOriginal_(); +#endif +} + +/************************************************ + * Private class SubStepper private methods + ************************************************/ + + +template +template +bool +AdaptiveTimeStepping::SubStepper:: +isReservoirCouplingMaster_() const +{ + return this->adaptive_time_stepping_.reservoir_coupling_master_ != nullptr; +} + +template +template +bool +AdaptiveTimeStepping::SubStepper:: +isReservoirCouplingSlave_() const +{ + return this->adaptive_time_stepping_.reservoir_coupling_slave_ != nullptr; +} + +template +template +void +AdaptiveTimeStepping::SubStepper:: +maybeModifySuggestedTimeStepAtBeginningOfReportStep_(const double original_time_step) +{ + this->adaptive_time_stepping_.maybeModifySuggestedTimeStepAtBeginningOfReportStep_( + original_time_step, this->is_event_ + ); +} + +// The maybeUpdateTuning_() lambda callback is defined in SimulatorFullyImplicitBlackoil::runStep() +// It has to be called for each substep since TUNING might have been changed for next sub step due +// to ACTIONX (via NEXTSTEP) or WCYCLE keywords. +template +template +bool +AdaptiveTimeStepping::SubStepper:: +maybeUpdateTuning_(double elapsed, double dt, int sub_step_number) const +{ + return this->tuning_updater_(elapsed, dt, sub_step_number); +} + +template +template +double +AdaptiveTimeStepping::SubStepper:: +maxTimeStep_() const +{ + return this->adaptive_time_stepping_.max_time_step_; +} + +template +template +SimulatorReport +AdaptiveTimeStepping::SubStepper:: +runStepOriginal_() +{ + auto elapsed = this->simulator_timer_.simulationTimeElapsed(); + auto original_time_step = this->simulator_timer_.currentStepLength(); + auto report_step = this->simulator_timer_.reportStepNum(); + maybeUpdateTuning_(elapsed, original_time_step, report_step); + maybeModifySuggestedTimeStepAtBeginningOfReportStep_(original_time_step); + + AdaptiveSimulatorTimer substep_timer{ + this->simulator_timer_.startDateTime(), + original_time_step, + elapsed, + suggestedNextTimestep_(), + report_step, + maxTimeStep_() + }; + SubStepIteration substepIteration{*this, substep_timer, original_time_step, /*final_step=*/true}; + return substepIteration.run(); +} + +#ifdef RESERVOIR_COUPLING_ENABLED +template +template +ReservoirCouplingMaster& +AdaptiveTimeStepping::SubStepper:: +reservoirCouplingMaster_() +{ + return *adaptive_time_stepping_.reservoir_coupling_master_; +} +#endif + +#ifdef RESERVOIR_COUPLING_ENABLED +template +template +ReservoirCouplingSlave& +AdaptiveTimeStepping::SubStepper:: +reservoirCouplingSlave_() +{ + return *this->adaptive_time_stepping_.reservoir_coupling_slave_; +} +#endif + +#ifdef RESERVOIR_COUPLING_ENABLED +template +template +SimulatorReport +AdaptiveTimeStepping::SubStepper:: +runStepReservoirCouplingMaster_() +{ + bool substep_done = false; + int iteration = 0; + const double original_time_step = this->simulator_timer_.currentStepLength(); + double current_time{this->simulator_timer_.simulationTimeElapsed()}; + double step_end_time = current_time + original_time_step; + auto current_step_length = original_time_step; + SimulatorReport report; + while(!substep_done) { + reservoirCouplingMaster_().receiveNextReportDateFromSlaves(); + if (iteration == 0) { + maybeUpdateTuning_(current_time, current_step_length, /*substep=*/0); } - catch (const ConvergenceMonitorFailure& e) { - substepReport = solver.failureReport(); - causeOfFailure = "Convergence monitor failure"; + current_step_length = reservoirCouplingMaster_().maybeChopSubStep( + current_step_length, current_time); + reservoirCouplingMaster_().sendNextTimeStepToSlaves(current_step_length); + if (iteration == 0) { + maybeModifySuggestedTimeStepAtBeginningOfReportStep_(current_step_length); } - catch (const LinearSolverProblem& e) { - substepReport = solver.failureReport(); - causeOfFailure = "Linear solver convergence failure"; - - logException_(e, solverVerbose_); - // since linearIterations is < 0 this will restart the solver + AdaptiveSimulatorTimer substep_timer{ + this->simulator_timer_.startDateTime(), + /*stepLength=*/current_step_length, + /*elapsedTime=*/current_time, + /*timeStepEstimate=*/suggestedNextTimestep_(), + /*reportStep=*/this->simulator_timer_.reportStepNum(), + maxTimeStep_() + }; + bool final_step = ReservoirCoupling::Seconds::compare_gt_or_eq( + current_time + current_step_length, step_end_time + ); + SubStepIteration substepIteration{*this, substep_timer, current_step_length, final_step}; + auto sub_steps_report = substepIteration.run(); + report += sub_steps_report; + current_time += current_step_length; + if (final_step) { + break; } - catch (const NumericalProblem& e) { - substepReport = solver.failureReport(); - causeOfFailure = "Solver convergence failure - Numerical problem encountered"; + iteration++; + } + return report; +} +#endif - logException_(e, solverVerbose_); - // since linearIterations is < 0 this will restart the solver +#ifdef RESERVOIR_COUPLING_ENABLED +template +template +SimulatorReport +AdaptiveTimeStepping::SubStepper:: +runStepReservoirCouplingSlave_() +{ + bool substep_done = false; + int iteration = 0; + const double original_time_step = this->simulator_timer_.currentStepLength(); + double current_time{this->simulator_timer_.simulationTimeElapsed()}; + double step_end_time = current_time + original_time_step; + SimulatorReport report; + while(!substep_done) { + reservoirCouplingSlave_().sendNextReportDateToMasterProcess(); + auto timestep = reservoirCouplingSlave_().receiveNextTimeStepFromMaster(); + if (iteration == 0) { + maybeUpdateTuning_(current_time, original_time_step, /*substep=*/0); + maybeModifySuggestedTimeStepAtBeginningOfReportStep_(timestep); } - catch (const std::runtime_error& e) { - substepReport = solver.failureReport(); - - logException_(e, solverVerbose_); - // also catch linear solver not converged + AdaptiveSimulatorTimer substep_timer{ + this->simulator_timer_.startDateTime(), + /*step_length=*/timestep, + /*elapsed_time=*/current_time, + /*time_step_estimate=*/suggestedNextTimestep_(), + this->simulator_timer_.reportStepNum(), + maxTimeStep_() + }; + bool final_step = ReservoirCoupling::Seconds::compare_gt_or_eq( + current_time + timestep, step_end_time + ); + SubStepIteration substepIteration{*this, substep_timer, timestep, final_step}; + auto sub_steps_report = substepIteration.run(); + report += sub_steps_report; + current_time += timestep; + if (final_step) { + substep_done = true; + break; } - catch (const Dune::ISTLError& e) { - substepReport = solver.failureReport(); + iteration++; + } + return report; +} - logException_(e, solverVerbose_); - // also catch errors in ISTL AMG that occur when time step is too large - } - catch (const Dune::MatrixBlockError& e) { - substepReport = solver.failureReport(); +#endif + +template +template +double +AdaptiveTimeStepping::SubStepper:: +suggestedNextTimestep_() const +{ + return this->adaptive_time_stepping_.suggestedNextStep(); +} - logException_(e, solverVerbose_); - // this can be thrown by ISTL's ILU0 in block mode, yet is not an ISTLError + + +/************************************************ + * Private class SubStepIteration public methods + ************************************************/ + +template +template +AdaptiveTimeStepping::SubStepIteration:: +SubStepIteration( + SubStepper& substepper, + AdaptiveSimulatorTimer& substep_timer, + const double original_time_step, + bool final_step +) + : substepper_{substepper} + , substep_timer_{substep_timer} + , original_time_step_{original_time_step} + , final_step_{final_step} + , adaptive_time_stepping_{substepper.getAdaptiveTimerStepper()} +{ +} + +template +template +SimulatorReport +AdaptiveTimeStepping::SubStepIteration:: +run() +{ + auto& simulator = solver_().model().simulator(); + auto& problem = simulator.problem(); + // counter for solver restarts + int restarts = 0; + SimulatorReport report; + + // sub step time loop + while (!this->substep_timer_.done()) { + maybeUpdateTuningAndTimeStep_(); + const double dt = this->substep_timer_.currentStepLength(); + if (timeStepVerbose_()) { + detail::logTimer(this->substep_timer_); } + auto substep_report = runSubStep_(); + //Pass substep to eclwriter for summary output - simulator.problem().setSubStepReport(substepReport); - - report += substepReport; - - bool continue_on_uncoverged_solution = ignoreConvergenceFailure_ && - !substepReport.converged && - dt <= minTimeStep_; - - if (continue_on_uncoverged_solution && solverVerbose_) { - const auto msg = fmt::format( - "Solver failed to converge but timestep " - "{} is smaller or equal to {}\n" - "which is the minimum threshold given " - "by option --solver-min-time-step\n", - dt, minTimeStep_ - ); - OpmLog::problem(msg); - } + problem.setSubStepReport(substep_report); - if (substepReport.converged || continue_on_uncoverged_solution) { - - // advance by current dt - ++substepTimer; - - // create object to compute the time error, simply forwards the call to the model - SolutionTimeErrorSolverWrapper relativeChange(solver); - - // compute new time step estimate - const int iterations = useNewtonIteration_ ? substepReport.total_newton_iterations - : substepReport.total_linear_iterations; - double dtEstimate = timeStepControl_->computeTimeStepSize(dt, iterations, relativeChange, - substepTimer.simulationTimeElapsed()); - - assert(dtEstimate > 0); - // limit the growth of the timestep size by the growth factor - dtEstimate = std::min(dtEstimate, double(maxGrowth_ * dt)); - assert(dtEstimate > 0); - // further restrict time step size growth after convergence problems - if (restarts > 0) { - dtEstimate = std::min(growthFactor_ * dt, dtEstimate); - // solver converged, reset restarts counter - restarts = 0; - } + report += substep_report; - if (timestepVerbose_) { - std::ostringstream ss; - substepReport.reportStep(ss); - OpmLog::info(ss.str()); - } + if (substep_report.converged || checkContinueOnUnconvergedSolution_(dt)) { + ++this->substep_timer_; // advance by current dt - // write data if outputWriter was provided - // if the time step is done we do not need - // to write it as this will be done by the simulator - // anyway. - if (!substepTimer.done()) { - time::StopWatch perfTimer; - perfTimer.start(); + const int iterations = getNumIterations_(substep_report); + auto dt_estimate = timeStepControlComputeEstimate_( + dt, iterations, this->substep_timer_.simulationTimeElapsed()); - problem.writeOutput(true); + assert(dt_estimate > 0); + dt_estimate = maybeRestrictTimeStepGrowth_(dt, dt_estimate, restarts); + restarts = 0; // solver converged, reset restarts counter - report.success.output_write_time += perfTimer.secsSinceStart(); + maybeReportSubStep_(substep_report); + if (this->final_step_ && this->substep_timer_.done()) { + // if the time step is done we do not need to write it as this will be done + // by the simulator anyway. + } + else { + report.success.output_write_time += writeOutput_(); } // set new time step length - substepTimer.provideTimeStepEstimate(dtEstimate); - - report.success.converged = substepTimer.done(); - substepTimer.setLastStepFailed(false); + setTimeStep_(dt_estimate); + report.success.converged = this->substep_timer_.done(); + this->substep_timer_.setLastStepFailed(false); } else { // in case of no convergence - substepTimer.setLastStepFailed(true); - - // If we have restarted (i.e. cut the timestep) too - // many times, we have failed and throw an exception. - if (restarts >= solverRestartMax_) { - const auto msg = fmt::format( - "Solver failed to converge after cutting timestep {} times.", - restarts - ); - if (solverVerbose_) { - OpmLog::error(msg); - } - // Use throw directly to prevent file and line - throw TimeSteppingBreakdown{msg}; + this->substep_timer_.setLastStepFailed(true); + checkTimeStepMaxRestartLimit_(restarts); + + const double new_time_step = restartFactor_() * dt; + checkTimeStepMinLimit_(new_time_step); + bool wells_shut = false; + if (new_time_step > minTimeStepBeforeClosingWells_()) { + chopTimeStep_(new_time_step); + } else { + wells_shut = chopTimeStepOrCloseFailingWells_(new_time_step); + } + if (wells_shut) { + setTimeStep_(dt); // retry the old timestep } + else { + restarts++; // only increase if no wells were shut + } + } + problem.setNextTimeStepSize(this->substep_timer_.currentStepLength()); + } + updateSuggestedNextStep_(); + return report; +} - // The new, chopped timestep. - const double newTimeStep = restartFactor_ * dt; +/************************************************ + * Private class SubStepIteration private methods + ************************************************/ - // If we have restarted (i.e. cut the timestep) too - // much, we have failed and throw an exception. - if (newTimeStep < minTimeStep_) { - const auto msg = fmt::format( - "Solver failed to converge after cutting timestep to {}\n" - "which is the minimum threshold given by option --solver-min-time-step\n", - minTimeStep_ - ); - if (solverVerbose_) { - OpmLog::error(msg); - } - // Use throw directly to prevent file and line - throw TimeSteppingBreakdown{msg}; - } - // Define utility function for chopping timestep. - auto chopTimestep = [&]() { - substepTimer.provideTimeStepEstimate(newTimeStep); - if (solverVerbose_) { - const auto msg = fmt::format( - "{}\nTimestep chopped to {} days\n", - causeOfFailure, - unit::convert::to(substepTimer.currentStepLength(), unit::day) - ); - OpmLog::problem(msg); - } - ++restarts; - }; +template +template +bool +AdaptiveTimeStepping::SubStepIteration:: +checkContinueOnUnconvergedSolution_(double dt) const +{ + bool continue_on_uncoverged_solution = ignoreConvergenceFailure_() && dt <= minTimeStep_(); + if (continue_on_uncoverged_solution && solverVerbose_()) { + // NOTE: This method is only called if the solver failed to converge. + const auto msg = fmt::format( + "Solver failed to converge but timestep {} is smaller or equal to {}\n" + "which is the minimum threshold given by option --solver-min-time-step\n", + dt, minTimeStep_() + ); + OpmLog::problem(msg); + } + return continue_on_uncoverged_solution; +} - const double minimumChoppedTimestep = minTimeStepBeforeShuttingProblematicWells_; - if (newTimeStep > minimumChoppedTimestep) { - chopTimestep(); - } else { - // We are below the threshold, and will check if there are any - // wells we should close rather than chopping again. - std::set failing_wells = detail::consistentlyFailingWells(solver.model().stepReports()); - if (failing_wells.empty()) { - // Found no wells to close, chop the timestep as above. - chopTimestep(); - } else { - // Close all consistently failing wells that are not under group control - std::vector shut_wells; - for (const auto& well : failing_wells) { - bool was_shut = solver.model().wellModel().forceShutWellByName( - well, substepTimer.simulationTimeElapsed(), /*dont_shut_grup_wells =*/ true); - if (was_shut) { - shut_wells.push_back(well); - } - } - // If no wells are closed we also try to shut wells under group control - if (shut_wells.empty()) { - for (const auto& well : failing_wells) { - bool was_shut = solver.model().wellModel().forceShutWellByName( - well, substepTimer.simulationTimeElapsed(), /*dont_shut_grup_wells =*/ false); - if (was_shut) { - shut_wells.push_back(well); - } - } - } - // If still no wells are closed we must fall back to chopping again - if (shut_wells.empty()) { - chopTimestep(); - } else { - substepTimer.provideTimeStepEstimate(dt); - if (solverVerbose_) { - const std::string msg = - fmt::format("\nProblematic well(s) were shut: {}" - "(retrying timestep)\n", - fmt::join(shut_wells, " ")); - OpmLog::problem(msg); - } - } +template +template +void +AdaptiveTimeStepping::SubStepIteration:: +checkTimeStepMaxRestartLimit_(const int restarts) const +{ + // If we have restarted (i.e. cut the timestep) too + // many times, we have failed and throw an exception. + if (restarts >= solverRestartMax_()) { + const auto msg = fmt::format( + "Solver failed to converge after cutting timestep {} times.", restarts + ); + if (solverVerbose_()) { + OpmLog::error(msg); + } + // Use throw directly to prevent file and line + throw TimeSteppingBreakdown{msg}; + } +} + +template +template +void +AdaptiveTimeStepping::SubStepIteration:: +checkTimeStepMinLimit_(const int new_time_step) const +{ + // If we have restarted (i.e. cut the timestep) too + // much, we have failed and throw an exception. + if (new_time_step < minTimeStep_()) { + const auto msg = fmt::format( + "Solver failed to converge after cutting timestep to {}\n" + "which is the minimum threshold given by option --solver-min-time-step\n", + minTimeStep_() + ); + if (solverVerbose_()) { + OpmLog::error(msg); + } + // Use throw directly to prevent file and line + throw TimeSteppingBreakdown{msg}; + } +} + +template +template +void +AdaptiveTimeStepping::SubStepIteration:: +chopTimeStep_(const double new_time_step) +{ + setTimeStep_(new_time_step); + if (solverVerbose_()) { + const auto msg = fmt::format("{}\nTimestep chopped to {} days\n", + this->cause_of_failure_, + unit::convert::to(this->substep_timer_.currentStepLength(), unit::day)); + OpmLog::problem(msg); + } +} + +template +template +bool +AdaptiveTimeStepping::SubStepIteration:: +chopTimeStepOrCloseFailingWells_(const int new_time_step) +{ + bool wells_shut = false; + // We are below the threshold, and will check if there are any wells we should close + // rather than chopping again. + std::set failing_wells = detail::consistentlyFailingWells( + solver_().model().stepReports()); + if (failing_wells.empty()) { + // Found no wells to close, chop the timestep + chopTimeStep_(new_time_step); + } else { + // Close all consistently failing wells that are not under group control + std::vector shut_wells; + for (const auto& well : failing_wells) { + bool was_shut = solver_().model().wellModel().forceShutWellByName( + well, this->substep_timer_.simulationTimeElapsed(), /*dont_shut_grup_wells =*/ true); + if (was_shut) { + shut_wells.push_back(well); + } + } + // If no wells are closed we also try to shut wells under group control + if (shut_wells.empty()) { + for (const auto& well : failing_wells) { + bool was_shut = solver_().model().wellModel().forceShutWellByName( + well, this->substep_timer_.simulationTimeElapsed(), /*dont_shut_grup_wells =*/ false); + if (was_shut) { + shut_wells.push_back(well); } } } - problem.setNextTimeStepSize(substepTimer.currentStepLength()); + // If still no wells are closed we must fall back to chopping again + if (shut_wells.empty()) { + chopTimeStep_(new_time_step); + } else { + wells_shut = true; + if (solverVerbose_()) { + const std::string msg = + fmt::format("\nProblematic well(s) were shut: {}" + "(retrying timestep)\n", + fmt::join(shut_wells, " ")); + OpmLog::problem(msg); + } + } } + return wells_shut; +} - // store estimated time step for next reportStep - suggestedNextTimestep_ = substepTimer.currentStepLength(); - if (timestepVerbose_) { - std::ostringstream ss; - substepTimer.report(ss); - ss << "Suggested next step size = " << unit::convert::to(suggestedNextTimestep_, unit::day) << " (days)" << std::endl; - OpmLog::debug(ss.str()); - } +template +template +boost::posix_time::ptime +AdaptiveTimeStepping::SubStepIteration:: +currentDateTime_() const +{ + return simulatorTimer_().currentDateTime(); +} - if (! std::isfinite(suggestedNextTimestep_)) { // check for NaN - suggestedNextTimestep_ = timestep; +template +template +int +AdaptiveTimeStepping::SubStepIteration:: +getNumIterations_(const SimulatorReportSingle &substep_report) const +{ + if (useNewtonIteration_()) { + return substep_report.total_newton_iterations; + } + else { + return substep_report.total_linear_iterations; } - return report; } template -void AdaptiveTimeStepping:: -updateTUNING(double max_next_tstep, const Tuning& tuning) +template +double +AdaptiveTimeStepping::SubStepIteration:: +growthFactor_() const { - restartFactor_ = tuning.TSFCNV; - growthFactor_ = tuning.TFDIFF; - maxGrowth_ = tuning.TSFMAX; - maxTimeStep_ = tuning.TSMAXZ; - updateNEXTSTEP(max_next_tstep); - timestepAfterEvent_ = tuning.TMAXWC; + return this->adaptive_time_stepping_.growth_factor_; } template -void AdaptiveTimeStepping:: -updateNEXTSTEP(double max_next_tstep) +template +bool +AdaptiveTimeStepping::SubStepIteration:: +ignoreConvergenceFailure_() const { - // \Note Only update next suggested step if TSINIT was explicitly set in TUNING or NEXTSTEP is active. - if (max_next_tstep > 0) { - suggestedNextTimestep_ = max_next_tstep; + return adaptive_time_stepping_.ignore_convergence_failure_; +} + +template +template +double +AdaptiveTimeStepping::SubStepIteration:: +maxGrowth_() const +{ + return this->adaptive_time_stepping_.max_growth_; +} + +template +template +void +AdaptiveTimeStepping::SubStepIteration:: +maybeReportSubStep_(SimulatorReportSingle substep_report) const +{ + if (timeStepVerbose_()) { + std::ostringstream ss; + substep_report.reportStep(ss); + OpmLog::info(ss.str()); } } template -template -void AdaptiveTimeStepping:: -serializeOp(Serializer& serializer) +template +double +AdaptiveTimeStepping::SubStepIteration:: +maybeRestrictTimeStepGrowth_(const double dt, double dt_estimate, const int restarts) const { - serializer(timeStepControlType_); - switch (timeStepControlType_) { - case TimeStepControlType::HardCodedTimeStep: - allocAndSerialize(serializer); - break; - case TimeStepControlType::PIDAndIterationCount: - allocAndSerialize(serializer); - break; - case TimeStepControlType::SimpleIterationCount: - allocAndSerialize(serializer); - break; - case TimeStepControlType::PID: - allocAndSerialize(serializer); - break; + // limit the growth of the timestep size by the growth factor + dt_estimate = std::min(dt_estimate, double(maxGrowth_() * dt)); + assert(dt_estimate > 0); + // further restrict time step size growth after convergence problems + if (restarts > 0) { + dt_estimate = std::min(growthFactor_() * dt, dt_estimate); } - serializer(restartFactor_); - serializer(growthFactor_); - serializer(maxGrowth_); - serializer(maxTimeStep_); - serializer(minTimeStep_); - serializer(ignoreConvergenceFailure_); - serializer(solverRestartMax_); - serializer(solverVerbose_); - serializer(timestepVerbose_); - serializer(suggestedNextTimestep_); - serializer(fullTimestepInitially_); - serializer(timestepAfterEvent_); - serializer(useNewtonIteration_); - serializer(minTimeStepBeforeShuttingProblematicWells_); + + return dt_estimate; } +// The maybeUpdateTuning_() lambda callback is defined in SimulatorFullyImplicitBlackoil::runStep() +// It has to be called for each substep since TUNING might have been changed for next sub step due +// to ACTIONX (via NEXTSTEP) or WCYCLE keywords. template -AdaptiveTimeStepping -AdaptiveTimeStepping:: -serializationTestObjectHardcoded() +template +void +AdaptiveTimeStepping::SubStepIteration:: +maybeUpdateTuningAndTimeStep_() { - return serializationTestObject_(); + // TODO: This function is currently only called if NEXTSTEP is activated from ACTIONX or + // if the WCYCLE keyword needs to modify the current timestep. So this method should rather + // be named maybeUpdateTimeStep_() or similar, since it should not update the tuning. However, + // the current definition of the maybeUpdateTuning_() callback is actually calling + // adaptiveTimeStepping_->updateTUNING(max_next_tstep, tuning) which is updating the tuning + // see SimulatorFullyImplicitBlackoil::runStep() for more details. + auto old_value = suggestedNextTimestep_(); + if (this->substepper_.maybeUpdateTuning_(this->substep_timer_.simulationTimeElapsed(), + this->substep_timer_.currentStepLength(), + this->substep_timer_.currentStepNum())) + { + // Either NEXTSTEP and WCYCLE wants to change the current time step, but they cannot + // change the current time step directly. Instead, they change the suggested next time step + // by calling updateNEXTSTEP() via the maybeUpdateTuning() callback. We now need to update + // the current time step to the new suggested time step and reset the suggested time step + // to the old value. + setTimeStep_(suggestedNextTimestep_()); + setSuggestedNextStep_(old_value); + } } template -AdaptiveTimeStepping -AdaptiveTimeStepping:: -serializationTestObjectPID() +template +double +AdaptiveTimeStepping::SubStepIteration:: +minTimeStepBeforeClosingWells_() const { - return serializationTestObject_(); + return this->adaptive_time_stepping_.min_time_step_before_shutting_problematic_wells_; } template -AdaptiveTimeStepping -AdaptiveTimeStepping:: -serializationTestObjectPIDIt() +template +double +AdaptiveTimeStepping::SubStepIteration:: +minTimeStep_() const { - return serializationTestObject_(); + return this->adaptive_time_stepping_.min_time_step_; } template -AdaptiveTimeStepping -AdaptiveTimeStepping:: -serializationTestObjectSimple() +template +double +AdaptiveTimeStepping::SubStepIteration:: +restartFactor_() const { - return serializationTestObject_(); + return this->adaptive_time_stepping_.restart_factor_; } template -bool -AdaptiveTimeStepping:: -operator==(const AdaptiveTimeStepping& rhs) const +template +SimulatorReportSingle +AdaptiveTimeStepping::SubStepIteration:: +runSubStep_() { - if (timeStepControlType_ != rhs.timeStepControlType_ || - (timeStepControl_ && !rhs.timeStepControl_) || - (!timeStepControl_ && rhs.timeStepControl_)) { - return false; + SimulatorReportSingle substep_report; + + auto handleFailure = [this, &substep_report] + (const std::string& failure_reason, const std::exception& e, bool log_exception = true) + { + substep_report = solver_().failureReport(); + this->cause_of_failure_ = failure_reason; + if (log_exception && solverVerbose_()) { + std::string message; + message = "Caught Exception: "; + message += e.what(); + OpmLog::debug(message); + } + }; + + try { + substep_report = solver_().step(this->substep_timer_); + if (solverVerbose_()) { + // report number of linear iterations + OpmLog::debug("Overall linear iterations used: " + + std::to_string(substep_report.total_linear_iterations)); + } } - - bool result = false; - switch (timeStepControlType_) { - case TimeStepControlType::HardCodedTimeStep: - result = castAndComp(rhs); - break; - case TimeStepControlType::PIDAndIterationCount: - result = castAndComp(rhs); - break; - case TimeStepControlType::SimpleIterationCount: - result = castAndComp(rhs); - break; - case TimeStepControlType::PID: - result = castAndComp(rhs); - break; + catch (const TooManyIterations& e) { + handleFailure("Solver convergence failure - Iteration limit reached", e); + } + catch (const ConvergenceMonitorFailure& e) { + handleFailure("Convergence monitor failure", e, /*log_exception=*/false); + } + catch (const LinearSolverProblem& e) { + handleFailure("Linear solver convergence failure", e); + } + catch (const NumericalProblem& e) { + handleFailure("Solver convergence failure - Numerical problem encountered", e); + } + catch (const std::runtime_error& e) { + handleFailure("Runtime error encountered", e); + } + catch (const Dune::ISTLError& e) { + handleFailure("ISTL error - Time step too large", e); + } + catch (const Dune::MatrixBlockError& e) { + handleFailure("Matrix block error", e); } - return result - && this->restartFactor_ == rhs.restartFactor_ - && this->growthFactor_ == rhs.growthFactor_ - && this->maxGrowth_ == rhs.maxGrowth_ - && this->maxTimeStep_ == rhs.maxTimeStep_ - && this->minTimeStep_ == rhs.minTimeStep_ - && this->ignoreConvergenceFailure_ == rhs.ignoreConvergenceFailure_ - && this->solverRestartMax_== rhs.solverRestartMax_ - && this->solverVerbose_ == rhs.solverVerbose_ - && this->fullTimestepInitially_ == rhs.fullTimestepInitially_ - && this->timestepAfterEvent_ == rhs.timestepAfterEvent_ - && this->useNewtonIteration_ == rhs.useNewtonIteration_ - && this->minTimeStepBeforeShuttingProblematicWells_ == - rhs.minTimeStepBeforeShuttingProblematicWells_; + return substep_report; } template -template -AdaptiveTimeStepping -AdaptiveTimeStepping:: -serializationTestObject_() +template +void +AdaptiveTimeStepping::SubStepIteration:: +setTimeStep_(double dt_estimate) { - AdaptiveTimeStepping result; + this->substep_timer_.provideTimeStepEstimate(dt_estimate); +} - result.restartFactor_ = 1.0; - result.growthFactor_ = 2.0; - result.maxGrowth_ = 3.0; - result.maxTimeStep_ = 4.0; - result.minTimeStep_ = 5.0; - result.ignoreConvergenceFailure_ = true; - result.solverRestartMax_ = 6; - result.solverVerbose_ = true; - result.timestepVerbose_ = true; - result.suggestedNextTimestep_ = 7.0; - result.fullTimestepInitially_ = true; - result.useNewtonIteration_ = true; - result.minTimeStepBeforeShuttingProblematicWells_ = 9.0; - result.timeStepControlType_ = Controller::Type; - result.timeStepControl_ = std::make_unique(Controller::serializationTestObject()); +template +template +Solver& +AdaptiveTimeStepping::SubStepIteration:: +solver_() const +{ + return this->substepper_.solver_; +} - return result; + +template +template +int +AdaptiveTimeStepping::SubStepIteration:: +solverRestartMax_() const +{ + return this->adaptive_time_stepping_.solver_restart_max_; } template -void AdaptiveTimeStepping:: -init_(const UnitSystem& unitSystem) +template +void +AdaptiveTimeStepping::SubStepIteration:: +setSuggestedNextStep_(double step) { - std::tie(timeStepControlType_, - timeStepControl_, - useNewtonIteration_) = detail::createController(unitSystem); + this->adaptive_time_stepping_.setSuggestedNextStep(step); +} - // make sure growth factor is something reasonable - if (growthFactor_ < 1.0) { - OPM_THROW(std::runtime_error, - "Growth factor cannot be less than 1."); +template +template +const SimulatorTimer& +AdaptiveTimeStepping::SubStepIteration:: +simulatorTimer_() const +{ + return this->substepper_.simulator_timer_; +} + +template +template +bool +AdaptiveTimeStepping::SubStepIteration:: +solverVerbose_() const +{ + return this->adaptive_time_stepping_.solver_verbose_; +} + +template +template +boost::posix_time::ptime +AdaptiveTimeStepping::SubStepIteration:: +startDateTime_() const +{ + return simulatorTimer_().startDateTime(); +} + +template +template +double +AdaptiveTimeStepping::SubStepIteration:: +suggestedNextTimestep_() const +{ + return this->adaptive_time_stepping_.suggestedNextStep(); +} + +template +template +double +AdaptiveTimeStepping::SubStepIteration:: +timeStepControlComputeEstimate_(const double dt, const int iterations, double elapsed) const +{ + // create object to compute the time error, simply forwards the call to the model + SolutionTimeErrorSolverWrapper relative_change{solver_()}; + return this->adaptive_time_stepping_.time_step_control_->computeTimeStepSize( + dt, iterations, relative_change, elapsed); +} + +template +template +bool +AdaptiveTimeStepping::SubStepIteration:: +timeStepVerbose_() const +{ + return this->adaptive_time_stepping_.timestep_verbose_; +} + +// The suggested time step is the stepsize that will be used as a first try for +// the next sub step. It is updated at the end of each substep. It can also be +// updated by the TUNING or NEXTSTEP keywords at the beginning of each report step or +// at the beginning of each substep by the ACTIONX keyword (via NEXTSTEP), this is +// done by the maybeUpdateTuning_() method which is called at the beginning of each substep +// (and the begginning of each report step). Note that the WCYCLE keyword can also update the +// suggested time step via the maybeUpdateTuning_() method. +template +template +void +AdaptiveTimeStepping::SubStepIteration:: +updateSuggestedNextStep_() +{ + auto suggested_next_step = this->substep_timer_.currentStepLength(); + if (! std::isfinite(suggested_next_step)) { // check for NaN + suggested_next_step = this->original_time_step_; + } + if (timeStepVerbose_()) { + std::ostringstream ss; + this->substep_timer_.report(ss); + ss << "Suggested next step size = " + << unit::convert::to(suggested_next_step, unit::day) << " (days)" << std::endl; + OpmLog::debug(ss.str()); } + setSuggestedNextStep_(suggested_next_step); } -} // namespace Opm +template +template +bool +AdaptiveTimeStepping::SubStepIteration:: +useNewtonIteration_() const +{ + return this->adaptive_time_stepping_.use_newton_iteration_; +} -#endif +template +template +double +AdaptiveTimeStepping::SubStepIteration:: +writeOutput_() const +{ + time::StopWatch perf_timer; + perf_timer.start(); + auto& problem = solver_().model().simulator().problem(); + problem.writeOutput(true); + return perf_timer.secsSinceStart(); +} + +/************************************************ + * Private class SolutionTimeErrorSolverWrapper + * **********************************************/ + +template +template +AdaptiveTimeStepping::SolutionTimeErrorSolverWrapper::SolutionTimeErrorSolverWrapper( + const Solver& solver +) + : solver_{solver} +{} + +template +template +double AdaptiveTimeStepping::SolutionTimeErrorSolverWrapper::relativeChange() const +{ + // returns: || u^n+1 - u^n || / || u^n+1 || + return solver_.model().relativeChange(); +} + +} // namespace Opm diff --git a/opm/simulators/timestepping/SimulatorTimerInterface.hpp b/opm/simulators/timestepping/SimulatorTimerInterface.hpp index 6875db8711d..3948609bffe 100644 --- a/opm/simulators/timestepping/SimulatorTimerInterface.hpp +++ b/opm/simulators/timestepping/SimulatorTimerInterface.hpp @@ -40,14 +40,15 @@ namespace Opm /// destructor virtual ~SimulatorTimerInterface() {} - /// Current step number. This is the number of timesteps that - /// has been completed from the start of the run. The time - /// after initialization but before the simulation has started - /// is timestep number zero. - virtual int currentStepNum() const = 0; + // ----------------------------------------------------------- + // Pure virtual functions to be implemented by derived classes + // ----------------------------------------------------------- - /// Current report step number. This might differ from currentStepNum in case of sub stepping - virtual int reportStepNum() const { return currentStepNum(); } + /// advance time by currentStepLength + virtual void advance() = 0 ; + + /// return copy of current timer instance + virtual std::unique_ptr< SimulatorTimerInterface > clone () const = 0; /// Current step length. This is the length of the step /// the simulator will take in the next iteration. @@ -55,6 +56,25 @@ namespace Opm /// @note if done(), it is an error to call currentStepLength(). virtual double currentStepLength() const = 0; + /// Current step number. This is the number of timesteps that + /// has been completed from the start of the run. The time + /// after initialization but before the simulation has started + /// is timestep number zero. + virtual int currentStepNum() const = 0; + + /// Return true if timer indicates that simulation of timer interval is finished + virtual bool done() const = 0; + + /// Whether the current step is the first step. + virtual bool initialStep() const = 0; + + /// Return true if last time step failed + virtual bool lastStepFailed() const = 0; + + /// Time elapsed since the start of the simulation until the + /// beginning of the current time step [s]. + virtual double simulationTimeElapsed() const = 0; + /// Previous step length. This is the length of the step that /// was taken to arrive at this time. /// @@ -63,30 +83,13 @@ namespace Opm /// it is an error to call stepLengthTaken(). virtual double stepLengthTaken () const = 0; - /// Previous report step length. This is the length of the step that - /// was taken to arrive at this report step time. - /// - /// @note if no increments have been done (i.e. the timer is - /// still in its constructed state and reportStepNum() == 0), - /// it is an error to call stepLengthTaken(). - virtual double reportStepLengthTaken () const { return stepLengthTaken(); } - - /// Time elapsed since the start of the simulation until the - /// beginning of the current time step [s]. - virtual double simulationTimeElapsed() const = 0; - - /// advance time by currentStepLength - virtual void advance() = 0 ; - - /// Return true if timer indicates that simulation of timer interval is finished - virtual bool done() const = 0; - - /// Whether the current step is the first step. - virtual bool initialStep() const = 0; - /// Return start date of simulation virtual boost::posix_time::ptime startDateTime() const = 0; + // ----------------------------------------------------------------- + // Virtual functions (not pure) to allow for default implementations + // ----------------------------------------------------------------- + /// Return the current time as a posix time object. virtual boost::posix_time::ptime currentDateTime() const; @@ -94,11 +97,17 @@ namespace Opm /// 1970) until the current time step begins [s]. virtual time_t currentPosixTime() const; - /// Return true if last time step failed - virtual bool lastStepFailed() const = 0; + /// Previous report step length. This is the length of the step that + /// was taken to arrive at this report step time. + /// + /// @note if no increments have been done (i.e. the timer is + /// still in its constructed state and reportStepNum() == 0), + /// it is an error to call stepLengthTaken(). + virtual double reportStepLengthTaken () const { return stepLengthTaken(); } + + /// Current report step number. This might differ from currentStepNum in case of sub stepping + virtual int reportStepNum() const { return currentStepNum(); } - /// return copy of current timer instance - virtual std::unique_ptr< SimulatorTimerInterface > clone () const = 0; }; diff --git a/opm/simulators/utils/readDeck.cpp b/opm/simulators/utils/readDeck.cpp index 102a9f89eb3..aae46fcd094 100644 --- a/opm/simulators/utils/readDeck.cpp +++ b/opm/simulators/utils/readDeck.cpp @@ -504,7 +504,6 @@ Opm::setupLogging(Parallel::Communication& comm, } logFileStream << ".PRT"; debugFileStream << ".DBG"; - FileOutputMode output; { static std::map stringToOutputMode = @@ -567,7 +566,6 @@ void Opm::readDeck(Opm::Parallel::Communication comm, const bool slaveMode) { auto errorGuard = std::make_unique(); - int parseSuccess = 1; // > 0 is success std::string failureMessage; From 943d7fc2ce3004fe7513283207cea56ad1d4b520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Thu, 21 Nov 2024 11:18:20 +0100 Subject: [PATCH 17/33] Enable start at any report step Enable master and slaves to start at any report step. In the previous commits, only first report step was supported. --- CMakeLists_files.cmake | 5 + opm/simulators/flow/ReservoirCoupling.cpp | 78 +++++ opm/simulators/flow/ReservoirCoupling.hpp | 7 + .../flow/ReservoirCouplingMaster.cpp | 206 ++++------- .../flow/ReservoirCouplingMaster.hpp | 51 ++- .../flow/ReservoirCouplingSlave.cpp | 176 +++++++++- .../flow/ReservoirCouplingSlave.hpp | 14 +- .../flow/ReservoirCouplingSpawnSlaves.cpp | 327 ++++++++++++++++++ .../flow/ReservoirCouplingSpawnSlaves.hpp | 73 ++++ .../flow/SimulatorFullyImplicitBlackoil.hpp | 58 +++- 10 files changed, 823 insertions(+), 172 deletions(-) create mode 100644 opm/simulators/flow/ReservoirCoupling.cpp create mode 100644 opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp create mode 100644 opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 0f625a2bfcb..02860168061 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -112,8 +112,10 @@ list (APPEND MAIN_SOURCE_FILES opm/simulators/flow/RegionPhasePVAverage.cpp opm/simulators/flow/SimulatorConvergenceOutput.cpp opm/simulators/flow/SimulatorFullyImplicitBlackoil.cpp + opm/simulators/flow/ReservoirCoupling.cpp opm/simulators/flow/ReservoirCouplingMaster.cpp opm/simulators/flow/ReservoirCouplingSlave.cpp + opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp opm/simulators/flow/SimulatorReportBanners.cpp opm/simulators/flow/SimulatorSerializer.cpp opm/simulators/flow/SolutionContainers.cpp @@ -851,6 +853,9 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/flow/RSTConv.hpp opm/simulators/flow/RegionPhasePVAverage.hpp opm/simulators/flow/SimulatorConvergenceOutput.hpp + opm/simulators/flow/ReservoirCouplingMaster.hpp + opm/simulators/flow/ReservoirCouplingSlave.hpp + opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp opm/simulators/flow/SimulatorReportBanners.hpp opm/simulators/flow/SimulatorSerializer.hpp diff --git a/opm/simulators/flow/ReservoirCoupling.cpp b/opm/simulators/flow/ReservoirCoupling.cpp new file mode 100644 index 00000000000..69c4e3e621f --- /dev/null +++ b/opm/simulators/flow/ReservoirCoupling.cpp @@ -0,0 +1,78 @@ +/* + Copyright 2024 Equinor AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include + +#include +#include +#include + +#include + +namespace Opm { +namespace ReservoirCoupling { + +void custom_error_handler_(MPI_Comm* comm, int* err, const std::string &msg) +{ + // It can be useful to have a custom error handler for debugging purposes. + // If not, MPI will use the default error handler which aborts the program, and + // it can be difficult to determine exactly where the error occurred. With a custom + // error handler you can at least set a breakpoint here to get a backtrace. + int rank; + MPI_Comm_rank(*comm, &rank); + char err_string[MPI_MAX_ERROR_STRING]; + int len; + MPI_Error_string(*err, err_string, &len); + const std::string error_msg = fmt::format( + "Reservoir coupling MPI error handler {} rank {}: {}", msg, rank, err_string); + // NOTE: The output to terminal vie stderr or stdout has been redirected to files for + // the slaves, see Main.cpp. So the following output will not be visible in the terminal + // if we are called from a slave process. + // std::cerr << error_msg << std::endl; + OpmLog::error(error_msg); // Output to log file, also shows the message on stdout for master. + MPI_Abort(*comm, *err); +} + +void custom_error_handler_slave_(MPI_Comm* comm, int* err, ...) +{ + custom_error_handler_(comm, err, "slave"); +} + +void custom_error_handler_master_(MPI_Comm* comm, int* err, ...) +{ + custom_error_handler_(comm, err, "master"); +} + +void setErrhandler(MPI_Comm comm, bool is_master) +{ + MPI_Errhandler errhandler; + // NOTE: Lambdas with captures cannot be used as C function pointers, also + // converting lambdas that use ellipsis "..." as last argument to a C function pointer + // is not currently possible, so we need to use static functions instead. + if (is_master) { + MPI_Comm_create_errhandler(custom_error_handler_master_, &errhandler); + } + else { + MPI_Comm_create_errhandler(custom_error_handler_slave_, &errhandler); + } + MPI_Comm_set_errhandler(comm, errhandler); +} + +} // namespace ReservoirCoupling +} // namespace Opm diff --git a/opm/simulators/flow/ReservoirCoupling.hpp b/opm/simulators/flow/ReservoirCoupling.hpp index 75cffd73e72..31d5e266abf 100644 --- a/opm/simulators/flow/ReservoirCoupling.hpp +++ b/opm/simulators/flow/ReservoirCoupling.hpp @@ -29,9 +29,12 @@ namespace ReservoirCoupling { enum class MessageTag : int { SlaveSimulationStartDate, + SlaveActivationDate, SlaveProcessTermination, SlaveNextReportDate, SlaveNextTimeStep, + MasterGroupNames, + MasterGroupNamesSize, }; // Custom deleter for MPI_Comm @@ -194,6 +197,10 @@ class TimePoint { } }; +// Helper functions +void custom_error_handler_(MPI_Comm* comm, int* err, const std::string &msg); +void setErrhandler(MPI_Comm comm, bool is_master); + } // namespace ReservoirCoupling } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 57bf674fc4b..1bb61874044 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -38,32 +39,54 @@ namespace Opm { -ReservoirCouplingMaster::ReservoirCouplingMaster( +ReservoirCouplingMaster:: +ReservoirCouplingMaster( const Parallel::Communication &comm, - const Schedule &schedule + const Schedule &schedule, + int argc, char **argv ) : comm_{comm}, - schedule_{schedule} + schedule_{schedule}, + argc_{argc}, + argv_{argv} { - this->start_date_ = this->schedule_.getStartTime(); + this->activation_date_ = this->getMasterActivationDate_(); } // ------------------ // Public methods // ------------------ -double ReservoirCouplingMaster::maybeChopSubStep( - double suggested_timestep_original, double elapsed_time -) const +void +ReservoirCouplingMaster:: +maybeSpawnSlaveProcesses(int report_step) +{ + if (this->numSlavesStarted() > 0) { // We have already spawned the slave processes + return; + } + const auto& rescoup = this->schedule_[report_step].rescoup(); + auto slave_count = rescoup.slaveCount(); + auto master_group_count = rescoup.masterGroupCount(); + if (slave_count > 0 && master_group_count > 0) { + ReservoirCouplingSpawnSlaves spawn_slaves{*this, rescoup, report_step}; + spawn_slaves.spawn(); + } +} + + +double +ReservoirCouplingMaster:: +maybeChopSubStep(double suggested_timestep_original, double elapsed_time) const { // Check if the suggested timestep needs to be adjusted based on the slave processes' // next report step, or if the slave process has not started yet: the start of a slave process. - double start_date = static_cast(this->start_date_); + double start_date = this->schedule_.getStartTime(); TimePoint step_start_date{start_date + elapsed_time}; TimePoint step_end_date{step_start_date + suggested_timestep_original}; TimePoint suggested_timestep{suggested_timestep_original}; - for (unsigned int i = 0; i < this->num_slaves_; i++) { - double slave_start_date = static_cast(this->slave_start_dates_[i]); + auto num_slaves = this->numSlavesStarted(); + for (std::size_t i = 0; i < num_slaves; i++) { + double slave_start_date = this->slave_start_dates_[i]; TimePoint slave_next_report_date{this->slave_next_report_time_offsets_[i] + slave_start_date}; if (slave_start_date > step_end_date) { // The slave process has not started yet, and will not start during this timestep @@ -89,7 +112,10 @@ double ReservoirCouplingMaster::maybeChopSubStep( return suggested_timestep.getTime(); } -void ReservoirCouplingMaster::sendNextTimeStepToSlaves(double timestep) { +void +ReservoirCouplingMaster:: +sendNextTimeStepToSlaves(double timestep) +{ if (this->comm_.rank() == 0) { for (unsigned int i = 0; i < this->master_slave_comm_.size(); i++) { MPI_Send( @@ -98,7 +124,7 @@ void ReservoirCouplingMaster::sendNextTimeStepToSlaves(double timestep) { /*datatype=*/MPI_DOUBLE, /*dest_rank=*/0, /*tag=*/static_cast(MessageTag::SlaveNextTimeStep), - *this->master_slave_comm_[i].get() + this->getSlaveComm(i) ); OpmLog::info(fmt::format( "Sent next time step {} from master process rank 0 to slave process " @@ -108,9 +134,15 @@ void ReservoirCouplingMaster::sendNextTimeStepToSlaves(double timestep) { } } -void ReservoirCouplingMaster::receiveNextReportDateFromSlaves() { + +void +ReservoirCouplingMaster:: +receiveNextReportDateFromSlaves() +{ + auto num_slaves = this->numSlavesStarted(); + OpmLog::info("Receiving next report dates from slave processes"); if (this->comm_.rank() == 0) { - for (unsigned int i = 0; i < this->master_slave_comm_.size(); i++) { + for (unsigned int i = 0; i < num_slaves; i++) { double slave_next_report_time_offset; // Elapsed time from the beginning of the simulation int result = MPI_Recv( &slave_next_report_time_offset, @@ -118,7 +150,7 @@ void ReservoirCouplingMaster::receiveNextReportDateFromSlaves() { /*datatype=*/MPI_DOUBLE, /*source_rank=*/0, /*tag=*/static_cast(MessageTag::SlaveNextReportDate), - *this->master_slave_comm_[i].get(), + this->getSlaveComm(i), MPI_STATUS_IGNORE ); if (result != MPI_SUCCESS) { @@ -134,143 +166,41 @@ void ReservoirCouplingMaster::receiveNextReportDateFromSlaves() { } } this->comm_.broadcast( - this->slave_next_report_time_offsets_.data(), this->num_slaves_, /*emitter_rank=*/0 + this->slave_next_report_time_offsets_.data(), /*count=*/num_slaves, /*emitter_rank=*/0 ); OpmLog::info("Broadcasted slave next report dates to all ranks"); } -void ReservoirCouplingMaster::receiveSimulationStartDateFromSlaves() { - if (this->comm_.rank() == 0) { - // Ensure that std::time_t is of type long since we are sending it over MPI with MPI_LONG - static_assert(std::is_same::value, "std::time_t is not of type long"); - for (unsigned int i = 0; i < this->master_slave_comm_.size(); i++) { - std::time_t slave_start_date; - int result = MPI_Recv( - &slave_start_date, - /*count=*/1, - /*datatype=*/MPI_LONG, - /*source_rank=*/0, - /*tag=*/static_cast(MessageTag::SlaveSimulationStartDate), - *this->master_slave_comm_[i].get(), - MPI_STATUS_IGNORE - ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive simulation start date from slave process"); - } - if (slave_start_date < this->start_date_) { - OPM_THROW(std::runtime_error, "Slave process start date is before master process start date"); - } - this->slave_start_dates_[i] = slave_start_date; - OpmLog::info( - fmt::format( - "Received simulation start date from slave process with name: {}. " - "Start date: {}", this->slave_names_[i], slave_start_date - ) - ); - } - } - this->comm_.broadcast(this->slave_start_dates_.data(), this->num_slaves_, /*emitter_rank=*/0); - OpmLog::info("Broadcasted slave start dates to all ranks"); -} -// NOTE: This functions is executed for all ranks, but only rank 0 will spawn -// the slave processes -void ReservoirCouplingMaster::spawnSlaveProcesses(int argc, char **argv) { - const auto& rescoup = this->schedule_[0].rescoup(); - char *flow_program_name = argv[0]; - for (const auto& [slave_name, slave] : rescoup.slaves()) { - auto master_slave_comm = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); - const auto& data_file_name = slave.dataFilename(); - const auto& directory_path = slave.directoryPath(); - // Concatenate the directory path and the data file name to get the full path - std::filesystem::path dir_path{directory_path}; - std::filesystem::path data_file{data_file_name}; - std::filesystem::path full_path = dir_path / data_file; - std::string log_filename; // the getSlaveArgv() function will set this - std::vector slave_argv = getSlaveArgv( - argc, argv, full_path, slave_name, log_filename - ); - auto num_procs = slave.numprocs(); - std::vector errcodes(num_procs); - // TODO: We need to decide how to handle the output from the slave processes.. - // As far as I can tell, open MPI does not support redirecting the output - // to a file, so we might need to implement a custom solution for this - int spawn_result = MPI_Comm_spawn( - flow_program_name, - slave_argv.data(), - /*maxprocs=*/num_procs, - /*info=*/MPI_INFO_NULL, - /*root=*/0, // Rank 0 spawns the slave processes - /*comm=*/this->comm_, - /*intercomm=*/master_slave_comm.get(), - /*array_of_errcodes=*/errcodes.data() - ); - if (spawn_result != MPI_SUCCESS || (*master_slave_comm == MPI_COMM_NULL)) { - for (unsigned int i = 0; i < num_procs; i++) { - if (errcodes[i] != MPI_SUCCESS) { - char error_string[MPI_MAX_ERROR_STRING]; - int length_of_error_string; - MPI_Error_string(errcodes[i], error_string, &length_of_error_string); - OpmLog::info(fmt::format("Error spawning process {}: {}", i, error_string)); - } - } - OPM_THROW(std::runtime_error, "Failed to spawn slave process"); - } - OpmLog::info( - fmt::format( - "Spawned reservoir coupling slave simulation for slave with name: " - "{}. Standard output logfile name: {}.log", slave_name, slave_name - ) - ); - this->master_slave_comm_.push_back(std::move(master_slave_comm)); - this->slave_names_.push_back(slave_name); - this->num_slaves_++; - } - this->slave_start_dates_.resize(this->num_slaves_); - this->slave_next_report_time_offsets_.resize(this->num_slaves_); +std::size_t +ReservoirCouplingMaster:: +numSlavesStarted() const +{ + return this->slave_names_.size(); } // ------------------ // Private methods // ------------------ -std::vector ReservoirCouplingMaster::getSlaveArgv( - int argc, - char **argv, - const std::filesystem::path &data_file, - const std::string &slave_name, - std::string &log_filename -) { - // Calculate the size of the slave_argv vector like this: - // - We will not use the first argument in argv, as it is the program name - // - Replace the data file name in argv with the data_file path - // - Insert as first argument --slave-log-file=.log - // - Also add the argument "--slave=true" to the argv - // - Add a nullptr at the end of the argv - // So the size of the slave_argv vector will be argc + 2 - // - // Assume: All parameters will be on the form --parameter=value (as a string without spaces) - // - // Important: The returned vector will have pointers to argv pointers, - // data_file string buffer, and slave_name string buffer. So the caller - // must ensure that these buffers are not deallocated before the slave_argv has - // been used. - std::vector slave_argv(argc + 2); - log_filename = "--slave-log-file=" + slave_name; // .log extension will be added by the slave process - slave_argv[0] = const_cast(log_filename.c_str()); - for (int i = 1; i < argc; i++) { - // Check if the argument starts with "--", if not, we will assume it is a positional argument - // and we will replace it with the data file path - if (std::string(argv[i]).substr(0, 2) == "--") { - slave_argv[i] = argv[i]; - } else { - slave_argv[i] = const_cast(data_file.c_str()); +double +ReservoirCouplingMaster:: +getMasterActivationDate_() const +{ + // Assume master mode is activated when the first SLAVES keyword is encountered in the schedule + double start_date = this->schedule_.getStartTime(); + for (std::size_t report_step = 0; report_step < this->schedule_.size(); ++report_step) { + auto rescoup = this->schedule_[report_step].rescoup(); + if (rescoup.slaveCount() > 0) { + return start_date + this->schedule_.seconds(report_step); } } - slave_argv[argc] = const_cast("--slave=true"); - slave_argv[argc+1] = nullptr; - return slave_argv; + // NOTE: Consistency between SLAVES and GRUPMAST keywords has already been checked in + // init() in SimulatorFullyImplicitBlackoil.hpp + OPM_THROW(std::runtime_error, "Reservoir coupling: Failed to find master activation time: " + "No SLAVES keyword found in schedule"); } + } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index 182ec0f0188..a79ff1bcaef 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -37,31 +37,54 @@ class ReservoirCouplingMaster { using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; using MessageTag = ReservoirCoupling::MessageTag; using TimePoint = ReservoirCoupling::TimePoint; - ReservoirCouplingMaster(const Parallel::Communication &comm, const Schedule &schedule); + ReservoirCouplingMaster( + const Parallel::Communication &comm, + const Schedule &schedule, + int argc, char **argv + ); + bool activated() { return this->numSlavesStarted() > 0; } + void addSlaveCommunicator(MPI_Comm_Ptr comm) { + this->master_slave_comm_.push_back(std::move(comm)); + } + void addSlaveName(const std::string &name) { this->slave_names_.push_back(name); } + void addSlaveStartDate(std::time_t date) { this->slave_start_dates_.push_back(date); } + double getActivationDate() const { return this->activation_date_; } + int getArgc() const { return this->argc_; } + char *getArgv(int index) const { return this->argv_[index]; } + char **getArgv() const { return this->argv_; } + const Parallel::Communication &getComm() const { return this->comm_; } + double getSimulationStartDate() const { return this->schedule_.getStartTime(); } + const MPI_Comm &getSlaveComm(int index) const { return *this->master_slave_comm_[index].get(); } + const std::string &getSlaveName(int index) const { return this->slave_names_[index]; } + const double *getSlaveStartDates() { return this->slave_start_dates_.data(); } double maybeChopSubStep(double suggested_timestep, double current_time) const; - void spawnSlaveProcesses(int argc, char **argv); - void receiveSimulationStartDateFromSlaves(); + void maybeSpawnSlaveProcesses(int report_step); + std::size_t numSlavesStarted() const; void receiveNextReportDateFromSlaves(); + void resizeSlaveStartDates(int size) { this->slave_start_dates_.resize(size); } + void resizeNextReportDates(int size) { this->slave_next_report_time_offsets_.resize(size); } void sendNextTimeStepToSlaves(double next_time_step); private: - std::vector getSlaveArgv( - int argc, - char **argv, - const std::filesystem::path &data_file, - const std::string &slave_name, - std::string &log_filename - ); + double getMasterActivationDate_() const; const Parallel::Communication &comm_; const Schedule& schedule_; - std::time_t start_date_; // Master process' simulation start date - std::size_t num_slaves_ = 0; // Initially zero, will be updated in spawnSlaveProcesses() + int argc_; + char **argv_; std::vector master_slave_comm_; // MPI communicators for the slave processes std::vector slave_names_; - std::vector slave_start_dates_; - std::vector slave_next_report_time_offsets_; // Elapsed time from the beginning of the simulation + // The start dates are in whole seconds since the epoch. We use a double to store the value + // since both schedule_.getStartTime() and schedule_.stepLength(report_step) returns + // a double value representing whole seconds. + // However, note that schedule_[report_step].start_time() returns a time_point + // which can include milliseconds. The double values are also convenient when we need to + // to add fractions of seconds for sub steps to the start date. + std::vector slave_start_dates_; + // Elapsed time from the beginning of the simulation + std::vector slave_next_report_time_offsets_; + double activation_date_{0.0}; // The date when SLAVES is encountered in the schedule }; } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index 5f567feb689..45f0c65205a 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -33,7 +33,8 @@ namespace Opm { -ReservoirCouplingSlave::ReservoirCouplingSlave( +ReservoirCouplingSlave:: +ReservoirCouplingSlave( const Parallel::Communication &comm, const Schedule &schedule, const SimulatorTimer &timer @@ -47,9 +48,12 @@ ReservoirCouplingSlave::ReservoirCouplingSlave( if (*(this->slave_master_comm_) == MPI_COMM_NULL) { OPM_THROW(std::runtime_error, "Slave process is not spawned by a master process"); } + ReservoirCoupling::setErrhandler(*this->slave_master_comm_, /*is_master=*/false); } -double ReservoirCouplingSlave::receiveNextTimeStepFromMaster() { +double +ReservoirCouplingSlave:: +receiveNextTimeStepFromMaster() { double timestep; if (this->comm_.rank() == 0) { int result = MPI_Recv( @@ -68,13 +72,64 @@ double ReservoirCouplingSlave::receiveNextTimeStepFromMaster() { fmt::format("Slave rank 0 received next timestep {} from master.", timestep) ); } - this->comm_.broadcast(×tep, 1, /*emitter_rank=*/0); + this->comm_.broadcast(×tep, /*count=*/1, /*emitter_rank=*/0); OpmLog::info("Broadcasted slave next time step to all ranks"); return timestep; } +void +ReservoirCouplingSlave:: +receiveMasterGroupNamesFromMasterProcess() { + std::size_t size; + std::vector group_names; + if (this->comm_.rank() == 0) { + MPI_Aint asize = 0; + int result = MPI_Recv( + &asize, + /*count=*/1, + /*datatype=*/MPI_AINT, + /*source_rank=*/0, + /*tag=*/static_cast(MessageTag::MasterGroupNamesSize), + *this->slave_master_comm_, + MPI_STATUS_IGNORE + ); + OpmLog::info("Received master group names size from master process rank 0"); + if (result != MPI_SUCCESS) { + OPM_THROW(std::runtime_error, + "Failed to receive master group names (size) from master process"); + } + // NOTE: MPI_Aint and std::size_t should be compatible on most systems, but we will + // cast it to std::size_t to avoid any potential issues + size = static_cast(asize); + group_names.resize(size); + int result2 = MPI_Recv( + group_names.data(), + /*count=*/size, + /*datatype=*/MPI_CHAR, + /*source_rank=*/0, + /*tag=*/static_cast(MessageTag::MasterGroupNames), + *this->slave_master_comm_, + MPI_STATUS_IGNORE + ); + if (result2 != MPI_SUCCESS) { + OPM_THROW(std::runtime_error, + "Failed to receive master group names from master process"); + } + OpmLog::info("Received master group names from master process rank 0"); + } + this->comm_.broadcast(&size, /*count=*/1, /*emitter_rank=*/0); + if (this->comm_.rank() != 0) { + group_names.resize(size); + } + this->comm_.broadcast(group_names.data(), /*count=*/size, /*emitter_rank=*/0); + this->saveMasterGroupNamesAsMap_(group_names); + this->checkGrupSlavGroupNames_(); +} -void ReservoirCouplingSlave::sendNextReportDateToMasterProcess() { +void +ReservoirCouplingSlave:: +sendNextReportDateToMasterProcess() const +{ if (this->comm_.rank() == 0) { double elapsed_time = this->timer_.simulationTimeElapsed(); double current_step_length = this->timer_.currentStepLength(); @@ -93,15 +148,36 @@ void ReservoirCouplingSlave::sendNextReportDateToMasterProcess() { } } -void ReservoirCouplingSlave::sendSimulationStartDateToMasterProcess() { +void +ReservoirCouplingSlave:: +sendActivationDateToMasterProcess() const +{ + if (this->comm_.rank() == 0) { + // NOTE: The master process needs the s + double activation_date = this->getGrupSlavActivationDate_(); + MPI_Send( + &activation_date, + /*count=*/1, + /*datatype=*/MPI_DOUBLE, + /*dest_rank=*/0, + /*tag=*/static_cast(MessageTag::SlaveActivationDate), + *this->slave_master_comm_ + ); + OpmLog::info("Sent simulation start date to master process from rank 0"); + } +} + +void +ReservoirCouplingSlave:: +sendSimulationStartDateToMasterProcess() const +{ if (this->comm_.rank() == 0) { - // Ensure that std::time_t is of type long since we are sending it over MPI with MPI_LONG - static_assert(std::is_same::value, "std::time_t is not of type long"); - std::time_t start_date = this->schedule_.getStartTime(); + // NOTE: The master process needs the s + double start_date = this->schedule_.getStartTime(); MPI_Send( &start_date, /*count=*/1, - /*datatype=*/MPI_LONG, + /*datatype=*/MPI_DOUBLE, /*dest_rank=*/0, /*tag=*/static_cast(MessageTag::SlaveSimulationStartDate), *this->slave_master_comm_ @@ -110,4 +186,86 @@ void ReservoirCouplingSlave::sendSimulationStartDateToMasterProcess() { } } +// ------------------ +// Private methods +// ------------------ + +void +ReservoirCouplingSlave:: +checkGrupSlavGroupNames_() +{ + // Validate that each slave group name has a corresponding master group name + bool grup_slav_found = false; + for (std::size_t report_step = 0; report_step < this->schedule_.size(); ++report_step) { + auto rescoup = this->schedule_[report_step].rescoup(); + if (rescoup.grupSlavCount() > 0) { + grup_slav_found = true; + auto grup_slavs = rescoup.grupSlavs(); + for (const auto& [slave_group_name, grup_slav] : grup_slavs) { + auto master_group_name_it = this->master_group_names_.find(slave_group_name); + if (master_group_name_it == this->master_group_names_.end()) { + OPM_THROW(std::runtime_error, + "Reservoir coupling: Failed to find master group name for slave group: " + + slave_group_name); + } + else { + auto master_group_name = master_group_name_it->second; + if (grup_slav.masterGroupName() != master_group_name) { + OPM_THROW(std::runtime_error, + "Reservoir coupling: Inconsistent master group name for slave group: " + + slave_group_name); + } + } + } + } + } + if (!grup_slav_found) { + OPM_THROW(std::runtime_error, "Reservoir coupling: Failed to find slave group names: " + "No GRUPSLAV keyword found in schedule"); + } +} + +double +ReservoirCouplingSlave:: +getGrupSlavActivationDate_() const +{ + double start_date = this->schedule_.getStartTime(); + for (std::size_t report_step = 0; report_step < this->schedule_.size(); ++report_step) { + auto rescoup = this->schedule_[report_step].rescoup(); + if (rescoup.grupSlavCount() > 0) { + return start_date + this->schedule_.seconds(report_step); + } + } + OPM_THROW(std::runtime_error, "Reservoir coupling: Failed to find slave activation time: " + "No GRUPSLAV keyword found in schedule"); +} + +void +ReservoirCouplingSlave:: +maybeActivate(int report_step) { + if (!this->activated()) { + auto rescoup = this->schedule_[report_step].rescoup(); + if (rescoup.grupSlavCount() > 0) { + this->activated_ = true; + } + } +} + +void +ReservoirCouplingSlave:: +saveMasterGroupNamesAsMap_(const std::vector& group_names) { + // Deserialize the group names vector into a map of slavegroup names -> mastergroup names + auto total_size = group_names.size(); + std::size_t offset = 0; + while (offset < total_size) { + std::string master_group{group_names.data() + offset}; + offset += master_group.size() + 1; + assert(offset < total_size); + std::string slave_group{group_names.data() + offset}; + offset += slave_group.size() + 1; + this->master_group_names_[slave_group] = master_group; + } +} + + } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingSlave.hpp b/opm/simulators/flow/ReservoirCouplingSlave.hpp index b7579a890b2..17cb5c02e55 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.hpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.hpp @@ -40,16 +40,26 @@ class ReservoirCouplingSlave { ReservoirCouplingSlave( const Parallel::Communication &comm, const Schedule &schedule, const SimulatorTimer &timer ); - void sendSimulationStartDateToMasterProcess(); - void sendNextReportDateToMasterProcess(); + bool activated() const { return activated_; } + void maybeActivate(int report_step); + void sendActivationDateToMasterProcess() const; + void sendNextReportDateToMasterProcess() const; + void sendSimulationStartDateToMasterProcess() const; + void receiveMasterGroupNamesFromMasterProcess(); double receiveNextTimeStepFromMaster(); private: + void checkGrupSlavGroupNames_(); + double getGrupSlavActivationDate_() const; + void saveMasterGroupNamesAsMap_(const std::vector& group_names); + const Parallel::Communication &comm_; const Schedule& schedule_; const SimulatorTimer &timer_; // MPI parent communicator for a slave process MPI_Comm_Ptr slave_master_comm_{nullptr}; + std::map master_group_names_; + bool activated_{false}; }; } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp new file mode 100644 index 00000000000..be5f4973805 --- /dev/null +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp @@ -0,0 +1,327 @@ +/* + Copyright 2024 Equinor AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include + +#include + +#include +#include +#include +#include + +#include + +#include + + +#include +#include + +#include + +namespace Opm { + +// ------------------ +// Public methods +// ------------------ + +ReservoirCouplingSpawnSlaves:: +ReservoirCouplingSpawnSlaves( + ReservoirCouplingMaster &master, + const ReservoirCoupling::CouplingInfo &rescoup, + const int report_step +) : + master_{master}, + rescoup_{rescoup}, + report_step_{report_step}, + comm_{master.getComm()} +{ +} + +void +ReservoirCouplingSpawnSlaves:: +spawn() +{ + // spawn the slave processes and get the simulation start date from the slaves, + // and finally send the master group names to the slaves + this->spawnSlaveProcesses_(); + this->receiveActivationDateFromSlaves_(); + this->receiveSimulationStartDateFromSlaves_(); + this->sendMasterGroupNamesToSlaves_(); + this->prepareTimeStepping_(); + OpmLog::info("Reservoir coupling slave processes was spawned successfully"); +} + + +// ------------------ +// Private methods +// ------------------ + +std::vector ReservoirCouplingSpawnSlaves:: +getSlaveArgv_( + const std::filesystem::path &data_file, + const std::string &slave_name, + std::string &log_filename) const +{ + // Calculate the size of the slave_argv vector like this: + // - We will not use the first argument in argv, as it is the program name + // - Replace the data file name in argv with the data_file path + // - Insert as first argument --slave-log-file=.log + // - Also add the argument "--slave=true" to the argv + // - Add a nullptr at the end of the argv + // So the size of the slave_argv vector will be argc + 2 + // + // Assume: All parameters will be on the form --parameter=value (as a string without spaces) + // + // Important: The returned vector will have pointers to argv pointers, + // data_file string buffer, and slave_name string buffer. So the caller + // must ensure that these buffers are not deallocated before the slave_argv has + // been used. + auto argc = this->master_.getArgc(); + auto argv = this->master_.getArgv(); + std::vector slave_argv(argc + 2); + log_filename = "--slave-log-file=" + slave_name; // .log extension will be added by the slave process + slave_argv[0] = const_cast(log_filename.c_str()); + for (int i = 1; i < argc; i++) { + // Check if the argument starts with "--", if not, we will assume it is a positional argument + // and we will replace it with the data file path + if (std::string(argv[i]).substr(0, 2) == "--") { + slave_argv[i] = argv[i]; + } else { + slave_argv[i] = const_cast(data_file.c_str()); + } + } + slave_argv[argc] = const_cast("--slave=true"); + slave_argv[argc+1] = nullptr; + return slave_argv; +} + +std::pair, std::size_t> +ReservoirCouplingSpawnSlaves:: +getMasterGroupNamesForSlave_(const std::string &slave_name) const +{ + // For the given slave name, get all pairs of master group names and slave group names + // Serialize the data such that it can be sent over MPI in one chunk + auto master_groups = this->rescoup_.masterGroups(); + std::vector data; + std::vector master_group_names; + for (const auto& [group_name, master_group] : master_groups) { + if (master_group.slaveName() == slave_name) { + data.push_back(group_name); + data.push_back(master_group.slaveGroupName()); + } + } + assert(data.size() % 2 == 0); + assert(data.size() > 0); + return this->serializeStrings_(std::move(data)); +} + +void +ReservoirCouplingSpawnSlaves:: +prepareTimeStepping_() +{ + // Prepare the time stepping for the master process + // This is done after the slave processes have been spawned + // and the master group names have been sent to the slaves + auto num_slaves = this->master_.numSlavesStarted(); + this->master_.resizeNextReportDates(num_slaves); +} + +void +ReservoirCouplingSpawnSlaves:: +receiveActivationDateFromSlaves_() +{ + // Currently, we only use the activation date to check that no slave process + // starts before the master process. + auto num_slaves = this->master_.numSlavesStarted(); + if (this->comm_.rank() == 0) { + for (unsigned int i = 0; i < num_slaves; i++) { + double slave_activation_date; + int result = MPI_Recv( + &slave_activation_date, + /*count=*/1, + /*datatype=*/MPI_DOUBLE, + /*source_rank=*/0, + /*tag=*/static_cast(MessageTag::SlaveActivationDate), + this->master_.getSlaveComm(i), + MPI_STATUS_IGNORE + ); + if (result != MPI_SUCCESS) { + OPM_THROW(std::runtime_error, "Failed to receive activation date from slave process"); + } + if (slave_activation_date < this->master_.getActivationDate()) { + OPM_THROW(std::runtime_error, "Slave process start date is earlier than " + "the master process' activation date"); + } + OpmLog::info( + fmt::format( + "Received activation date from slave process with name: {}. " + "Activation date: {}", this->master_.getSlaveName(i), slave_activation_date + ) + ); + } + } +} + +void +ReservoirCouplingSpawnSlaves:: +receiveSimulationStartDateFromSlaves_() +{ + auto num_slaves = this->master_.numSlavesStarted(); + if (this->comm_.rank() == 0) { + for (unsigned int i = 0; i < num_slaves; i++) { + double slave_start_date; + int result = MPI_Recv( + &slave_start_date, + /*count=*/1, + /*datatype=*/MPI_DOUBLE, + /*source_rank=*/0, + /*tag=*/static_cast(MessageTag::SlaveSimulationStartDate), + this->master_.getSlaveComm(i), + MPI_STATUS_IGNORE + ); + if (result != MPI_SUCCESS) { + OPM_THROW(std::runtime_error, "Failed to receive start date from slave process"); + } + this->master_.addSlaveStartDate(slave_start_date); + OpmLog::info( + fmt::format( + "Received start date from slave process with name: {}. " + "Start date: {}", this->master_.getSlaveName(i), slave_start_date + ) + ); + } + } + if (this->comm_.rank() != 0) { + // Ensure that all ranks have the same number of slave activation dates + this->master_.resizeSlaveStartDates(num_slaves); + } + const double* data = this->master_.getSlaveStartDates(); + this->comm_.broadcast(const_cast(data), /*count=*/num_slaves, /*emitter_rank=*/0); + OpmLog::info("Broadcasted slave start dates to all ranks"); +} + +void +ReservoirCouplingSpawnSlaves:: +sendMasterGroupNamesToSlaves_() +{ + if (this->comm_.rank() == 0) { + auto num_slaves = this->master_.numSlavesStarted(); + for (unsigned int i = 0; i < num_slaves; i++) { + auto slave_name = this->master_.getSlaveName(i); + auto [group_names, size] = this->getMasterGroupNamesForSlave_(slave_name); + // NOTE: size should be of type std::size_t, so we can safely cast it to MPI_AINT + MPI_Send( + &size, + /*count=*/1, + /*datatype=*/MPI_AINT, + /*dest_rank=*/0, + /*tag=*/static_cast(MessageTag::MasterGroupNamesSize), + this->master_.getSlaveComm(i) + ); + MPI_Send( + group_names.data(), + /*count=*/size, + /*datatype=*/MPI_CHAR, + /*dest_rank=*/0, + /*tag=*/static_cast(MessageTag::MasterGroupNames), + this->master_.getSlaveComm(i) + ); + OpmLog::info(fmt::format( + "Sent master group names to slave process rank 0 with name: {}", slave_name) + ); + } + } +} + +std::pair, std::size_t> +ReservoirCouplingSpawnSlaves:: +serializeStrings_(std::vector data) const +{ + std::size_t total_size = 0; + std::vector serialized_data; + for (const auto& str: data) { + for (const auto& c: str) { + serialized_data.push_back(c); + } + serialized_data.push_back('\0'); + total_size += str.size() + 1; + } + return {serialized_data, total_size}; +} + +// NOTE: This functions is executed for all ranks, but only rank 0 will spawn +// the slave processes +void +ReservoirCouplingSpawnSlaves:: +spawnSlaveProcesses_() +{ + char *flow_program_name = this->master_.getArgv(0); + for (const auto& [slave_name, slave] : this->rescoup_.slaves()) { + auto master_slave_comm = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); + const auto& data_file_name = slave.dataFilename(); + const auto& directory_path = slave.directoryPath(); + // Concatenate the directory path and the data file name to get the full path + std::filesystem::path dir_path{directory_path}; + std::filesystem::path data_file{data_file_name}; + std::filesystem::path full_path = dir_path / data_file; + std::string log_filename; // the getSlaveArgv() function will set this + std::vector slave_argv = this->getSlaveArgv_( + full_path, slave_name, log_filename + ); + auto num_procs = slave.numprocs(); + std::vector errcodes(num_procs); + // TODO: We need to decide how to handle the output from the slave processes.. + // As far as I can tell, open MPI does not support redirecting the output + // to a file, so we might need to implement a custom solution for this + int spawn_result = MPI_Comm_spawn( + flow_program_name, + slave_argv.data(), + /*maxprocs=*/num_procs, + /*info=*/MPI_INFO_NULL, + /*root=*/0, // Rank 0 spawns the slave processes + /*comm=*/this->comm_, + /*intercomm=*/master_slave_comm.get(), + /*array_of_errcodes=*/errcodes.data() + ); + if (spawn_result != MPI_SUCCESS || (*master_slave_comm == MPI_COMM_NULL)) { + for (unsigned int i = 0; i < num_procs; i++) { + if (errcodes[i] != MPI_SUCCESS) { + char error_string[MPI_MAX_ERROR_STRING]; + int length_of_error_string; + MPI_Error_string(errcodes[i], error_string, &length_of_error_string); + OpmLog::info(fmt::format("Error spawning process {}: {}", i, error_string)); + } + } + OPM_THROW(std::runtime_error, "Failed to spawn slave process"); + } + ReservoirCoupling::setErrhandler(*master_slave_comm, /*is_master=*/true); + OpmLog::info( + fmt::format( + "Spawned reservoir coupling slave simulation for slave with name: " + "{}. Standard output logfile name: {}.log", slave_name, slave_name + ) + ); + this->master_.addSlaveCommunicator(std::move(master_slave_comm)); + this->master_.addSlaveName(slave_name); + } +} + +} // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp new file mode 100644 index 00000000000..f599da02a0a --- /dev/null +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp @@ -0,0 +1,73 @@ +/* + Copyright 2024 Equinor AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef OPM_RESERVOIR_COUPLING_SPAWN_SLAVES_HPP +#define OPM_RESERVOIR_COUPLING_SPAWN_SLAVES_HPP + +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +namespace Opm { + +class ReservoirCouplingSpawnSlaves { +public: + using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; + using MessageTag = ReservoirCoupling::MessageTag; + + ReservoirCouplingSpawnSlaves( + ReservoirCouplingMaster &master, + const ReservoirCoupling::CouplingInfo &rescoup, + const int report_step + ); + + void spawn(); + +private: + std::pair, std::size_t> + getMasterGroupNamesForSlave_(const std::string &slave_name) const; + std::vector getSlaveArgv_( + const std::filesystem::path &data_file, + const std::string &slave_name, + std::string &log_filename) const; + void prepareTimeStepping_(); + void receiveActivationDateFromSlaves_(); + void receiveSimulationStartDateFromSlaves_(); + void sendMasterGroupNamesToSlaves_(); + std::pair, std::size_t> + serializeStrings_(std::vector data) const; + void spawnSlaveProcesses_(); + + ReservoirCouplingMaster &master_; + const ReservoirCoupling::CouplingInfo &rescoup_; + const int report_step_; + const Parallel::Communication &comm_; +}; + +} // namespace Opm +#endif // OPM_RESERVOIR_COUPLING_SPAWN_SLAVES_HPP diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index 07f7bcb1908..6c020c290a6 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -30,6 +30,7 @@ #include #include #include +#include #endif #include @@ -201,6 +202,39 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim return finalize(); } +#if HAVE_MPI + // This method should only be called if slave mode (i.e. Parameters::Get()) + // is false. We try to determine if this is a normal flow simulation or a reservoir + // coupling master. It is a normal flow simulation if the schedule does not contain + // any SLAVES and GRUPMAST keywords. + bool checkRunningAsReservoirCouplingMaster() + { + for (std::size_t report_step = 0; report_step < this->schedule().size(); ++report_step) { + auto rescoup = this->schedule()[report_step].rescoup(); + auto slave_count = rescoup.slaveCount(); + auto master_group_count = rescoup.masterGroupCount(); + // - GRUPMAST and SLAVES keywords need to be specified at the same report step + // - They can only occur once in the schedule + if (slave_count > 0 && master_group_count > 0) { + return true; + } + else if (slave_count > 0 && master_group_count == 0) { + throw ReservoirCouplingError( + "Inconsistent reservoir coupling master schedule: " + "Slave count is greater than 0 but master group count is 0" + ); + } + else if (slave_count == 0 && master_group_count > 0) { + throw ReservoirCouplingError( + "Inconsistent reservoir coupling master schedule: " + "Master group count is greater than 0 but slave count is 0" + ); + } + } + return false; + } +#endif + // NOTE: The argc and argv will be used when launching a slave process void init(SimulatorTimer &timer, int argc, char** argv) { @@ -213,21 +247,19 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim FlowGenericVanguard::comm(), this->schedule(), timer ); + this->reservoirCouplingSlave_->sendActivationDateToMasterProcess(); this->reservoirCouplingSlave_->sendSimulationStartDateToMasterProcess(); + this->reservoirCouplingSlave_->receiveMasterGroupNamesFromMasterProcess(); } else { - // For now, we require that SLAVES and GRUPMAST are defined at the first - // schedule step, so it is enough to check the first step. See the - // keyword handlers in opm-common for more information. - auto master_mode = this->schedule()[0].rescoup().masterMode(); + auto master_mode = checkRunningAsReservoirCouplingMaster(); if (master_mode) { this->reservoirCouplingMaster_ = std::make_unique( FlowGenericVanguard::comm(), - this->schedule() + this->schedule(), + argc, argv ); - this->reservoirCouplingMaster_->spawnSlaveProcesses(argc, argv); - this->reservoirCouplingMaster_->receiveSimulationStartDateFromSlaves(); } } #endif @@ -254,10 +286,10 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim adaptiveTimeStepping_ = std::make_unique(unitSystem, max_next_tstep, terminalOutput_); } #if HAVE_MPI - if (slave_mode) { + if (this->reservoirCouplingSlave_) { adaptiveTimeStepping_->setReservoirCouplingSlave(this->reservoirCouplingSlave_.get()); } - else if (this->schedule()[0].rescoup().masterMode()) { + else if (this->reservoirCouplingMaster_) { adaptiveTimeStepping_->setReservoirCouplingMaster(this->reservoirCouplingMaster_.get()); } #endif @@ -406,6 +438,14 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim tuningUpdater(timer.simulationTimeElapsed(), this->adaptiveTimeStepping_->suggestedNextStep(), 0); +#if HAVE_MPI + if (this->reservoirCouplingMaster_) { + this->reservoirCouplingMaster_->maybeSpawnSlaveProcesses(timer.currentStepNum()); + } + else if (this->reservoirCouplingSlave_) { + this->reservoirCouplingSlave_->maybeActivate(timer.currentStepNum()); + } +#endif const auto& events = schedule()[timer.currentStepNum()].events(); bool event = events.hasEvent(ScheduleEvents::NEW_WELL) || events.hasEvent(ScheduleEvents::INJECTION_TYPE_CHANGED) || From 402bb85103a99009383e96306adc184b3cca2104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Sun, 1 Dec 2024 15:55:34 +0100 Subject: [PATCH 18/33] Enable building without MPI --- CMakeLists_files.cmake | 15 +++++++++++- opm/simulators/flow/FlowMain.hpp | 11 +++++++++ opm/simulators/flow/Main.cpp | 9 ++++++-- opm/simulators/flow/Main.hpp | 2 ++ .../flow/SimulatorFullyImplicitBlackoil.hpp | 23 ++++++++++++++----- 5 files changed, 51 insertions(+), 9 deletions(-) diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 02860168061..efd0d3f45d2 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -1179,7 +1179,20 @@ if(dune-alugrid_FOUND) examples/fracture_discretefracture.cpp ) endif() - +if(USE_MPI) + list (APPEND MAIN_SOURCE_FILES + opm/simulators/flow/ReservoirCoupling.cpp + opm/simulators/flow/ReservoirCouplingMaster.cpp + opm/simulators/flow/ReservoirCouplingSlave.cpp + opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp + ) + list (APPEND PUBLIC_HEADER_FILES + opm/simulators/flow/ReservoirCoupling.hpp + opm/simulators/flow/ReservoirCouplingMaster.hpp + opm/simulators/flow/ReservoirCouplingSlave.hpp + opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp + ) +endif() if(HYPRE_FOUND) list(APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/HyprePreconditioner.hpp diff --git a/opm/simulators/flow/FlowMain.hpp b/opm/simulators/flow/FlowMain.hpp index 77a1f140e43..f5589763679 100644 --- a/opm/simulators/flow/FlowMain.hpp +++ b/opm/simulators/flow/FlowMain.hpp @@ -32,6 +32,9 @@ #include #include +#if HAVE_MPI +#define RESERVOIR_COUPLING_ENABLED +#endif #if HAVE_DUNE_FEM #include #else @@ -369,7 +372,11 @@ namespace Opm { // Callback that will be called from runSimulatorInitOrRun_(). int runSimulatorRunCallback_() { +#ifdef RESERVOIR_COUPLING_ENABLED SimulatorReport report = simulator_->run(*simtimer_, this->argc_, this->argv_); +#else + SimulatorReport report = simulator_->run(*simtimer_); +#endif runSimulatorAfterSim_(report); return report.success.exit_status; } @@ -377,7 +384,11 @@ namespace Opm { // Callback that will be called from runSimulatorInitOrRun_(). int runSimulatorInitCallback_() { +#ifdef RESERVOIR_COUPLING_ENABLED simulator_->init(*simtimer_, this->argc_, this->argv_); +#else + simulator_->init(*simtimer_); +#endif return EXIT_SUCCESS; } diff --git a/opm/simulators/flow/Main.cpp b/opm/simulators/flow/Main.cpp index 2c27ccef18d..9ed9ccd0d31 100644 --- a/opm/simulators/flow/Main.cpp +++ b/opm/simulators/flow/Main.cpp @@ -61,7 +61,9 @@ namespace Opm { Main::Main(int argc, char** argv, bool ownMPI) : argc_(argc), argv_(argv), ownMPI_(ownMPI) { +#if HAVE_MPI maybeSaveReservoirCouplingSlaveLogFilename_(); +#endif if (ownMPI_) { initMPI(); } @@ -145,6 +147,7 @@ Main::~Main() #endif } +#if HAVE_MPI void Main::maybeSaveReservoirCouplingSlaveLogFilename_() { // If first command line argument is "--slave-log-file=", @@ -167,7 +170,8 @@ void Main::maybeSaveReservoirCouplingSlaveLogFilename_() } } } - +#endif +#if HAVE_MPI void Main::maybeRedirectReservoirCouplingSlaveOutput_() { if (!this->reservoirCouplingSlaveOutputFilename_.empty()) { std::string filename = this->reservoirCouplingSlaveOutputFilename_ @@ -188,6 +192,7 @@ void Main::maybeRedirectReservoirCouplingSlaveOutput_() { close(fd); } } +#endif void Main::setArgvArgc_(const std::string& filename) { @@ -221,9 +226,9 @@ void Main::initMPI() FlowGenericVanguard::setCommunication(std::make_unique()); handleTestSplitCommunicatorCmdLine_(); - maybeRedirectReservoirCouplingSlaveOutput_(); #if HAVE_MPI + maybeRedirectReservoirCouplingSlaveOutput_(); if (test_split_comm_ && FlowGenericVanguard::comm().size() > 1) { int world_rank = FlowGenericVanguard::comm().rank(); int color = (world_rank == 0); diff --git a/opm/simulators/flow/Main.hpp b/opm/simulators/flow/Main.hpp index 455bd8eb091..6a9e1a7b8e8 100644 --- a/opm/simulators/flow/Main.hpp +++ b/opm/simulators/flow/Main.hpp @@ -743,7 +743,9 @@ class Main // To demonstrate run with non_world_comm bool test_split_comm_ = false; bool isSimulationRank_ = true; +#if HAVE_MPI std::string reservoirCouplingSlaveOutputFilename_{}; +#endif #if HAVE_DAMARIS bool enableDamarisOutput_ = false; #endif diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index 6c020c290a6..8303cc461d1 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -25,6 +25,9 @@ #include #if HAVE_MPI +#define RESERVOIR_COUPLING_ENABLED +#endif +#ifdef RESERVOIR_COUPLING_ENABLED #include #include #include @@ -185,9 +188,15 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim /// \param[in,out] timer governs the requested reporting timesteps /// \param[in,out] state state of reservoir: pressure, fluxes /// \return simulation report, with timing data +#ifdef RESERVOIR_COUPLING_ENABLED SimulatorReport run(SimulatorTimer& timer, int argc, char** argv) { init(timer, argc, argv); +#else + SimulatorReport run(SimulatorTimer& timer) + { + init(timer); +#endif // Make cache up to date. No need for updating it in elementCtx. // NB! Need to be at the correct step in case of restart simulator_.setEpisodeIndex(timer.currentStepNum()); @@ -202,7 +211,7 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim return finalize(); } -#if HAVE_MPI +#ifdef RESERVOIR_COUPLING_ENABLED // This method should only be called if slave mode (i.e. Parameters::Get()) // is false. We try to determine if this is a normal flow simulation or a reservoir // coupling master. It is a normal flow simulation if the schedule does not contain @@ -235,12 +244,11 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim } #endif +#ifdef RESERVOIR_COUPLING_ENABLED // NOTE: The argc and argv will be used when launching a slave process void init(SimulatorTimer &timer, int argc, char** argv) { -#if HAVE_MPI auto slave_mode = Parameters::Get(); - FlowGenericVanguard::comm().barrier(); if (slave_mode) { this->reservoirCouplingSlave_ = std::make_unique( @@ -262,6 +270,9 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim ); } } +#else + void init(SimulatorTimer &timer) + { #endif simulator_.setEpisodeIndex(-1); @@ -285,7 +296,7 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim else { adaptiveTimeStepping_ = std::make_unique(unitSystem, max_next_tstep, terminalOutput_); } -#if HAVE_MPI +#ifdef RESERVOIR_COUPLING_ENABLED if (this->reservoirCouplingSlave_) { adaptiveTimeStepping_->setReservoirCouplingSlave(this->reservoirCouplingSlave_.get()); } @@ -438,7 +449,7 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim tuningUpdater(timer.simulationTimeElapsed(), this->adaptiveTimeStepping_->suggestedNextStep(), 0); -#if HAVE_MPI +#ifdef RESERVOIR_COUPLING_ENABLED if (this->reservoirCouplingMaster_) { this->reservoirCouplingMaster_->maybeSpawnSlaveProcesses(timer.currentStepNum()); } @@ -643,7 +654,7 @@ class SimulatorFullyImplicitBlackoil : private SerializableSim SimulatorConvergenceOutput convergence_output_; -#if HAVE_MPI +#ifdef RESERVOIR_COUPLING_ENABLED bool slaveMode_{false}; std::unique_ptr reservoirCouplingMaster_{nullptr}; std::unique_ptr reservoirCouplingSlave_{nullptr}; From 5ae50c90e1ab6ab9142ca7369a6c62e6f24fd508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 2 Dec 2024 14:05:17 +0100 Subject: [PATCH 19/33] Simplify storage of communicators We don't need unique ptrs for the communicators. These are just simple integers that can be copied into a std::vector. --- opm/simulators/flow/ReservoirCoupling.hpp | 12 ----------- .../flow/ReservoirCouplingMaster.hpp | 10 +++++----- .../flow/ReservoirCouplingSlave.cpp | 20 +++++++++---------- .../flow/ReservoirCouplingSlave.hpp | 3 +-- .../flow/ReservoirCouplingSpawnSlaves.cpp | 10 +++++----- .../flow/ReservoirCouplingSpawnSlaves.hpp | 1 - 6 files changed, 21 insertions(+), 35 deletions(-) diff --git a/opm/simulators/flow/ReservoirCoupling.hpp b/opm/simulators/flow/ReservoirCoupling.hpp index 31d5e266abf..0eb312cbd2e 100644 --- a/opm/simulators/flow/ReservoirCoupling.hpp +++ b/opm/simulators/flow/ReservoirCoupling.hpp @@ -37,18 +37,6 @@ enum class MessageTag : int { MasterGroupNamesSize, }; -// Custom deleter for MPI_Comm -struct MPI_Comm_Deleter { - void operator()(MPI_Comm* comm) const { - if (*comm != MPI_COMM_NULL) { - MPI_Comm_free(comm); - } - delete comm; - } -}; - -using MPI_Comm_Ptr = std::unique_ptr; - // This class represents a time point. // It is currently used to represent an epoch time (a double value in seconds since the epoch), // or an elapsed time (a double value in seconds since the start of the simulation). diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index a79ff1bcaef..9f11a1ed968 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -34,7 +34,6 @@ namespace Opm { class ReservoirCouplingMaster { public: - using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; using MessageTag = ReservoirCoupling::MessageTag; using TimePoint = ReservoirCoupling::TimePoint; ReservoirCouplingMaster( @@ -44,8 +43,8 @@ class ReservoirCouplingMaster { ); bool activated() { return this->numSlavesStarted() > 0; } - void addSlaveCommunicator(MPI_Comm_Ptr comm) { - this->master_slave_comm_.push_back(std::move(comm)); + void addSlaveCommunicator(MPI_Comm comm) { + this->master_slave_comm_.push_back(comm); } void addSlaveName(const std::string &name) { this->slave_names_.push_back(name); } void addSlaveStartDate(std::time_t date) { this->slave_start_dates_.push_back(date); } @@ -55,7 +54,7 @@ class ReservoirCouplingMaster { char **getArgv() const { return this->argv_; } const Parallel::Communication &getComm() const { return this->comm_; } double getSimulationStartDate() const { return this->schedule_.getStartTime(); } - const MPI_Comm &getSlaveComm(int index) const { return *this->master_slave_comm_[index].get(); } + MPI_Comm getSlaveComm(int index) const { return this->master_slave_comm_[index]; } const std::string &getSlaveName(int index) const { return this->slave_names_[index]; } const double *getSlaveStartDates() { return this->slave_start_dates_.data(); } double maybeChopSubStep(double suggested_timestep, double current_time) const; @@ -73,7 +72,8 @@ class ReservoirCouplingMaster { const Schedule& schedule_; int argc_; char **argv_; - std::vector master_slave_comm_; // MPI communicators for the slave processes + // NOTE: MPI_Comm is just an integer handle, so we can just copy it into the vector + std::vector master_slave_comm_; // MPI communicators for the slave processes std::vector slave_names_; // The start dates are in whole seconds since the epoch. We use a double to store the value // since both schedule_.getStartTime() and schedule_.stepLength(report_step) returns diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index 45f0c65205a..c735d09b70d 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -43,12 +43,12 @@ ReservoirCouplingSlave( schedule_{schedule}, timer_{timer} { - this->slave_master_comm_ = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); - MPI_Comm_get_parent(this->slave_master_comm_.get()); - if (*(this->slave_master_comm_) == MPI_COMM_NULL) { + this->slave_master_comm_ = MPI_COMM_NULL; + MPI_Comm_get_parent(&this->slave_master_comm_); + if (this->slave_master_comm_ == MPI_COMM_NULL) { OPM_THROW(std::runtime_error, "Slave process is not spawned by a master process"); } - ReservoirCoupling::setErrhandler(*this->slave_master_comm_, /*is_master=*/false); + ReservoirCoupling::setErrhandler(this->slave_master_comm_, /*is_master=*/false); } double @@ -62,7 +62,7 @@ receiveNextTimeStepFromMaster() { /*datatype=*/MPI_DOUBLE, /*source_rank=*/0, /*tag=*/static_cast(MessageTag::SlaveNextTimeStep), - *this->slave_master_comm_, + this->slave_master_comm_, MPI_STATUS_IGNORE ); if (result != MPI_SUCCESS) { @@ -90,7 +90,7 @@ receiveMasterGroupNamesFromMasterProcess() { /*datatype=*/MPI_AINT, /*source_rank=*/0, /*tag=*/static_cast(MessageTag::MasterGroupNamesSize), - *this->slave_master_comm_, + this->slave_master_comm_, MPI_STATUS_IGNORE ); OpmLog::info("Received master group names size from master process rank 0"); @@ -108,7 +108,7 @@ receiveMasterGroupNamesFromMasterProcess() { /*datatype=*/MPI_CHAR, /*source_rank=*/0, /*tag=*/static_cast(MessageTag::MasterGroupNames), - *this->slave_master_comm_, + this->slave_master_comm_, MPI_STATUS_IGNORE ); if (result2 != MPI_SUCCESS) { @@ -142,7 +142,7 @@ sendNextReportDateToMasterProcess() const /*datatype=*/MPI_DOUBLE, /*dest_rank=*/0, /*tag=*/static_cast(MessageTag::SlaveNextReportDate), - *this->slave_master_comm_ + this->slave_master_comm_ ); OpmLog::info("Sent next report date to master process from rank 0"); } @@ -161,7 +161,7 @@ sendActivationDateToMasterProcess() const /*datatype=*/MPI_DOUBLE, /*dest_rank=*/0, /*tag=*/static_cast(MessageTag::SlaveActivationDate), - *this->slave_master_comm_ + this->slave_master_comm_ ); OpmLog::info("Sent simulation start date to master process from rank 0"); } @@ -180,7 +180,7 @@ sendSimulationStartDateToMasterProcess() const /*datatype=*/MPI_DOUBLE, /*dest_rank=*/0, /*tag=*/static_cast(MessageTag::SlaveSimulationStartDate), - *this->slave_master_comm_ + this->slave_master_comm_ ); OpmLog::info("Sent simulation start date to master process from rank 0"); } diff --git a/opm/simulators/flow/ReservoirCouplingSlave.hpp b/opm/simulators/flow/ReservoirCouplingSlave.hpp index 17cb5c02e55..5bd72fbc603 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.hpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.hpp @@ -34,7 +34,6 @@ namespace Opm { class ReservoirCouplingSlave { public: - using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; using MessageTag = ReservoirCoupling::MessageTag; ReservoirCouplingSlave( @@ -57,7 +56,7 @@ class ReservoirCouplingSlave { const Schedule& schedule_; const SimulatorTimer &timer_; // MPI parent communicator for a slave process - MPI_Comm_Ptr slave_master_comm_{nullptr}; + MPI_Comm slave_master_comm_{MPI_COMM_NULL}; std::map master_group_names_; bool activated_{false}; }; diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp index be5f4973805..34f051954e4 100644 --- a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp @@ -275,7 +275,7 @@ spawnSlaveProcesses_() { char *flow_program_name = this->master_.getArgv(0); for (const auto& [slave_name, slave] : this->rescoup_.slaves()) { - auto master_slave_comm = MPI_Comm_Ptr(new MPI_Comm(MPI_COMM_NULL)); + MPI_Comm master_slave_comm = MPI_COMM_NULL; const auto& data_file_name = slave.dataFilename(); const auto& directory_path = slave.directoryPath(); // Concatenate the directory path and the data file name to get the full path @@ -298,10 +298,10 @@ spawnSlaveProcesses_() /*info=*/MPI_INFO_NULL, /*root=*/0, // Rank 0 spawns the slave processes /*comm=*/this->comm_, - /*intercomm=*/master_slave_comm.get(), + /*intercomm=*/&master_slave_comm, /*array_of_errcodes=*/errcodes.data() ); - if (spawn_result != MPI_SUCCESS || (*master_slave_comm == MPI_COMM_NULL)) { + if (spawn_result != MPI_SUCCESS || (master_slave_comm == MPI_COMM_NULL)) { for (unsigned int i = 0; i < num_procs; i++) { if (errcodes[i] != MPI_SUCCESS) { char error_string[MPI_MAX_ERROR_STRING]; @@ -312,14 +312,14 @@ spawnSlaveProcesses_() } OPM_THROW(std::runtime_error, "Failed to spawn slave process"); } - ReservoirCoupling::setErrhandler(*master_slave_comm, /*is_master=*/true); + ReservoirCoupling::setErrhandler(master_slave_comm, /*is_master=*/true); OpmLog::info( fmt::format( "Spawned reservoir coupling slave simulation for slave with name: " "{}. Standard output logfile name: {}.log", slave_name, slave_name ) ); - this->master_.addSlaveCommunicator(std::move(master_slave_comm)); + this->master_.addSlaveCommunicator(master_slave_comm); this->master_.addSlaveName(slave_name); } } diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp index f599da02a0a..2f59e6dec98 100644 --- a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp @@ -37,7 +37,6 @@ namespace Opm { class ReservoirCouplingSpawnSlaves { public: - using MPI_Comm_Ptr = ReservoirCoupling::MPI_Comm_Ptr; using MessageTag = ReservoirCoupling::MessageTag; ReservoirCouplingSpawnSlaves( From 879fa72ce840e52db0c5cba755e5343f1cd29db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 2 Dec 2024 22:30:21 +0100 Subject: [PATCH 20/33] Eliminate TimePoint class Make fuzzy comparison between two dates explicit. --- opm/simulators/flow/ReservoirCoupling.cpp | 30 +++ opm/simulators/flow/ReservoirCoupling.hpp | 177 +++--------------- .../flow/ReservoirCouplingMaster.cpp | 18 +- .../flow/ReservoirCouplingMaster.hpp | 2 +- 4 files changed, 69 insertions(+), 158 deletions(-) diff --git a/opm/simulators/flow/ReservoirCoupling.cpp b/opm/simulators/flow/ReservoirCoupling.cpp index 69c4e3e621f..1e25c7b5977 100644 --- a/opm/simulators/flow/ReservoirCoupling.cpp +++ b/opm/simulators/flow/ReservoirCoupling.cpp @@ -74,5 +74,35 @@ void setErrhandler(MPI_Comm comm, bool is_master) MPI_Comm_set_errhandler(comm, errhandler); } +bool Seconds::compare_eq(double a, double b) +{ + // Are a and b equal? + return std::abs(a - b) < std::max(abstol, reltol * std::max(std::abs(a), std::abs(b))); +} + +bool Seconds::compare_gt_or_eq(double a, double b) +{ + // Is a greater than or equal to b? + if (compare_eq(a, b)) { + return true; + } + return a > b; +} + +bool Seconds::compare_gt(double a, double b) +{ + // Is a greater than b? + return !compare_eq(a, b) && a > b; +} + +bool Seconds::compare_lt_or_eq(double a, double b) +{ + // Is a less than or equal to b? + if (compare_eq(a, b)) { + return true; + } + return a < b; +} + } // namespace ReservoirCoupling } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCoupling.hpp b/opm/simulators/flow/ReservoirCoupling.hpp index 0eb312cbd2e..0d69c6a3a2b 100644 --- a/opm/simulators/flow/ReservoirCoupling.hpp +++ b/opm/simulators/flow/ReservoirCoupling.hpp @@ -37,158 +37,39 @@ enum class MessageTag : int { MasterGroupNamesSize, }; -// This class represents a time point. -// It is currently used to represent an epoch time (a double value in seconds since the epoch), -// or an elapsed time (a double value in seconds since the start of the simulation). -// To avoid numerical issues when adding or subtracting time points and then later comparing -// for equality with for example a given report date, we use a tolerance value. -class TimePoint { -private: - double time; - // TODO: Epoch values often lies in the range of [1e9,1e11], so a tolerance value of 1e-10 - // might be a little too small. However, for elapsed time values, the range is often - // in the range of [0, 1e8], so a tolerance value of 1e-10 should be sufficient. - // NOTE: 1 nano-second = 1e-9 seconds - static constexpr double tol = 1e-10; // Tolerance value - -public: - TimePoint() : time(0.0) {} - explicit TimePoint(double t) : time(t) {} - TimePoint(const TimePoint& other) : time(other.time) {} - - // Assignment operator for double - TimePoint& operator=(double t) { - time = t; - return *this; - } - - // Copy assignment operator - TimePoint& operator=(const TimePoint& other) { - if (this != &other) { - time = other.time; - } - return *this; - } - - double getTime() const { return time; } - - // Equality operator - bool operator==(const TimePoint& other) const { - return std::abs(time - other.time) < tol; - } - - // Inequality operator - bool operator!=(const TimePoint& other) const { - return !(*this == other); - } - - // Less than operator - bool operator<(const TimePoint& other) const { - return (time < other.time) && !(*this == other); - } - - // Comparison operator: double < TimePoint - friend bool operator<(double lhs, const TimePoint& rhs) { - return lhs < rhs.time; - } - - // Comparison operator: TimePoint < double - bool operator<(double rhs) const { - return time < rhs; - } - - // Less than or equal to operator - bool operator<=(const TimePoint& other) const { - return (time < other.time) || (*this == other); - } - - // Comparison operator: double <= TimePoint - friend bool operator<=(double lhs, const TimePoint& rhs) { - return lhs <= rhs.time; - } - - // Comparison operator: TimePoint <= double - bool operator<=(double rhs) const { - return time <= rhs; - } - - // Greater than operator - bool operator>(const TimePoint& other) const { - return (time > other.time) && !(*this == other); - } - - // Comparison operator: double > TimePoint - friend bool operator>(double lhs, const TimePoint& rhs) { - return lhs > rhs.time; - } - - // Comparison operator: TimePoint > double - bool operator>(double rhs) const { - return time > rhs; - } - - // Greater than or equal to operator - bool operator>=(const TimePoint& other) const { - return (time > other.time) || (*this == other); - } - - // Comparison operator: TimePoint >= double - bool operator>=(double rhs) const { - return time >= rhs; - } - - // Comparison operator: double >= TimePoint - friend bool operator>=(double lhs, const TimePoint& rhs) { - return lhs >= rhs.time; - } - - // Addition operator: TimePoint + TimePoint (summing their times) - TimePoint operator+(const TimePoint& other) const { - return TimePoint(time + other.time); - } - - // Addition operator: TimePoint + double - TimePoint operator+(double delta) const { - return TimePoint(time + delta); - } - - // Friend addition operator: double + TimePoint - friend TimePoint operator+(double lhs, const TimePoint& rhs) { - return TimePoint(lhs + rhs.time); - } - - // Overload += operator for adding a double - TimePoint& operator+=(double delta) { - time += delta; - return *this; - } - - // Subtraction operator: TimePoint - TimePoint (resulting in a new TimePoint) - TimePoint operator-(const TimePoint& other) const { - return TimePoint(time - other.time); - } - - // Subtraction operator: TimePoint - double - TimePoint operator-(double delta) const { - return TimePoint(time - delta); - } - - // Friend subtraction operator: double - TimePoint - friend TimePoint operator-(double lhs, const TimePoint& rhs) { - return TimePoint(lhs - rhs.time); - } - - // Stream insertion operator for easy printing - friend std::ostream& operator<<(std::ostream& os, const TimePoint& tp) { - os << tp.time; - return os; - } -}; - // Helper functions void custom_error_handler_(MPI_Comm* comm, int* err, const std::string &msg); void setErrhandler(MPI_Comm comm, bool is_master); +// Utility class for comparing double values representing epoch dates (seconds since +// unix epoch) or elapsed time (seconds since the start of the simulation). +// NOTE: It is important that when comparing against start of a report step or similar, that +// that we do not miss these due to numerical issues. This is because communication between +// master and slave processes are based on these points in time. +// NOTE: Epoch values in this century (2000-2100) lies in the range of [1e9,4e9], and a double variable cannot +// represent such large values with high precision. For example, the date 01-01-2020 is equal +// to 1.5778368e9 seconds and adding 1e-7 seconds to this value will not change the value. +// So microseconds (1e-6) is approximately the smallest time unit we can represent for such a number. +// NOTE: Report steps seems to have a maximum resolution of whole seconds, see stepLength() in +// Schedule.cpp in opm-common, which returns the step length in seconds. +struct Seconds { + static constexpr double abstol = 1e-15; + static constexpr double reltol = 1e-15; + // We will will use the following expression to determine if two values a and b are equal: + // |a - b| <= tol = abstol + reltol * max(|a|, |b|) + // For example, assume abstol = reltol = 1e-15, then the following holds: + // - If |a| and |b| are below 1, then the absolute tolerance applies. + // - If a and b are above 1, then the relative tolerance applies. + // For example, for dates in the range 01-01-2000 to 01-01-2100, epoch values will be in the range + // [1e9, 4e9]. And we have 1e-15 * 1e9 = 1e-6, so numbers differing below one microsecond will + // be considered equal. + // NOTE: The above is not true for numbers close to zero, but we do not expect to compare such numbers. + static bool compare_eq(double a, double b); + static bool compare_gt(double a, double b); + static bool compare_gt_or_eq(double a, double b); + static bool compare_lt_or_eq(double a, double b); +}; + } // namespace ReservoirCoupling } // namespace Opm diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 1bb61874044..93e230312b3 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -81,21 +81,21 @@ maybeChopSubStep(double suggested_timestep_original, double elapsed_time) const // Check if the suggested timestep needs to be adjusted based on the slave processes' // next report step, or if the slave process has not started yet: the start of a slave process. double start_date = this->schedule_.getStartTime(); - TimePoint step_start_date{start_date + elapsed_time}; - TimePoint step_end_date{step_start_date + suggested_timestep_original}; - TimePoint suggested_timestep{suggested_timestep_original}; + double step_start_date{start_date + elapsed_time}; + double step_end_date{step_start_date + suggested_timestep_original}; + double suggested_timestep{suggested_timestep_original}; auto num_slaves = this->numSlavesStarted(); for (std::size_t i = 0; i < num_slaves; i++) { double slave_start_date = this->slave_start_dates_[i]; - TimePoint slave_next_report_date{this->slave_next_report_time_offsets_[i] + slave_start_date}; - if (slave_start_date > step_end_date) { + double slave_next_report_date{this->slave_next_report_time_offsets_[i] + slave_start_date}; + if (Seconds::compare_gt_or_eq(slave_start_date, step_end_date)) { // The slave process has not started yet, and will not start during this timestep continue; } - TimePoint slave_elapsed_time; - if (slave_start_date <= step_start_date) { + double slave_elapsed_time; + if (Seconds::compare_lt_or_eq(slave_start_date,step_start_date)) { // The slave process has already started, and will continue during this timestep - if (slave_next_report_date > step_end_date) { + if (Seconds::compare_gt(slave_next_report_date, step_end_date)) { // The slave process will not report during this timestep continue; } @@ -109,7 +109,7 @@ maybeChopSubStep(double suggested_timestep_original, double elapsed_time) const suggested_timestep = slave_elapsed_time; step_end_date = step_start_date + suggested_timestep; } - return suggested_timestep.getTime(); + return suggested_timestep; } void diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index 9f11a1ed968..a5090686377 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -35,7 +35,7 @@ namespace Opm { class ReservoirCouplingMaster { public: using MessageTag = ReservoirCoupling::MessageTag; - using TimePoint = ReservoirCoupling::TimePoint; + using Seconds = ReservoirCoupling::Seconds; ReservoirCouplingMaster( const Parallel::Communication &comm, const Schedule &schedule, From 00be5ed47ecbeac933a05a41155c55cb8de94e3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Tue, 3 Dec 2024 10:21:32 +0100 Subject: [PATCH 21/33] Conversion of std::time_t to double Clarify the limits for conversion of std::time_t to double --- opm/simulators/flow/ReservoirCouplingMaster.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 93e230312b3..ffac355d371 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -80,6 +80,9 @@ maybeChopSubStep(double suggested_timestep_original, double elapsed_time) const { // Check if the suggested timestep needs to be adjusted based on the slave processes' // next report step, or if the slave process has not started yet: the start of a slave process. + // NOTE: getStartTime() returns a std::time_t value, which is typically a long integer. It should + // be possible to represent reasonable epoch values within a double. See comment for + // getMasterActivationDate_() for more information. double start_date = this->schedule_.getStartTime(); double step_start_date{start_date + elapsed_time}; double step_end_date{step_start_date + suggested_timestep_original}; @@ -188,6 +191,11 @@ ReservoirCouplingMaster:: getMasterActivationDate_() const { // Assume master mode is activated when the first SLAVES keyword is encountered in the schedule + // NOTE: getStartTime() returns a std::time_t value, which is typically a long integer representing + // the number of seconds since the epoch (1970-01-01 00:00:00 UTC) + // The maximum integer that can be represented by a double is 2^53 - 1, which is approximately + // 9e15. This corresponds to a date in the year 2.85e8 or 285 million years into the future. + // So we should be able to represent reasonable epoch values within a double. double start_date = this->schedule_.getStartTime(); for (std::size_t report_step = 0; report_step < this->schedule_.size(); ++report_step) { auto rescoup = this->schedule_[report_step].rescoup(); From afab98a5a4f0cb87f55cfd0f8fb5705717d9b428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Tue, 3 Dec 2024 12:44:03 +0100 Subject: [PATCH 22/33] Clarify how the timestep is selected Clarify how the master timestep is computed based on the slaves next report date or the slaves start date. --- opm/simulators/flow/ReservoirCouplingMaster.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index ffac355d371..688182b1371 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -88,6 +88,9 @@ maybeChopSubStep(double suggested_timestep_original, double elapsed_time) const double step_end_date{step_start_date + suggested_timestep_original}; double suggested_timestep{suggested_timestep_original}; auto num_slaves = this->numSlavesStarted(); + // Determine the minimum step_end_date and the corresponding suggested_timestep such that no + // slave process will report or start during the timestep [step_start_date, step_end_date] + // where suggested_timestep = step_end_date - step_start_date for (std::size_t i = 0; i < num_slaves; i++) { double slave_start_date = this->slave_start_dates_[i]; double slave_next_report_date{this->slave_next_report_time_offsets_[i] + slave_start_date}; From 8da3c203f3428e658210509018827abd1c7e39b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Tue, 3 Dec 2024 14:05:31 +0100 Subject: [PATCH 23/33] Do not check return values for MPI calls The custom error handlers for each slave-master communicator will handle errors in MPI_Recv() and MPI_Send() and eventually call MPI_Abort(). So there is no need to check return values for these MPI calls. --- .../flow/ReservoirCouplingMaster.cpp | 8 ++--- .../flow/ReservoirCouplingSlave.cpp | 35 +++++++++++-------- .../flow/ReservoirCouplingSpawnSlaves.cpp | 22 +++++++----- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 688182b1371..000c6cee179 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -150,7 +150,10 @@ receiveNextReportDateFromSlaves() if (this->comm_.rank() == 0) { for (unsigned int i = 0; i < num_slaves; i++) { double slave_next_report_time_offset; // Elapsed time from the beginning of the simulation - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( &slave_next_report_time_offset, /*count=*/1, /*datatype=*/MPI_DOUBLE, @@ -159,9 +162,6 @@ receiveNextReportDateFromSlaves() this->getSlaveComm(i), MPI_STATUS_IGNORE ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive next report date from slave process"); - } this->slave_next_report_time_offsets_[i] = slave_next_report_time_offset; OpmLog::info( fmt::format( diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index c735d09b70d..f98932c1c4d 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -48,6 +48,9 @@ ReservoirCouplingSlave( if (this->slave_master_comm_ == MPI_COMM_NULL) { OPM_THROW(std::runtime_error, "Slave process is not spawned by a master process"); } + // NOTE: By installing a custom error handler for all slave-master communicators, which + // eventually will call MPI_Abort(), there is no need to check the return value of any + // MPI_Recv() or MPI_Send() calls as errors will be caught by the error handler. ReservoirCoupling::setErrhandler(this->slave_master_comm_, /*is_master=*/false); } @@ -56,7 +59,10 @@ ReservoirCouplingSlave:: receiveNextTimeStepFromMaster() { double timestep; if (this->comm_.rank() == 0) { - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( ×tep, /*count=*/1, /*datatype=*/MPI_DOUBLE, @@ -65,9 +71,6 @@ receiveNextTimeStepFromMaster() { this->slave_master_comm_, MPI_STATUS_IGNORE ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive next time step from master"); - } OpmLog::info( fmt::format("Slave rank 0 received next timestep {} from master.", timestep) ); @@ -84,7 +87,10 @@ receiveMasterGroupNamesFromMasterProcess() { std::vector group_names; if (this->comm_.rank() == 0) { MPI_Aint asize = 0; - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( &asize, /*count=*/1, /*datatype=*/MPI_AINT, @@ -94,15 +100,11 @@ receiveMasterGroupNamesFromMasterProcess() { MPI_STATUS_IGNORE ); OpmLog::info("Received master group names size from master process rank 0"); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, - "Failed to receive master group names (size) from master process"); - } // NOTE: MPI_Aint and std::size_t should be compatible on most systems, but we will // cast it to std::size_t to avoid any potential issues size = static_cast(asize); group_names.resize(size); - int result2 = MPI_Recv( + MPI_Recv( group_names.data(), /*count=*/size, /*datatype=*/MPI_CHAR, @@ -111,10 +113,6 @@ receiveMasterGroupNamesFromMasterProcess() { this->slave_master_comm_, MPI_STATUS_IGNORE ); - if (result2 != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, - "Failed to receive master group names from master process"); - } OpmLog::info("Received master group names from master process rank 0"); } this->comm_.broadcast(&size, /*count=*/1, /*emitter_rank=*/0); @@ -136,6 +134,9 @@ sendNextReportDateToMasterProcess() const // NOTE: This is an offset in seconds from the start date, so it will be 0 if the next report // would be the start date. In general, it should be a positive number. double next_report_time_offset = elapsed_time + current_step_length; + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. MPI_Send( &next_report_time_offset, /*count=*/1, @@ -155,6 +156,9 @@ sendActivationDateToMasterProcess() const if (this->comm_.rank() == 0) { // NOTE: The master process needs the s double activation_date = this->getGrupSlavActivationDate_(); + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. MPI_Send( &activation_date, /*count=*/1, @@ -174,6 +178,9 @@ sendSimulationStartDateToMasterProcess() const if (this->comm_.rank() == 0) { // NOTE: The master process needs the s double start_date = this->schedule_.getStartTime(); + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. MPI_Send( &start_date, /*count=*/1, diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp index 34f051954e4..1d9c6bfcea9 100644 --- a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp @@ -154,7 +154,10 @@ receiveActivationDateFromSlaves_() if (this->comm_.rank() == 0) { for (unsigned int i = 0; i < num_slaves; i++) { double slave_activation_date; - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( &slave_activation_date, /*count=*/1, /*datatype=*/MPI_DOUBLE, @@ -163,9 +166,6 @@ receiveActivationDateFromSlaves_() this->master_.getSlaveComm(i), MPI_STATUS_IGNORE ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive activation date from slave process"); - } if (slave_activation_date < this->master_.getActivationDate()) { OPM_THROW(std::runtime_error, "Slave process start date is earlier than " "the master process' activation date"); @@ -188,7 +188,10 @@ receiveSimulationStartDateFromSlaves_() if (this->comm_.rank() == 0) { for (unsigned int i = 0; i < num_slaves; i++) { double slave_start_date; - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( &slave_start_date, /*count=*/1, /*datatype=*/MPI_DOUBLE, @@ -197,9 +200,6 @@ receiveSimulationStartDateFromSlaves_() this->master_.getSlaveComm(i), MPI_STATUS_IGNORE ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive start date from slave process"); - } this->master_.addSlaveStartDate(slave_start_date); OpmLog::info( fmt::format( @@ -227,6 +227,9 @@ sendMasterGroupNamesToSlaves_() for (unsigned int i = 0; i < num_slaves; i++) { auto slave_name = this->master_.getSlaveName(i); auto [group_names, size] = this->getMasterGroupNamesForSlave_(slave_name); + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. // NOTE: size should be of type std::size_t, so we can safely cast it to MPI_AINT MPI_Send( &size, @@ -312,6 +315,9 @@ spawnSlaveProcesses_() } OPM_THROW(std::runtime_error, "Failed to spawn slave process"); } + // NOTE: By installing a custom error handler for all slave-master communicators, which + // eventually will call MPI_Abort(), there is no need to check the return value of any + // MPI_Recv() or MPI_Send() calls as errors will be caught by the error handler. ReservoirCoupling::setErrhandler(master_slave_comm, /*is_master=*/true); OpmLog::info( fmt::format( From 407424544ec50d1ea7ad64a5a028efafd3ed1693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Sat, 7 Dec 2024 16:28:09 +0100 Subject: [PATCH 24/33] Fix typo in Equinor ASA --- opm/simulators/flow/ReservoirCoupling.cpp | 2 +- opm/simulators/flow/ReservoirCoupling.hpp | 2 +- opm/simulators/flow/ReservoirCouplingMaster.cpp | 2 +- opm/simulators/flow/ReservoirCouplingMaster.hpp | 2 +- opm/simulators/flow/ReservoirCouplingSlave.cpp | 2 +- opm/simulators/flow/ReservoirCouplingSlave.hpp | 2 +- opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp | 2 +- opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/opm/simulators/flow/ReservoirCoupling.cpp b/opm/simulators/flow/ReservoirCoupling.cpp index 1e25c7b5977..0ca3f321263 100644 --- a/opm/simulators/flow/ReservoirCoupling.cpp +++ b/opm/simulators/flow/ReservoirCoupling.cpp @@ -1,5 +1,5 @@ /* - Copyright 2024 Equinor AS + Copyright 2024 Equinor ASA This file is part of the Open Porous Media project (OPM). diff --git a/opm/simulators/flow/ReservoirCoupling.hpp b/opm/simulators/flow/ReservoirCoupling.hpp index 0d69c6a3a2b..08ad81fa0ae 100644 --- a/opm/simulators/flow/ReservoirCoupling.hpp +++ b/opm/simulators/flow/ReservoirCoupling.hpp @@ -1,5 +1,5 @@ /* - Copyright 2024 Equinor AS + Copyright 2024 Equinor ASA This file is part of the Open Porous Media project (OPM). diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 000c6cee179..c0e82fe3279 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -1,5 +1,5 @@ /* - Copyright 2024 Equinor AS + Copyright 2024 Equinor ASA This file is part of the Open Porous Media project (OPM). diff --git a/opm/simulators/flow/ReservoirCouplingMaster.hpp b/opm/simulators/flow/ReservoirCouplingMaster.hpp index a5090686377..c8de9316ffc 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.hpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.hpp @@ -1,5 +1,5 @@ /* - Copyright 2024 Equinor AS + Copyright 2024 Equinor ASA This file is part of the Open Porous Media project (OPM). diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index f98932c1c4d..742e8e11489 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -1,5 +1,5 @@ /* - Copyright 2024 Equinor AS + Copyright 2024 Equinor ASA This file is part of the Open Porous Media project (OPM). diff --git a/opm/simulators/flow/ReservoirCouplingSlave.hpp b/opm/simulators/flow/ReservoirCouplingSlave.hpp index 5bd72fbc603..61a8d3c0cd8 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.hpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.hpp @@ -1,5 +1,5 @@ /* - Copyright 2024 Equinor AS + Copyright 2024 Equinor ASA This file is part of the Open Porous Media project (OPM). diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp index 1d9c6bfcea9..17cce1e8e06 100644 --- a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp @@ -1,5 +1,5 @@ /* - Copyright 2024 Equinor AS + Copyright 2024 Equinor ASA This file is part of the Open Porous Media project (OPM). diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp index 2f59e6dec98..054f9939b23 100644 --- a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp @@ -1,5 +1,5 @@ /* - Copyright 2024 Equinor AS + Copyright 2024 Equinor ASA This file is part of the Open Porous Media project (OPM). From 89dc1930ef62449d46a2fe3e37cd5214521040e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Sat, 7 Dec 2024 16:34:27 +0100 Subject: [PATCH 25/33] Mark GRUPSLAV and GRUPMAST as unsupported Don't remove these entries from the map before reservoir coupling is completely supported. --- opm/simulators/utils/UnsupportedFlowKeywords.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/opm/simulators/utils/UnsupportedFlowKeywords.cpp b/opm/simulators/utils/UnsupportedFlowKeywords.cpp index aa737331798..d8afce54947 100644 --- a/opm/simulators/utils/UnsupportedFlowKeywords.cpp +++ b/opm/simulators/utils/UnsupportedFlowKeywords.cpp @@ -242,7 +242,9 @@ const KeywordValidation::UnsupportedKeywords& unsupportedKeywords() {"GRAVDRB", {true, std::nullopt}}, {"GRAVDRM", {true, std::nullopt}}, {"GRDREACH", {true, std::nullopt}}, + {"GRUPMAST", {true, std::nullopt}}, {"GRUPRIG", {true, std::nullopt}}, + {"GRUPSLAV", {true, std::nullopt}}, {"GRUPTARG", {true, std::nullopt}}, {"GSATINJE", {true, std::nullopt}}, {"GSEPCOND", {true, std::nullopt}}, From 46406a24d6ce691d518743c99c27901d5ef2805a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Sat, 7 Dec 2024 22:33:00 +0100 Subject: [PATCH 26/33] Use Dune::MPITraits to determine MPI datatype Determine size of std::size_t correctly for all platforms using Dune::MPITraits::getType() --- opm/simulators/flow/ReservoirCouplingSlave.cpp | 12 +++++------- opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp | 6 ++++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index 742e8e11489..313f4e2ee8c 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -25,9 +25,10 @@ #include #include #include - #include +#include + #include #include @@ -86,23 +87,20 @@ receiveMasterGroupNamesFromMasterProcess() { std::size_t size; std::vector group_names; if (this->comm_.rank() == 0) { - MPI_Aint asize = 0; + auto MPI_SIZE_T_TYPE = Dune::MPITraits::getType(); // NOTE: All slave-master communicators have set a custom error handler, which eventually // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() // or MPI_Send() calls. MPI_Recv( - &asize, + &size, /*count=*/1, - /*datatype=*/MPI_AINT, + /*datatype=*/MPI_SIZE_T_TYPE, /*source_rank=*/0, /*tag=*/static_cast(MessageTag::MasterGroupNamesSize), this->slave_master_comm_, MPI_STATUS_IGNORE ); OpmLog::info("Received master group names size from master process rank 0"); - // NOTE: MPI_Aint and std::size_t should be compatible on most systems, but we will - // cast it to std::size_t to avoid any potential issues - size = static_cast(asize); group_names.resize(size); MPI_Recv( group_names.data(), diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp index 17cce1e8e06..741bbb224aa 100644 --- a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp @@ -30,6 +30,7 @@ #include +#include #include #include @@ -230,11 +231,12 @@ sendMasterGroupNamesToSlaves_() // NOTE: All slave-master communicators have set a custom error handler, which eventually // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() // or MPI_Send() calls. - // NOTE: size should be of type std::size_t, so we can safely cast it to MPI_AINT + static_assert(std::is_same_v, "size must be of type std::size_t"); + auto MPI_SIZE_T_TYPE = Dune::MPITraits::getType(); MPI_Send( &size, /*count=*/1, - /*datatype=*/MPI_AINT, + /*datatype=*/MPI_SIZE_T_TYPE, /*dest_rank=*/0, /*tag=*/static_cast(MessageTag::MasterGroupNamesSize), this->master_.getSlaveComm(i) From dfbafd9b238bd0110c77b133ad7e4e2b818108f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Sun, 8 Dec 2024 14:23:51 +0100 Subject: [PATCH 27/33] Clearify that errhandler is a handle It is safe to free the error handler after MPI_Comm_set_errhandler() has been called --- opm/simulators/flow/ReservoirCoupling.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/opm/simulators/flow/ReservoirCoupling.cpp b/opm/simulators/flow/ReservoirCoupling.cpp index 0ca3f321263..8c4499a87ba 100644 --- a/opm/simulators/flow/ReservoirCoupling.cpp +++ b/opm/simulators/flow/ReservoirCoupling.cpp @@ -71,7 +71,15 @@ void setErrhandler(MPI_Comm comm, bool is_master) else { MPI_Comm_create_errhandler(custom_error_handler_slave_, &errhandler); } + // NOTE: The errhandler is a handle (an integer) that is associated with the communicator + // that is why we pass this by value below. And it is safe to free the errhandler after it has + // been associated with the communicator. MPI_Comm_set_errhandler(comm, errhandler); + // Mark the error handler for deletion. According to the documentation: "The error handler will + // be deallocated after all the objects associated with it (communicator, window, or file) have + // been deallocated." So the error handler will still be in effect until the communicator is + // deallocated. + MPI_Errhandler_free(&errhandler); } bool Seconds::compare_eq(double a, double b) From 18a03da1a8c8f94ee5180eca63d3937ad94b52e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Sun, 8 Dec 2024 22:31:31 +0100 Subject: [PATCH 28/33] Add doxygen comments Convert comment blocks into doxygen type comments --- opm/simulators/flow/ReservoirCoupling.hpp | 77 +++++++++++++++++------ 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/opm/simulators/flow/ReservoirCoupling.hpp b/opm/simulators/flow/ReservoirCoupling.hpp index 08ad81fa0ae..2308d4ce454 100644 --- a/opm/simulators/flow/ReservoirCoupling.hpp +++ b/opm/simulators/flow/ReservoirCoupling.hpp @@ -41,32 +41,69 @@ enum class MessageTag : int { void custom_error_handler_(MPI_Comm* comm, int* err, const std::string &msg); void setErrhandler(MPI_Comm comm, bool is_master); -// Utility class for comparing double values representing epoch dates (seconds since -// unix epoch) or elapsed time (seconds since the start of the simulation). -// NOTE: It is important that when comparing against start of a report step or similar, that -// that we do not miss these due to numerical issues. This is because communication between -// master and slave processes are based on these points in time. -// NOTE: Epoch values in this century (2000-2100) lies in the range of [1e9,4e9], and a double variable cannot -// represent such large values with high precision. For example, the date 01-01-2020 is equal -// to 1.5778368e9 seconds and adding 1e-7 seconds to this value will not change the value. -// So microseconds (1e-6) is approximately the smallest time unit we can represent for such a number. -// NOTE: Report steps seems to have a maximum resolution of whole seconds, see stepLength() in -// Schedule.cpp in opm-common, which returns the step length in seconds. +/// \brief Utility class for comparing double values representing epoch dates or elapsed time. +/// +/// This class is used to compare double values that represent: +/// - Epoch dates (seconds since the Unix epoch) +/// - Elapsed time (seconds since the start of the simulation) +/// +/// \note When comparing against the start of a report step or similar, it is important not to miss +/// these points due to numerical issues. Communication between master and slave processes is based +/// on these specific points in time. +/// +/// \note Epoch values in this century (2000-2100) fall within the range [1e9, 4e9]. A double variable +/// cannot represent such large values with high precision. For example, the date 01-01-2020 corresponds +/// to 1.5778368e9 seconds, and adding 1e-7 seconds to this value does not change it. Microseconds (1e-6) +/// are approximately the smallest time units that can be represented accurately for such numbers. +/// +/// \note Report steps appear to have a maximum resolution of whole seconds. See the `stepLength()` +/// function in `Schedule.cpp` in the `opm-common` module, which returns the step length in seconds. struct Seconds { + /// \brief Absolute tolerance used for comparisons. static constexpr double abstol = 1e-15; + + /// \brief Relative tolerance used for comparisons. static constexpr double reltol = 1e-15; - // We will will use the following expression to determine if two values a and b are equal: - // |a - b| <= tol = abstol + reltol * max(|a|, |b|) - // For example, assume abstol = reltol = 1e-15, then the following holds: - // - If |a| and |b| are below 1, then the absolute tolerance applies. - // - If a and b are above 1, then the relative tolerance applies. - // For example, for dates in the range 01-01-2000 to 01-01-2100, epoch values will be in the range - // [1e9, 4e9]. And we have 1e-15 * 1e9 = 1e-6, so numbers differing below one microsecond will - // be considered equal. - // NOTE: The above is not true for numbers close to zero, but we do not expect to compare such numbers. + + /// \brief Determines if two double values are equal within a specified tolerance. + /// + /// Two values \a a and \a b are considered equal if: + /// \f[ |a - b| \leq \text{tol} = \text{abstol} + \text{reltol} \times \max(|a|, |b|) \f] + /// + /// For example, if \a abstol = \a reltol = 1e-15: + /// - If \a |a| and \a |b| are below 1, the absolute tolerance applies. + /// - If \a a and \a b are above 1, the relative tolerance applies. + /// + /// \note For epoch dates between 01-01-2000 and 01-01-2100, epoch values are in the range [1e9, 4e9]. + /// Therefore, \f$ 10^{-15} \times 10^9 = 10^{-6} \f$, meaning differences smaller than one microsecond + /// will be considered equal. + /// + /// \note This approach is not accurate for numbers close to zero, but such comparisons are not expected. + /// + /// \param a First double value. + /// \param b Second double value. + /// \return True if \a a and \a b are considered equal within the tolerance. static bool compare_eq(double a, double b); + + /// \brief Determines if \a a is greater than \a b within the specified tolerance. + /// + /// \param a First double value. + /// \param b Second double value. + /// \return True if \a a is greater than \a b. static bool compare_gt(double a, double b); + + /// \brief Determines if \a a is greater than \a b within the specified tolerance. + /// + /// \param a First double value. + /// \param b Second double value. + /// \return True if \a a is greater than \a b. static bool compare_gt_or_eq(double a, double b); + + /// \brief Determines if \a a is less than or equal to \a b within the specified tolerance. + /// + /// \param a First double value. + /// \param b Second double value. + /// \return True if \a a is less than or equal to \a b. static bool compare_lt_or_eq(double a, double b); }; From ac7e77bedbe1c561764e932e058d47e3da830770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 9 Dec 2024 13:20:18 +0100 Subject: [PATCH 29/33] Remove duplicate headers --- opm/simulators/flow/Main.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/opm/simulators/flow/Main.cpp b/opm/simulators/flow/Main.cpp index 9ed9ccd0d31..a90aacd78e7 100644 --- a/opm/simulators/flow/Main.cpp +++ b/opm/simulators/flow/Main.cpp @@ -45,16 +45,11 @@ #endif #include +// NOTE: There is no C++ header replacement for these C posix headers (as of C++17) #include // for open() #include // for dup2(), close() #include -#include // for open() -#include // for dup2(), close() - -#include -#include // for open() -#include // for dup2(), close() namespace Opm { From f867f9a977a8779e02008ec710499cf619894f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Sat, 4 Jan 2025 16:24:08 +0100 Subject: [PATCH 30/33] Cleanup after rebase on master After rebasing on master some changes to AdaptiveTimeStepping.hpp and AdaptiveTimeStepping_impl.hpp were missed --- opm/simulators/timestepping/AdaptiveTimeStepping.hpp | 2 ++ .../timestepping/AdaptiveTimeStepping_impl.hpp | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/opm/simulators/timestepping/AdaptiveTimeStepping.hpp b/opm/simulators/timestepping/AdaptiveTimeStepping.hpp index daf0f455330..a90aefd8e70 100644 --- a/opm/simulators/timestepping/AdaptiveTimeStepping.hpp +++ b/opm/simulators/timestepping/AdaptiveTimeStepping.hpp @@ -69,6 +69,8 @@ namespace detail { void logTimer(const AdaptiveSimulatorTimer& substep_timer); std::set consistentlyFailingWells(const std::vector& sr); void registerAdaptiveParameters(); + std::tuple, bool> + createController(const UnitSystem& unitSystem); } template diff --git a/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp b/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp index 74c6c2c233c..fa169a06244 100644 --- a/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp +++ b/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp @@ -19,6 +19,10 @@ #ifndef OPM_ADAPTIVE_TIME_STEPPING_IMPL_HPP #define OPM_ADAPTIVE_TIME_STEPPING_IMPL_HPP + +// Improve IDE experience +#ifndef OPM_ADAPTIVE_TIME_STEPPING_HPP +#include #include #endif @@ -40,6 +44,7 @@ #include #include +#include #include #include @@ -408,7 +413,7 @@ init_(const UnitSystem& unitSystem) time_step_control_, use_newton_iteration_) = detail::createController(unitSystem); // make sure growth factor is something reasonable - if (growthFactor_ < 1.0) { + if (this->growth_factor_ < 1.0) { OPM_THROW(std::runtime_error, "Growth factor cannot be less than 1."); } @@ -1266,3 +1271,5 @@ double AdaptiveTimeStepping::SolutionTimeErrorSolverWrapper::re } } // namespace Opm + +#endif // OPM_ADAPTIVE_TIME_STEPPING_IMPL_HPP From 93eda52576ca780aa6225e11e1b9b6f99b58c388 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Sun, 12 Jan 2025 13:53:29 +0100 Subject: [PATCH 31/33] Fix typo --- opm/simulators/flow/ReservoirCouplingSlave.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index 313f4e2ee8c..eb16256af37 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -165,7 +165,7 @@ sendActivationDateToMasterProcess() const /*tag=*/static_cast(MessageTag::SlaveActivationDate), this->slave_master_comm_ ); - OpmLog::info("Sent simulation start date to master process from rank 0"); + OpmLog::info("Sent simulation activation date to master process from rank 0"); } } From b4192b06c36b5ba57040c060e64447ac4834b0a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Fri, 17 Jan 2025 22:09:23 +0100 Subject: [PATCH 32/33] Fix rebase problem --- CMakeLists_files.cmake | 7 ------- opm/simulators/flow/FlowMain.hpp | 4 ---- opm/simulators/flow/Main.cpp | 6 +++--- opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp | 2 -- 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index efd0d3f45d2..4d8cd0c39da 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -112,10 +112,6 @@ list (APPEND MAIN_SOURCE_FILES opm/simulators/flow/RegionPhasePVAverage.cpp opm/simulators/flow/SimulatorConvergenceOutput.cpp opm/simulators/flow/SimulatorFullyImplicitBlackoil.cpp - opm/simulators/flow/ReservoirCoupling.cpp - opm/simulators/flow/ReservoirCouplingMaster.cpp - opm/simulators/flow/ReservoirCouplingSlave.cpp - opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp opm/simulators/flow/SimulatorReportBanners.cpp opm/simulators/flow/SimulatorSerializer.cpp opm/simulators/flow/SolutionContainers.cpp @@ -853,9 +849,6 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/flow/RSTConv.hpp opm/simulators/flow/RegionPhasePVAverage.hpp opm/simulators/flow/SimulatorConvergenceOutput.hpp - opm/simulators/flow/ReservoirCouplingMaster.hpp - opm/simulators/flow/ReservoirCouplingSlave.hpp - opm/simulators/flow/ReservoirCouplingSpawnSlaves.hpp opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp opm/simulators/flow/SimulatorReportBanners.hpp opm/simulators/flow/SimulatorSerializer.hpp diff --git a/opm/simulators/flow/FlowMain.hpp b/opm/simulators/flow/FlowMain.hpp index f5589763679..610b735e5fe 100644 --- a/opm/simulators/flow/FlowMain.hpp +++ b/opm/simulators/flow/FlowMain.hpp @@ -102,10 +102,6 @@ namespace Opm { Parameters::Register ("Developer option to see whether logging was on non-root processors. " "In that case it will be appended to the *.DBG or *.PRT files"); - Parameters::Register - ("Specify if the simulation is a slave simulation in a master-slave simulation"); - Parameters::Hide(); - Simulator::registerParameters(); // register the base parameters registerAllParameters_(/*finalizeRegistration=*/false); diff --git a/opm/simulators/flow/Main.cpp b/opm/simulators/flow/Main.cpp index a90aacd78e7..9ce461cce0e 100644 --- a/opm/simulators/flow/Main.cpp +++ b/opm/simulators/flow/Main.cpp @@ -353,7 +353,7 @@ void Main::setupDamaris(const std::string& outputDir ) //const auto find_replace_map; //const auto find_replace_map = Opm::DamarisOutput::DamarisKeywords(EclGenericVanguard::comm(), outputDir); std::map find_replace_map; - find_replace_map = Opm::DamarisOutput::getDamarisKeywords(FlowGenericVanguard::comm(), outputDir); + find_replace_map = DamarisOutput::getDamarisKeywords(FlowGenericVanguard::comm(), outputDir); // By default EnableDamarisOutputCollective is true so all simulation results will // be written into one single file for each iteration using Parallel HDF5. @@ -366,9 +366,9 @@ void Main::setupDamaris(const std::string& outputDir ) find_replace_map); int is_client; MPI_Comm new_comm; - // damaris_start() is where the Damaris Server ranks will block, until damaris_stop() + // damaris_start() is where the Damaris Server ranks will block, until damaris_stop() // is called from the client ranks - int err = damaris_start(&is_client); + int err = damaris_start(&is_client); isSimulationRank_ = (is_client > 0); if (isSimulationRank_ && err == DAMARIS_OK) { damaris_client_comm_get(&new_comm); diff --git a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp index 8303cc461d1..4198cc3777c 100644 --- a/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp +++ b/opm/simulators/flow/SimulatorFullyImplicitBlackoil.hpp @@ -47,8 +47,6 @@ #include #include #include -#include -#include #include #include #include From 18d35cb820b787654fc7747d1155379f0cd5eed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kon=20H=C3=A6gland?= Date: Mon, 20 Jan 2025 23:01:39 +0100 Subject: [PATCH 33/33] Explain the timestepping Adds developer documentation about the timestepping procedure. --- .../AdaptiveTimeStepping_impl.hpp | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp b/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp index fa169a06244..4e113fe5e93 100644 --- a/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp +++ b/opm/simulators/timestepping/AdaptiveTimeStepping_impl.hpp @@ -579,6 +579,36 @@ reservoirCouplingSlave_() #endif #ifdef RESERVOIR_COUPLING_ENABLED +// Description of the reservoir coupling master and slave substep loop +// ------------------------------------------------------------------- +// The master and slave processes attempts to reach the end of the report step using a series of substeps +// (also called timesteps). Each substep have an upper limit that is roughly determined by a combination +// of the keywords TUNING (through the TSINIT, TSMAXZ values), NEXSTEP, WCYCLE, and the start of the +// next report step (the last substep needs to coincide with this time). Note that NEXTSTEP can be +// updated from an ACTIONX keyword. Although, this comment focuses on the maximum substep limit, +// note that there is also a lower limit on the substep length. And the substep sizes will be adjusted +// automatically (or retried) based on the convergence behavior of the solver and other criteria. +// +// When using reservoir coupling, the upper limit on each substep is further limited to: a) not overshoot +// next report date of a slave reservoir, and b) to keep the flow rate of the slave groups within +// certain limits. To determine this potential further limitation on the substep, the following procedure +// is used at the beginning of each master substep: +// - First, the slaves sends the master the date of their next report step +// - The master receives the date of the next report step from the slaves +// - If necessary, the master computes a modified substep length based on the received dates +// TODO: explain how the substep is limited to keep the flow rate of the slave groups within certain +// limits. Since this is not implemented yet, this part of the procedure is not explained here. +// +// Then, after the master has determined the substep length using the above procedure, it sends it +// to the slaves. The slaves will now use the end of this substep as a fixed point (like a mini report +// step), that they will try to reach using a single substep or multiple substeps. The end of the +// substep received from the master will either conincide with the end of its current report step, or +// it will end before (it cannot not end after since the master has already adjusted the step to not +// exceed any report time in a slave). If the end of this substep is earlier in time than its next report +// date, the slave will enter a loop and wait for the master to send it a new substep. +// The loop is finished when the end of the received time step conincides with the end of its current +// report step. + template template SimulatorReport