From e0c871b3c9bd58aaaa3062a14cd6d0c6a754a9aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Fri, 24 Nov 2023 13:48:31 +0100 Subject: [PATCH] Fix bugs uncovered in simulation context Poking an internal variable in queue, _queue_stopped. This should perhaps be an official way in order to stop the queue from a sync context --- src/ert/job_queue/driver.py | 8 +++++++- src/ert/job_queue/realization_state.py | 6 +++++- src/ert/simulator/simulation_context.py | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/ert/job_queue/driver.py b/src/ert/job_queue/driver.py index a18711a3788..858d04a90ef 100644 --- a/src/ert/job_queue/driver.py +++ b/src/ert/job_queue/driver.py @@ -93,7 +93,13 @@ async def submit(self, realization: "RealizationState") -> None: if process.returncode == 0: if output: logger.info(output) - realization.runend() + if str(realization.current_state.id) == "RUNNING": + realization.runend() + else: + logger.debug( + f"Realization {realization.realization.run_arg.iens} finished " + f"successfully but was in state {realization.current_state.id}" + ) else: if output: logger.error(output) diff --git a/src/ert/job_queue/realization_state.py b/src/ert/job_queue/realization_state.py index 1943739d9ee..23e3b2ef5d7 100644 --- a/src/ert/job_queue/realization_state.py +++ b/src/ert/job_queue/realization_state.py @@ -143,7 +143,11 @@ def on_enter_EXIT(self) -> None: failed_job = exit_file.find("job").text error_reason = exit_file.find("reason").text stderr_capture = exit_file.find("stderr").text - stderr_file = exit_file.find("stderr_file").text + + stderr_file = "" + if stderr_file_node := exit_file.find("stderr_file"): + stderr_file = stderr_file_node.text + logger.error( f"job {failed_job} failed with: '{error_reason}'\n" f"\tstderr file: '{stderr_file}',\n" diff --git a/src/ert/simulator/simulation_context.py b/src/ert/simulator/simulation_context.py index b1c8aa1e512..1a72689e913 100644 --- a/src/ert/simulator/simulation_context.py +++ b/src/ert/simulator/simulation_context.py @@ -195,7 +195,7 @@ def get_sim_fs(self) -> EnsembleAccessor: return self._run_context.sim_fs def stop(self) -> None: - self.job_queue.kill_all_jobs() + self.job_queue._queue_stopped = True self._sim_thread.join() def job_progress(self, iens: int) -> Optional[ForwardModelStatus]: