diff --git a/src/_ert/events.py b/src/_ert/events.py index 99033c7e833..780a0f8fb0c 100644 --- a/src/_ert/events.py +++ b/src/_ert/events.py @@ -110,6 +110,7 @@ class RealizationBaseEvent(BaseEvent): real: str ensemble: Union[str, None] = None queue_event_type: Union[str, None] = None + exec_hosts: Union[str, None] = None class RealizationPending(RealizationBaseEvent): diff --git a/src/ert/ensemble_evaluator/snapshot.py b/src/ert/ensemble_evaluator/snapshot.py index 35e0157944f..5bb57019bbf 100644 --- a/src/ert/ensemble_evaluator/snapshot.py +++ b/src/ert/ensemble_evaluator/snapshot.py @@ -252,6 +252,7 @@ def update_realization( status: str, start_time: Optional[datetime] = None, end_time: Optional[datetime] = None, + exec_hosts: Optional[str] = None, callback_status_message: Optional[str] = None, ) -> "EnsembleSnapshot": self._realization_snapshots[real_id].update( @@ -260,6 +261,7 @@ def update_realization( status=status, start_time=start_time, end_time=end_time, + exec_hosts=exec_hosts, callback_status_message=callback_status_message, ) ) @@ -279,10 +281,12 @@ def update_from_event( status = _FM_TYPE_EVENT_TO_STATUS[type(event)] start_time = None end_time = None + exec_hosts = None callback_status_message = None if e_type is RealizationRunning: start_time = convert_iso8601_to_datetime(timestamp) + exec_hosts = event.exec_hosts elif e_type in { RealizationSuccess, RealizationFailed, @@ -296,6 +300,7 @@ def update_from_event( status, start_time, end_time, + exec_hosts, callback_status_message, ) @@ -397,6 +402,7 @@ class RealizationSnapshot(TypedDict, total=False): active: Optional[bool] start_time: Optional[datetime] end_time: Optional[datetime] + exec_hosts: Optional[str] fm_steps: Dict[str, FMStepSnapshot] callback_status_message: Optional[str] @@ -411,6 +417,7 @@ def _realization_dict_to_realization_snapshot( end_time=source.get("end_time"), callback_status_message=source.get("callback_status_message"), fm_steps=source.get("fm_steps", {}), + exec_hosts=source.get("exec_hosts"), ) return _filter_nones(realization) diff --git a/src/ert/gui/model/node.py b/src/ert/gui/model/node.py index 90a3772faff..29e294cf003 100644 --- a/src/ert/gui/model/node.py +++ b/src/ert/gui/model/node.py @@ -74,6 +74,7 @@ class RealNodeData: real_status_color: Optional[QColor] = None current_memory_usage: Optional[int] = None max_memory_usage: Optional[int] = None + exec_hosts: Optional[str] = None stderr: Optional[str] = None callback_status_message: Optional[str] = None diff --git a/src/ert/gui/model/snapshot.py b/src/ert/gui/model/snapshot.py index e5509888930..6ea4ba6e351 100644 --- a/src/ert/gui/model/snapshot.py +++ b/src/ert/gui/model/snapshot.py @@ -168,6 +168,8 @@ def _update_snapshot(self, snapshot: EnsembleSnapshot, iter_: str) -> None: data = real_node.data if real_status := real.get("status"): data.status = real_status + if real_exec_hosts := real.get("exec_hosts"): + data.exec_hosts = real_exec_hosts for real_fm_step_id, color in ( metadata["aggr_fm_step_status_colors"].get(real_id, {}).items() ): diff --git a/src/ert/gui/simulation/run_dialog.py b/src/ert/gui/simulation/run_dialog.py index 56cba507be2..be5e34eb51f 100644 --- a/src/ert/gui/simulation/run_dialog.py +++ b/src/ert/gui/simulation/run_dialog.py @@ -335,10 +335,21 @@ def on_snapshot_new_iteration( def _select_real(self, index: QModelIndex) -> None: real = index.row() iter_ = index.model().get_iter() # type: ignore + exec_hosts = None + + iter_node = self._snapshot_model.root.children.get(str(iter_), None) + if iter_node: + real_node = iter_node.children.get(str(real), None) + if real_node: + exec_hosts = real_node.data.exec_hosts + self._fm_step_overview.set_realization(iter_, real) - self._fm_step_label.setText( + text = ( f"Realization id {index.data(RealIens)} in iteration {index.data(IterNum)}" ) + if exec_hosts and exec_hosts != "-": + text += f", assigned to host: [{exec_hosts}]" + self._fm_step_label.setText(text) def closeEvent(self, a0: Optional[QCloseEvent]) -> None: if not self._notifier.is_simulation_running: diff --git a/src/ert/scheduler/event.py b/src/ert/scheduler/event.py index 9eff9c81be7..470e861fe23 100644 --- a/src/ert/scheduler/event.py +++ b/src/ert/scheduler/event.py @@ -7,12 +7,14 @@ @dataclass class StartedEvent: iens: int + exec_hosts: str = "" @dataclass class FinishedEvent: iens: int returncode: int + exec_hosts: str = "" Event = Union[StartedEvent, FinishedEvent] diff --git a/src/ert/scheduler/job.py b/src/ert/scheduler/job.py index 831de54e85f..5a4316cf6f2 100644 --- a/src/ert/scheduler/job.py +++ b/src/ert/scheduler/job.py @@ -62,6 +62,7 @@ def __init__(self, scheduler: Scheduler, real: Realization) -> None: self.real = real self.state = JobState.WAITING self.started = asyncio.Event() + self.exec_hosts: str = "-" self.returncode: asyncio.Future[int] = asyncio.Future() self._aborted = False self._scheduler: Scheduler = scheduler @@ -263,6 +264,7 @@ async def _send(self, state: JobState) -> None: "event_type": _queue_jobstate_event_type[state], "queue_event_type": state, "real": str(self.iens), + "exec_hosts": self.exec_hosts, } self.state = state if state == JobState.FAILED: diff --git a/src/ert/scheduler/lsf_driver.py b/src/ert/scheduler/lsf_driver.py index 941e05a2c0f..97297b443b1 100644 --- a/src/ert/scheduler/lsf_driver.py +++ b/src/ert/scheduler/lsf_driver.py @@ -373,6 +373,7 @@ async def submit( iens=iens, job_state=QueuedJob(job_state="PEND"), submitted_timestamp=time.time(), + exec_hosts="-", ) self._iens2jobid[iens] = job_id @@ -500,11 +501,15 @@ async def _process_job_update(self, job_id: str, new_state: AnyJob) -> None: event: Optional[Event] = None if isinstance(new_state, RunningJob): logger.debug(f"Realization {iens} is running") - event = StartedEvent(iens=iens) + event = StartedEvent(iens=iens, exec_hosts=self._jobs[job_id].exec_hosts) elif isinstance(new_state, FinishedJobFailure): logger.info(f"Realization {iens} (LSF-id: {self._iens2jobid[iens]}) failed") exit_code = await self._get_exit_code(job_id) - event = FinishedEvent(iens=iens, returncode=exit_code) + event = FinishedEvent( + iens=iens, + returncode=exit_code, + exec_hosts=self._jobs[job_id].exec_hosts, + ) elif isinstance(new_state, FinishedJobSuccess): logger.info( f"Realization {iens} (LSF-id: {self._iens2jobid[iens]}) succeeded" diff --git a/src/ert/scheduler/scheduler.py b/src/ert/scheduler/scheduler.py index ae96556421e..ed5fc816ccf 100644 --- a/src/ert/scheduler/scheduler.py +++ b/src/ert/scheduler/scheduler.py @@ -28,7 +28,7 @@ from ert.constant_filenames import CERT_FILE from .driver import Driver -from .event import FinishedEvent +from .event import FinishedEvent, StartedEvent from .job import Job, JobState if TYPE_CHECKING: @@ -308,6 +308,9 @@ async def _process_event_queue(self) -> None: # Any event implies the job has at least started job.started.set() + if isinstance(event, (StartedEvent, FinishedEvent)) and event.exec_hosts: + self._jobs[event.iens].exec_hosts = event.exec_hosts + if ( isinstance(event, FinishedEvent) and not self._cancelled