From 791fd8da705afaf7ca95b372d970e8ce0574f46d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Thu, 9 Jan 2025 08:35:15 +0100 Subject: [PATCH] Allow the bhist command to be temporarily unavailable This makes the handling of a FileNotFoundError on bhist similar to the handling of FileNotFoundError from bjobs. It is important not to crash on potentially intermittent failures in code that is rerun every 2 seconds. --- src/ert/scheduler/lsf_driver.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/ert/scheduler/lsf_driver.py b/src/ert/scheduler/lsf_driver.py index 57c465c62f5..d39816b0cff 100644 --- a/src/ert/scheduler/lsf_driver.py +++ b/src/ert/scheduler/lsf_driver.py @@ -584,12 +584,17 @@ async def _poll_once_by_bhist( if time.time() - self._bhist_cache_timestamp < self._bhist_required_cache_age: return {} - process = await asyncio.create_subprocess_exec( - self._bhist_cmd, - *[str(job_id) for job_id in missing_job_ids], - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + try: + process = await asyncio.create_subprocess_exec( + self._bhist_cmd, + *[str(job_id) for job_id in missing_job_ids], + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + except FileNotFoundError as e: + logger.error(str(e)) + return {} + stdout, stderr = await process.communicate() if process.returncode: logger.error(