Skip to content

Commit

Permalink
Ignore OSErrors on subprocess call of bjobs
Browse files Browse the repository at this point in the history
Pretend these kinds of issues are flaky.
  • Loading branch information
berland committed Jan 10, 2025
1 parent 2a3f4f0 commit 102ad15
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/ert/scheduler/lsf_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,10 @@ async def poll(self) -> None:
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
except FileNotFoundError as e:
except OSError as e:
logger.error(str(e))
return
await asyncio.sleep(self._poll_period)
continue

stdout, stderr = await process.communicate()
if process.returncode:
Expand Down
26 changes: 26 additions & 0 deletions tests/ert/unit_tests/scheduler/test_lsf_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,32 @@ def mock_poll_once_by_bhist(*args, **kwargs):
assert job_id in driver._bhist_cache


async def test_no_exception_when_bjobs_does_not_exist(caplog, job_name):
"""The intent of this test is to ensure the driver will not
go down if the filesystem is temporarily flaky."""
driver = LsfDriver(bjobs_cmd="/bin_foo/not_existing")
driver._poll_period = 0.01
await driver.submit(0, "sh", "-c", "sleep 1", name=job_name)
with pytest.raises(asyncio.TimeoutError):
await asyncio.wait_for(driver.poll(), timeout=0.1)
assert "No such file or directory: '/bin_foo/not_existing'" in caplog.text


@pytest.mark.integration_test
async def test_no_exception_when_no_access_to_bjobs_executable(
not_found_bjobs, caplog, job_name
):
"""The intent of this test is to ensure the driver will not
go down if the filesystem is temporarily flaky."""
driver = LsfDriver()
driver._poll_period = 0.01
Path("bin/bjobs").chmod(0x0) # Modify the bjobs from the fixture
await driver.submit(0, "sh", "-c", "echo", name=job_name)
with pytest.raises(asyncio.TimeoutError):
await asyncio.wait_for(driver.poll(), timeout=0.1)
assert "Permission denied" in caplog.text


@pytest.mark.integration_test
async def test_that_kill_before_submit_is_finished_works(tmp_path, monkeypatch, caplog):
"""This test asserts that it is possible to issue a kill command
Expand Down

0 comments on commit 102ad15

Please sign in to comment.