Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add error message for rsync not found in image #2250

Merged
merged 4 commits into from
Jul 20, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,11 @@ def _update_blocklist_on_gcp_error(
logger.warning(f'Got \'resource not found\' in {zone.name}.')
self._blocked_resources.add(
launchable_resources.copy(zone=zone.name))
elif 'rsync: command not found' in stderr:
with ux_utils.print_exception_no_traceback():
raise RuntimeError(
'rsync is not installed on the specific image. '
'Please install rsync and try again.')
cblmemo marked this conversation as resolved.
Show resolved Hide resolved
else:
logger.info('====== stdout ======')
for s in stdout.split('\n'):
Expand Down Expand Up @@ -806,6 +811,11 @@ def _update_blocklist_on_aws_error(
line.startswith('<1/1> Setting up head node')
for line in stdout_splits + stderr_splits)
if not errors or head_node_up:
if 'rsync: command not found' in stderr:
with ux_utils.print_exception_no_traceback():
raise RuntimeError(
'rsync is not installed on the specific image. '
'Please install rsync and try again.')
# TODO: Got transient 'Failed to create security group' that goes
# away after a few minutes. Should we auto retry other regions, or
# let the user retry.
Expand Down Expand Up @@ -858,6 +868,11 @@ def _update_blocklist_on_azure_error(
in s.strip() or '(ReadOnlyDisabledSubscription)' in s.strip())
]
if not errors:
if 'rsync: command not found' in stderr:
with ux_utils.print_exception_no_traceback():
raise RuntimeError(
'rsync is not installed on the specific image. '
'Please install rsync and try again.')
logger.info('====== stdout ======')
for s in stdout_splits:
print(s)
Expand Down Expand Up @@ -891,6 +906,11 @@ def _update_blocklist_on_lambda_error(
if 'LambdaCloudError:' in s.strip()
]
if not errors:
if 'rsync: command not found' in stderr:
with ux_utils.print_exception_no_traceback():
raise RuntimeError(
'rsync is not installed on the specific image. '
'Please install rsync and try again.')
logger.info('====== stdout ======')
for s in stdout_splits:
print(s)
Expand Down Expand Up @@ -927,6 +947,11 @@ def _update_blocklist_on_scp_error(
if 'SCPError:' in s.strip()
]
if not errors:
if 'rsync: command not found' in stderr:
with ux_utils.print_exception_no_traceback():
raise RuntimeError(
'rsync is not installed on the specific image. '
'Please install rsync and try again.')
logger.info('====== stdout ======')
for s in stdout_splits:
print(s)
Expand Down Expand Up @@ -964,6 +989,11 @@ def _update_blocklist_on_ibm_error(
if 'ERR' in s.strip() or 'PANIC' in s.strip()
]
if not errors:
if 'rsync: command not found' in stderr:
with ux_utils.print_exception_no_traceback():
raise RuntimeError(
'rsync is not installed on the specific image. '
'Please install rsync and try again.')
logger.info('====== stdout ======')
for s in stdout_splits:
print(s)
Expand Down Expand Up @@ -995,6 +1025,11 @@ def _update_blocklist_on_local_error(
if 'ERR' in s.strip() or 'PANIC' in s.strip()
]
if not errors:
if 'rsync: command not found' in stderr:
with ux_utils.print_exception_no_traceback():
raise RuntimeError(
'rsync is not installed on the specific image. '
'Please install rsync and try again.')
logger.info('====== stdout ======')
for s in stdout_splits:
print(s)
Expand Down Expand Up @@ -1030,6 +1065,11 @@ def _update_blocklist_on_oci_error(
'LimitExceeded' in s.strip() or 'NotAuthenticated' in s.strip()))
]
if not errors:
if 'rsync: command not found' in stderr:
with ux_utils.print_exception_no_traceback():
raise RuntimeError(
'rsync is not installed on the specific image. '
'Please install rsync and try again.')
logger.info('====== stdout ======')
for s in stdout_splits:
print(s)
Expand Down Expand Up @@ -1758,6 +1798,11 @@ def need_ray_up(
'error.')
return True

if 'rsync: command not found' in stderr:
logger.info('Skipping retry due to `rsync` not found in '
'the specified image.')
return False

if ('Processing file mounts' in stdout and
'Running setup commands' not in stdout and
'Failed to setup head node.' in stderr):
Expand Down