From bc777e28d4d45d5043678b1f352f9130a38fa205 Mon Sep 17 00:00:00 2001 From: Kaiyuan Eric Chen Date: Thu, 9 Jan 2025 10:47:17 -0800 Subject: [PATCH] [Jobs] Refactor dashboard controller launching with systemd (#4538) * convert to systemd * Refactor jobs-controller.yaml.j2 to use systemd user service for skypilot-dashboard. Added user-specific environment variables and log redirection. Updated service management commands to operate in user mode. * Enhance jobs-controller.yaml.j2 to check for systemd user service availability before executing service management commands. If systemd is not found, implement a manual setup for the SkyPilot dashboard, including process termination and background launch. This improves robustness and user experience during setup. * streamline sky/templates/jobs-controller.yaml.j2 Co-authored-by: Christopher Cooper * Refactor jobs-controller.yaml.j2 to remove Flask installation check and update systemd service configuration. Added Flask as a dependency in controller_utils.py for the dashboard. Changed service target from multi-user to default for improved service management. --------- Co-authored-by: Christopher Cooper --- sky/templates/jobs-controller.yaml.j2 | 38 ++++++++++++++++++++++++--- sky/utils/controller_utils.py | 3 +++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/sky/templates/jobs-controller.yaml.j2 b/sky/templates/jobs-controller.yaml.j2 index 45cdb5141d4..71c808fdd0f 100644 --- a/sky/templates/jobs-controller.yaml.j2 +++ b/sky/templates/jobs-controller.yaml.j2 @@ -26,10 +26,40 @@ setup: | echo 'export SKYPILOT_DEV=1' >> ~/.bashrc {% endif %} - # Dashboard. - ps aux | grep -v nohup | grep -v grep | grep -- "-m sky.spot.dashboard" | awk '{print $2}' | xargs kill > /dev/null 2>&1 || true - pip list | grep flask > /dev/null 2>&1 || pip install flask 2>&1 > /dev/null - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "-m sky.jobs.dashboard.dashboard") || (nohup {{ sky_python_cmd }} -m sky.jobs.dashboard.dashboard >> ~/.sky/job-dashboard.log 2>&1 &)); + # Create systemd service file + mkdir -p ~/.config/systemd/user/ + + # Create systemd user service file + cat << EOF > ~/.config/systemd/user/skypilot-dashboard.service + [Unit] + Description=SkyPilot Jobs Dashboard + After=network.target + + [Service] + Environment="PATH={{ sky_python_env_path }}:\$PATH" + Environment="SKYPILOT_USER_ID={{controller_envs.SKYPILOT_USER_ID}}" + Environment="SKYPILOT_USER={{controller_envs.SKYPILOT_USER}}" + Restart=always + StandardOutput=append:/home/$USER/.sky/job-dashboard.log + StandardError=append:/home/$USER/.sky/job-dashboard.log + ExecStart={{ sky_python_cmd }} -m sky.jobs.dashboard.dashboard + + [Install] + WantedBy=default.target + EOF + + if command -v systemctl &>/dev/null && systemctl --user show &>/dev/null; then + systemctl --user daemon-reload + systemctl --user enable --now skypilot-dashboard + else + echo "Systemd user services not found. Setting up SkyPilot dashboard manually." + # Kill any old dashboard processes + ps aux | grep -v nohup | grep -v grep | grep -- '-m sky.jobs.dashboard.dashboard' \ + | awk '{print $2}' | xargs kill > /dev/null 2>&1 || true + # Launch the dashboard in the background if not already running + (ps aux | grep -v nohup | grep -v grep | grep -q -- '-m sky.jobs.dashboard.dashboard') || \ + (nohup {{ sky_python_cmd }} -m sky.jobs.dashboard.dashboard >> ~/.sky/job-dashboard.log 2>&1 &) + fi run: | {{ sky_activate_python_env }} diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index 39623085bbb..acb636893a5 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -206,6 +206,9 @@ def _get_cloud_dependencies_installation_commands( # installed, so we don't check that. python_packages: Set[str] = set() + # add flask to the controller dependencies for dashboard + python_packages.add('flask') + step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append(f'echo -en "\\r{step_prefix}uv{empty_str}" &&' f'{constants.SKY_UV_INSTALL_CMD} >/dev/null 2>&1')