Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] [4/N] Parameter change for ray cluster start with physical mode #48838

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/ray/_private/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,7 @@ def start_raylet(
object_store_memory: int,
use_valgrind: bool = False,
use_profiler: bool = False,
enable_physical_mode: bool = False,
):
"""Start the raylet.

Expand Down Expand Up @@ -1240,6 +1241,7 @@ def start_raylet(
node_name=self._ray_params.node_name,
webui=self._webui_url,
labels=self._get_node_labels(),
enable_physical_mode=enable_physical_mode,
)
assert ray_constants.PROCESS_TYPE_RAYLET not in self.all_processes
self.all_processes[ray_constants.PROCESS_TYPE_RAYLET] = [process_info]
Expand Down
4 changes: 4 additions & 0 deletions python/ray/_private/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ class RayParams:
session_name: The name of the session of the ray cluster.
webui: The url of the UI.
cluster_id: The cluster ID in hex string.
enable_physical_mode: Whether physical mode is enaled, which applies
constraint to tasks' resource consumption.
"""

def __init__(
Expand Down Expand Up @@ -193,6 +195,7 @@ def __init__(
webui: Optional[str] = None,
cluster_id: Optional[str] = None,
node_id: Optional[str] = None,
enable_physical_mode: bool = False,
):
self.redis_address = redis_address
self.gcs_address = gcs_address
Expand Down Expand Up @@ -256,6 +259,7 @@ def __init__(
self._check_usage()
self.cluster_id = cluster_id
self.node_id = node_id
self.enable_physical_mode = enable_physical_mode

# Set the internal config options for object reconstruction.
if enable_object_reconstruction:
Expand Down
4 changes: 4 additions & 0 deletions python/ray/_private/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -1549,6 +1549,7 @@ def start_raylet(
node_name: Optional[str] = None,
webui: Optional[str] = None,
labels: Optional[dict] = None,
enable_physical_mode: bool = False,
):
"""Start a raylet, which is a combined local scheduler and object manager.

Expand Down Expand Up @@ -1605,6 +1606,8 @@ def start_raylet(
available externally to this node.
env_updates: Environment variable overrides.
labels: The key-value labels of the node.
enable_physical_mode: Whether physical mode is enaled, which applies
constraint to tasks' resource consumption.
Returns:
ProcessInfo for the process that was started.
"""
Expand Down Expand Up @@ -1695,6 +1698,7 @@ def start_raylet(
f"--temp-dir={temp_dir}",
f"--webui={webui}",
f"--cluster-id={cluster_id}",
f"--enable_physical_mode={enable_physical_mode}",
]
)

Expand Down
6 changes: 6 additions & 0 deletions python/ray/scripts/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,8 @@ def start(
labels,
):
"""Start Ray processes manually on the local machine."""
# TODO(hjiang): Expose physical mode interface to ray cluster start command after
# all features implemented.

if gcs_server_port is not None:
cli_logger.error(
Expand Down Expand Up @@ -756,6 +758,7 @@ def start(
no_monitor=no_monitor,
tracing_startup_hook=tracing_startup_hook,
ray_debugger_external=ray_debugger_external,
enable_physical_mode=False,
)

if ray_constants.RAY_START_HOOK in os.environ:
Expand Down Expand Up @@ -1008,6 +1011,9 @@ def start(
)
temp_dir = node.get_temp_dir_path()

# TODO(hjiang): Validate whether specified resource is true for physical
# resource.

# Ray and Python versions should probably be checked before
# initializing Node.
node.check_version_info()
Expand Down
10 changes: 10 additions & 0 deletions src/ray/raylet/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ DEFINE_int64(object_store_memory, -1, "The initial memory of the object store.")
DEFINE_string(node_name, "", "The user-provided identifier or name for this node.");
DEFINE_string(session_name, "", "Session name (ClusterID) of the cluster.");
DEFINE_string(cluster_id, "", "ID of the cluster, separate from observability.");
DEFINE_bool(enable_physical_mode,
false,
"Whether physical mode is enaled, which applies constraint to tasks' "
"resource consumption.");

#ifdef __linux__
DEFINE_string(plasma_directory,
Expand Down Expand Up @@ -180,6 +184,12 @@ int main(int argc, char *argv[]) {
RAY_LOG(INFO) << "Setting cluster ID to: " << cluster_id;
gflags::ShutDownCommandLineFlags();

// Setup cgroup preparation if specified.
// TODO(hjiang): Depends on
// - https://github.com/ray-project/ray/pull/48833, which checks cgroup V2 availability.
// - https://github.com/ray-project/ray/pull/48828, which sets up cgroup preparation for
// cgroup related operations.

// Configuration for the node manager.
ray::raylet::NodeManagerConfig node_manager_config;
absl::flat_hash_map<std::string, double> static_resource_conf;
Expand Down