From 9ee73aa44d3ceab8b6a47d7cc9f7fb887721e6f5 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 21 Jan 2025 07:30:29 +0000 Subject: [PATCH] fix comment --- llumnix/constants.py | 3 ++ llumnix/entrypoints/bladellm/utils.py | 14 ++++---- llumnix/entrypoints/vllm/arg_utils.py | 1 - .../global_scheduler/dispatch_scheduler.py | 6 ++-- llumnix/global_scheduler/migration_policy.py | 1 - llumnix/instance_info.py | 35 ++++++++++--------- llumnix/launcher.py | 10 +++--- 7 files changed, 35 insertions(+), 35 deletions(-) diff --git a/llumnix/constants.py b/llumnix/constants.py index bf4b723c..bd174c5e 100644 --- a/llumnix/constants.py +++ b/llumnix/constants.py @@ -21,6 +21,9 @@ WATCH_DEPLOYMENT_INTERVAL: float = 10.0 WATCH_DEPLOYMENT_INTERVAL_PENDING_INSTANCE: float = 120.0 +# llumnix/global_scheduler/dispatch_scheduler.py +DISPATCH_LOG_FREQUENCY = 100 + # llumnix/entrypoints/setup.py MAX_RAY_RESTARTS: int = 10 RAY_RESTART_INTERVAL: float = 10.0 diff --git a/llumnix/entrypoints/bladellm/utils.py b/llumnix/entrypoints/bladellm/utils.py index 7d6675db..a26571d8 100644 --- a/llumnix/entrypoints/bladellm/utils.py +++ b/llumnix/entrypoints/bladellm/utils.py @@ -13,15 +13,13 @@ from blade_llm.service.args import ServingArgs -from llumnix.arg_utils import EntrypointsArgs, ManagerArgs from llumnix.logging.logger import init_logger +from llumnix.backends.backend_interface import BackendType +from llumnix.arg_utils import EntrypointsArgs, ManagerArgs, InstanceArgs, LaunchMode logger = init_logger(__name__) -from llumnix.backends.backend_interface import BackendType -from llumnix.arg_utils import EntrypointsArgs, ManagerArgs, InstanceArgs, LaunchMode - def detect_unsupported_engine_feature(engine_args: ServingArgs) -> None: unsupported_feature = None if engine_args.enable_lora: @@ -56,9 +54,9 @@ def get_args(llumnix_cfg, llumnix_parser, engine_args: ServingArgs): detect_unsupported_engine_feature(engine_args) - logger.info("entrypoints_args: {}", entrypoints_args) - logger.info("manager_args: {}", manager_args) - logger.info("instance_args: {}", instance_args) - logger.info("engine_args: {}", engine_args) + logger.info("entrypoints_args: {}".format(entrypoints_args)) + logger.info("manager_args: {}".format(manager_args)) + logger.info("instance_args: {}".format(instance_args)) + logger.info("engine_args: {}".format(engine_args)) return entrypoints_args, manager_args, instance_args, engine_args diff --git a/llumnix/entrypoints/vllm/arg_utils.py b/llumnix/entrypoints/vllm/arg_utils.py index ac35d308..6d1e07aa 100644 --- a/llumnix/entrypoints/vllm/arg_utils.py +++ b/llumnix/entrypoints/vllm/arg_utils.py @@ -1,6 +1,5 @@ from vllm.engine.arg_utils import AsyncEngineArgs -from llumnix.arg_utils import EntrypointsArgs, ManagerArgs from llumnix.logging.logger import init_logger from llumnix.backends.backend_interface import BackendType from llumnix.backends.vllm.utils import check_engine_args diff --git a/llumnix/global_scheduler/dispatch_scheduler.py b/llumnix/global_scheduler/dispatch_scheduler.py index 665a82b1..a4aab518 100644 --- a/llumnix/global_scheduler/dispatch_scheduler.py +++ b/llumnix/global_scheduler/dispatch_scheduler.py @@ -16,14 +16,12 @@ import random from llumnix.logging.logger import init_logger -from llumnix.instance_info import InstanceInfo -from llumnix.instance_info import InstanceInfo +from llumnix.instance_info import InstanceInfo, InstanceType from llumnix.arg_utils import InstanceArgs -from llumnix.instance_info import InstanceType +from llumnix.constants import DISPATCH_LOG_FREQUENCY logger = init_logger(__name__) -DISPATCH_LOG_FREQUENCY = 100 class DispatchScheduler: def __init__(self, dispatch_policy: str,) -> None: diff --git a/llumnix/global_scheduler/migration_policy.py b/llumnix/global_scheduler/migration_policy.py index 6ade3fcb..63989da7 100644 --- a/llumnix/global_scheduler/migration_policy.py +++ b/llumnix/global_scheduler/migration_policy.py @@ -18,7 +18,6 @@ from llumnix.logging.logger import init_logger from llumnix.instance_info import InstanceInfo -from llumnix.instance_info import InstanceInfo logger = init_logger(__name__) diff --git a/llumnix/instance_info.py b/llumnix/instance_info.py index b20f4ed0..c79049c0 100644 --- a/llumnix/instance_info.py +++ b/llumnix/instance_info.py @@ -90,25 +90,26 @@ def __init__(self, load_metric: str, enable_defrag: bool = False) -> None: self.enable_defrag = enable_defrag @abstractmethod - def compute_instance_load(self, i: InstanceInfo) -> float: + def compute_instance_load(self, instance_info: InstanceInfo) -> float: pass class DispatchLoadComputation(LoadComputationStrategy): - def compute_instance_load(self, i: InstanceInfo) -> float: + def compute_instance_load(self, instance_info: InstanceInfo) -> float: instance_load = -np.inf if self.load_metric == 'usage_ratio': - instance_load = (i.num_used_gpu_blocks + i.num_blocks_all_waiting_requests) / i.num_total_gpu_blocks + instance_load = (instance_info.num_used_gpu_blocks + instance_info.num_blocks_all_waiting_requests) \ + / instance_info.num_total_gpu_blocks elif self.load_metric == 'remaining_steps': - num_requests = i.num_running_requests + i.num_waiting_requests - num_available_gpu_blocks = i.num_available_gpu_blocks - i.num_blocks_all_waiting_requests + num_requests = instance_info.num_running_requests + instance_info.num_waiting_requests + num_available_gpu_blocks = instance_info.num_available_gpu_blocks - instance_info.num_blocks_all_waiting_requests if num_requests == 0: return -np.inf instance_load = (num_available_gpu_blocks / num_requests)*(-1) return instance_load class MigrationLoadComputation(LoadComputationStrategy): - def compute_instance_load_after_migrate(self, i: InstanceInfo, is_migrate_in: bool) -> float: - instance_info_after_migrate = copy.deepcopy(i) + def compute_instance_load_after_migrate(self, instance_info: InstanceInfo, is_migrate_in: bool) -> float: + instance_info_after_migrate = copy.deepcopy(instance_info) num_blocks_last_running_request = instance_info_after_migrate.num_blocks_last_running_request if is_migrate_in: @@ -120,19 +121,21 @@ def compute_instance_load_after_migrate(self, i: InstanceInfo, is_migrate_in: bo return self.compute_instance_load(instance_info_after_migrate) - def compute_instance_load(self, i: InstanceInfo) -> float: + def compute_instance_load(self, instance_info: InstanceInfo) -> float: instance_load = -np.inf if self.load_metric == 'usage_ratio': - instance_load = (i.num_used_gpu_blocks + i.num_blocks_first_waiting_request) / i.num_total_gpu_blocks + instance_load = (instance_info.num_used_gpu_blocks + instance_info.num_blocks_first_waiting_request) \ + / instance_info.num_total_gpu_blocks elif self.load_metric == 'remaining_steps': if not self.enable_defrag: - num_requests = i.num_running_requests - num_available_gpu_blocks = i.num_available_gpu_blocks + num_requests = instance_info.num_running_requests + num_available_gpu_blocks = instance_info.num_available_gpu_blocks else: - num_requests = i.num_running_requests - if i.num_waiting_requests != 0: + num_requests = instance_info.num_running_requests + if instance_info.num_waiting_requests != 0: num_requests += 1 - num_available_gpu_blocks = i.num_available_gpu_blocks - i.num_blocks_first_waiting_request + num_available_gpu_blocks = instance_info.num_available_gpu_blocks - \ + instance_info.num_blocks_first_waiting_request if num_requests == 0: return -np.inf instance_load = (num_available_gpu_blocks / num_requests) * (-1) @@ -145,6 +148,6 @@ def __init__(self, load_metric): super().__init__(load_metric) self.load_calculator = DispatchLoadComputation(load_metric) - def compute_instance_load(self, i: InstanceInfo) -> float: - return self.load_calculator.compute_instance_load(i) + def compute_instance_load(self, instance_info: InstanceInfo) -> float: + return self.load_calculator.compute_instance_load(instance_info) \ No newline at end of file diff --git a/llumnix/launcher.py b/llumnix/launcher.py index 7978ea78..429e9ba7 100644 --- a/llumnix/launcher.py +++ b/llumnix/launcher.py @@ -20,14 +20,14 @@ from ray.util.state import list_placement_groups, list_actors from ray.util.placement_group import PlacementGroup -from llumnix.logger import init_logger +from llumnix.logging.logger import init_logger from llumnix.instance_info import InstanceType from llumnix.global_scheduler.global_scheduler import GlobalScheduler from llumnix.llumlet.llumlet import Llumlet from llumnix.queue.queue_type import QueueType from llumnix.backends.backend_interface import BackendType from llumnix.arg_utils import EntrypointsArgs, InstanceArgs -from llumnix.entrypoints.vllm.api_server_actor import FastAPIServerActor +from llumnix.entrypoints.vllm.api_server_actor import APIServerActor from llumnix.backends.utils import get_engine_world_size from llumnix.utils import (remove_placement_group, get_manager_name, INSTANCE_NAME_PREFIX, get_instance_name, SERVER_NAME_PREFIX, kill_server, kill_instance, get_actor_data_from_ray_internal_kv, @@ -86,7 +86,7 @@ def get_instance_deployment_states(self, instance_id: str): return pg_created, server_alive, instance_alive - def get_cluster_deployment(self) -> Tuple[Dict[str, PlacementGroup], Dict[str, FastAPIServerActor], Dict[str, Llumlet]]: + def get_cluster_deployment(self) -> Tuple[Dict[str, PlacementGroup], Dict[str, APIServerActor], Dict[str, Llumlet]]: curr_pgs: Dict[str, PlacementGroup] = {} curr_servers: Dict[str, PlacementGroup] = {} curr_instances: Dict[str, Llumlet] = {} @@ -201,8 +201,8 @@ async def done_scale_up(instance_args: InstanceArgs, entrypoint_args: Entrypoint asyncio.create_task(done_scale_up(next_instance_args, next_entrypoints_args)) def init_server(self, server_name: str, placement_group: PlacementGroup, - entrypoints_args: EntrypointsArgs) -> FastAPIServerActor: - fastapi_server = FastAPIServerActor.from_args(server_name, placement_group, entrypoints_args) + entrypoints_args: EntrypointsArgs) -> APIServerActor: + fastapi_server = APIServerActor.from_args(server_name, placement_group, entrypoints_args) return fastapi_server def init_instance(self,