Skip to content

Commit

Permalink
fix comment
Browse files Browse the repository at this point in the history
  • Loading branch information
KuilongCui committed Jan 21, 2025
1 parent 1da6e0a commit 9ee73aa
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 35 deletions.
3 changes: 3 additions & 0 deletions llumnix/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
WATCH_DEPLOYMENT_INTERVAL: float = 10.0
WATCH_DEPLOYMENT_INTERVAL_PENDING_INSTANCE: float = 120.0

# llumnix/global_scheduler/dispatch_scheduler.py
DISPATCH_LOG_FREQUENCY = 100

# llumnix/entrypoints/setup.py
MAX_RAY_RESTARTS: int = 10
RAY_RESTART_INTERVAL: float = 10.0
Expand Down
14 changes: 6 additions & 8 deletions llumnix/entrypoints/bladellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,13 @@

from blade_llm.service.args import ServingArgs

from llumnix.arg_utils import EntrypointsArgs, ManagerArgs
from llumnix.logging.logger import init_logger
from llumnix.backends.backend_interface import BackendType
from llumnix.arg_utils import EntrypointsArgs, ManagerArgs, InstanceArgs, LaunchMode

logger = init_logger(__name__)


from llumnix.backends.backend_interface import BackendType
from llumnix.arg_utils import EntrypointsArgs, ManagerArgs, InstanceArgs, LaunchMode

def detect_unsupported_engine_feature(engine_args: ServingArgs) -> None:
unsupported_feature = None
if engine_args.enable_lora:
Expand Down Expand Up @@ -56,9 +54,9 @@ def get_args(llumnix_cfg, llumnix_parser, engine_args: ServingArgs):

detect_unsupported_engine_feature(engine_args)

logger.info("entrypoints_args: {}", entrypoints_args)
logger.info("manager_args: {}", manager_args)
logger.info("instance_args: {}", instance_args)
logger.info("engine_args: {}", engine_args)
logger.info("entrypoints_args: {}".format(entrypoints_args))
logger.info("manager_args: {}".format(manager_args))
logger.info("instance_args: {}".format(instance_args))
logger.info("engine_args: {}".format(engine_args))

return entrypoints_args, manager_args, instance_args, engine_args
1 change: 0 additions & 1 deletion llumnix/entrypoints/vllm/arg_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from vllm.engine.arg_utils import AsyncEngineArgs

from llumnix.arg_utils import EntrypointsArgs, ManagerArgs
from llumnix.logging.logger import init_logger
from llumnix.backends.backend_interface import BackendType
from llumnix.backends.vllm.utils import check_engine_args
Expand Down
6 changes: 2 additions & 4 deletions llumnix/global_scheduler/dispatch_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@
import random

from llumnix.logging.logger import init_logger
from llumnix.instance_info import InstanceInfo
from llumnix.instance_info import InstanceInfo
from llumnix.instance_info import InstanceInfo, InstanceType
from llumnix.arg_utils import InstanceArgs
from llumnix.instance_info import InstanceType
from llumnix.constants import DISPATCH_LOG_FREQUENCY

logger = init_logger(__name__)

DISPATCH_LOG_FREQUENCY = 100

class DispatchScheduler:
def __init__(self, dispatch_policy: str,) -> None:
Expand Down
1 change: 0 additions & 1 deletion llumnix/global_scheduler/migration_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

from llumnix.logging.logger import init_logger
from llumnix.instance_info import InstanceInfo
from llumnix.instance_info import InstanceInfo

logger = init_logger(__name__)

Expand Down
35 changes: 19 additions & 16 deletions llumnix/instance_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,26 @@ def __init__(self, load_metric: str, enable_defrag: bool = False) -> None:
self.enable_defrag = enable_defrag

@abstractmethod
def compute_instance_load(self, i: InstanceInfo) -> float:
def compute_instance_load(self, instance_info: InstanceInfo) -> float:
pass

class DispatchLoadComputation(LoadComputationStrategy):
def compute_instance_load(self, i: InstanceInfo) -> float:
def compute_instance_load(self, instance_info: InstanceInfo) -> float:
instance_load = -np.inf
if self.load_metric == 'usage_ratio':
instance_load = (i.num_used_gpu_blocks + i.num_blocks_all_waiting_requests) / i.num_total_gpu_blocks
instance_load = (instance_info.num_used_gpu_blocks + instance_info.num_blocks_all_waiting_requests) \
/ instance_info.num_total_gpu_blocks
elif self.load_metric == 'remaining_steps':
num_requests = i.num_running_requests + i.num_waiting_requests
num_available_gpu_blocks = i.num_available_gpu_blocks - i.num_blocks_all_waiting_requests
num_requests = instance_info.num_running_requests + instance_info.num_waiting_requests
num_available_gpu_blocks = instance_info.num_available_gpu_blocks - instance_info.num_blocks_all_waiting_requests
if num_requests == 0:
return -np.inf
instance_load = (num_available_gpu_blocks / num_requests)*(-1)
return instance_load

class MigrationLoadComputation(LoadComputationStrategy):
def compute_instance_load_after_migrate(self, i: InstanceInfo, is_migrate_in: bool) -> float:
instance_info_after_migrate = copy.deepcopy(i)
def compute_instance_load_after_migrate(self, instance_info: InstanceInfo, is_migrate_in: bool) -> float:
instance_info_after_migrate = copy.deepcopy(instance_info)
num_blocks_last_running_request = instance_info_after_migrate.num_blocks_last_running_request

if is_migrate_in:
Expand All @@ -120,19 +121,21 @@ def compute_instance_load_after_migrate(self, i: InstanceInfo, is_migrate_in: bo

return self.compute_instance_load(instance_info_after_migrate)

def compute_instance_load(self, i: InstanceInfo) -> float:
def compute_instance_load(self, instance_info: InstanceInfo) -> float:
instance_load = -np.inf
if self.load_metric == 'usage_ratio':
instance_load = (i.num_used_gpu_blocks + i.num_blocks_first_waiting_request) / i.num_total_gpu_blocks
instance_load = (instance_info.num_used_gpu_blocks + instance_info.num_blocks_first_waiting_request) \
/ instance_info.num_total_gpu_blocks
elif self.load_metric == 'remaining_steps':
if not self.enable_defrag:
num_requests = i.num_running_requests
num_available_gpu_blocks = i.num_available_gpu_blocks
num_requests = instance_info.num_running_requests
num_available_gpu_blocks = instance_info.num_available_gpu_blocks
else:
num_requests = i.num_running_requests
if i.num_waiting_requests != 0:
num_requests = instance_info.num_running_requests
if instance_info.num_waiting_requests != 0:
num_requests += 1
num_available_gpu_blocks = i.num_available_gpu_blocks - i.num_blocks_first_waiting_request
num_available_gpu_blocks = instance_info.num_available_gpu_blocks - \
instance_info.num_blocks_first_waiting_request
if num_requests == 0:
return -np.inf
instance_load = (num_available_gpu_blocks / num_requests) * (-1)
Expand All @@ -145,6 +148,6 @@ def __init__(self, load_metric):
super().__init__(load_metric)
self.load_calculator = DispatchLoadComputation(load_metric)

def compute_instance_load(self, i: InstanceInfo) -> float:
return self.load_calculator.compute_instance_load(i)
def compute_instance_load(self, instance_info: InstanceInfo) -> float:
return self.load_calculator.compute_instance_load(instance_info)

10 changes: 5 additions & 5 deletions llumnix/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
from ray.util.state import list_placement_groups, list_actors
from ray.util.placement_group import PlacementGroup

from llumnix.logger import init_logger
from llumnix.logging.logger import init_logger
from llumnix.instance_info import InstanceType
from llumnix.global_scheduler.global_scheduler import GlobalScheduler
from llumnix.llumlet.llumlet import Llumlet
from llumnix.queue.queue_type import QueueType
from llumnix.backends.backend_interface import BackendType
from llumnix.arg_utils import EntrypointsArgs, InstanceArgs
from llumnix.entrypoints.vllm.api_server_actor import FastAPIServerActor
from llumnix.entrypoints.vllm.api_server_actor import APIServerActor
from llumnix.backends.utils import get_engine_world_size
from llumnix.utils import (remove_placement_group, get_manager_name, INSTANCE_NAME_PREFIX, get_instance_name,
SERVER_NAME_PREFIX, kill_server, kill_instance, get_actor_data_from_ray_internal_kv,
Expand Down Expand Up @@ -86,7 +86,7 @@ def get_instance_deployment_states(self, instance_id: str):

return pg_created, server_alive, instance_alive

def get_cluster_deployment(self) -> Tuple[Dict[str, PlacementGroup], Dict[str, FastAPIServerActor], Dict[str, Llumlet]]:
def get_cluster_deployment(self) -> Tuple[Dict[str, PlacementGroup], Dict[str, APIServerActor], Dict[str, Llumlet]]:
curr_pgs: Dict[str, PlacementGroup] = {}
curr_servers: Dict[str, PlacementGroup] = {}
curr_instances: Dict[str, Llumlet] = {}
Expand Down Expand Up @@ -201,8 +201,8 @@ async def done_scale_up(instance_args: InstanceArgs, entrypoint_args: Entrypoint
asyncio.create_task(done_scale_up(next_instance_args, next_entrypoints_args))

def init_server(self, server_name: str, placement_group: PlacementGroup,
entrypoints_args: EntrypointsArgs) -> FastAPIServerActor:
fastapi_server = FastAPIServerActor.from_args(server_name, placement_group, entrypoints_args)
entrypoints_args: EntrypointsArgs) -> APIServerActor:
fastapi_server = APIServerActor.from_args(server_name, placement_group, entrypoints_args)
return fastapi_server

def init_instance(self,
Expand Down

0 comments on commit 9ee73aa

Please sign in to comment.