Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: move ensure_server() to vllm.py
ensure_server is used only by vllm. Move it to reduce circular dependency between backends.py and vllm.py. Slightly adopt the function on the new place. Diff of ensure_server adaptations: -def ensure_server( - backend: str, - api_base: str, - http_client=None, - host="localhost", - port=8000, - background=True, - foreground_allowed=False, - server_process_func=None, - max_startup_attempts=None, -) -> Tuple[ - Optional[multiprocessing.Process], Optional[subprocess.Popen], Optional[str] -]: - """Checks if server is running, if not starts one as a subprocess. Returns the server process - and the URL where it's available.""" + def _ensure_server( + self, + http_client=None, + background=True, + foreground_allowed=False, + ) -> Tuple[ + Optional[subprocess.Popen], Optional[str] + ]: + """Checks if server is running, if not starts one as a subprocess. Returns the server process + and the URL where it's available.""" - logger.info(f"Trying to connect to model server at {api_base}") - if check_api_base(api_base, http_client): - return (None, None, api_base) - port = free_tcp_ipv4_port(host) - logger.debug(f"Using available port {port} for temporary model serving.") + logger.info(f"Trying to connect to model server at {self.api_base}") + if check_api_base(self.api_base, http_client): + return (None, self.api_base) + port = free_tcp_ipv4_port(self.host) + logger.debug(f"Using available port {port} for temporary model serving.") - host_port = f"{host}:{port}" - temp_api_base = get_api_base(host_port) - vllm_server_process = None + host_port = f"{self.host}:{port}" + temp_api_base = get_api_base(host_port) - - if backend == VLLM: - # TODO: resolve how the hostname is getting passed around the class and this function - vllm_server_process = server_process_func(port, background) + vllm_server_process = self.create_server_process(port, background) logger.info("Starting a temporary vLLM server at %s", temp_api_base) count = 0 # Each call to check_api_base takes >2s + 2s sleep # Default to 120 if not specified (~8 mins of wait time) - vllm_startup_max_attempts = max_startup_attempts or 120 - start_time_secs = time() + vllm_startup_max_attempts = self.max_startup_attempts or 120 + start_time_secs = time.time() while count < vllm_startup_max_attempts: count += 1 # Check if the process is still alive @@ -61,20 +59,11 @@ def ensure_server( temp_api_base, vllm_startup_max_attempts, ) - duration = round(time() - start_time_secs, 1) + duration = round(time.time() - start_time_secs, 1) shutdown_process(vllm_server_process, 20) # pylint: disable=raise-missing-from raise ServerException(f"vLLM failed to start up in {duration} seconds") - sleep(2) - return (None, vllm_server_process, temp_api_base) + time.sleep(2) + return (vllm_server_process, temp_api_base) References: - https://en.wikipedia.org/wiki/Circular_dependency - https://en.wikipedia.org/wiki/Acyclic_dependencies_principle Signed-off-by: Costa Shulyupin <[email protected]>
- Loading branch information