forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Bugfix] Fix request cancellation without polling (vllm-project#11190)
- Loading branch information
Showing
12 changed files
with
164 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import asyncio | ||
import functools | ||
|
||
from fastapi import Request | ||
|
||
|
||
async def listen_for_disconnect(request: Request) -> None: | ||
"""Returns if a disconnect message is received""" | ||
while True: | ||
message = await request.receive() | ||
if message["type"] == "http.disconnect": | ||
break | ||
|
||
|
||
def with_cancellation(handler_func): | ||
"""Decorator that allows a route handler to be cancelled by client | ||
disconnections. | ||
This does _not_ use request.is_disconnected, which does not work with | ||
middleware. Instead this follows the pattern from | ||
starlette.StreamingResponse, which simultaneously awaits on two tasks- one | ||
to wait for an http disconnect message, and the other to do the work that we | ||
want done. When the first task finishes, the other is cancelled. | ||
A core assumption of this method is that the body of the request has already | ||
been read. This is a safe assumption to make for fastapi handlers that have | ||
already parsed the body of the request into a pydantic model for us. | ||
This decorator is unsafe to use elsewhere, as it will consume and throw away | ||
all incoming messages for the request while it looks for a disconnect | ||
message. | ||
In the case where a `StreamingResponse` is returned by the handler, this | ||
wrapper will stop listening for disconnects and instead the response object | ||
will start listening for disconnects. | ||
""" | ||
|
||
# Functools.wraps is required for this wrapper to appear to fastapi as a | ||
# normal route handler, with the correct request type hinting. | ||
@functools.wraps(handler_func) | ||
async def wrapper(*args, **kwargs): | ||
|
||
# The request is either the second positional arg or `raw_request` | ||
request = args[1] if len(args) > 1 else kwargs["raw_request"] | ||
|
||
handler_task = asyncio.create_task(handler_func(*args, **kwargs)) | ||
cancellation_task = asyncio.create_task(listen_for_disconnect(request)) | ||
|
||
done, pending = await asyncio.wait([handler_task, cancellation_task], | ||
return_when=asyncio.FIRST_COMPLETED) | ||
for task in pending: | ||
task.cancel() | ||
|
||
if handler_task in done: | ||
return handler_task.result() | ||
return None | ||
|
||
return wrapper |
Oops, something went wrong.