Skip to content

Commit

Permalink
♻️ cleanup from PR comments
Browse files Browse the repository at this point in the history
Signed-off-by: Joe Runde <[email protected]>
  • Loading branch information
joerunde committed May 20, 2024
1 parent 4b44ba6 commit 0281fcc
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 26 deletions.
8 changes: 3 additions & 5 deletions vllm/entrypoints/grpc/grpc_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,13 @@ async def _handle_exception(e: Exception, func, *args, **kwargs):
# So we only add special handling for other types of errors
if not isinstance(e, AbortError):
if type(e).__name__ == "torch.cuda.OutOfMemoryError": #TODO check
context = kwargs.get("context", None) or args[-1]
logger.exception("%s caused GPU OOM error", func.__name__)
service_metrics.count_request_failure(FailureReasonLabel.OOM)
await context.abort(StatusCode.RESOURCE_EXHAUSTED, str(e))
elif is_generate_fn:
service_metrics.count_request_failure(FailureReasonLabel.GENERATE)
else:
if is_generate_fn:
service_metrics.count_request_failure(FailureReasonLabel.GENERATE)
else:
service_metrics.count_request_failure(FailureReasonLabel.UNKNOWN)
service_metrics.count_request_failure(FailureReasonLabel.UNKNOWN)
logger.exception("%s failed", func.__name__)
raise e

Expand Down
40 changes: 19 additions & 21 deletions vllm/tgis_utils/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,25 @@ def log_response(
else:
kind_log = (f"Sub-request {sub_request_num} from batch of "
f"{request_count}")
_log_response(inputs=[r.text for r in request.requests],
response=response,
params=request.params,
prefix_id=request.prefix_id,
engine_metrics=engine_metrics,
start_time=start_time,
kind_log=kind_log,
method_str="generate",
logger=logger)
inputs = [r.text for r in request.requests]
method_str = "generate"
else:
# streaming case
_log_response(inputs=[request.request.text],
response=response,
params=request.params,
prefix_id=request.prefix_id,
engine_metrics=engine_metrics,
start_time=start_time,
kind_log="Streaming response",
method_str="generate_stream",
logger=logger)
inputs = [request.request.text]
kind_log = "Streaming response"
method_str = "generate_stream"

_log_response(
inputs=inputs,
response=response,
params=request.params,
prefix_id=request.prefix_id,
engine_metrics=engine_metrics,
start_time=start_time,
kind_log=kind_log,
method_str=method_str,
logger=logger,
)


def log_error(request: Union[BatchedGenerationRequest,
Expand All @@ -56,8 +55,7 @@ def log_error(request: Union[BatchedGenerationRequest,
"""Logs errors similar to how the TGIS server does"""
# NB: We don't actually log the `Exception` here to match the TGIS behavior
# of just logging the simple string representation of the error
params = request.params
paramstr = text_format.MessageToString(params, as_one_line=True)
param_str = text_format.MessageToString(request.params, as_one_line=True)
prefix_id = request.prefix_id

if isinstance(request, BatchedGenerationRequest):
Expand All @@ -71,7 +69,7 @@ def log_error(request: Union[BatchedGenerationRequest,
input_chars = sum(len(input_) for input_ in inputs)

span_str = (f"{method_str}{{input={short_input} prefix_id={prefix_id} "
f"input_chars=[{input_chars}] params={paramstr}")
f"input_chars=[{input_chars}] params={param_str}")

logger.error("%s: %s", span_str, exception_str)

Expand Down

0 comments on commit 0281fcc

Please sign in to comment.