Skip to content

Commit

Permalink
[fix] vllm-online-benchmark first token latency error (intel-analytic…
Browse files Browse the repository at this point in the history
  • Loading branch information
ACupofAir authored Oct 29, 2024
1 parent 0bbc04b commit 3700e81
Showing 1 changed file with 1 addition and 7 deletions.
8 changes: 1 addition & 7 deletions docker/llm/serving/xpu/docker/vllm_online_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,13 +270,7 @@ def perform_request(session, url, payload, headers):
json_data = json.loads(data)
if 'choices' in json_data and len(json_data['choices']) > 0:
choice = json_data['choices'][0]
if 'finish_reason' in choice and (choice['finish_reason'] == 'length' or choice['finish_reason'] == 'stop'):
if 'first_token_time' in choice and isinstance(choice['first_token_time'], float):
first_token_inference_time = choice['first_token_time']
if 'rest_token_time' in choice and isinstance(choice['rest_token_time'], float):
next_token_inference_time = choice['rest_token_time']
else:
# 记录第一个token的时间
if 'text' in choice:
if first_token_time is None:
first_token_time = token_time
else:
Expand Down

0 comments on commit 3700e81

Please sign in to comment.