Skip to content

Commit

Permalink
Add option to warm up (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
qihqi authored Mar 15, 2024
1 parent 2b9db52 commit e04302c
Showing 1 changed file with 45 additions and 1 deletion.
46 changes: 45 additions & 1 deletion benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ def calculate_metrics(

def grpc_sync_request(api_url: str, request: Any) -> tuple[list[str], float, float]:
"""Send grpc synchronous request since the current grpc server is sync."""
with grpc.insecure_channel(api_url) as channel:
options = [("grpc.keepalive_timeout_ms", 10000)]
with grpc.insecure_channel(api_url, options=options) as channel:
grpc.channel_ready_future(channel).result()
stub = jetstream_pb2_grpc.OrchestratorStub(channel)
print("Making request")
Expand Down Expand Up @@ -374,6 +375,24 @@ def mock_requests(total_mock_requests: int):
return data


def sample_warmup_requests(requests):
interesting_buckets = [
0,
16,
32,
64,
128,
256,
512,
1024,]

for start, end in zip(interesting_buckets[:-1], interesting_buckets[1:]):
for request in requests:
if start < request.prompt_len <= end:
yield request
break


def main(args: argparse.Namespace):
print(args)
random.seed(args.seed)
Expand All @@ -390,6 +409,23 @@ def main(args: argparse.Namespace):
else:
input_requests = sample_requests(args.dataset, args.num_prompts, tokenizer, args.max_output_length)

if args.warmup_first:
print('Warm up start:' )
warmup_requests = list(sample_warmup_requests(input_requests)) * 2
benchmark_result, request_outputs = asyncio.run(
benchmark(
api_url=api_url,
tokenizer=tokenizer,
input_requests=warmup_requests,
request_rate=args.request_rate,
disable_tqdm=args.disable_tqdm,
session_cache=args.session_cache,
priority=args.priority,
threads=args.threads,
)
)
print('Warm up done')

benchmark_result, request_outputs = asyncio.run(
benchmark(
api_url=api_url,
Expand Down Expand Up @@ -551,6 +587,14 @@ def main(args: argparse.Namespace):
"File path to store request outputs"
),
)
parser.add_argument(
"--warmup-first",
type=bool,
default=False,
help=(
"Whether to send warmup req first"
),
)

args = parser.parse_args()
main(args)

0 comments on commit e04302c

Please sign in to comment.