From 36ddb962d874509dd2aa5fbba20edcd71578e689 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Fri, 23 Feb 2024 10:09:19 +0100 Subject: [PATCH] server: tests: parallel fix server is started twice, add colors to help to monitor in the CI jobs --- examples/server/tests/features/environment.py | 1 + examples/server/tests/features/parallel.feature | 9 +-------- examples/server/tests/features/steps/steps.py | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 01877cd10d486..de2f9c9828d02 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -8,6 +8,7 @@ def before_scenario(context, scenario): + print(f"\x1b[33;42mStarting new scenario: {scenario.name}!\x1b[0m") port = 8080 if 'PORT' in os.environ: port = int(os.environ['PORT']) diff --git a/examples/server/tests/features/parallel.feature b/examples/server/tests/features/parallel.feature index d4d403eade843..07be39ef58a4b 100644 --- a/examples/server/tests/features/parallel.feature +++ b/examples/server/tests/features/parallel.feature @@ -50,17 +50,10 @@ Feature: Parallel Then all prompts are predicted with tokens Examples: | streaming | n_predict | - | disabled | 64 | + | disabled | 128 | #| enabled | 64 | FIXME: phymbert: need to investigate why in aiohttp with streaming only one token is generated Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969 - Given a server listening on localhost:8080 - And a model file stories260K.gguf - And 42 as server seed - And 2 slots - And 64 KV cache size - Then the server is starting - Then the server is healthy Given a prompt: """ Write a very long story about AI. diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 7375fcba92a5c..cfe03e799c2e7 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -88,7 +88,7 @@ def step_start_server(context): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: result = sock.connect_ex((context.server_fqdn, context.server_port)) if result == 0: - print("\x1b[33;42mserver started!\x1b[0m") + print("\x1b[33;46mserver started!\x1b[0m") return attempts += 1 if attempts > 20: