From a8c555a9d0b49b1ce1d4e8e475180e403f97786e Mon Sep 17 00:00:00 2001 From: Finley McIlwaine Date: Thu, 17 Oct 2024 17:04:13 -0700 Subject: [PATCH] WIP Stress tests --- docs/stress-tests.md | 96 +++++++++++++++-------- test-stress/Test/Stress/Client.hs | 10 +-- test-stress/Test/Stress/Driver.hs | 30 ++++++- test-stress/Test/Stress/Driver/Summary.hs | 12 ++- test-stress/Test/Stress/Server.hs | 7 -- 5 files changed, 104 insertions(+), 51 deletions(-) diff --git a/docs/stress-tests.md b/docs/stress-tests.md index 7ae3587f..98f941ab 100644 --- a/docs/stress-tests.md +++ b/docs/stress-tests.md @@ -47,9 +47,9 @@ The stress test client and server communicate via the four major types of RPCs: and client take turns sending one message back and forth until each has sent `N` messages. -All communications between a client and a server happens in one of those four -patterns. The messages sent back and forth are random lazy bytestrings ranging -in length from 128 to 256 bytes. +These are the "atoms" of communication between the stress test clients and +servers. The messages sent back and forth are random lazy bytestrings ranging in +length from 128 to 256 bytes (see [here](../test-stress/Test/Stress/Common.hs)). # The Server @@ -69,41 +69,71 @@ documentation](./demo-server.md) for more information. # The Client The client takes options that specify the server it should connect to, how many -times it should connect to the server, and a list of calls it should execute on -those connections. For example, to run a client that opens 3 connections to an +times it should connect to the server, and what calls it should execute on those +connections. For example, to run a client that opens 3 connections to an insecure server at port 50051 and makes a client streaming call with 1234 -messages followed by a server streaming call with 500 messages on those -connections: +messages and a server streaming call with 500 messages, repeating those calls on +each connection 10 times. ```bash -cabal run test-stress -- client --port 50051 --num-connections 3 --client-streaming 1234 --server-streaming 500 +cabal run test-stress -- client \ + --port 50051 \ + --num-connections 3 \ + --num-calls 5 \ + --client-streaming 1234 \ + --server-streaming 500 ``` -Clients also support running every connection concurrently via the -`--concurrent` option. Clients can connect to secure servers (using the default -certificates) using the `--secure` option, but can be configured to use -non-default certificates via other command line options just like the demo -client. See the `--help` client option and the [demo client's -documentation](./demo-client.md) for more information. +Clients also support running each connection concurrently via the `--concurrent` +option. Clients can connect to secure servers (using the default certificates) +using the `--secure` option, but can be configured to use non-default +certificates via other command line options just like the demo client. See the +`--help` client option and the [demo client's documentation](./demo-client.md) +for more information. # The Driver -The driver spawns a matrix of servers and a matrix of clients for a total of 60 -seconds. The server matrix is two-dimensional: - -1. **Stability:** Unstable servers are killed and restarted intermittently. - Stable servers are never killed. -2. **Security:** Secure servers use TLS, insecure servers do not. - -The client matrix is three-dimensional: - -1. **Stability:** Unstable clients are killed and restarted intermittently. - Stable clients are never killed. -2. **Security:** Secure clients connect over TLS, insecure clients do not. -3. **Compression:** Clients insist on either no compression, gzip compression, - deflate compression, or snappy compression. - -We limit the heap usage of all spawned processes, and the test fails if any of -them exceed their limits. We also profile the heap usage of all stable -components, generate graphs from those profiles and stitch them into an HTML -summary document at the end of the test. +The driver spawns a variety of servers and clients in separate processes, and +runs for a total of 60 seconds. Each process is run with a specific heap limit +(via the `-M` RTS flag), and the application will terminate with a non-zero exit +code if any of the processes are killed with a `heap overflow` exception. + +## Servers + +The driver spawns four total server processes. Each server is either secure or +insecure, and either stable or unstable. Secure servers require TLS, insecure +require non-TLS. Unstable servers are killed and restarted intermittently, +stable servers are left running for the duration of the driver's execution. + +## Clients + +The driver spawns 56 total client processes. Similar to the servers, each client +is either secure or insecure and stable or unstable. Each client only +communicates with one of the servers. Obviously, (in)secure clients only +communicate with an (in)secure servers. Each client-server pair only +communicates in one of the following "patterns": + +* **Many connections:** Open a connection, make a single non-streaming call, + repeat indefinitely. Think of this as calling `withConnection` over and over. +* **Many non-streaming calls:** Open a connection. Make a single non-streaming + call, repeat indefinitely. Think of this as calling `withRPC` and sending a + single message back and forth on a single connection over and over. +* **Client streaming:** Open a connection. Make a non-stop client streaming + call. +* **Many client streaming calls:** Open a connection. Make a client streaming + call with a few messages, repeat indefinitely. +* **Server streaming:** Same as client streaming, but server sends messages + non-stop. +* **Many server streaming calls:** Same as client streaming, but server streams. +* **Bidirectional streaming:** Same as client streaming, but both client and + server send messages indefinitely. +* **Many bidirectional streaming calls:** Same as client streaming, but both + client and server stream messages. + +## Summary chart generation + +The stress test driver can optionally create summary heap profile charts for the +stable components after the test is finished by passing the `--gen-charts` flag. +This will cause each stable component to emit an event log with heap profiling +events. The driver will parse the event logs and generate SVG plots of the +memory usage over time. diff --git a/test-stress/Test/Stress/Client.hs b/test-stress/Test/Stress/Client.hs index bc91eeef..38f5a44b 100644 --- a/test-stress/Test/Stress/Client.hs +++ b/test-stress/Test/Stress/Client.hs @@ -105,7 +105,7 @@ runCalls v mServerValidation serverPort compr callNum (connNum, calls) = do allowCertainFailures = handle $ \case e | Just ServerDisconnected{} <- fromException e -> - say' v serverPort "server disconnected" + say' v serverPort $ "server disconnected: " ++ show e | Just IOError{} <- fromException e -> say' v serverPort "failed to connect" | Just ConnectionIsTimeout <- fromException e -> @@ -168,7 +168,7 @@ clientStreaming v p conn n = do void $ sendNextInput call msg sendFinalInput call msg void $ recvFinalOutput call - say' v p $ "received final output for client streaming call" + say' v p "received final output for client streaming call" -- | Server streaming -- @@ -178,10 +178,10 @@ serverStreaming v p conn n = do say' v p "initiating server streaming call" withRPC conn def (Proxy @(Trivial' "server-streaming")) $ \call -> do say' v p $ "receiving " ++ show n ++ " messages" - sendNextInput call $ BS.Char8.pack (show n) + sendFinalInput call $ BS.Char8.pack (show n) forM_ [1 .. n-1] $ \_ -> void $ recvNextOutput call void $ recvFinalOutput call - say' v p $ "received final output for server streaming call" + say' v p "received final output for server streaming call" -- | Bidirectional streaming -- @@ -199,7 +199,7 @@ bidiStreaming v p conn n = do void $ recvNextOutput call sendFinalInput call msg void $ recvFinalOutput call - say' v p $ "sent and received final messages for bidi streaming call" + say' v p "sent and received final messages for bidi streaming call" ------------------------------------------------------------------------------- -- Utils diff --git a/test-stress/Test/Stress/Driver.hs b/test-stress/Test/Stress/Driver.hs index 8b3204aa..6db23a23 100644 --- a/test-stress/Test/Stress/Driver.hs +++ b/test-stress/Test/Stress/Driver.hs @@ -175,6 +175,10 @@ runComponent v genCharts mwd c@Component{..} = do cmd v componentType , [ "--port=" ++ show componentPort ] , [ "--secure" | componentSecure ] + , if componentType == Server then + [ "+RTS", "-N", "-RTS" ] + else + [] , filter (const $ componentStable && genCharts) [ "+RTS" , "-l" @@ -264,7 +268,7 @@ servers = [ , componentPort = 50001 , componentSecure = True , componentStable = False - , componentLimit = Just 80 + , componentLimit = Just 100 , componentName = "server-unstable-secure" } , Component { @@ -280,7 +284,7 @@ servers = [ , componentPort = 50003 , componentSecure = True , componentStable = True - , componentLimit = Just 80 + , componentLimit = Just 100 , componentName = "server-stable-secure" } ] @@ -305,20 +309,40 @@ clients = [ ( [ indefinitely "--num-connections" , "--non-streaming" ] - , "connections" + , "non-streaming-many-connections" + ) + , ( [ indefinitely "--num-calls" + , "--non-streaming" + ] + , "non-streaming-many-calls" ) , ( [ indefinitely "--client-streaming" ] , "client-stream" ) + , ( [ indefinitely "--num-calls" + , "--client-streaming=10" + ] + , "client-stream-many-calls" + ) , ( [ indefinitely "--server-streaming" ] , "server-stream" ) + , ( [ indefinitely "--num-calls" + , "--server-streaming=10" + ] + , "server-stream-many-calls" + ) , ( [ indefinitely "--bidi-streaming" ] , "bidi-stream" ) + , ( [ indefinitely "--num-calls" + , "--bidi-streaming=10" + ] + , "bidi-stream-many-calls" + ) ] , portSecurity <- [ (False, 50000) diff --git a/test-stress/Test/Stress/Driver/Summary.hs b/test-stress/Test/Stress/Driver/Summary.hs index 958f1c97..b9644cbe 100644 --- a/test-stress/Test/Stress/Driver/Summary.hs +++ b/test-stress/Test/Stress/Driver/Summary.hs @@ -72,15 +72,18 @@ eventlogToSvg v elFile = do case ei of HeapLive _ s -> return $ acc { samplesLiveBytes = - (timeConv t, sizeConv s) : samplesLiveBytes acc + insertBy byFirst + (timeConv t, sizeConv s) (samplesLiveBytes acc) } BlocksSize _ s -> return $ acc { samplesBlocksSize = - (timeConv t, sizeConv s) : samplesBlocksSize acc + insertBy byFirst + (timeConv t, sizeConv s) (samplesBlocksSize acc) } HeapSize _ s -> return $ acc { samplesHeapSize = - (timeConv t, sizeConv s) : samplesHeapSize acc + insertBy byFirst + (timeConv t, sizeConv s) (samplesHeapSize acc) } _ -> return acc where @@ -92,6 +95,9 @@ eventlogToSvg v elFile = do say' :: Bool -> String -> IO () say' v = say v . ("(summary) " ++) +byFirst :: Ord a => (a, b) -> (a, b) -> Ordering +byFirst (x1, _) (x2, _) = compare x1 x2 + ------------------------------------------------------------------------------- -- Internal auxiliary ------------------------------------------------------------------------------- diff --git a/test-stress/Test/Stress/Server.hs b/test-stress/Test/Stress/Server.hs index 4b423b39..a98904f0 100644 --- a/test-stress/Test/Stress/Server.hs +++ b/test-stress/Test/Stress/Server.hs @@ -6,7 +6,6 @@ import Control.Exception import Control.Monad import Data.ByteString.Lazy.Char8 qualified as BS.Char8 import Data.IORef -import GHC.IO import Network.GRPC.Common import Network.GRPC.Server @@ -62,8 +61,6 @@ handlers v idRef = [ nonStreaming call = do say' "handling non-streaming call" msg <- recvFinalInput call - -- uncomment to cause a leak that fails -M15m for many connections - -- () <- atomicModifyIORef _ref ((,()) . (():)) sendFinalOutput call $ (msg, NoMetadata) say' "sent final output for non-streaming call" @@ -118,7 +115,3 @@ handlers v idRef = [ say' msg = do sid <- readIORef idRef say v $ "(server " ++ sid ++ ") " ++ msg - -{-# NOINLINE _ref #-} -_ref :: IORef [()] -_ref = unsafePerformIO $ newIORef []