Skip to content

Commit

Permalink
Merge pull request #5875 from IntersectMBO/mgalazyn/test/fix-flaky-te…
Browse files Browse the repository at this point in the history
…st-cleanup

[cardano-testnet] Fix flaky test workspace cleanup and node port allocation
  • Loading branch information
carbolymer authored Jul 31, 2024
2 parents f215bf6 + 53a3f65 commit 8829321
Show file tree
Hide file tree
Showing 10 changed files with 286 additions and 132 deletions.
3 changes: 2 additions & 1 deletion cardano-testnet/cardano-testnet.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,16 @@ library
, ouroboros-network-api
, prettyprinter
, process
, random
, resourcet
, retry
, safe-exceptions
, scientific
, si-timers
, stm
, tasty ^>= 1.5
, tasty-expected-failure
, tasty-hedgehog
, temporary
, text
, time
, transformers
Expand Down
2 changes: 1 addition & 1 deletion cardano-testnet/src/Cardano/Testnet.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module Cardano.Testnet (
-- ** Start a testnet
cardanoTestnet,
cardanoTestnetDefault,
requestAvailablePortNumbers,
retryOnAddressInUseError,

-- ** Testnet options
CardanoTestnetOptions(..),
Expand Down
21 changes: 18 additions & 3 deletions cardano-testnet/src/Testnet/Ping.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module Testnet.Ping
( pingNode
, checkSprocket
, waitForSprocket
, waitForPortClosed
, TestnetMagic
, PingClientError(..)
) where
Expand All @@ -20,13 +22,14 @@ import Cardano.Network.Ping (HandshakeFailure, NodeVersion (..), hands

import qualified Codec.CBOR.Read as CBOR
import Control.Exception.Safe
import Control.Monad (when)
import Control.Monad
import Control.Monad.Class.MonadTime.SI (Time)
import qualified Control.Monad.Class.MonadTimer.SI as MT
import Control.Monad.IO.Class
import qualified Control.Retry as R
import Control.Tracer (nullTracer)
import qualified Data.ByteString.Lazy as LBS
import Data.Either (isLeft)
import Data.Either
import Data.IORef
import qualified Data.List as L
import Data.Word (Word32)
Expand All @@ -35,10 +38,11 @@ import Network.Mux.Timeout (TimeoutFn, withTimeoutSerial)
import Network.Mux.Types (MiniProtocolDir (InitiatorDir), MiniProtocolNum (..),
MuxBearer (read, write), MuxSDU (..), MuxSDUHeader (..),
RemoteClockModel (RemoteClockModel))
import Network.Socket (AddrInfo (..), StructLinger (..))
import Network.Socket (AddrInfo (..), PortNumber, StructLinger (..))
import qualified Network.Socket as Socket
import Prettyprinter

import qualified Hedgehog.Extras.Stock.IO.Network.Socket as IO
import qualified Hedgehog.Extras.Stock.IO.Network.Sprocket as IO

type TestnetMagic = Word32
Expand Down Expand Up @@ -167,6 +171,17 @@ sprocketToAddrInfo sprocket = do
[] Socket.AF_UNIX Socket.Stream
Socket.defaultProtocol (Socket.SockAddrUnix socketAbsPath) Nothing

-- | Wait until port gets closed.
waitForPortClosed
:: MonadIO m
=> MT.DiffTime -- ^ timeout
-> MT.DiffTime -- ^ check interval
-> PortNumber
-> m Bool -- ^ 'True' if port is closed, 'False' if timeout was reached before that
waitForPortClosed timeout interval portNumber = liftIO $ do
let retryPolicy = R.constantDelay (round @Double $ realToFrac interval) <> R.limitRetries (ceiling $ toRational timeout / toRational interval)
fmap not . R.retrying retryPolicy (const pure) $ \_ ->
liftIO (IO.isPortOpen (fromIntegral portNumber))

data PingClientError
= PceDecodingError
Expand Down
29 changes: 17 additions & 12 deletions cardano-testnet/src/Testnet/Process/Run.hs
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,11 @@ procCli = GHC.withFrozenCallStack $ H.procFlex "cardano-cli" "CARDANO_CLI"
-- | Create a 'CreateProcess' describing how to start the cardano-node process
-- and an argument list.
procNode
:: [String]
:: HasCallStack
=> MonadIO m
=> [String]
-- ^ Arguments to the CLI command
-> ExceptT ExecutableError IO CreateProcess
-> ExceptT ExecutableError m CreateProcess
-- ^ Captured stdout
procNode = GHC.withFrozenCallStack $ procFlexNew "cardano-node" "CARDANO_NODE"

Expand Down Expand Up @@ -234,31 +236,32 @@ resourceAndIOExceptionHandlers = [ Handler $ pure . ProcessIOException
, Handler $ pure . ResourceException
]


procFlexNew
:: String
:: MonadIO m
=> String
-- ^ Cabal package name corresponding to the executable
-> String
-- ^ Environment variable pointing to the binary to run
-> [String]
-- ^ Arguments to the CLI command
-> ExceptT ExecutableError IO CreateProcess
-> ExceptT ExecutableError m CreateProcess
-- ^ Captured stdout
procFlexNew = procFlexNew' H.defaultExecConfig

procFlexNew'
:: H.ExecConfig
:: MonadIO m
=> H.ExecConfig
-> String
-- ^ Cabal package name corresponding to the executable
-> String
-- ^ Environment variable pointing to the binary to run
-> [String]
-- ^ Arguments to the CLI command
-> ExceptT ExecutableError IO CreateProcess
-> ExceptT ExecutableError m CreateProcess
-- ^ Captured stdout
procFlexNew' execConfig pkg binaryEnv arguments = GHC.withFrozenCallStack $ do
bin <- binFlexNew pkg binaryEnv
return (IO.proc bin arguments)
pure (IO.proc bin arguments)
{ IO.env = getLast $ H.execConfigEnv execConfig
, IO.cwd = getLast $ H.execConfigCwd execConfig
-- this allows sending signals to the created processes, without killing the test-suite process
Expand All @@ -267,11 +270,12 @@ procFlexNew' execConfig pkg binaryEnv arguments = GHC.withFrozenCallStack $ do

-- | Compute the path to the binary given a package name or an environment variable override.
binFlexNew
:: String
:: MonadIO m
=> String
-- ^ Package name
-> String
-- ^ Environment variable pointing to the binary to run
-> ExceptT ExecutableError IO FilePath
-> ExceptT ExecutableError m FilePath
-- ^ Path to executable
binFlexNew pkg binaryEnv = do
maybeEnvBin <- liftIO $ IO.lookupEnv binaryEnv
Expand Down Expand Up @@ -316,9 +320,10 @@ data ExecutableError
-- to a haskell package. It is assumed that the project has already been configured and the
-- executable has been built.
binDist
:: String
:: MonadIO m
=> String
-- ^ Package name
-> ExceptT ExecutableError IO FilePath
-> ExceptT ExecutableError m FilePath
-- ^ Path to executable
binDist pkg = do
pJsonFp <- handleIOExceptT RetrievePlanJsonFailure planJsonFile
Expand Down
52 changes: 49 additions & 3 deletions cardano-testnet/src/Testnet/Property/Util.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}

Expand All @@ -15,14 +16,23 @@ module Testnet.Property.Util

import Cardano.Api

import Control.Exception.Safe
import Control.Monad
import Control.Monad.Trans.Resource
import qualified Control.Retry as R
import qualified Data.Aeson as Aeson
import GHC.Stack
import qualified System.Directory as IO
import qualified System.Environment as IO
import System.FilePath ((</>))
import System.Info (os)
import qualified System.IO as IO
import qualified System.IO.Temp as IO
import qualified System.IO.Unsafe as IO

import qualified Hedgehog as H
import qualified Hedgehog.Extras as H
import qualified Hedgehog.Extras.Stock.CallStack as H
import Hedgehog.Internal.Property (MonadTest)


Expand All @@ -43,16 +53,52 @@ integrationRetryWorkspace n workspaceName f = withFrozenCallStack $
if disableRetries
then
integration $
H.runFinallies $ H.workspace (workspaceName <> "-no-retries") f
H.runFinallies $ workspace (workspaceName <> "-no-retries") f
else
integration $ H.retry n $ \i ->
H.runFinallies $ H.workspace (workspaceName <> "-" <> show i) f
H.runFinallies $ workspace (workspaceName <> "-" <> show i) f

-- | Create a workspace directory which will exist for at least the duration of
-- the supplied block.
--
-- The directory will have the supplied prefix but contain a generated random
-- suffix to prevent interference between tests
--
-- The directory will be deleted if the block succeeds, but left behind if
-- the block fails.
-- TODO: this is a version which retries deleting of a workspace on exception - upstream to hedgehog-extras
workspace
:: MonadTest m
=> HasCallStack
=> MonadResource m
=> FilePath
-> (FilePath -> m ())
-> m ()
workspace prefixPath f = withFrozenCallStack $ do
systemTemp <- H.evalIO IO.getCanonicalTemporaryDirectory
maybeKeepWorkspace <- H.evalIO $ IO.lookupEnv "KEEP_WORKSPACE"
ws <- H.evalIO $ IO.createTempDirectory systemTemp $ prefixPath <> "-test"
H.annotate $ "Workspace: " <> ws
H.evalIO $ IO.writeFile (ws </> "module") H.callerModuleName
f ws
when (os /= "mingw32" && maybeKeepWorkspace /= Just "1") $ do
-- try to delete the directory 5 times, 100ms apart
let retryPolicy = R.constantDelay 100_000 <> R.limitRetries 10
-- retry only on IOExceptions
ioExH _ = Handler $ \(_ :: IOException) -> pure True
-- For some reason, the temporary directory removal sometimes fails.
-- Lets wrap this in MonadResource try multiple times before we fail.
void
. register
. R.recovering retryPolicy [ioExH]
. const
$ IO.removePathForcibly ws

-- | The 'FilePath' in '(FilePath -> H.Integration ())' is the work space directory.
-- This is created (and returned) via 'H.workspace'.
integrationWorkspace :: HasCallStack => FilePath -> (FilePath -> H.Integration ()) -> H.Property
integrationWorkspace workspaceName f = withFrozenCallStack $
integration $ H.runFinallies $ H.workspace workspaceName f
integration $ H.runFinallies $ workspace workspaceName f

isLinux :: Bool
isLinux = os == "linux"
Expand Down
Loading

0 comments on commit 8829321

Please sign in to comment.