Skip to content

Commit

Permalink
fix(v7): prevent concurrent hermes operations on the same chain (#1099)
Browse files Browse the repository at this point in the history
* fix(v7): prevent concurrent hermes operations on the same chain

Allowing them can result in "hermes create ..." commands failing because
there's sequence numbers being modified by simultaneous commands.

* Remove DefaultBlockDatabaseFilepath from tests

This can break in CI
  • Loading branch information
fastfadingviolets authored May 10, 2024
1 parent e7c0c39 commit 9a88474
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
# run tests
- name: run unit tests
# -short flag purposefully omitted because there are some longer unit tests
run: go test -race -timeout 10m -failfast -p 2 $(go list ./... | grep -v /cmd | grep -v /examples)
run: go test -race -timeout 30m -failfast -p 2 $(go list ./... | grep -v /cmd | grep -v /examples)
9 changes: 4 additions & 5 deletions examples/cosmos/sdk_boundary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,10 @@ func TestSDKBoundaries(t *testing.T) {
rep := testreporter.NewNopReporter()

require.NoError(t, ic.Build(ctx, rep.RelayerExecReporter(t), interchaintest.InterchainBuildOptions{
TestName: t.Name(),
Client: client,
NetworkID: network,
BlockDatabaseFile: interchaintest.DefaultBlockDatabaseFilepath(),
SkipPathCreation: false,
TestName: t.Name(),
Client: client,
NetworkID: network,
SkipPathCreation: false,
}))
t.Cleanup(func() {
_ = ic.Close()
Expand Down
7 changes: 3 additions & 4 deletions examples/ibc/ics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,9 @@ func TestICS(t *testing.T) {

// Build interchain
err = ic.Build(ctx, eRep, interchaintest.InterchainBuildOptions{
TestName: t.Name(),
Client: client,
NetworkID: network,
BlockDatabaseFile: interchaintest.DefaultBlockDatabaseFilepath(),
TestName: t.Name(),
Client: client,
NetworkID: network,

SkipPathCreation: false,
})
Expand Down
91 changes: 91 additions & 0 deletions interchain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"testing"
"time"

"cosmossdk.io/math"
"github.com/cosmos/cosmos-sdk/codec"
Expand Down Expand Up @@ -255,6 +256,96 @@ func TestCosmosChain_BroadcastTx_HermesRelayer(t *testing.T) {
broadcastTxCosmosChainTest(t, ibc.Hermes)
}

func TestInterchain_ConcurrentRelayerOps(t *testing.T) {
type relayerTest struct {
relayer ibc.RelayerImplementation
name string
}

const (
denom = "uatom"
chains = 4
)

relayers := []relayerTest{
{
relayer: ibc.CosmosRly,
name: "Cosmos Relayer",
},
{
relayer: ibc.Hermes,
name: "Hermes",
},
}

numFullNodes := 0
numValidators := 1

for _, rly := range relayers {
rly := rly
t.Run(rly.name, func(t *testing.T) {
client, network := interchaintest.DockerSetup(t)
f, err := interchaintest.CreateLogFile(fmt.Sprintf("%d.json", time.Now().Unix()))
require.NoError(t, err)
// Reporter/logs
rep := testreporter.NewReporter(f)
eRep := rep.RelayerExecReporter(t)
ctx := context.Background()

chainSpecs := make([]*interchaintest.ChainSpec, chains)
for i := 0; i < chains; i++ {
chainSpecs[i] = &interchaintest.ChainSpec{
Name: "gaia",
ChainName: fmt.Sprintf("g%d", i+1),
Version: "v7.0.1",
NumValidators: &numValidators,
NumFullNodes: &numFullNodes,
ChainConfig: ibc.ChainConfig{
GasPrices: "0" + denom,
Denom: denom,
},
}
}
r := interchaintest.NewBuiltinRelayerFactory(rly.relayer, zaptest.NewLogger(t)).Build(
t, client, network,
)

cf := interchaintest.NewBuiltinChainFactory(zaptest.NewLogger(t), chainSpecs)
chains, err := cf.Chains(t.Name())
require.NoError(t, err)
ic := interchaintest.NewInterchain()
for _, chain := range chains {
require.NoError(t, err)
ic.AddChain(chain)
}
ic.AddRelayer(r, "relayer")
for i, chainI := range chains {
for j := i + 1; j < len(chains); j++ {
ic.AddLink(interchaintest.InterchainLink{
Chain1: chainI,
Chain2: chains[j],
Relayer: r,
Path: getIBCPath(chainI, chains[j]),
})
}
}
err = ic.Build(ctx, eRep, interchaintest.InterchainBuildOptions{
TestName: t.Name(),
Client: client,
NetworkID: network,
})
require.NoError(t, err)
t.Cleanup(func() {
ic.Close()
})
})
}
}

func getIBCPath(chainA, chainB ibc.Chain) string {
return chainA.Config().ChainID + "-" + chainB.Config().ChainID
}

func broadcastTxCosmosChainTest(t *testing.T, relayerImpl ibc.RelayerImplementation) {
if testing.Short() {
t.Skip("skipping in short mode")
Expand Down
78 changes: 70 additions & 8 deletions relayer/hermes/hermes_relayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"regexp"
"strings"
"sync"
"time"

"github.com/docker/docker/client"
Expand Down Expand Up @@ -35,8 +36,12 @@ var (
// Relayer is the ibc.Relayer implementation for hermes.
type Relayer struct {
*relayer.DockerRelayer

// lock protects the relayer's state
lock sync.RWMutex
paths map[string]*pathConfiguration
chainConfigs []ChainConfig
chainLocks map[string]*sync.Mutex
}

// ChainConfig holds all values required to write an entry in the "chains" section in the hermes config file.
Expand Down Expand Up @@ -72,6 +77,7 @@ func NewHermesRelayer(log *zap.Logger, testName string, cli *client.Client, netw

return &Relayer{
DockerRelayer: dr,
chainLocks: map[string]*sync.Mutex{},
}
}

Expand All @@ -87,7 +93,13 @@ func (r *Relayer) AddChainConfiguration(ctx context.Context, rep ibc.RelayerExec
return fmt.Errorf("failed to write hermes config: %w", err)
}

return r.validateConfig(ctx, rep)
if err := r.validateConfig(ctx, rep); err != nil {
return err
}
r.lock.Lock()
defer r.lock.Unlock()
r.chainLocks[chainConfig.ChainID] = &sync.Mutex{}
return nil
}

func (r *Relayer) MarkChainAsConsumer(ctx context.Context, chainID string) error {
Expand Down Expand Up @@ -121,7 +133,9 @@ func (r *Relayer) MarkChainAsConsumer(ctx context.Context, chainID string) error
// LinkPath performs the operations that happen when a path is linked. This includes creating clients, creating connections
// and establishing a channel. This happens across multiple operations rather than a single link path cli command.
func (r *Relayer) LinkPath(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, channelOpts ibc.CreateChannelOptions, clientOpts ibc.CreateClientOptions) error {
r.lock.RLock()
_, ok := r.paths[pathName]
r.lock.RUnlock()
if !ok {
return fmt.Errorf("path %s not found", pathName)
}
Expand All @@ -142,7 +156,12 @@ func (r *Relayer) LinkPath(ctx context.Context, rep ibc.RelayerExecReporter, pat
}

func (r *Relayer) CreateChannel(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, opts ibc.CreateChannelOptions) error {
pathConfig := r.paths[pathName]
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

cmd := []string{hermes, "--json", "create", "channel", "--order", opts.Order.String(), "--a-chain", pathConfig.chainA.chainID, "--a-port", opts.SourcePortName, "--b-port", opts.DestPortName, "--a-connection", pathConfig.chainA.connectionID}
if opts.Version != "" {
cmd = append(cmd, "--channel-version", opts.Version)
Expand All @@ -157,7 +176,12 @@ func (r *Relayer) CreateChannel(ctx context.Context, rep ibc.RelayerExecReporter
}

func (r *Relayer) CreateConnections(ctx context.Context, rep ibc.RelayerExecReporter, pathName string) error {
pathConfig := r.paths[pathName]
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

cmd := []string{hermes, "--json", "create", "connection", "--a-chain", pathConfig.chainA.chainID, "--a-client", pathConfig.chainA.clientID, "--b-client", pathConfig.chainB.clientID}

res := r.Exec(ctx, rep, cmd, nil)
Expand All @@ -175,10 +199,12 @@ func (r *Relayer) CreateConnections(ctx context.Context, rep ibc.RelayerExecRepo
}

func (r *Relayer) UpdateClients(ctx context.Context, rep ibc.RelayerExecReporter, pathName string) error {
pathConfig, ok := r.paths[pathName]
if !ok {
return fmt.Errorf("path %s not found", pathName)
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

updateChainACmd := []string{hermes, "--json", "update", "client", "--host-chain", pathConfig.chainA.chainID, "--client", pathConfig.chainA.clientID}
res := r.Exec(ctx, rep, updateChainACmd, nil)
if res.Err != nil {
Expand All @@ -192,7 +218,12 @@ func (r *Relayer) UpdateClients(ctx context.Context, rep ibc.RelayerExecReporter
// Note: in the go relayer this can be done with a single command using the path reference,
// however in Hermes this needs to be done as two separate commands.
func (r *Relayer) CreateClients(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, opts ibc.CreateClientOptions) error {
pathConfig := r.paths[pathName]
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

chainACreateClientCmd := []string{hermes, "--json", "create", "client", "--host-chain", pathConfig.chainA.chainID, "--reference-chain", pathConfig.chainB.chainID}
if opts.TrustingPeriod != "" {
chainACreateClientCmd = append(chainACreateClientCmd, "--trusting-period", opts.TrustingPeriod)
Expand Down Expand Up @@ -233,7 +264,11 @@ func (r *Relayer) CreateClients(ctx context.Context, rep ibc.RelayerExecReporter
}

func (r *Relayer) CreateClient(ctx context.Context, rep ibc.RelayerExecReporter, srcChainID, dstChainID, pathName string, opts ibc.CreateClientOptions) error {
pathConfig := r.paths[pathName]
pathConfig, unlock, err := r.getAndLockPath(pathName)
if err != nil {
return err
}
defer unlock()

createClientCmd := []string{hermes, "--json", "create", "client", "--host-chain", srcChainID, "--reference-chain", dstChainID}
if opts.TrustingPeriod != "" {
Expand Down Expand Up @@ -290,6 +325,8 @@ func (r *Relayer) RestoreKey(ctx context.Context, rep ibc.RelayerExecReporter, c
}

func (r *Relayer) UpdatePath(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, opts ibc.PathUpdateOptions) error {
r.lock.Lock()
defer r.lock.Unlock()
// the concept of paths doesn't exist in hermes, but update our in-memory paths so we can use them elsewhere
path, ok := r.paths[pathName]
if !ok {
Expand Down Expand Up @@ -317,7 +354,9 @@ func (r *Relayer) UpdatePath(ctx context.Context, rep ibc.RelayerExecReporter, p
}

func (r *Relayer) Flush(ctx context.Context, rep ibc.RelayerExecReporter, pathName string, channelID string) error {
r.lock.RLock()
path := r.paths[pathName]
r.lock.RUnlock()
cmd := []string{hermes, "clear", "packets", "--chain", path.chainA.chainID, "--channel", channelID, "--port", path.chainA.portID}
res := r.Exec(ctx, rep, cmd, nil)
return res.Err
Expand All @@ -326,6 +365,8 @@ func (r *Relayer) Flush(ctx context.Context, rep ibc.RelayerExecReporter, pathNa
// GeneratePath establishes an in memory path representation. The concept does not exist in hermes, so it is handled
// at the interchain test level.
func (r *Relayer) GeneratePath(ctx context.Context, rep ibc.RelayerExecReporter, srcChainID, dstChainID, pathName string) error {
r.lock.Lock()
defer r.lock.Unlock()
if r.paths == nil {
r.paths = map[string]*pathConfiguration{}
}
Expand All @@ -344,6 +385,8 @@ func (r *Relayer) GeneratePath(ctx context.Context, rep ibc.RelayerExecReporter,
// rather than multiple config files, we need to maintain a list of chain configs each time they are added to write the
// full correct file update calling Relayer.AddChainConfiguration.
func (r *Relayer) configContent(cfg ibc.ChainConfig, keyName, rpcAddr, grpcAddr string) ([]byte, error) {
r.lock.Lock()
defer r.lock.Unlock()
r.chainConfigs = append(r.chainConfigs, ChainConfig{
cfg: cfg,
keyName: keyName,
Expand Down Expand Up @@ -381,6 +424,25 @@ func extractJsonResult(stdout []byte) []byte {
return []byte(jsonOutput)
}

func (r *Relayer) getAndLockPath(pathName string) (*pathConfiguration, func(), error) {
// we don't get an RLock here because we could deadlock while trying to get the chain locks
r.lock.Lock()
path, ok := r.paths[pathName]
defer r.lock.Unlock()
if !ok {
return nil, nil, fmt.Errorf("path %s not found", pathName)
}
chainALock := r.chainLocks[path.chainA.chainID]
chainBLock := r.chainLocks[path.chainB.chainID]
chainALock.Lock()
chainBLock.Lock()
unlock := func() {
chainALock.Unlock()
chainBLock.Unlock()
}
return path, unlock, nil
}

// GetClientIdFromStdout extracts the client ID from stdout.
func GetClientIdFromStdout(stdout []byte) (string, error) {
var clientCreationResult ClientCreationResponse
Expand Down

0 comments on commit 9a88474

Please sign in to comment.