Skip to content

Commit

Permalink
Fix: increased settlement submit batch retry delay (#236)
Browse files Browse the repository at this point in the history
* Updated manager sync test.

* Increased SL retry delay.

* Fixed bug in syncing an aggregator upon start and removed redundant test.
  • Loading branch information
omritoptix authored Feb 11, 2023
1 parent 18cb856 commit 9789c75
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 66 deletions.
13 changes: 8 additions & 5 deletions block/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type blockSource string
const (
defaultDABlockTime = 30 * time.Second
DABatchRetryDelay = 20 * time.Second
SLBatchRetryDelay = 10 * time.Second
)

const (
Expand Down Expand Up @@ -198,7 +199,8 @@ func (m *Manager) getLatestBatchFromSL(ctx context.Context) (*settlement.ResultR

}

// waitForSync waits until we are synced before it unblocks.
// waitForSync enforaces the aggregator to be synced before it can produce blocks.
// It requires the retriveBlockLoop to be running.
func (m *Manager) waitForSync(ctx context.Context) error {
resultRetrieveBatch, err := m.getLatestBatchFromSL(ctx)
// Set the syncTarget according to the result
Expand All @@ -212,7 +214,7 @@ func (m *Manager) waitForSync(ctx context.Context) error {
m.logger.Error("failed to retrieve batch from SL", "err", err)
return err
} else {
atomic.StoreUint64(&m.syncTarget, resultRetrieveBatch.EndHeight)
m.updateSyncParams(ctx, resultRetrieveBatch.EndHeight)
}
// Wait until isSynced is true and then call the PublishBlockLoop
m.isSyncedCond.L.Lock()
Expand Down Expand Up @@ -268,8 +270,9 @@ func (m *Manager) ProduceBlockLoop(ctx context.Context) {
}
}

// SyncTargetLoop is responsible for updating the syncTarget as read from the SL
// to a ring buffer which will later be used by retrieveLoop for actually syncing until this target
// SyncTargetLoop is responsible for getting real time updates about batches submission.
// for non aggregator: updating the sync target which will be used by retrieveLoop to sync until this target.
// for aggregator: get notification that batch has been accepted so can send next batch.
func (m *Manager) SyncTargetLoop(ctx context.Context) {
m.logger.Info("Started sync target loop")
subscription, err := m.pubsub.Subscribe(ctx, "syncTargetLoop", settlement.EventQueryNewSettlementBatchAccepted)
Expand Down Expand Up @@ -705,7 +708,7 @@ func (m *Manager) submitBatchToSL(ctx context.Context, batch *types.Batch, resul
return err
}
return nil
}, retry.Context(ctx), retry.LastErrorOnly(true))
}, retry.Context(ctx), retry.LastErrorOnly(true), retry.Delay(SLBatchRetryDelay))
if err != nil {
m.logger.Error("Failed to submit batch to SL Layer", batch, err)
panic(err)
Expand Down
70 changes: 9 additions & 61 deletions block/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,68 +113,17 @@ func TestInitialState(t *testing.T) {
}
}

// TestWaitUntilSynced tests that we don't start producing blocks until we're synced.
// 1. Validate blocks are produced.
// 2. Add a batch which takes the manager out of sync
// 3. Validate blocks are not produced.
func TestWaitUntilSynced(t *testing.T) {
storeLastBlockHeight := uint64(0)
manager, err := getManager(nil, nil, 1, 1, int64(storeLastBlockHeight), nil, nil)
require.NoError(t, err)
require.NotNil(t, manager)

// Manager should produce blocks as it's the first to write batches.
ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
defer cancel()
// Run syncTargetLoop so that we update the syncTarget.
go manager.SyncTargetLoop(ctx)
go manager.ProduceBlockLoop(ctx)
select {
case <-ctx.Done():
// Validate some blocks produced
assert.Greater(t, manager.store.Height(), storeLastBlockHeight)
}
// As the publishBlock function doesn't stop upon context termination (only PublishBlockLoop),
// wait for it to finish before taking the manager out of sync.
time.Sleep(1 * time.Second)

// Take the manager out of sync.
t.Log("Taking the manager out of sync by submitting a batch")
startHeight := atomic.LoadUint64(&manager.syncTarget) + 1
endHeight := startHeight + uint64(defaultBatchSize-1)*2
batch, err := testutil.GenerateBatch(startHeight, endHeight, manager.proposerKey)
require.NoError(t, err)
daResult := &da.ResultSubmitBatch{
BaseResult: da.BaseResult{
DAHeight: 1,
},
}
resultSubmitBatch := manager.settlementClient.SubmitBatch(batch, manager.dalc.GetClientType(), daResult)
assert.Equal(t, resultSubmitBatch.Code, settlement.StatusSuccess)

// Validate blocks are not produced.
t.Log("Validating blocks are not produced")
storeHeight := manager.store.Height()
ctx, cancel = context.WithTimeout(context.Background(), time.Second*3)
defer cancel()
go manager.ProduceBlockLoop(ctx)
select {
case <-ctx.Done():
assert.Equal(t, storeHeight, manager.store.Height())
}
}

// TestPublishAfterSynced should test that we are resuming publishing blocks after we are synced
// 1. Validate blocks are not produced by adding a batch and outsyncing the manager
// TestProduceOnlyAfterSynced should test that we are resuming publishing blocks after we are synced
// 1. Submit a batch and outsync the manager
// 2. Fail to produce blocks
// 2. Sync the manager
// 3. Validate blocks are produced.
func TestPublishAfterSynced(t *testing.T) {
// 3. Succeed to produce blocks
func TestProduceOnlyAfterSynced(t *testing.T) {
manager, err := getManager(nil, nil, 1, 1, 0, nil, nil)
require.NoError(t, err)
require.NotNil(t, manager)

// Validate blocks are not produced by adding a batch and outsyncing the manager.
// Submit batch
t.Log("Taking the manager out of sync by submitting a batch")
lastStoreHeight := manager.store.Height()
numBatchesToAdd := 2
nextBatchStartHeight := atomic.LoadUint64(&manager.syncTarget) + 1
Expand All @@ -191,7 +140,7 @@ func TestPublishAfterSynced(t *testing.T) {
time.Sleep(time.Millisecond * 500)
}

// Check manager is out of sync
t.Log("Validating manager can't produce blocks")
ctx, cancel := context.WithTimeout(context.Background(), time.Second*1)
defer cancel()
go manager.ProduceBlockLoop(ctx)
Expand All @@ -200,10 +149,9 @@ func TestPublishAfterSynced(t *testing.T) {
assert.Equal(t, lastStoreHeight, manager.store.Height())
}

// Sync the manager
t.Log("Sync the manager")
ctx, cancel = context.WithTimeout(context.Background(), time.Second*2)
defer cancel()
go manager.SyncTargetLoop(ctx)
go manager.RetriveLoop(ctx)
go manager.ApplyBlockLoop(ctx)
select {
Expand All @@ -212,7 +160,7 @@ func TestPublishAfterSynced(t *testing.T) {
assert.Equal(t, batch.EndHeight, manager.store.Height())
}

// Validate blocks are produced
t.Log("Validate blocks are produced")
ctx, cancel = context.WithTimeout(context.Background(), time.Second*3)
defer cancel()
go manager.ProduceBlockLoop(ctx)
Expand Down

0 comments on commit 9789c75

Please sign in to comment.