Skip to content

Commit

Permalink
fix: slow shutdown after signal (#169)
Browse files Browse the repository at this point in the history
Fix shutdown after signal to allow the last client disconnect to trigger
a prune check. Previous this would always waiting for the shutdown timeout,
which by default is 10 minutes, resulting in an unnecessarily slow shutdown.
  • Loading branch information
stevenh authored Oct 14, 2024
1 parent f1eb68a commit cbdc142
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 27 deletions.
4 changes: 3 additions & 1 deletion reaper.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,9 @@ func (r *reaper) pruneWait(ctx context.Context) (*resources, error) {
case addr := <-r.disconnected:
clients--
r.logger.Info("client disconnected", fieldAddress, addr, fieldClients, clients)
if clients == 0 && done != nil {
if clients == 0 {
// No clients connected, trigger prune check overriding
// any timeout set by shutdown signal.
pruneCheck.Reset(r.cfg.ReconnectionTimeout)
}
case <-done:
Expand Down
93 changes: 67 additions & 26 deletions reaper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -516,38 +516,79 @@ func TestAbortedClient(t *testing.T) {
}

func TestShutdownTimeout(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
t.Cleanup(cancel)

var log safeBuffer
logger := withLogger(slog.New(slog.NewTextHandler(&log, &slog.HandlerOptions{
Level: slog.LevelDebug,
})))
tc := newRunTest()
cli := newMockClient(tc)
r, err := newReaper(ctx, logger, withClient(cli), testConfig)
require.NoError(t, err)
t.Run("slow-timeout", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
t.Cleanup(cancel)

var log safeBuffer
logger := withLogger(slog.New(slog.NewTextHandler(&log, &slog.HandlerOptions{
Level: slog.LevelDebug,
})))
tc := newRunTest()
cli := newMockClient(tc)
r, err := newReaper(ctx, logger, withClient(cli), testConfig)
require.NoError(t, err)

errCh := make(chan error, 1)
runCtx, runCancel := context.WithCancel(ctx)
t.Cleanup(runCancel)
go func() {
errCh <- r.run(runCtx)
}()
errCh := make(chan error, 1)
runCtx, runCancel := context.WithCancel(ctx)
t.Cleanup(runCancel)
go func() {
errCh <- r.run(runCtx)
}()

testConnect(ctx, t, r.listener.Addr().String())
runCancel()

select {
case err = <-errCh:
require.NoError(t, err)
case <-ctx.Done():
t.Fatal("timeout", log.String())
}

require.NoError(t, err)
data := log.String()
require.Contains(t, data, "signal received")
require.Contains(t, data, "shutdown timeout")
require.Contains(t, data, "done")
})

testConnect(ctx, t, r.listener.Addr().String())
runCancel()
t.Run("fast-client-disconnect", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*1)
t.Cleanup(cancel)

select {
case err = <-errCh:
var log safeBuffer
logger := withLogger(slog.New(slog.NewTextHandler(&log, &slog.HandlerOptions{
Level: slog.LevelDebug,
})))
tc := newRunTest()
cli := newMockClient(tc)
r, err := newReaper(ctx, logger, withClient(cli), testConfig)
require.NoError(t, err)
case <-ctx.Done():
t.Fatal("timeout", log.String())
}

require.Contains(t, log.String(), "shutdown timeout")
errCh := make(chan error, 1)
runCtx, runCancel := context.WithCancel(ctx)
t.Cleanup(runCancel)
go func() {
errCh <- r.run(runCtx)
}()

connectCtx, connectCancel := context.WithTimeout(ctx, time.Millisecond*100)
t.Cleanup(connectCancel)
testConnect(connectCtx, t, r.listener.Addr().String())
runCancel()

select {
case err = <-errCh:
require.NoError(t, err)
case <-ctx.Done():
t.Fatal("timeout", log.String())
}

data := log.String()
require.Contains(t, data, "signal received")
require.NotContains(t, data, "shutdown timeout")
require.Contains(t, data, "done")
})
}

func TestReapContainer(t *testing.T) {
Expand Down

0 comments on commit cbdc142

Please sign in to comment.