Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: don't panic when logs waits for more than 5 seconds (v2) #1278

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 34 additions & 24 deletions docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const (
ReaperDefault = "reaper_default" // Default network name when bridge is not available
packagePath = "github.com/testcontainers/testcontainers-go"

logStoppedForOutOfSyncMessage = "Stopping log consumer: Headers out of sync"
logRestartedForOutOfSyncMessage = "headers out of sync, will retry"
)

// DockerContainer represents a container started using Docker
Expand All @@ -68,7 +68,7 @@ type DockerContainer struct {
terminationSignal chan bool
consumers []LogConsumer
raw *types.ContainerJSON
stopProducer chan bool
stopProducer context.CancelFunc
logger Logging
lifecycleHooks []ContainerLifecycleHooks
}
Expand Down Expand Up @@ -617,9 +617,9 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
return errors.New("log producer already started")
}

c.stopProducer = make(chan bool)
ctx, c.stopProducer = context.WithCancel(ctx)

go func(stop <-chan bool) {
go func() {
since := ""
// if the socket is closed we will make additional logs request with updated Since timestamp
BEGIN:
Expand All @@ -630,26 +630,30 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
Since: since,
}

ctx, cancel := context.WithTimeout(ctx, time.Second*5)
defer cancel()

r, err := c.provider.client.ContainerLogs(ctx, c.GetContainerID(), options)
if err != nil {
// if we can't get the logs, panic, we can't return an error to anything
// from within this goroutine
panic(err)
// if we can't get the logs, retry in one second.
if ctx.Err() != nil {
// context done.
return
}
c.logger.Printf("cannot get logs for container %q: %v", c.ID, err)
time.Sleep(1 * time.Second)
goto BEGIN
}
defer c.provider.Close()

for {
select {
case <-stop:
if ctx.Err() != nil {
err := r.Close()
if err != nil {
// we can't close the read closer, this should never happen
panic(err)
}
return
}
select {
case <-ctx.Done():
continue
default:
h := make([]byte, 8)
_, err := io.ReadFull(r, h)
Expand All @@ -660,13 +664,15 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
since = fmt.Sprintf("%d.%09d", now.Unix(), int64(now.Nanosecond()))
goto BEGIN
}
if errors.Is(err, context.DeadlineExceeded) {
// Probably safe to continue here
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
// If the outer context is done, loop will exit in the next iteration.
continue
}
_, _ = fmt.Fprintf(os.Stderr, "container log error: %+v. %s", err, logStoppedForOutOfSyncMessage)
_, _ = fmt.Fprintf(os.Stderr, "read log header: %+v. %s", err, logRestartedForOutOfSyncMessage)
// if we would continue here, the next header-read will result into random data...
return
// we need to restart the whole request.
time.Sleep(1 * time.Second)
goto BEGIN
}

count := binary.BigEndian.Uint32(h[4:])
Expand All @@ -688,13 +694,16 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
if err != nil {
// TODO: add-logger: use logger to log out this error
_, _ = fmt.Fprintf(os.Stderr, "error occurred reading log with known length %s", err.Error())
if errors.Is(err, context.DeadlineExceeded) {
// Probably safe to continue here

if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
// If the outer context is done, loop will exit in the next iteration.
continue
}
// we can not continue here as the next read most likely will not be the next header
_, _ = fmt.Fprintln(os.Stderr, logStoppedForOutOfSyncMessage)
return
// if we would continue here, the next header-read will result into random data...
// we need to restart the whole request.
_, _ = fmt.Fprintf(os.Stderr, "read log message: %+v. %s", err, logRestartedForOutOfSyncMessage)
time.Sleep(1 * time.Second)
goto BEGIN
}
for _, c := range c.consumers {
c.Accept(Log{
Expand All @@ -704,7 +713,7 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
}
}
}
}(c.stopProducer)
}()

return nil
}
Expand All @@ -713,7 +722,8 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context) error {
// and sending them to each added LogConsumer
func (c *DockerContainer) StopLogProducer() error {
if c.stopProducer != nil {
c.stopProducer <- true
// Cancel the producer's context.
c.stopProducer()
c.stopProducer = nil
}
return nil
Expand Down
99 changes: 75 additions & 24 deletions logconsumer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"net/http"
"strings"
"sync"
"testing"
"time"

Expand All @@ -20,8 +21,18 @@ import (
const lastMessage = "DONE"

type TestLogConsumer struct {
Msgs []string
Ack chan bool
mu sync.Mutex
msgs []string
Ack chan bool
waitingFor string
ackWait chan bool
}

func NewTestLogConsumer() *TestLogConsumer {
return &TestLogConsumer{
msgs: []string{},
Ack: make(chan bool),
}
}

func (g *TestLogConsumer) Accept(l Log) {
Expand All @@ -31,7 +42,43 @@ func (g *TestLogConsumer) Accept(l Log) {
return
}

g.Msgs = append(g.Msgs, s)
// Accept is called from a different goroutine than WaitFor.
// We need to synchronize access and notify the waiting goroutine so that it always sees the updated msgs.
g.mu.Lock()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add a comment here explaining why the synchronisation must be done at the test level? You already added it in #947 :)

defer g.mu.Unlock()
g.msgs = append(g.msgs, s)
if g.waitingFor != "" && s == fmt.Sprintf("echo %s\n", g.waitingFor) {
close(g.ackWait)
g.waitingFor = ""
}
}

// WaitFor waits for s to appear in the output.
// It returns an error if another wait is already in progress or the context is canceled.
func (g *TestLogConsumer) WaitFor(ctx context.Context, s string) error {
g.mu.Lock()
if g.waitingFor != "" {
g.mu.Unlock()
return fmt.Errorf("already waiting")
}
g.waitingFor = s
g.ackWait = make(chan bool)
g.mu.Unlock()
select {
case <-ctx.Done():
return ctx.Err()
case <-g.ackWait:
return nil
}
}

// Msgs returns messages received so far.
// The caller must not modify the contents of the slice.
func (g *TestLogConsumer) Msgs() []string {
g.mu.Lock()
v := g.msgs[0:len(g.msgs):len(g.msgs)]
g.mu.Unlock()
return v
}

func Test_LogConsumerGetsCalled(t *testing.T) {
Expand All @@ -56,12 +103,9 @@ func Test_LogConsumerGetsCalled(t *testing.T) {
ep, err := c.Endpoint(ctx, "http")
require.NoError(t, err)

g := TestLogConsumer{
Msgs: []string{},
Ack: make(chan bool),
}
g := NewTestLogConsumer()

c.FollowOutput(&g)
c.FollowOutput(g)

err = c.StartLogProducer(ctx)
require.NoError(t, err)
Expand All @@ -81,7 +125,7 @@ func Test_LogConsumerGetsCalled(t *testing.T) {
t.Fatal("never received final log message")
}
assert.Nil(t, c.StopLogProducer())
assert.Equal(t, []string{"ready\n", "echo hello\n", "echo there\n"}, g.Msgs)
assert.Equal(t, []string{"ready\n", "echo hello\n", "echo there\n"}, g.Msgs())

terminateContainerOnEnd(t, ctx, c)
}
Expand Down Expand Up @@ -173,11 +217,11 @@ func Test_MultipleLogConsumers(t *testing.T) {
ep, err := c.Endpoint(ctx, "http")
require.NoError(t, err)

first := TestLogConsumer{Msgs: []string{}, Ack: make(chan bool)}
second := TestLogConsumer{Msgs: []string{}, Ack: make(chan bool)}
first := NewTestLogConsumer()
second := NewTestLogConsumer()

c.FollowOutput(&first)
c.FollowOutput(&second)
c.FollowOutput(first)
c.FollowOutput(second)

err = c.StartLogProducer(ctx)
require.NoError(t, err)
Expand All @@ -192,8 +236,8 @@ func Test_MultipleLogConsumers(t *testing.T) {
<-second.Ack
assert.Nil(t, c.StopLogProducer())

assert.Equal(t, []string{"ready\n", "echo mlem\n"}, first.Msgs)
assert.Equal(t, []string{"ready\n", "echo mlem\n"}, second.Msgs)
assert.Equal(t, []string{"ready\n", "echo mlem\n"}, first.Msgs())
assert.Equal(t, []string{"ready\n", "echo mlem\n"}, second.Msgs())
assert.Nil(t, c.Terminate(ctx))
}

Expand All @@ -219,9 +263,9 @@ func Test_StartStop(t *testing.T) {
ep, err := c.Endpoint(ctx, "http")
require.NoError(t, err)

g := TestLogConsumer{Msgs: []string{}, Ack: make(chan bool)}
g := NewTestLogConsumer()

c.FollowOutput(&g)
c.FollowOutput(g)

require.NoError(t, c.StopLogProducer(), "nothing should happen even if the producer is not started")
require.NoError(t, c.StartLogProducer(ctx))
Expand All @@ -230,6 +274,11 @@ func Test_StartStop(t *testing.T) {
_, err = http.Get(ep + "/stdout?echo=mlem")
require.NoError(t, err)

waitCtx, cancelWait := context.WithTimeout(ctx, 5*time.Second)
err = g.WaitFor(waitCtx, "mlem")
cancelWait()
require.NoError(t, err)

require.NoError(t, c.StopLogProducer())
require.NoError(t, c.StartLogProducer(ctx))

Expand All @@ -248,7 +297,7 @@ func Test_StartStop(t *testing.T) {
"ready\n",
"echo mlem\n",
"echo mlem2\n",
}, g.Msgs)
}, g.Msgs())
assert.Nil(t, c.Terminate(ctx))
}

Expand Down Expand Up @@ -339,7 +388,7 @@ func TestContainerLogWithErrClosed(t *testing.T) {

// Gather the initial container logs
time.Sleep(time.Second * 1)
existingLogs := len(consumer.Msgs)
existingLogs := len(consumer.Msgs())

hitNginx := func() {
i, _, err := dind.Exec(ctx, []string{"wget", "--spider", "localhost:" + port.Port()})
Expand All @@ -350,10 +399,11 @@ func TestContainerLogWithErrClosed(t *testing.T) {

hitNginx()
time.Sleep(time.Second * 1)
if len(consumer.Msgs)-existingLogs != 1 {
t.Fatalf("logConsumer should have 1 new log message, instead has: %v", consumer.Msgs[existingLogs:])
logs2 := consumer.Msgs()
if len(logs2)-existingLogs != 1 {
t.Fatalf("logConsumer should have 1 new log message, instead has: %v", logs2[existingLogs:])
}
existingLogs = len(consumer.Msgs)
existingLogs = len(consumer.Msgs())

iptableArgs := []string{
"INPUT", "-p", "tcp", "--dport", "2375",
Expand All @@ -373,10 +423,11 @@ func TestContainerLogWithErrClosed(t *testing.T) {
hitNginx()
hitNginx()
time.Sleep(time.Second * 1)
if len(consumer.Msgs)-existingLogs != 2 {
logs3 := consumer.Msgs()
if len(logs3)-existingLogs != 2 {
t.Fatalf(
"LogConsumer should have 2 new log messages after detecting closed connection and"+
" re-requesting logs. Instead has:\n%s", consumer.Msgs[existingLogs:],
" re-requesting logs. Instead has:\n%s", logs3[existingLogs:],
)
}
}
Expand Down