Skip to content

Commit

Permalink
chore: add debugging for termination
Browse files Browse the repository at this point in the history
Add debug logging to help identify issue

More debug to see if reaper is cause the issue.

Disable ryuk to see if that fixes the tests.

Add docker version info to see if there's a bug in the action runners
docker version.

Add docker info, config to see if we can enable debug on docker daemon.

Restart docker with debug so we can get request logs.
  • Loading branch information
stevenh committed Jul 28, 2024
1 parent 5730bbe commit d5092d0
Show file tree
Hide file tree
Showing 3 changed files with 259 additions and 13 deletions.
47 changes: 38 additions & 9 deletions docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,10 @@ func (c *DockerContainer) Stop(ctx context.Context, timeout *time.Duration) erro
// without exposing the ability to fully initialize the container state.
// See: https://github.com/testcontainers/testcontainers-go/issues/2667
// TODO: Add a check for isRunning when the above issue is resolved.
debugPrintln("XXX: DockerContainer.Stop", c.ID)
err := c.stoppingHook(ctx)
if err != nil {
debugPrintln("XXX: DockerContainer.Stop stoppingHook error:", err, c.ID)
return fmt.Errorf("stopping hook: %w", err)
}

Expand All @@ -280,7 +282,9 @@ func (c *DockerContainer) Stop(ctx context.Context, timeout *time.Duration) erro
options.Timeout = &timeoutSeconds
}

debugPrintln("XXX: DockerContainer.Stop ContainerStop", c.ID)
if err := c.provider.client.ContainerStop(ctx, c.ID, options); err != nil {
debugPrintln("XXX: DockerContainer.Stop ContainerStop error:", err)
return fmt.Errorf("container stop: %w", err)
}

Expand All @@ -293,8 +297,10 @@ func (c *DockerContainer) Stop(ctx context.Context, timeout *time.Duration) erro

err = c.stoppedHook(ctx)
if err != nil {
debugPrintln("XXX: DockerContainer.Stop stoppedHook error:", err, c.ID)
return fmt.Errorf("stopped hook: %w", err)
}
debugPrintln("XXX: DockerContainer.Stop done", c.ID)

return nil
}
Expand All @@ -310,6 +316,9 @@ func (c *DockerContainer) Terminate(ctx context.Context) error {
// ContainerRemove hardcodes stop timeout to 3 seconds which is too short
// to ensure that child containers are stopped so we manually call stop.
// TODO: make this configurable via a functional option.
// time.Sleep(10 * time.Millisecond)
debugPrintln("XXX: DockerContainer.Terminate", c.ID)

timeout := 10 * time.Second
err := c.Stop(ctx, &timeout)
if err != nil && !isCleanupSafe(err) {
Expand All @@ -329,14 +338,17 @@ func (c *DockerContainer) Terminate(ctx context.Context) error {

// TODO: Handle errors from ContainerRemove more correctly, e.g. should we
// run the terminated hook?
errs := []error{
c.terminatingHook(ctx),
c.provider.client.ContainerRemove(ctx, c.GetContainerID(), container.RemoveOptions{
RemoveVolumes: true,
Force: true,
}),
c.terminatedHook(ctx),
}

errs := []error{c.terminatingHook(ctx)}

// time.Sleep(10 * time.Millisecond)
debugPrintln("XXX: DockerContainer.ContainerRemove", c.ID)
err = c.provider.client.ContainerRemove(ctx, c.GetContainerID(), container.RemoveOptions{
RemoveVolumes: true,
Force: true,
})
debugPrintln("XXX: DockerContainer.ContainerRemove done", c.ID, err)
errs = append(errs, err, c.terminatedHook(ctx))

if c.imageWasBuilt && !c.keepBuiltImage {
_, err := c.provider.client.ImageRemove(ctx, c.Image, image.RemoveOptions{
Expand Down Expand Up @@ -747,6 +759,7 @@ func (c *DockerContainer) StartLogProducer(ctx context.Context, opts ...LogProdu
// Use functional option WithLogProductionTimeout() to override default timeout. If it's
// lower than 5s and greater than 60s it will be set to 5s or 60s respectively.
func (c *DockerContainer) startLogProduction(ctx context.Context, opts ...LogProductionOption) error {
debugPrintln("XXX: DockerContainer.startLogProduction", c.ID)
for _, opt := range opts {
opt(c)
}
Expand Down Expand Up @@ -782,6 +795,9 @@ func (c *DockerContainer) startLogProduction(ctx context.Context, opts ...LogPro
// - A fatal error occurs
// - No more logs are available
func (c *DockerContainer) logProducer(stdout, stderr io.Writer) error {
debugPrintln("XXX: DockerContainer.startLogProduction go", c.ID)
defer debugPrintln("XXX: DockerContainer.startLogProduction done", c.ID)

// Clean up idle client connections.
defer c.provider.Close()

Expand All @@ -797,6 +813,7 @@ func (c *DockerContainer) logProducer(stdout, stderr io.Writer) error {
defer cancel()

err := c.copyLogs(timeoutCtx, options, stdout, stderr)
debugPrintln("XXX: DockerContainer.logProducerSince:", err, c.ID)
switch {
case err == nil:
// No more logs available.
Expand All @@ -810,22 +827,25 @@ func (c *DockerContainer) logProducer(stdout, stderr io.Writer) error {
// Unexpected error, retry.
Logger.Printf("Unexpected error reading logs: %v", err)
}
debugPrintln("XXX: DockerContainer.logProducerAll:", err, c.ID)

// Retry from the last log received.
now := time.Now()
options.Since = fmt.Sprintf("%d.%09d", now.Unix(), int64(now.Nanosecond()))
}
}

// copyLogs copies logs from the container to stdout and stderr.
// copyLogs copies logs from the container received using options to stdout and stderr.
func (c *DockerContainer) copyLogs(ctx context.Context, options container.LogsOptions, stdout, stderr io.Writer) error {
rc, err := c.provider.client.ContainerLogs(ctx, c.GetContainerID(), options)
if err != nil {
debugPrintln("XXX: DockerContainer.startLogProduction ContainerLogs:", err)
return fmt.Errorf("container logs: %w", err)
}
defer rc.Close()

if _, err = stdcopy.StdCopy(stdout, stderr, rc); err != nil {
debugPrintln("XXX: DockerContainer.startLogProduction stdcopy.StdCopy:", err)
return fmt.Errorf("stdcopy: %w", err)
}

Expand All @@ -837,17 +857,25 @@ func (c *DockerContainer) StopLogProducer() error {
return c.stopLogProduction()
}

// debugPrintln is a function that can be overridden for debugging purposes.
var debugPrintln = func(a ...any) {}

// stopLogProduction will stop the concurrent process that is reading logs
// and sending them to each added LogConsumer
func (c *DockerContainer) stopLogProduction() error {
debugPrintln("XXX: DockerContainer.stopLogProduction", c.ID)

if c.logProductionCancel == nil {
return nil
}

// Signal the log production to stop.
c.logProductionCancel(errLogProductionStop)

debugPrintln("XXX: DockerContainer.stopLogProduction signalled", c.ID)

if err := context.Cause(c.logProductionCtx); err != nil {
debugPrintln("XXX: DockerContainer.stopLogProduction err:", err, c.ID)
switch {
case errors.Is(err, errLogProductionStop):
// Log production was stopped.
Expand All @@ -861,6 +889,7 @@ func (c *DockerContainer) stopLogProduction() error {
}
}

debugPrintln("XXX: DockerContainer.stopLogProduction done", c.ID)
return nil
}

Expand Down
Loading

0 comments on commit d5092d0

Please sign in to comment.