From 77e51657fe5bf7bf67251037c8a4b16391c84aab Mon Sep 17 00:00:00 2001 From: Albin Kerouanton Date: Tue, 24 Sep 2024 16:03:21 +0200 Subject: [PATCH] cmd/docker-proxy: re-add SO_REUSEADDR Since commit b3fabede, the Engine creates the listening sockets used by docker-proxy by making raw syscalls (ie. socket, setsockopt, bind). Before that commit, those sockets were created by docker-proxy through Go's `net.ListenX` functions. Unlike `net.ListenX` functions, the raw syscall code doesn't set the `SO_REUSEADDR` option. This option is typically used by TCP servers to make sure that they can be restarted even if there are client sockets referencing the server port as their sport (eg. in TIME_WAIT state, or any other state). Citing UNIX Network Programming, Section 7.5 (p210): > By default, when the listening server is restarted by calling socket, > bind, and listen, the call to bind fails because the listening server > is trying to bind a port that is part of an existing connection [...] > _All_ TCP servers should specify this socket option to allow the > server to be restarted in this situation. Signed-off-by: Albin Kerouanton --- .../networking/port_mapping_linux_test.go | 53 +++++++++++++++++++ .../drivers/bridge/port_mapping_linux.go | 4 ++ 2 files changed, 57 insertions(+) diff --git a/integration/networking/port_mapping_linux_test.go b/integration/networking/port_mapping_linux_test.go index cb0955497adbe..b3e59e02db644 100644 --- a/integration/networking/port_mapping_linux_test.go +++ b/integration/networking/port_mapping_linux_test.go @@ -400,3 +400,56 @@ func TestAccessPublishedPortFromRemoteHost(t *testing.T) { } } } + +// TestRestartUserlandProxyUnder2MSL checks that a container can be restarted +// while previous connections to the proxy are still in TIME_WAIT state. +func TestRestartUserlandProxyUnder2MSL(t *testing.T) { + skip.If(t, testEnv.IsRootless()) + + ctx := setupTest(t) + + d := daemon.New(t) + d.StartWithBusybox(ctx, t) + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + const netName = "nat-time-wait" + network.CreateNoError(ctx, t, c, netName, + network.WithDriver("bridge"), + network.WithOption(bridge.BridgeName, netName)) + defer network.RemoveNoError(ctx, t, c, netName) + + ctrName := sanitizeCtrName(t.Name() + "-server") + ctrOpts := []func(*container.TestContainerConfig){ + container.WithName(ctrName), + container.WithExposedPorts("80/tcp"), + container.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: "1780"}}}), + container.WithCmd("httpd", "-f"), + container.WithNetworkMode(netName), + } + + container.Run(ctx, t, c, ctrOpts...) + defer c.ContainerRemove(ctx, ctrName, containertypes.RemoveOptions{Force: true}) + + // Make an HTTP request to open a TCP connection to the proxy. We don't + // care about the HTTP response, just that the connection is established. + // So, check that we receive a 404 to make sure we've a working full-duplex + // TCP connection. + httpClient := &http.Client{Timeout: 3 * time.Second} + resp, err := httpClient.Get("http://127.0.0.1:1780") + assert.NilError(t, err) + assert.Check(t, is.Equal(resp.StatusCode, 404)) + + // Removing the container will kill the userland proxy, and the connection + // opened by the previous HTTP request will be properly closed (ie. on both + // sides). Thus, that connection will transition to the TIME_WAIT state. + assert.NilError(t, c.ContainerRemove(ctx, ctrName, containertypes.RemoveOptions{Force: true})) + + // Make sure the container can be restarted. [container.Run] checks that + // the ContainerStart API call doesn't return an error. We don't need to + // make another TCP connection either, that's out of scope. Hence, we don't + // need to check anything after this call. + container.Run(ctx, t, c, ctrOpts...) +} diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index 95b3db45da52d..0302cb7fff066 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -598,6 +598,10 @@ func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retE } }() + if err := syscall.SetsockoptInt(sd, syscall.SOL_SOCKET, syscall.SO_REUSEADDR, 1); err != nil { + return portBinding{}, fmt.Errorf("failed to setsockopt(SO_REUSEADDR) for %s: %w", cfg, err) + } + if domain == syscall.AF_INET6 { syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) }