From 0548fe251c8d5df99347cba52dd5cffbcd9d913d Mon Sep 17 00:00:00 2001 From: Rob Murray Date: Wed, 16 Oct 2024 12:05:17 +0100 Subject: [PATCH] Enable bridge netfiltering if userland-proxy=false In release 27.0, ip6tables was enabled by default. That caused a problem on some hosts where iptables was explicitly disabled and loading the br_netfilter module (which loads with its nf-call-iptables settings enabled) caused user-defined iptables rules to block traffic on bridges, breaking inter-container communication. In 27.3.0, commit 5c499fc4b249708619b4225de00c50c861bcfc08 delayed loading of the br_netfilter module until it was needed. The load now happens in the function that sets bridge-nf-call-ip[6]tables when needed. It was only called for icc=false networks. However, br_netfilter is also needed when userland-proxy=false. Without it, packets addressed to a host-mapped port for a container on the same network are not DNAT'd properly (responses have the server container's address instead of the host's). That means, in all releases including 26.x, if br_netfilter was loaded before the daemon started - and the OS/user/other-application had disabled bridge-nf-call-ip[6]tables, it would not be enabled by the daemon. So, ICC would fail for host-mapped ports with the userland-proxy disabled. The change in 27.3.0 made this worse - previously, loading br_netfilter whenever iptables/ip6tables was enabled meant that bridge-netfiltering got enabled, even though the daemon didn't check it was enabled. So... check that br_netfilter is loaded, with bridge-nf-call-ip[6]tables enabled, if userland-proxy=false. Signed-off-by: Rob Murray --- .../networking/port_mapping_linux_test.go | 89 +++++++++++++++++++ libnetwork/drivers/bridge/bridge_linux.go | 10 ++- 2 files changed, 96 insertions(+), 3 deletions(-) diff --git a/integration/networking/port_mapping_linux_test.go b/integration/networking/port_mapping_linux_test.go index b882a3f49bab6..e35088feeb851 100644 --- a/integration/networking/port_mapping_linux_test.go +++ b/integration/networking/port_mapping_linux_test.go @@ -6,6 +6,7 @@ import ( "net" "net/http" "net/netip" + "os" "os/exec" "strconv" "strings" @@ -408,6 +409,94 @@ func TestAccessPublishedPortFromRemoteHost(t *testing.T) { } } +func TestAccessPublishedPortFromCtr(t *testing.T) { + // This test makes changes to the host's "/proc/sys/net/bridge/bridge-nf-call-iptables", + // which would have no effect on rootlesskit's netns. + skip.If(t, testEnv.IsRootless, "rootlesskit has its own netns") + + testcases := []struct { + name string + daemonOpts []string + disableBrNfCall bool + }{ + { + name: "with-proxy", + }, + { + name: "no-proxy", + daemonOpts: []string{"--userland-proxy=false"}, + }, + { + // Before starting the daemon, disable bridge-nf-call-iptables. It should + // be enabled by the daemon because, without docker-proxy, it's needed to + // DNAT packets crossing the bridge between containers. + // Regression test for https://github.com/moby/moby/issues/48664 + name: "no-proxy no-brNfCall", + daemonOpts: []string{"--userland-proxy=false"}, + disableBrNfCall: true, + }, + } + + // Find an address on the test host. + hostAddr := func() string { + conn, err := net.Dial("tcp4", "hub.docker.com:80") + assert.NilError(t, err) + defer conn.Close() + return conn.LocalAddr().(*net.TCPAddr).IP.String() + }() + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + ctx := setupTest(t) + + if tc.disableBrNfCall { + // Only run this test if br_netfilter is loaded, and enabled for IPv4. + const procFile = "/proc/sys/net/bridge/bridge-nf-call-iptables" + val, err := os.ReadFile(procFile) + if err != nil { + t.Skipf("Cannot read %s, br_netfilter not loaded? (%s)", procFile, err) + } + if val[0] != '1' { + t.Skipf("bridge-nf-call-iptables=%v", val[0]) + } + err = os.WriteFile(procFile, []byte{'0', '\n'}, 0o644) + assert.NilError(t, err) + defer os.WriteFile(procFile, []byte{'1', '\n'}, 0o644) + } + + d := daemon.New(t) + d.StartWithBusybox(ctx, t, tc.daemonOpts...) + defer d.Stop(t) + c := d.NewClientT(t) + defer c.Close() + + const netName = "tappfcnet" + network.CreateNoError(ctx, t, c, netName) + defer network.RemoveNoError(ctx, t, c, netName) + + serverId := container.Run(ctx, t, c, + container.WithNetworkMode(netName), + container.WithExposedPorts("80"), + container.WithPortMap(nat.PortMap{"80": {{HostIP: "0.0.0.0"}}}), + container.WithCmd("httpd", "-f"), + ) + defer c.ContainerRemove(ctx, serverId, containertypes.RemoveOptions{Force: true}) + + inspect := container.Inspect(ctx, t, c, serverId) + hostPort := inspect.NetworkSettings.Ports["80/tcp"][0].HostPort + + clientCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + res := container.RunAttach(clientCtx, t, c, + container.WithNetworkMode(netName), + container.WithCmd("wget", "http://"+net.JoinHostPort(hostAddr, hostPort)), + ) + defer c.ContainerRemove(ctx, res.ContainerID, containertypes.RemoveOptions{Force: true}) + assert.Check(t, is.Contains(res.Stderr.String(), "404 Not Found")) + }) + } +} + // TestRestartUserlandProxyUnder2MSL checks that a container can be restarted // while previous connections to the proxy are still in TIME_WAIT state. func TestRestartUserlandProxyUnder2MSL(t *testing.T) { diff --git a/libnetwork/drivers/bridge/bridge_linux.go b/libnetwork/drivers/bridge/bridge_linux.go index 39bd117877339..218e1a1ef3f3d 100644 --- a/libnetwork/drivers/bridge/bridge_linux.go +++ b/libnetwork/drivers/bridge/bridge_linux.go @@ -876,6 +876,10 @@ func (d *driver) createNetwork(config *networkConfiguration) (err error) { enableIPv6Forwarding := config.EnableIPv6 && d.config.EnableIPForwarding + // Module br_netfilter needs to be loaded with net.bridge.bridge-nf-call-ip[6]tables + // enabled to implement icc=false, or DNAT when the userland-proxy is disabled. + enableBrNfCallIptables := !config.EnableICC || !d.config.EnableUserlandProxy + // Conditionally queue setup steps depending on configuration values. for _, step := range []struct { Condition bool @@ -918,9 +922,9 @@ func (d *driver) createNetwork(config *networkConfiguration) (err error) { // Add inter-network communication rules. {d.config.EnableIPTables || d.config.EnableIP6Tables, setupNetworkIsolationRules}, - // Configure bridge networking filtering if ICC is off and IP tables are enabled - {!config.EnableICC && d.config.EnableIPTables, setupIPv4BridgeNetFiltering}, - {!config.EnableICC && d.config.EnableIP6Tables, setupIPv6BridgeNetFiltering}, + // Configure bridge networking filtering if needed and IP tables are enabled + {enableBrNfCallIptables && d.config.EnableIPTables, setupIPv4BridgeNetFiltering}, + {enableBrNfCallIptables && d.config.EnableIP6Tables, setupIPv6BridgeNetFiltering}, } { if step.Condition { bridgeSetup.queueStep(step.Fn)