diff --git a/integration/network/bridge/iptablesdoc/generated/new-daemon.md b/integration/network/bridge/iptablesdoc/generated/new-daemon.md index 74087516a22e4..0a68fda23553f 100644 --- a/integration/network/bridge/iptablesdoc/generated/new-daemon.md +++ b/integration/network/bridge/iptablesdoc/generated/new-daemon.md @@ -22,6 +22,7 @@ Table `filter`: Chain DOCKER (1 references) num pkts bytes target prot opt in out source destination + 1 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-ISOLATION-STAGE-1 (1 references) num pkts bytes target prot opt in out source destination @@ -54,6 +55,7 @@ Table `filter`: -A FORWARD -o docker0 -j DOCKER -A FORWARD -i docker0 ! -o docker0 -j ACCEPT -A FORWARD -i docker0 -o docker0 -j ACCEPT + -A DOCKER ! -i docker0 -o docker0 -j DROP -A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -j RETURN -A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP @@ -108,7 +110,12 @@ But, when ICC is disabled, rule 6 is DROP, so it would need to be placed before rule 5. Because the rules are generated in different places, that's a slightly bigger change than it should be._ -The DOCKER chain is empty, because there are no containers with port mappings yet. +The DOCKER chain has a single DROP rule for the bridge network, to drop any +packets routed to the network that have not originated in the network. Added by +[defaultDrop][21]. +_This means there is no dependency on the filter-FORWARD chain's default policy. +Even if it is ACCEPT, packets will be dropped unless container ports/protocols +are published._ The DOCKER-ISOLATION chains implement inter-network isolation, all (unrelated) packets are processed by these chains. The rule are inserted at the head of the @@ -119,6 +126,7 @@ chain when a network is created, in [setINC][20]. packets that are destined for any other network are dropped. [20]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L369 +[21]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L252 Table nat: diff --git a/integration/network/bridge/iptablesdoc/generated/usernet-internal.md b/integration/network/bridge/iptablesdoc/generated/usernet-internal.md index 0d1ce16af9594..16f98d128fbe0 100644 --- a/integration/network/bridge/iptablesdoc/generated/usernet-internal.md +++ b/integration/network/bridge/iptablesdoc/generated/usernet-internal.md @@ -28,6 +28,7 @@ The filter table is updated as follows: Chain DOCKER (1 references) num pkts bytes target prot opt in out source destination + 1 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-ISOLATION-STAGE-1 (1 references) num pkts bytes target prot opt in out source destination @@ -63,6 +64,7 @@ The filter table is updated as follows: -A FORWARD -o docker0 -j DOCKER -A FORWARD -i docker0 ! -o docker0 -j ACCEPT -A FORWARD -i docker0 -o docker0 -j ACCEPT + -A DOCKER ! -i docker0 -o docker0 -j DROP -A DOCKER-ISOLATION-STAGE-1 ! -s 192.0.2.0/24 -o bridge1 -j DROP -A DOCKER-ISOLATION-STAGE-1 ! -d 192.0.2.0/24 -i bridge1 -j DROP -A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2 diff --git a/integration/network/bridge/iptablesdoc/generated/usernet-portmap-noicc.md b/integration/network/bridge/iptablesdoc/generated/usernet-portmap-noicc.md index 34984075bc30d..992778f426d0a 100644 --- a/integration/network/bridge/iptablesdoc/generated/usernet-portmap-noicc.md +++ b/integration/network/bridge/iptablesdoc/generated/usernet-portmap-noicc.md @@ -32,6 +32,8 @@ The filter table is: Chain DOCKER (2 references) num pkts bytes target prot opt in out source destination 1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80 + 2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0 + 3 0 0 DROP 0 -- !bridge1 bridge1 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-ISOLATION-STAGE-1 (1 references) num pkts bytes target prot opt in out source destination @@ -71,6 +73,8 @@ The filter table is: -A FORWARD -i docker0 -o docker0 -j ACCEPT -A FORWARD -i bridge1 -o bridge1 -j DROP -A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT + -A DOCKER ! -i docker0 -o docker0 -j DROP + -A DOCKER ! -i bridge1 -o bridge1 -j DROP -A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -j RETURN diff --git a/integration/network/bridge/iptablesdoc/generated/usernet-portmap-noproxy.md b/integration/network/bridge/iptablesdoc/generated/usernet-portmap-noproxy.md index b4fa87ae47f28..1f8975a16b030 100644 --- a/integration/network/bridge/iptablesdoc/generated/usernet-portmap-noproxy.md +++ b/integration/network/bridge/iptablesdoc/generated/usernet-portmap-noproxy.md @@ -35,6 +35,8 @@ The filter table is the same as with the userland proxy enabled. Chain DOCKER (2 references) num pkts bytes target prot opt in out source destination 1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80 + 2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0 + 3 0 0 DROP 0 -- !bridge1 bridge1 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-ISOLATION-STAGE-1 (1 references) num pkts bytes target prot opt in out source destination @@ -71,6 +73,8 @@ The filter table is the same as with the userland proxy enabled. -A FORWARD -i docker0 ! -o docker0 -j ACCEPT -A FORWARD -i docker0 -o docker0 -j ACCEPT -A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT + -A DOCKER ! -i docker0 -o docker0 -j DROP + -A DOCKER ! -i bridge1 -o bridge1 -j DROP -A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -j RETURN diff --git a/integration/network/bridge/iptablesdoc/generated/usernet-portmap-routed.md b/integration/network/bridge/iptablesdoc/generated/usernet-portmap-routed.md index 76b97cfc18401..0ba094df10632 100644 --- a/integration/network/bridge/iptablesdoc/generated/usernet-portmap-routed.md +++ b/integration/network/bridge/iptablesdoc/generated/usernet-portmap-routed.md @@ -8,7 +8,7 @@ Running the daemon with the userland proxy disabled then, as before, adding a ne --subnet 192.0.2.0/24 --gateway 192.0.2.1 bridge1 docker run --network bridge1 -p 8080:80 --name c1 busybox -The filter table is the same as with the userland proxy enabled. +The filter table is largely the same as with the userland proxy enabled. _Note that this means inter-network communication is disabled as-normal so, although published ports will be directly accessible from a remote host @@ -40,6 +40,9 @@ on the same host._ Chain DOCKER (2 references) num pkts bytes target prot opt in out source destination 1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80 + 2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0 + 3 0 0 ACCEPT 1 -- * bridge1 0.0.0.0/0 0.0.0.0/0 + 4 0 0 DROP 0 -- !bridge1 bridge1 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-ISOLATION-STAGE-1 (1 references) num pkts bytes target prot opt in out source destination @@ -76,6 +79,9 @@ on the same host._ -A FORWARD -i docker0 ! -o docker0 -j ACCEPT -A FORWARD -i docker0 -o docker0 -j ACCEPT -A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT + -A DOCKER ! -i docker0 -o docker0 -j DROP + -A DOCKER -o bridge1 -p icmp -j ACCEPT + -A DOCKER ! -i bridge1 -o bridge1 -j DROP -A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -j RETURN @@ -87,6 +93,39 @@ on the same host._ +However, a rule is added by [setICMP][5] to the DOCKER chain (shown below) to +allow ICMP. The equivalent IPv6 rule uses `-p icmpv6` rather than `-p icmp`, +so *ALL* ICMP message types are allowed. + +_The ACCEPT rule as shown by `iptables -L` looks alarming until you spot that it's +for `prot 1`._ + +Because the ICMP rule (rule 3) is per-network, it is appended to the chain along +with the default-DROP rule (rule 4). So, it is likely to be separated from +per-port/protocol ACCEPT rules for published ports on the same network. But it +will always appear before the default-DROP. + +_[RFC 4890 section 4.3][6] makes recommendations for filtering ICMPv6. These +have been considered, but the host firewall is not a network boundary in the +sense used by the RFC. So, Node Information and Router Renumbering messages are +not discarded, and experimental/unused types are allowed because they may be +needed._ + + Chain DOCKER (2 references) + num pkts bytes target prot opt in out source destination + 1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80 + 2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0 + 3 0 0 ACCEPT 1 -- * bridge1 0.0.0.0/0 0.0.0.0/0 + 4 0 0 DROP 0 -- !bridge1 bridge1 0.0.0.0/0 0.0.0.0/0 + + + -N DOCKER + -A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT + -A DOCKER ! -i docker0 -o docker0 -j DROP + -A DOCKER -o bridge1 -p icmp -j ACCEPT + -A DOCKER ! -i bridge1 -o bridge1 -j DROP + + The nat table is: Chain PREROUTING (policy ACCEPT 0 packets, 0 bytes) @@ -141,3 +180,5 @@ _And, the userland proxy won't be started for mapped ports._ [2]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L294 [3]: https://github.com/moby/moby/blob/675c2ac2db93e38bb9c5a6615d4155a969535fd9/libnetwork/drivers/bridge/port_mapping_linux.go#L477-L479 [4]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L290 +[5]: https://github.com/robmry/moby/blob/d456d79cfc12cd7c801eebce0550b645c5343ca6/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L390-L395 +[6]: https://www.rfc-editor.org/rfc/rfc4890#section-4.3 diff --git a/integration/network/bridge/iptablesdoc/generated/usernet-portmap.md b/integration/network/bridge/iptablesdoc/generated/usernet-portmap.md index 6eea52b3b9c69..e2e2f440bc8b2 100644 --- a/integration/network/bridge/iptablesdoc/generated/usernet-portmap.md +++ b/integration/network/bridge/iptablesdoc/generated/usernet-portmap.md @@ -31,6 +31,8 @@ The filter table is updated as follows: Chain DOCKER (2 references) num pkts bytes target prot opt in out source destination 1 0 0 ACCEPT 6 -- !bridge1 bridge1 0.0.0.0/0 192.0.2.2 tcp dpt:80 + 2 0 0 DROP 0 -- !docker0 docker0 0.0.0.0/0 0.0.0.0/0 + 3 0 0 DROP 0 -- !bridge1 bridge1 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-ISOLATION-STAGE-1 (1 references) num pkts bytes target prot opt in out source destination @@ -70,6 +72,8 @@ The filter table is updated as follows: -A FORWARD -i docker0 ! -o docker0 -j ACCEPT -A FORWARD -i docker0 -o docker0 -j ACCEPT -A DOCKER -d 192.0.2.2/32 ! -i bridge1 -o bridge1 -p tcp -m tcp --dport 80 -j ACCEPT + -A DOCKER ! -i docker0 -o docker0 -j DROP + -A DOCKER ! -i bridge1 -o bridge1 -j DROP -A DOCKER-ISOLATION-STAGE-1 -i bridge1 ! -o bridge1 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -i docker0 ! -o docker0 -j DOCKER-ISOLATION-STAGE-2 -A DOCKER-ISOLATION-STAGE-1 -j RETURN @@ -93,8 +97,14 @@ Note that: to the container's address. This rule is added when the container is created (unlike all the other rules so-far, which were created during driver or network initialisation). [setPerPortForwarding][1] + - These per-port rules are inserted at the head of the chain, so that they + appear before the network's DROP rule [defaultDrop][2] which is always + appended to the end of the chain. In this case, because `docker0` was + created before `bridge1`, the `bridge1` rules appear above and below the + `docker0` DROP rule. [1]: https://github.com/moby/moby/blob/675c2ac2db93e38bb9c5a6615d4155a969535fd9/libnetwork/drivers/bridge/port_mapping_linux.go#L795 +[2]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L252 And the corresponding nat table: diff --git a/integration/network/bridge/iptablesdoc/iptablesdoc_linux_test.go b/integration/network/bridge/iptablesdoc/iptablesdoc_linux_test.go index 60efde5e9bf0c..780ec31aef5fc 100644 --- a/integration/network/bridge/iptablesdoc/iptablesdoc_linux_test.go +++ b/integration/network/bridge/iptablesdoc/iptablesdoc_linux_test.go @@ -142,6 +142,7 @@ type iptCmdType = string const ( iptCmdLFilter4 iptCmdType = "LFilter4" iptCmdSFilter4 iptCmdType = "SFilter4" + iptCmdLFilterDocker4 iptCmdType = "LFilterDocker4" iptCmdSFilterForward4 iptCmdType = "SFilterForward4" iptCmdSFilterDocker4 iptCmdType = "SFilterDocker4" iptCmdLNat4 iptCmdType = "LNat4" @@ -152,6 +153,7 @@ var iptCmds = map[iptCmdType][]string{ iptCmdLFilter4: {"iptables", "-nvL", "--line-numbers", "-t", "filter"}, iptCmdSFilter4: {"iptables", "-S", "-t", "filter"}, iptCmdSFilterForward4: {"iptables", "-S", "FORWARD"}, + iptCmdLFilterDocker4: {"iptables", "-nvL", "DOCKER", "--line-numbers", "-t", "filter"}, iptCmdSFilterDocker4: {"iptables", "-S", "DOCKER"}, iptCmdLNat4: {"iptables", "-nvL", "--line-numbers", "-t", "nat"}, iptCmdSNat4: {"iptables", "-S", "-t", "nat"}, diff --git a/integration/network/bridge/iptablesdoc/templates/new-daemon.md b/integration/network/bridge/iptablesdoc/templates/new-daemon.md index 2fc16648ecfe4..330b36c7d0020 100644 --- a/integration/network/bridge/iptablesdoc/templates/new-daemon.md +++ b/integration/network/bridge/iptablesdoc/templates/new-daemon.md @@ -59,7 +59,12 @@ But, when ICC is disabled, rule 6 is DROP, so it would need to be placed before rule 5. Because the rules are generated in different places, that's a slightly bigger change than it should be._ -The DOCKER chain is empty, because there are no containers with port mappings yet. +The DOCKER chain has a single DROP rule for the bridge network, to drop any +packets routed to the network that have not originated in the network. Added by +[defaultDrop][21]. +_This means there is no dependency on the filter-FORWARD chain's default policy. +Even if it is ACCEPT, packets will be dropped unless container ports/protocols +are published._ The DOCKER-ISOLATION chains implement inter-network isolation, all (unrelated) packets are processed by these chains. The rule are inserted at the head of the @@ -70,6 +75,7 @@ chain when a network is created, in [setINC][20]. packets that are destined for any other network are dropped. [20]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L369 +[21]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L252 Table nat: diff --git a/integration/network/bridge/iptablesdoc/templates/usernet-portmap-routed.md b/integration/network/bridge/iptablesdoc/templates/usernet-portmap-routed.md index f0098813966f7..3054b92e5ceb1 100644 --- a/integration/network/bridge/iptablesdoc/templates/usernet-portmap-routed.md +++ b/integration/network/bridge/iptablesdoc/templates/usernet-portmap-routed.md @@ -8,7 +8,7 @@ Running the daemon with the userland proxy disabled then, as before, adding a ne --subnet 192.0.2.0/24 --gateway 192.0.2.1 bridge1 docker run --network bridge1 -p 8080:80 --name c1 busybox -The filter table is the same as with the userland proxy enabled. +The filter table is largely the same as with the userland proxy enabled. _Note that this means inter-network communication is disabled as-normal so, although published ports will be directly accessible from a remote host @@ -24,6 +24,28 @@ on the same host._ +However, a rule is added by [setICMP][5] to the DOCKER chain (shown below) to +allow ICMP. The equivalent IPv6 rule uses `-p icmpv6` rather than `-p icmp`, +so *ALL* ICMP message types are allowed. + +_The ACCEPT rule as shown by `iptables -L` looks alarming until you spot that it's +for `prot 1`._ + +Because the ICMP rule (rule 3) is per-network, it is appended to the chain along +with the default-DROP rule (rule 4). So, it is likely to be separated from +per-port/protocol ACCEPT rules for published ports on the same network. But it +will always appear before the default-DROP. + +_[RFC 4890 section 4.3][6] makes recommendations for filtering ICMPv6. These +have been considered, but the host firewall is not a network boundary in the +sense used by the RFC. So, Node Information and Router Renumbering messages are +not discarded, and experimental/unused types are allowed because they may be +needed._ + + {{index . "LFilterDocker4"}} + + {{index . "SFilterDocker4"}} + The nat table is: {{index . "LNat4"}} @@ -51,3 +73,5 @@ _And, the userland proxy won't be started for mapped ports._ [2]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L294 [3]: https://github.com/moby/moby/blob/675c2ac2db93e38bb9c5a6615d4155a969535fd9/libnetwork/drivers/bridge/port_mapping_linux.go#L477-L479 [4]: https://github.com/moby/moby/blob/333cfa640239153477bf635a8131734d0e9d099d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L290 +[5]: https://github.com/robmry/moby/blob/d456d79cfc12cd7c801eebce0550b645c5343ca6/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L390-L395 +[6]: https://www.rfc-editor.org/rfc/rfc4890#section-4.3 diff --git a/integration/network/bridge/iptablesdoc/templates/usernet-portmap.md b/integration/network/bridge/iptablesdoc/templates/usernet-portmap.md index 7e9bae42e7169..d537068ee1c84 100644 --- a/integration/network/bridge/iptablesdoc/templates/usernet-portmap.md +++ b/integration/network/bridge/iptablesdoc/templates/usernet-portmap.md @@ -30,8 +30,14 @@ Note that: to the container's address. This rule is added when the container is created (unlike all the other rules so-far, which were created during driver or network initialisation). [setPerPortForwarding][1] + - These per-port rules are inserted at the head of the chain, so that they + appear before the network's DROP rule [defaultDrop][2] which is always + appended to the end of the chain. In this case, because `docker0` was + created before `bridge1`, the `bridge1` rules appear above and below the + `docker0` DROP rule. [1]: https://github.com/moby/moby/blob/675c2ac2db93e38bb9c5a6615d4155a969535fd9/libnetwork/drivers/bridge/port_mapping_linux.go#L795 +[2]: https://github.com/robmry/moby/blob/52c89d467fc5326149e4bbb8903d23589b66ff0d/libnetwork/drivers/bridge/setup_ip_tables_linux.go#L252 And the corresponding nat table: diff --git a/integration/networking/port_mapping_linux_test.go b/integration/networking/port_mapping_linux_test.go index 8ec8f7f856f0a..f623afa38523b 100644 --- a/integration/networking/port_mapping_linux_test.go +++ b/integration/networking/port_mapping_linux_test.go @@ -599,3 +599,151 @@ func TestRestartUserlandProxyUnder2MSL(t *testing.T) { // need to check anything after this call. container.Run(ctx, t, c, ctrOpts...) } + +// Test direct routing from remote hosts (setting up a route to a container +// network on a remote host, and addressing containers directly), for +// combinations of: +// - Filter FORWARD default policy: ACCEPT/DROP - shouldn't affect behaviour +// - Gateway mode: nat/routed +// For each combination, test: +// - ping +// - http access to an open (mapped) container port +// - http access to an unmapped container port +func TestDirectRoutingOpenPorts(t *testing.T) { + skip.If(t, testEnv.IsRootless()) + ctx := setupTest(t) + + d := daemon.New(t) + d.StartWithBusybox(ctx, t) + t.Cleanup(func() { d.Stop(t) }) + + c := d.NewClientT(t) + t.Cleanup(func() { c.Close() }) + + // Simulate the remote host. + + l3 := networking.NewL3Segment(t, "test-routed-open-ports", + netip.MustParsePrefix("192.168.124.1/24"), + netip.MustParsePrefix("fdc0:36dc:a4dd::1/64")) + t.Cleanup(func() { l3.Destroy(t) }) + + // "docker" is the host where dockerd is running. + l3.AddHost(t, "docker", networking.CurrentNetns, "eth-test", + netip.MustParsePrefix("192.168.124.2/24"), + netip.MustParsePrefix("fdc0:36dc:a4dd::2/64")) + // "remote" simulates the remote host. + l3.AddHost(t, "remote", "test-remote-host", "eth0", + netip.MustParsePrefix("192.168.124.3/24"), + netip.MustParsePrefix("fdc0:36dc:a4dd::3/64")) + // Add default routes to the "docker" Host from the "remote" Host. + l3.Hosts["remote"].Run(t, "ip", "route", "add", "default", "via", "192.168.124.2") + l3.Hosts["remote"].Run(t, "ip", "-6", "route", "add", "default", "via", "fdc0:36dc:a4dd::2") + + type ctrDesc struct { + id string + ipv4 string + ipv6 string + } + + // Create a network and run a container on it. + // Run http servers on ports 80 and 81, but only map/open port 80. + createNet := func(gwMode string) ctrDesc { + netName := "test-" + gwMode + network.CreateNoError(ctx, t, c, netName, + network.WithDriver("bridge"), + network.WithIPv6(), + network.WithOption(bridge.BridgeName, "br-"+gwMode), + network.WithOption(bridge.IPv4GatewayMode, gwMode), + network.WithOption(bridge.IPv6GatewayMode, gwMode), + ) + t.Cleanup(func() { + network.RemoveNoError(ctx, t, c, netName) + }) + + ctrId := container.Run(ctx, t, c, + container.WithNetworkMode(netName), + container.WithName("ctr-"+gwMode), + container.WithExposedPorts("80/tcp"), + container.WithPortMap(nat.PortMap{"80/tcp": {}}), + ) + t.Cleanup(func() { + c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true}) + }) + + container.ExecT(ctx, t, c, ctrId, []string{"httpd", "-p", "80"}) + container.ExecT(ctx, t, c, ctrId, []string{"httpd", "-p", "81"}) + + insp := container.Inspect(ctx, t, c, ctrId) + return ctrDesc{ + id: ctrId, + ipv4: insp.NetworkSettings.Networks[netName].IPAddress, + ipv6: insp.NetworkSettings.Networks[netName].GlobalIPv6Address, + } + } + + const ( + httpSuccess = "404 Not Found" + httpFail = "Connection timed out" + pingSuccess = 0 + pingFail = 1 + ) + + networks := map[string]ctrDesc{ + "nat": createNet("nat"), + "routed": createNet("routed"), + } + expPingExit := map[string]int{ + "nat": pingFail, + "routed": pingSuccess, + } + + testPing := func(t *testing.T, cmd, addr string, expExit int) { + t.Helper() + t.Parallel() + l3.Hosts["remote"].Do(t, func() { + t.Helper() + pingRes := icmd.RunCommand(cmd, "--numeric", "--count=1", "--timeout=3", addr) + assert.Check(t, pingRes.ExitCode == expExit, "%s %s -> out:%s err:%s", + cmd, addr, pingRes.Stdout(), pingRes.Stderr()) + }) + } + testHttp := func(t *testing.T, addr, port, expOut string) { + t.Helper() + t.Parallel() + l3.Hosts["remote"].Do(t, func() { + t.Helper() + u := "http://" + net.JoinHostPort(addr, port) + res := icmd.RunCommand("curl", "--max-time", "3", "--show-error", "--silent", u) + assert.Check(t, is.Contains(res.Combined(), expOut), "url:%s", u) + }) + } + + // Run the ping and http tests in two parallel groups, rather than waiting for + // ping/http timeouts separately. (The iptables filter-FORWARD policy affects the + // whole host, so ACCEPT/DROP tests can't be parallelized). + for _, fwdPolicy := range []string{"ACCEPT", "DROP"} { + networking.SetFilterForwardPolicies(t, fwdPolicy) + t.Run(fwdPolicy, func(t *testing.T) { + for _, gwMode := range []string{"nat", "routed"} { + t.Run(gwMode+"/v4/ping", func(t *testing.T) { + testPing(t, "ping", networks[gwMode].ipv4, expPingExit[gwMode]) + }) + t.Run(gwMode+"/v6/ping", func(t *testing.T) { + testPing(t, "ping6", networks[gwMode].ipv6, expPingExit[gwMode]) + }) + t.Run(gwMode+"/v4/http/80", func(t *testing.T) { + testHttp(t, networks[gwMode].ipv4, "80", httpSuccess) + }) + t.Run(gwMode+"/v4/http/81", func(t *testing.T) { + testHttp(t, networks[gwMode].ipv4, "81", httpFail) + }) + t.Run(gwMode+"/v6/http/80", func(t *testing.T) { + testHttp(t, networks[gwMode].ipv6, "80", httpSuccess) + }) + t.Run(gwMode+"/v6/http/81", func(t *testing.T) { + testHttp(t, networks[gwMode].ipv6, "81", httpFail) + }) + } + }) + } +} diff --git a/internal/testutils/networking/iptables.go b/internal/testutils/networking/iptables.go new file mode 100644 index 0000000000000..a28c98daf8c65 --- /dev/null +++ b/internal/testutils/networking/iptables.go @@ -0,0 +1,45 @@ +package networking + +import ( + "os/exec" + "regexp" + "testing" +) + +// Find the policy in, for example "Chain FORWARD (policy ACCEPT)". +var rePolicy = regexp.MustCompile("policy ([A-Z]+)") + +// SetFilterForwardPolicies sets the default policy for the FORWARD chain in +// the filter tables for both IPv4 and IPv6. The original policy is restored +// using t.Cleanup(). +// +// There's only one filter-FORWARD policy, so this won't behave well if used by +// tests running in parallel in a single network namespace that expect different +// behaviour. +func SetFilterForwardPolicies(t *testing.T, policy string) { + t.Helper() + + for _, iptablesCmd := range []string{"iptables", "ip6tables"} { + cmd := exec.Command(iptablesCmd, "-L", "FORWARD") + out, err := cmd.Output() + if err != nil { + t.Fatalf("Failed to get %s FORWARD policy: %v", iptablesCmd, err) + } + opMatch := rePolicy.FindSubmatch(out) + if len(opMatch) != 2 { + t.Fatalf("Failed to find %s FORWARD policy in: %s", iptablesCmd, out) + } + origPolicy := string(opMatch[1]) + if origPolicy == policy { + continue + } + if err := exec.Command(iptablesCmd, "-P", "FORWARD", policy).Run(); err != nil { + t.Fatalf("Failed to set %s FORWARD policy: %v", iptablesCmd, err) + } + t.Cleanup(func() { + if err := exec.Command(iptablesCmd, "-P", "FORWARD", origPolicy).Run(); err != nil { + t.Logf("Failed to restore %s FORWARD policy: %v", iptablesCmd, err) + } + }) + } +} diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index 510b7d08fe160..132a204623195 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -828,6 +828,10 @@ func setPerPortNAT(b portBinding, ipv iptables.IPVersion, proxyPath string, brid } func setPerPortForwarding(b portBinding, ipv iptables.IPVersion, bridgeName string, enable bool) error { + // Insert rules for open ports at the top of the filter table's DOCKER + // chain (a per-network DROP rule, which must come after these per-port + // per-container ACCEPT rules, is appended to the chain when the network + // is created). args := []string{ "!", "-i", bridgeName, "-o", bridgeName, @@ -837,7 +841,7 @@ func setPerPortForwarding(b portBinding, ipv iptables.IPVersion, bridgeName stri "-j", "ACCEPT", } rule := iptRule{ipv: ipv, table: iptables.Filter, chain: DockerChain, args: args} - if err := appendOrDelChainRule(rule, "OPEN PORT", enable); err != nil { + if err := programChainRule(rule, "OPEN PORT", enable); err != nil { return err } diff --git a/libnetwork/drivers/bridge/setup_ip_tables_linux.go b/libnetwork/drivers/bridge/setup_ip_tables_linux.go index af09e1011611b..a61cbdd6d1cd7 100644 --- a/libnetwork/drivers/bridge/setup_ip_tables_linux.go +++ b/libnetwork/drivers/bridge/setup_ip_tables_linux.go @@ -177,6 +177,7 @@ func (n *bridgeNetwork) setupIPTables(ipVersion iptables.IPVersion, maskedAddr * n.registerIptCleanFunc(func() error { return setupIPTablesInternal(ipVersion, config, maskedAddr, hairpinMode, false) }) + natChain, filterChain, _, _, err := n.getDriverChains(ipVersion) if err != nil { return fmt.Errorf("Failed to setup IP tables, cannot acquire chain info %s", err.Error()) @@ -191,10 +192,16 @@ func (n *bridgeNetwork) setupIPTables(ipVersion iptables.IPVersion, maskedAddr * if err != nil { return fmt.Errorf("Failed to program FILTER chain: %s", err.Error()) } - n.registerIptCleanFunc(func() error { return iptable.ProgramChain(filterChain, config.BridgeName, hairpinMode, false) }) + + if err := defaultDrop(ipVersion, config.BridgeName, true); err != nil { + return fmt.Errorf("failed to add default-drop rule: %s", err.Error()) + } + n.registerIptCleanFunc(func() error { + return defaultDrop(ipVersion, config.BridgeName, false) + }) } d.Lock() @@ -203,6 +210,30 @@ func (n *bridgeNetwork) setupIPTables(ipVersion iptables.IPVersion, maskedAddr * return err } +func setICMP(ipv iptables.IPVersion, bridgeName string, enable bool) error { + icmpProto := "icmp" + if ipv == iptables.IPv6 { + icmpProto = "icmpv6" + } + icmpRule := iptRule{ipv: ipv, table: iptables.Filter, chain: DockerChain, args: []string{ + "-o", bridgeName, + "-p", icmpProto, + "-j", "ACCEPT", + }} + return appendOrDelChainRule(icmpRule, "ICMP", enable) +} + +// Append to the filter table's DOCKER chain (the default DROP rule must follow +// per-port ACCEPT rules, which will be inserted at the top of the chain). +func defaultDrop(ipv iptables.IPVersion, bridgeName string, enable bool) error { + dropRule := iptRule{ipv: ipv, table: iptables.Filter, chain: DockerChain, args: []string{ + "!", "-i", bridgeName, + "-o", bridgeName, + "-j", "DROP", + }} + return appendOrDelChainRule(dropRule, "DEFAULT DROP", enable) +} + type iptRule struct { ipv iptables.IPVersion table iptables.Table @@ -308,6 +339,13 @@ func setupIPTablesInternal(ipVer iptables.IPVersion, config *networkConfiguratio return err } + // Allow ICMP in routed mode. + if !nat { + if err := setICMP(ipVer, config.BridgeName, enable); err != nil { + return err + } + } + // Set Accept on all non-intercontainer outgoing packets. return programChainRule(outRule, "ACCEPT NON_ICC OUTGOING", enable) }