From 91df504bf152fc549b8783f155a471e00b909e2f Mon Sep 17 00:00:00 2001 From: Albin Kerouanton Date: Thu, 19 Sep 2024 15:48:45 +0200 Subject: [PATCH 1/3] inte/networking: move port mapping tests into a dedicated file Signed-off-by: Albin Kerouanton --- integration/networking/bridge_linux_test.go | 163 ---------------- .../networking/port_mapping_linux_test.go | 180 ++++++++++++++++++ 2 files changed, 180 insertions(+), 163 deletions(-) create mode 100644 integration/networking/port_mapping_linux_test.go diff --git a/integration/networking/bridge_linux_test.go b/integration/networking/bridge_linux_test.go index 5f047bbbe8cb1..076d09ea4e673 100644 --- a/integration/networking/bridge_linux_test.go +++ b/integration/networking/bridge_linux_test.go @@ -3,8 +3,6 @@ package networking import ( "context" "fmt" - "net" - "net/http" "os/exec" "regexp" "strings" @@ -16,11 +14,9 @@ import ( "github.com/docker/docker/client" "github.com/docker/docker/integration/internal/container" "github.com/docker/docker/integration/internal/network" - "github.com/docker/docker/libnetwork/drivers/bridge" "github.com/docker/docker/libnetwork/netlabel" "github.com/docker/docker/testutil" "github.com/docker/docker/testutil/daemon" - "github.com/docker/go-connections/nat" "github.com/google/go-cmp/cmp/cmpopts" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" @@ -838,162 +834,3 @@ func TestSetEndpointSysctl(t *testing.T) { } } } - -func TestDisableNAT(t *testing.T) { - ctx := setupTest(t) - d := daemon.New(t) - d.StartWithBusybox(ctx, t) - defer d.Stop(t) - - c := d.NewClientT(t) - defer c.Close() - - testcases := []struct { - name string - gwMode4 string - gwMode6 string - expPortMap nat.PortMap - }{ - { - name: "defaults", - expPortMap: nat.PortMap{ - "80/tcp": []nat.PortBinding{ - {HostIP: "0.0.0.0", HostPort: "8080"}, - {HostIP: "::", HostPort: "8080"}, - }, - }, - }, - { - name: "nat4 routed6", - gwMode4: "nat", - gwMode6: "routed", - expPortMap: nat.PortMap{ - "80/tcp": []nat.PortBinding{ - {HostIP: "0.0.0.0", HostPort: "8080"}, - {HostIP: "::", HostPort: ""}, - }, - }, - }, - { - name: "nat6 routed4", - gwMode4: "routed", - gwMode6: "nat", - expPortMap: nat.PortMap{ - "80/tcp": []nat.PortBinding{ - {HostIP: "0.0.0.0", HostPort: ""}, - {HostIP: "::", HostPort: "8080"}, - }, - }, - }, - } - - for _, tc := range testcases { - t.Run(tc.name, func(t *testing.T) { - ctx := testutil.StartSpan(ctx, t) - - const netName = "testnet" - nwOpts := []func(options *networktypes.CreateOptions){ - network.WithIPv6(), - network.WithIPAM("fd2a:a2c3:4448::/64", "fd2a:a2c3:4448::1"), - } - if tc.gwMode4 != "" { - nwOpts = append(nwOpts, network.WithOption(bridge.IPv4GatewayMode, tc.gwMode4)) - } - if tc.gwMode6 != "" { - nwOpts = append(nwOpts, network.WithOption(bridge.IPv6GatewayMode, tc.gwMode6)) - } - network.CreateNoError(ctx, t, c, netName, nwOpts...) - defer network.RemoveNoError(ctx, t, c, netName) - - id := container.Run(ctx, t, c, - container.WithNetworkMode(netName), - container.WithExposedPorts("80/tcp"), - container.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: "8080"}}}), - ) - defer c.ContainerRemove(ctx, id, containertypes.RemoveOptions{Force: true}) - - inspect := container.Inspect(ctx, t, c, id) - assert.Check(t, is.DeepEqual(inspect.NetworkSettings.Ports, tc.expPortMap)) - }) - } -} - -// Check that a container on one network can reach a service in a container on -// another network, via a mapped port on the host. -func TestPortMappedHairpin(t *testing.T) { - skip.If(t, testEnv.IsRootless) - - ctx := setupTest(t) - d := daemon.New(t) - d.StartWithBusybox(ctx, t) - defer d.Stop(t) - c := d.NewClientT(t) - defer c.Close() - - // Find an address on the test host. - conn, err := net.Dial("tcp4", "hub.docker.com:80") - assert.NilError(t, err) - hostAddr := conn.LocalAddr().(*net.TCPAddr).IP.String() - conn.Close() - - const serverNetName = "servernet" - network.CreateNoError(ctx, t, c, serverNetName) - defer network.RemoveNoError(ctx, t, c, serverNetName) - const clientNetName = "clientnet" - network.CreateNoError(ctx, t, c, clientNetName) - defer network.RemoveNoError(ctx, t, c, clientNetName) - - serverId := container.Run(ctx, t, c, - container.WithNetworkMode(serverNetName), - container.WithExposedPorts("80"), - container.WithPortMap(nat.PortMap{"80": {{HostIP: "0.0.0.0"}}}), - container.WithCmd("httpd", "-f"), - ) - defer c.ContainerRemove(ctx, serverId, containertypes.RemoveOptions{Force: true}) - - inspect := container.Inspect(ctx, t, c, serverId) - hostPort := inspect.NetworkSettings.Ports["80/tcp"][0].HostPort - - clientCtx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - res := container.RunAttach(clientCtx, t, c, - container.WithNetworkMode(clientNetName), - container.WithCmd("wget", "http://"+hostAddr+":"+hostPort), - ) - defer c.ContainerRemove(ctx, res.ContainerID, containertypes.RemoveOptions{Force: true}) - assert.Check(t, is.Contains(res.Stderr.String(), "404 Not Found")) -} - -// Check that a container on an IPv4-only network can have a port mapping -// from a specific IPv6 host address (using docker-proxy). -// Regression test for https://github.com/moby/moby/issues/48067 (which -// is about incorrectly reporting this as invalid config). -func TestProxy4To6(t *testing.T) { - skip.If(t, testEnv.IsRootless) - - ctx := setupTest(t) - d := daemon.New(t) - d.StartWithBusybox(ctx, t) - defer d.Stop(t) - - c := d.NewClientT(t) - defer c.Close() - - const netName = "ipv4net" - network.CreateNoError(ctx, t, c, netName) - - serverId := container.Run(ctx, t, c, - container.WithNetworkMode(netName), - container.WithExposedPorts("80"), - container.WithPortMap(nat.PortMap{"80": {{HostIP: "::1"}}}), - container.WithCmd("httpd", "-f"), - ) - defer c.ContainerRemove(ctx, serverId, containertypes.RemoveOptions{Force: true}) - - inspect := container.Inspect(ctx, t, c, serverId) - hostPort := inspect.NetworkSettings.Ports["80/tcp"][0].HostPort - - resp, err := http.Get("http://[::1]:" + hostPort) - assert.NilError(t, err) - assert.Check(t, is.Equal(resp.StatusCode, 404)) -} diff --git a/integration/networking/port_mapping_linux_test.go b/integration/networking/port_mapping_linux_test.go new file mode 100644 index 0000000000000..7af1d7dcc13c7 --- /dev/null +++ b/integration/networking/port_mapping_linux_test.go @@ -0,0 +1,180 @@ +package networking + +import ( + "context" + "net" + "net/http" + "testing" + "time" + + containertypes "github.com/docker/docker/api/types/container" + networktypes "github.com/docker/docker/api/types/network" + "github.com/docker/docker/integration/internal/container" + "github.com/docker/docker/integration/internal/network" + "github.com/docker/docker/libnetwork/drivers/bridge" + "github.com/docker/docker/testutil" + "github.com/docker/docker/testutil/daemon" + "github.com/docker/go-connections/nat" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" + "gotest.tools/v3/skip" +) + +func TestDisableNAT(t *testing.T) { + ctx := setupTest(t) + d := daemon.New(t) + d.StartWithBusybox(ctx, t) + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + testcases := []struct { + name string + gwMode4 string + gwMode6 string + expPortMap nat.PortMap + }{ + { + name: "defaults", + expPortMap: nat.PortMap{ + "80/tcp": []nat.PortBinding{ + {HostIP: "0.0.0.0", HostPort: "8080"}, + {HostIP: "::", HostPort: "8080"}, + }, + }, + }, + { + name: "nat4 routed6", + gwMode4: "nat", + gwMode6: "routed", + expPortMap: nat.PortMap{ + "80/tcp": []nat.PortBinding{ + {HostIP: "0.0.0.0", HostPort: "8080"}, + {HostIP: "::", HostPort: ""}, + }, + }, + }, + { + name: "nat6 routed4", + gwMode4: "routed", + gwMode6: "nat", + expPortMap: nat.PortMap{ + "80/tcp": []nat.PortBinding{ + {HostIP: "0.0.0.0", HostPort: ""}, + {HostIP: "::", HostPort: "8080"}, + }, + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + ctx := testutil.StartSpan(ctx, t) + + const netName = "testnet" + nwOpts := []func(options *networktypes.CreateOptions){ + network.WithIPv6(), + network.WithIPAM("fd2a:a2c3:4448::/64", "fd2a:a2c3:4448::1"), + } + if tc.gwMode4 != "" { + nwOpts = append(nwOpts, network.WithOption(bridge.IPv4GatewayMode, tc.gwMode4)) + } + if tc.gwMode6 != "" { + nwOpts = append(nwOpts, network.WithOption(bridge.IPv6GatewayMode, tc.gwMode6)) + } + network.CreateNoError(ctx, t, c, netName, nwOpts...) + defer network.RemoveNoError(ctx, t, c, netName) + + id := container.Run(ctx, t, c, + container.WithNetworkMode(netName), + container.WithExposedPorts("80/tcp"), + container.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: "8080"}}}), + ) + defer c.ContainerRemove(ctx, id, containertypes.RemoveOptions{Force: true}) + + inspect := container.Inspect(ctx, t, c, id) + assert.Check(t, is.DeepEqual(inspect.NetworkSettings.Ports, tc.expPortMap)) + }) + } +} + +// Check that a container on one network can reach a service in a container on +// another network, via a mapped port on the host. +func TestPortMappedHairpin(t *testing.T) { + skip.If(t, testEnv.IsRootless) + + ctx := setupTest(t) + d := daemon.New(t) + d.StartWithBusybox(ctx, t) + defer d.Stop(t) + c := d.NewClientT(t) + defer c.Close() + + // Find an address on the test host. + conn, err := net.Dial("tcp4", "hub.docker.com:80") + assert.NilError(t, err) + hostAddr := conn.LocalAddr().(*net.TCPAddr).IP.String() + conn.Close() + + const serverNetName = "servernet" + network.CreateNoError(ctx, t, c, serverNetName) + defer network.RemoveNoError(ctx, t, c, serverNetName) + const clientNetName = "clientnet" + network.CreateNoError(ctx, t, c, clientNetName) + defer network.RemoveNoError(ctx, t, c, clientNetName) + + serverId := container.Run(ctx, t, c, + container.WithNetworkMode(serverNetName), + container.WithExposedPorts("80"), + container.WithPortMap(nat.PortMap{"80": {{HostIP: "0.0.0.0"}}}), + container.WithCmd("httpd", "-f"), + ) + defer c.ContainerRemove(ctx, serverId, containertypes.RemoveOptions{Force: true}) + + inspect := container.Inspect(ctx, t, c, serverId) + hostPort := inspect.NetworkSettings.Ports["80/tcp"][0].HostPort + + clientCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + res := container.RunAttach(clientCtx, t, c, + container.WithNetworkMode(clientNetName), + container.WithCmd("wget", "http://"+hostAddr+":"+hostPort), + ) + defer c.ContainerRemove(ctx, res.ContainerID, containertypes.RemoveOptions{Force: true}) + assert.Check(t, is.Contains(res.Stderr.String(), "404 Not Found")) +} + +// Check that a container on an IPv4-only network can have a port mapping +// from a specific IPv6 host address (using docker-proxy). +// Regression test for https://github.com/moby/moby/issues/48067 (which +// is about incorrectly reporting this as invalid config). +func TestProxy4To6(t *testing.T) { + skip.If(t, testEnv.IsRootless) + + ctx := setupTest(t) + d := daemon.New(t) + d.StartWithBusybox(ctx, t) + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + const netName = "ipv4net" + network.CreateNoError(ctx, t, c, netName) + + serverId := container.Run(ctx, t, c, + container.WithNetworkMode(netName), + container.WithExposedPorts("80"), + container.WithPortMap(nat.PortMap{"80": {{HostIP: "::1"}}}), + container.WithCmd("httpd", "-f"), + ) + defer c.ContainerRemove(ctx, serverId, containertypes.RemoveOptions{Force: true}) + + inspect := container.Inspect(ctx, t, c, serverId) + hostPort := inspect.NetworkSettings.Ports["80/tcp"][0].HostPort + + resp, err := http.Get("http://[::1]:" + hostPort) + assert.NilError(t, err) + assert.Check(t, is.Equal(resp.StatusCode, 404)) +} From 2552782c1d5a02a817dd3519cc33409ef4b81af4 Mon Sep 17 00:00:00 2001 From: Albin Kerouanton Date: Thu, 19 Sep 2024 23:44:01 +0200 Subject: [PATCH 2/3] inte/networking: test accessing a published port from the host Signed-off-by: Albin Kerouanton --- .../networking/port_mapping_linux_test.go | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/integration/networking/port_mapping_linux_test.go b/integration/networking/port_mapping_linux_test.go index 7af1d7dcc13c7..6435ccd1e3efe 100644 --- a/integration/networking/port_mapping_linux_test.go +++ b/integration/networking/port_mapping_linux_test.go @@ -2,8 +2,12 @@ package networking import ( "context" + "fmt" "net" "net/http" + "os/exec" + "strconv" + "strings" "testing" "time" @@ -20,6 +24,28 @@ import ( "gotest.tools/v3/skip" ) +func getIfaceAddrs(t *testing.T, name string, ipv6 bool) []net.IP { + t.Helper() + + iface, err := net.InterfaceByName(name) + assert.NilError(t, err) + + addrs, err := iface.Addrs() + assert.NilError(t, err) + + var ips []net.IP + + for _, netaddr := range addrs { + addr := netaddr.(*net.IPNet) + if (addr.IP.To4() != nil && !ipv6) || (addr.IP.To4() == nil && ipv6) { + ips = append(ips, addr.IP) + } + } + + assert.Check(t, len(ips) > 0) + return ips +} + func TestDisableNAT(t *testing.T) { ctx := setupTest(t) d := daemon.New(t) @@ -178,3 +204,134 @@ func TestProxy4To6(t *testing.T) { assert.NilError(t, err) assert.Check(t, is.Equal(resp.StatusCode, 404)) } + +func enableIPv6OnAll(t *testing.T) func() { + t.Helper() + + out, err := exec.Command("sysctl", "net.ipv6.conf").Output() + assert.NilError(t, err) + + ifaces := map[string]string{} + var allVal string + + sysctls := strings.Split(string(out), "\n") + for _, sysctl := range sysctls { + if sysctl == "" { + continue + } + + kv := strings.Split(sysctl, " = ") + sub := strings.Split(kv[0], ".") + if sub[4] == "disable_ipv6" { + if sub[3] == "all" { + allVal = kv[1] + continue + } + ifaces[sub[3]] = kv[1] + } + } + + assert.NilError(t, exec.Command("sysctl", "net.ipv6.conf.all.disable_ipv6=0").Run()) + + return func() { + if allVal == "1" { + assert.NilError(t, exec.Command("sysctl", "net.ipv6.conf.all.disable_ipv6=1").Run()) + } + + for iface, val := range ifaces { + assert.NilError(t, exec.Command("sysctl", fmt.Sprintf("net.ipv6.conf.%s.disable_ipv6=%s", iface, val)).Run()) + } + } +} + +// TestAccessPublishedPortFromHost checks whether published ports are +// accessible from the host. +func TestAccessPublishedPortFromHost(t *testing.T) { + // Both IPv6 test cases are currently failing in rootless mode. This needs further investigation. + skip.If(t, testEnv.IsRootless) + + ctx := setupTest(t) + + revertIPv6OnAll := enableIPv6OnAll(t) + defer revertIPv6OnAll() + assert.NilError(t, exec.Command("ip", "addr", "add", "fdfb:5cbb:29bf::2/64", "dev", "eth0", "nodad").Run()) + defer assert.NilError(t, exec.Command("ip", "addr", "del", "fdfb:5cbb:29bf::2/64", "dev", "eth0").Run()) + + testcases := []struct { + ulpEnabled bool + ipv6 bool + }{ + { + ulpEnabled: true, + ipv6: false, + }, + { + ulpEnabled: false, + ipv6: false, + }, + { + ulpEnabled: true, + ipv6: true, + }, + { + ulpEnabled: false, + ipv6: true, + }, + } + + for tcID, tc := range testcases { + t.Run(fmt.Sprintf("userland-proxy=%t/IPv6=%t", tc.ulpEnabled, tc.ipv6), func(t *testing.T) { + ctx := testutil.StartSpan(ctx, t) + + d := daemon.New(t) + d.StartWithBusybox(ctx, t, fmt.Sprintf("--userland-proxy=%t", tc.ulpEnabled)) + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + bridgeName := fmt.Sprintf("nat-from-host-%d", tcID) + bridgeOpts := []func(options *networktypes.CreateOptions){ + network.WithDriver("bridge"), + network.WithOption(bridge.BridgeName, bridgeName), + } + if tc.ipv6 { + bridgeOpts = append(bridgeOpts, + network.WithIPv6(), + network.WithIPAM("fd31:1c42:6f59::/64", "fd31:1c42:6f59::1")) + } + + network.CreateNoError(ctx, t, c, bridgeName, bridgeOpts...) + defer network.RemoveNoError(ctx, t, c, bridgeName) + + hostPort := strconv.Itoa(1234 + tcID) + serverID := container.Run(ctx, t, c, + container.WithName(sanitizeCtrName(t.Name()+"-server")), + container.WithExposedPorts("80/tcp"), + container.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: hostPort}}}), + container.WithCmd("httpd", "-f"), + container.WithNetworkMode(bridgeName)) + defer c.ContainerRemove(ctx, serverID, containertypes.RemoveOptions{Force: true}) + + for _, iface := range []string{"lo", "eth0"} { + for _, hostAddr := range getIfaceAddrs(t, iface, tc.ipv6) { + if !tc.ulpEnabled && hostAddr.To4() == nil && hostAddr.IsLoopback() { + // iptables can't DNAT packets addressed to the IPv6 + // loopback address. + continue + } + if hostAddr.IsLinkLocalUnicast() { + // Mapping ports on link-local addresses is currently + // unsupported. + continue + } + + httpClient := &http.Client{Timeout: 3 * time.Second} + resp, err := httpClient.Get("http://" + net.JoinHostPort(hostAddr.String(), hostPort)) + assert.NilError(t, err) + assert.Check(t, is.Equal(resp.StatusCode, 404)) + } + } + }) + } +} From 5875b6e8cf1c5dd46c0e0ed08223d0117e762c01 Mon Sep 17 00:00:00 2001 From: Albin Kerouanton Date: Wed, 25 Sep 2024 16:10:43 +0200 Subject: [PATCH 3/3] inte/networking: test access to published ports from remote host Along with this new test, a new struct `L3Segment` is introduced to simulate hosts connected on a same switched network. This struct will let us test various scenarios where published ports and containers should or should not be accessible from remote hosts. The new test introduced, `TestAccessPublishedPortFromRemoteHost`, skips link-local address as port publishing doesn't work on those addresses currently. This will be fixed in a future commit. Signed-off-by: Albin Kerouanton --- .../networking/port_mapping_linux_test.go | 65 +++++++ .../testutils/networking/l3_segment_linux.go | 160 ++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 internal/testutils/networking/l3_segment_linux.go diff --git a/integration/networking/port_mapping_linux_test.go b/integration/networking/port_mapping_linux_test.go index 6435ccd1e3efe..cb0955497adbe 100644 --- a/integration/networking/port_mapping_linux_test.go +++ b/integration/networking/port_mapping_linux_test.go @@ -5,6 +5,7 @@ import ( "fmt" "net" "net/http" + "net/netip" "os/exec" "strconv" "strings" @@ -15,12 +16,14 @@ import ( networktypes "github.com/docker/docker/api/types/network" "github.com/docker/docker/integration/internal/container" "github.com/docker/docker/integration/internal/network" + "github.com/docker/docker/internal/testutils/networking" "github.com/docker/docker/libnetwork/drivers/bridge" "github.com/docker/docker/testutil" "github.com/docker/docker/testutil/daemon" "github.com/docker/go-connections/nat" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" + "gotest.tools/v3/icmd" "gotest.tools/v3/skip" ) @@ -335,3 +338,65 @@ func TestAccessPublishedPortFromHost(t *testing.T) { }) } } + +func TestAccessPublishedPortFromRemoteHost(t *testing.T) { + // IPv6 test case is currently failing in rootless mode. This needs further investigation. + skip.If(t, testEnv.IsRootless) + + ctx := setupTest(t) + + l3 := networking.NewL3Segment(t, "test-pbs-remote-br", + netip.MustParsePrefix("192.168.120.1/24"), + netip.MustParsePrefix("fd30:e631:f886::1/64")) + defer l3.Destroy(t) + + // "docker" is the host where dockerd is running and where ports will be + // published. + l3.AddHost(t, "docker", networking.CurrentNetns, "eth-test", + netip.MustParsePrefix("192.168.120.2/24"), + netip.MustParsePrefix("fd30:e631:f886::2/64")) + l3.AddHost(t, "neigh", "test-pbs-remote-neighbor", "eth0", + netip.MustParsePrefix("192.168.120.3/24"), + netip.MustParsePrefix("fd30:e631:f886::3/64")) + + d := daemon.New(t) + d.StartWithBusybox(ctx, t) + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + bridgeName := "nat-remote" + network.CreateNoError(ctx, t, c, bridgeName, + network.WithDriver("bridge"), + network.WithOption(bridge.BridgeName, bridgeName), + network.WithIPv6(), + network.WithIPAM("fdd8:c9fe:1a25::/64", "fdd8:c9fe:1a25::1")) + defer network.RemoveNoError(ctx, t, c, bridgeName) + + hostPort := "1780" + serverID := container.Run(ctx, t, c, + container.WithName(sanitizeCtrName(t.Name()+"-server")), + container.WithExposedPorts("80/tcp"), + container.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: hostPort}}}), + container.WithCmd("httpd", "-f"), + container.WithNetworkMode(bridgeName)) + defer c.ContainerRemove(ctx, serverID, containertypes.RemoveOptions{Force: true}) + + for _, ipv6 := range []bool{true, false} { + for _, hostAddr := range getIfaceAddrs(t, l3.Hosts["docker"].Iface, ipv6) { + if hostAddr.IsLinkLocalUnicast() { + // For some reason, hosts in a L3Segment can't communicate + // using link-local addresses. + continue + } + + l3.Hosts["neigh"].Do(t, func() { + url := "http://" + net.JoinHostPort(hostAddr.String(), hostPort) + t.Logf("Sending a request to %s", url) + + icmd.RunCommand("curl", url).Assert(t, icmd.Success) + }) + } + } +} diff --git a/internal/testutils/networking/l3_segment_linux.go b/internal/testutils/networking/l3_segment_linux.go new file mode 100644 index 0000000000000..68d020815db6c --- /dev/null +++ b/internal/testutils/networking/l3_segment_linux.go @@ -0,0 +1,160 @@ +package networking + +import ( + "bytes" + "net/netip" + "os/exec" + "runtime" + "strings" + "syscall" + "testing" + + "github.com/vishvananda/netns" +) + +// CurrentNetns can be passed to L3Segment.AddHost to indicate that the +// host lives in the current network namespace (eg. where dockerd runs). +const CurrentNetns = "" + +func runCommand(t *testing.T, cmd string, args ...string) { + t.Log(strings.Join(append([]string{cmd}, args...), " ")) + + var b bytes.Buffer + c := exec.Command(cmd, args...) + c.Stdout = &b + c.Stderr = &b + err := c.Run() + if err != nil { + t.Log(b.String()) + t.Fatalf("Error: %v", err) + } +} + +// L3Segment simulates a switched, dual-stack capable network that +// interconnects multiple hosts running in their own network namespace. +type L3Segment struct { + Hosts map[string]Host + bridge Host +} + +// NewL3Segment creates a new L3Segment. The bridge interface interconnecting +// all the hosts is created in a new network namespace named nsName and it's +// assigned one or more IP addresses. Those need to be unmasked netip.Prefix. +func NewL3Segment(t *testing.T, nsName string, addrs ...netip.Prefix) *L3Segment { + t.Helper() + + l3 := &L3Segment{ + Hosts: map[string]Host{}, + } + + l3.bridge = newHost(t, nsName, "br0") + defer func() { + if t.Failed() { + l3.Destroy(t) + } + }() + + l3.bridge.Run(t, "ip", "link", "add", l3.bridge.Iface, "type", "bridge") + for _, addr := range addrs { + l3.bridge.Run(t, "ip", "addr", "add", addr.String(), "dev", l3.bridge.Iface, "nodad") + l3.bridge.Run(t, "ip", "link", "set", l3.bridge.Iface, "up") + } + + return l3 +} + +func (l3 *L3Segment) AddHost(t *testing.T, hostname, nsName, ifname string, addrs ...netip.Prefix) { + t.Helper() + + if len(hostname) >= syscall.IFNAMSIZ { + // hostname is reused as the name for the veth interface added to the + // bridge. Hence, it needs to be shorter than ifnamsiz. + t.Fatalf("hostname too long") + } + + host := newHost(t, nsName, ifname) + l3.Hosts[hostname] = host + + host.Run(t, "ip", "link", "add", hostname, "netns", l3.bridge.ns, "type", "veth", "peer", "name", host.Iface) + l3.bridge.Run(t, "ip", "link", "set", hostname, "up", "master", l3.bridge.Iface) + host.Run(t, "ip", "link", "set", host.Iface, "up") + + for _, addr := range addrs { + host.Run(t, "ip", "addr", "add", addr.String(), "dev", host.Iface, "nodad") + } +} + +func (l3 *L3Segment) Destroy(t *testing.T) { + for _, host := range l3.Hosts { + host.Destroy(t) + } + l3.bridge.Destroy(t) +} + +type Host struct { + Iface string // Iface is the interface name in the host network namespace. + ns string // ns is the network namespace name. +} + +func newHost(t *testing.T, nsName, ifname string) Host { + t.Helper() + + if len(ifname) >= syscall.IFNAMSIZ { + t.Fatalf("ifname too long") + } + + if nsName != CurrentNetns { + runCommand(t, "ip", "netns", "add", nsName) + } + + return Host{ + Iface: ifname, + ns: nsName, + } +} + +// Run executes the provided command in the host's network namespace. +func (h Host) Run(t *testing.T, cmd string, args ...string) { + t.Helper() + + if h.ns != CurrentNetns { + args = append([]string{"netns", "exec", h.ns, cmd}, args...) + cmd = "ip" + } + runCommand(t, cmd, args...) +} + +// Do run the provided function in the host's network namespace. +func (h Host) Do(t *testing.T, fn func()) { + t.Helper() + + targetNs, err := netns.GetFromName(h.ns) + if err != nil { + t.Fatalf("failed to get netns handle: %v", err) + } + defer targetNs.Close() + + origNs, err := netns.Get() + if err != nil { + t.Fatalf("failed to get current netns: %v", err) + } + defer origNs.Close() + + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + if err := netns.Set(targetNs); err != nil { + t.Fatalf("failed to enter netns: %v", err) + } + defer netns.Set(origNs) + + fn() +} + +func (h Host) Destroy(t *testing.T) { + t.Helper() + + if h.ns != CurrentNetns { + runCommand(t, "ip", "netns", "delete", h.ns) + } +}