fix(e2e::network): refactor NoSNAT test

Signed-off-by: knight42 <anonymousknight96@gmail.com>
2025-09-10 05:30:26 +00:00 · 2020-07-30 15:18:00 +08:00
parent a5090a8ff2
commit 7b07b33094
3 changed files with 60 additions and 194 deletions
--- a/test/e2e/network/no_snat.go
+++ b/test/e2e/network/no_snat.go
@@ -19,116 +19,43 @@ package network
 import (
 	"context"
 	"fmt"
-	"io/ioutil"
-	"net/http"
-	"strconv"
-	"strings"
+	"net"
 	"time"

+	"github.com/onsi/ginkgo"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/wait"
+
 	"k8s.io/kubernetes/test/e2e/framework"
-
-	"github.com/onsi/ginkgo"
-
 	imageutils "k8s.io/kubernetes/test/utils/image"
 )

 const (
-	testPodPort = 8080
-
-	testProxyPort = 31235 // Firewall rule allows external traffic on ports 30000-32767. I just picked a random one.
+	testPodPort    = "8080"
+	noSNATTestName = "no-snat-test"
 )

 var (
 	testPod = v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
-			GenerateName: "no-snat-test",
+			GenerateName: noSNATTestName,
 			Labels: map[string]string{
-				"no-snat-test": "",
+				noSNATTestName: "",
 			},
 		},
 		Spec: v1.PodSpec{
 			Containers: []v1.Container{
 				{
-					Name:  "no-snat-test",
+					Name:  noSNATTestName,
 					Image: imageutils.GetE2EImage(imageutils.Agnhost),
-					Args:  []string{"no-snat-test", "--port", strconv.Itoa(testPodPort)},
-					Env: []v1.EnvVar{
-						{
-							Name:      "POD_IP",
-							ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "status.podIP"}},
-						},
-					},
-				},
-			},
-		},
-	}
-
-	testProxyPod = v1.Pod{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: "no-snat-test-proxy",
-		},
-		Spec: v1.PodSpec{
-			HostNetwork: true,
-			Containers: []v1.Container{
-				{
-					Name:  "no-snat-test-proxy",
-					Image: imageutils.GetE2EImage(imageutils.Agnhost),
-					Args:  []string{"no-snat-test-proxy", "--port", strconv.Itoa(testProxyPort)},
-					Ports: []v1.ContainerPort{
-						{
-							ContainerPort: testProxyPort,
-							HostPort:      testProxyPort,
-						},
-					},
+					Args:  []string{"netexec", "--http-port", testPodPort},
 				},
 			},
 		},
 	}
 )

-// Produces a pod spec that passes nip as NODE_IP env var using downward API
-func newTestPod(nodename string, nip string) *v1.Pod {
-	pod := testPod
-	nodeIP := v1.EnvVar{
-		Name:  "NODE_IP",
-		Value: nip,
-	}
-	pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, nodeIP)
-	pod.Spec.NodeName = nodename
-	return &pod
-}
-
-func newTestProxyPod(nodename string) *v1.Pod {
-	pod := testProxyPod
-	pod.Spec.NodeName = nodename
-	return &pod
-}
-
-func getIP(iptype v1.NodeAddressType, node *v1.Node) (string, error) {
-	for _, addr := range node.Status.Addresses {
-		if addr.Type == iptype {
-			return addr.Address, nil
-		}
-	}
-	return "", fmt.Errorf("did not find %s on Node", iptype)
-}
-
-func getSchedulable(nodes []v1.Node) (*v1.Node, error) {
-	for _, node := range nodes {
-		if !node.Spec.Unschedulable {
-			return &node, nil
-		}
-	}
-	return nil, fmt.Errorf("all Nodes were unschedulable")
-}
-
-func checknosnatURL(proxy, pip string, ips []string) string {
-	return fmt.Sprintf("http://%s/checknosnat?target=%s&ips=%s", proxy, pip, strings.Join(ips, ","))
-}
-
 // This test verifies that a Pod on each node in a cluster can talk to Pods on every other node without SNAT.
 // We use the [Feature:NoSNAT] tag so that most jobs will skip this test by default.
 var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
@@ -144,39 +71,15 @@ var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
 		framework.ExpectNotEqual(len(nodes.Items), 0, "no Nodes in the cluster")

 		for _, node := range nodes.Items {
-			// find the Node's internal ip address to feed to the Pod
-			inIP, err := getIP(v1.NodeInternalIP, &node)
-			framework.ExpectNoError(err)
-
-			// target Pod at Node and feed Pod Node's InternalIP
-			pod := newTestPod(node.Name, inIP)
-			_, err = pc.Create(context.TODO(), pod, metav1.CreateOptions{})
+			// target Pod at Node
+			testPod.Spec.NodeName = node.Name
+			_, err = pc.Create(context.TODO(), &testPod, metav1.CreateOptions{})
 			framework.ExpectNoError(err)
 		}

-		// In some (most?) scenarios, the test harness doesn't run in the same network as the Pods,
-		// which means it can't query Pods using their cluster-internal IPs. To get around this,
-		// we create a Pod in a Node's host network, and have that Pod serve on a specific port of that Node.
-		// We can then ask this proxy Pod to query the internal endpoints served by the test Pods.
-
-		// Find the first schedulable node; masters are marked unschedulable. We don't put the proxy on the master
-		// because in some (most?) deployments firewall rules don't allow external traffic to hit ports 30000-32767
-		// on the master, but do allow this on the nodes.
-		node, err := getSchedulable(nodes.Items)
-		framework.ExpectNoError(err)
-		ginkgo.By("creating a no-snat-test-proxy Pod on Node " + node.Name + " port " + strconv.Itoa(testProxyPort) +
-			" so we can target our test Pods through this Node's ExternalIP")
-
-		extIP, err := getIP(v1.NodeExternalIP, node)
-		framework.ExpectNoError(err)
-		proxyNodeIP := extIP + ":" + strconv.Itoa(testProxyPort)
-
-		_, err = pc.Create(context.TODO(), newTestProxyPod(node.Name), metav1.CreateOptions{})
-		framework.ExpectNoError(err)
-
 		ginkgo.By("waiting for all of the no-snat-test pods to be scheduled and running")
 		err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) {
-			pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: "no-snat-test"})
+			pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: noSNATTestName})
 			if err != nil {
 				return false, err
 			}
@@ -194,64 +97,22 @@ var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
 		})
 		framework.ExpectNoError(err)

-		ginkgo.By("waiting for the no-snat-test-proxy Pod to be scheduled and running")
-		err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) {
-			pod, err := pc.Get(context.TODO(), "no-snat-test-proxy", metav1.GetOptions{})
-			if err != nil {
-				return false, err
-			}
-			if pod.Status.Phase != v1.PodRunning {
-				if pod.Status.Phase != v1.PodPending {
-					return false, fmt.Errorf("expected pod to be in phase \"Pending\" or \"Running\"")
-				}
-				return false, nil // pod is still pending
-			}
-			return true, nil // pod is running
-		})
-		framework.ExpectNoError(err)
-
 		ginkgo.By("sending traffic from each pod to the others and checking that SNAT does not occur")
-		pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: "no-snat-test"})
+		pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: noSNATTestName})
 		framework.ExpectNoError(err)

-		// collect pod IPs
-		podIPs := []string{}
-		for _, pod := range pods.Items {
-			podIPs = append(podIPs, pod.Status.PodIP+":"+strconv.Itoa(testPodPort))
-		}
-
-		// hit the /checknosnat endpoint on each Pod, tell each Pod to check all the other Pods
+		// hit the /clientip endpoint on every other Pods to check if source ip is preserved
 		// this test is O(n^2) but it doesn't matter because we only run this test on small clusters (~3 nodes)
-		errs := []string{}
-		client := http.Client{
-			Timeout: 5 * time.Minute,
-		}
-		for _, pip := range podIPs {
-			ips := []string{}
-			for _, ip := range podIPs {
-				if ip == pip {
+		for _, sourcePod := range pods.Items {
+			for _, targetPod := range pods.Items {
+				if targetPod.Name == sourcePod.Name {
 					continue
 				}
-				ips = append(ips, ip)
+				targetAddr := net.JoinHostPort(targetPod.Status.PodIP, testPodPort)
+				sourceIP, execPodIP := execSourceIPTest(sourcePod, targetAddr)
+				ginkgo.By("Verifying the preserved source ip")
+				framework.ExpectEqual(sourceIP, execPodIP)
 			}
-			// hit /checknosnat on pip, via proxy
-			resp, err := client.Get(checknosnatURL(proxyNodeIP, pip, ips))
-			framework.ExpectNoError(err)
-
-			// check error code on the response, if 500 record the body, which will describe the error
-			if resp.StatusCode == 500 {
-				body, err := ioutil.ReadAll(resp.Body)
-				framework.ExpectNoError(err)
-				errs = append(errs, string(body))
-			}
-			resp.Body.Close()
-		}
-
-		// report the errors all at the end
-		if len(errs) > 0 {
-			str := strings.Join(errs, "\n")
-			err := fmt.Errorf("/checknosnat failed in the following cases:\n%s", str)
-			framework.ExpectNoError(err)
 		}
 	})
 })
--- a/test/e2e/network/service.go
+++ b/test/e2e/network/service.go
@@ -1019,7 +1019,7 @@ var _ = SIGDescribe("Services", func() {
 		serviceAddress := net.JoinHostPort(serviceIP, strconv.Itoa(servicePort))

 		for _, pausePod := range pausePods.Items {
-			sourceIP, execPodIP := execSourceipTest(pausePod, serviceAddress)
+			sourceIP, execPodIP := execSourceIPTest(pausePod, serviceAddress)
 			ginkgo.By("Verifying the preserved source ip")
 			framework.ExpectEqual(sourceIP, execPodIP)
 		}
@@ -3382,38 +3382,6 @@ var _ = SIGDescribe("ESIPP [Slow]", func() {
 	})
 })

-func execSourceipTest(pausePod v1.Pod, serviceAddress string) (string, string) {
-	var err error
-	var stdout string
-	timeout := 2 * time.Minute
-
-	framework.Logf("Waiting up to %v to get response from %s", timeout, serviceAddress)
-	cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %s/clientip`, serviceAddress)
-	for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
-		stdout, err = framework.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd)
-		if err != nil {
-			framework.Logf("got err: %v, retry until timeout", err)
-			continue
-		}
-		// Need to check output because it might omit in case of error.
-		if strings.TrimSpace(stdout) == "" {
-			framework.Logf("got empty stdout, retry until timeout")
-			continue
-		}
-		break
-	}
-
-	framework.ExpectNoError(err)
-
-	// The stdout return from RunHostCmd is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
-	host, _, err := net.SplitHostPort(stdout)
-	if err != nil {
-		// ginkgo.Fail the test if output format is unexpected.
-		framework.Failf("exec pod returned unexpected stdout: [%v]\n", stdout)
-	}
-	return pausePod.Status.PodIP, host
-}
-
 // execAffinityTestForSessionAffinityTimeout is a helper function that wrap the logic of
 // affinity test for non-load-balancer services. Session afinity will be
 // enabled when the service is created and a short timeout will be configured so
--- a/test/e2e/network/util.go
+++ b/test/e2e/network/util.go
@@ -19,6 +19,7 @@ package network
 import (
 	"bytes"
 	"fmt"
+	"net"
 	"regexp"
 	"strings"
 	"time"
@@ -125,3 +126,39 @@ func CheckSCTPModuleLoadedOnNodes(f *framework.Framework, nodes *v1.NodeList) bo
 	}
 	return false
 }
+
+// execSourceIPTest executes curl to access "/clientip" endpoint on target address
+// from given Pod to check if source ip is preserved.
+func execSourceIPTest(sourcePod v1.Pod, targetAddr string) (string, string) {
+	var (
+		err     error
+		stdout  string
+		timeout = 2 * time.Minute
+	)
+
+	framework.Logf("Waiting up to %v to get response from %s", timeout, targetAddr)
+	cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %s/clientip`, targetAddr)
+	for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
+		stdout, err = framework.RunHostCmd(sourcePod.Namespace, sourcePod.Name, cmd)
+		if err != nil {
+			framework.Logf("got err: %v, retry until timeout", err)
+			continue
+		}
+		// Need to check output because it might omit in case of error.
+		if strings.TrimSpace(stdout) == "" {
+			framework.Logf("got empty stdout, retry until timeout")
+			continue
+		}
+		break
+	}
+
+	framework.ExpectNoError(err)
+
+	// The stdout return from RunHostCmd is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
+	host, _, err := net.SplitHostPort(stdout)
+	if err != nil {
+		// ginkgo.Fail the test if output format is unexpected.
+		framework.Failf("exec pod returned unexpected stdout: [%v]\n", stdout)
+	}
+	return sourcePod.Status.PodIP, host
+}