mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 10:20:51 +00:00
fix(e2e::network): refactor NoSNAT test
Signed-off-by: knight42 <anonymousknight96@gmail.com>
This commit is contained in:
parent
a5090a8ff2
commit
7b07b33094
@ -19,116 +19,43 @@ package network
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"github.com/onsi/ginkgo"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
"github.com/onsi/ginkgo"
|
||||
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
)
|
||||
|
||||
const (
|
||||
testPodPort = 8080
|
||||
|
||||
testProxyPort = 31235 // Firewall rule allows external traffic on ports 30000-32767. I just picked a random one.
|
||||
testPodPort = "8080"
|
||||
noSNATTestName = "no-snat-test"
|
||||
)
|
||||
|
||||
var (
|
||||
testPod = v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
GenerateName: "no-snat-test",
|
||||
GenerateName: noSNATTestName,
|
||||
Labels: map[string]string{
|
||||
"no-snat-test": "",
|
||||
noSNATTestName: "",
|
||||
},
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "no-snat-test",
|
||||
Name: noSNATTestName,
|
||||
Image: imageutils.GetE2EImage(imageutils.Agnhost),
|
||||
Args: []string{"no-snat-test", "--port", strconv.Itoa(testPodPort)},
|
||||
Env: []v1.EnvVar{
|
||||
{
|
||||
Name: "POD_IP",
|
||||
ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "status.podIP"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
testProxyPod = v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "no-snat-test-proxy",
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
HostNetwork: true,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "no-snat-test-proxy",
|
||||
Image: imageutils.GetE2EImage(imageutils.Agnhost),
|
||||
Args: []string{"no-snat-test-proxy", "--port", strconv.Itoa(testProxyPort)},
|
||||
Ports: []v1.ContainerPort{
|
||||
{
|
||||
ContainerPort: testProxyPort,
|
||||
HostPort: testProxyPort,
|
||||
},
|
||||
},
|
||||
Args: []string{"netexec", "--http-port", testPodPort},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// Produces a pod spec that passes nip as NODE_IP env var using downward API
|
||||
func newTestPod(nodename string, nip string) *v1.Pod {
|
||||
pod := testPod
|
||||
nodeIP := v1.EnvVar{
|
||||
Name: "NODE_IP",
|
||||
Value: nip,
|
||||
}
|
||||
pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, nodeIP)
|
||||
pod.Spec.NodeName = nodename
|
||||
return &pod
|
||||
}
|
||||
|
||||
func newTestProxyPod(nodename string) *v1.Pod {
|
||||
pod := testProxyPod
|
||||
pod.Spec.NodeName = nodename
|
||||
return &pod
|
||||
}
|
||||
|
||||
func getIP(iptype v1.NodeAddressType, node *v1.Node) (string, error) {
|
||||
for _, addr := range node.Status.Addresses {
|
||||
if addr.Type == iptype {
|
||||
return addr.Address, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("did not find %s on Node", iptype)
|
||||
}
|
||||
|
||||
func getSchedulable(nodes []v1.Node) (*v1.Node, error) {
|
||||
for _, node := range nodes {
|
||||
if !node.Spec.Unschedulable {
|
||||
return &node, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("all Nodes were unschedulable")
|
||||
}
|
||||
|
||||
func checknosnatURL(proxy, pip string, ips []string) string {
|
||||
return fmt.Sprintf("http://%s/checknosnat?target=%s&ips=%s", proxy, pip, strings.Join(ips, ","))
|
||||
}
|
||||
|
||||
// This test verifies that a Pod on each node in a cluster can talk to Pods on every other node without SNAT.
|
||||
// We use the [Feature:NoSNAT] tag so that most jobs will skip this test by default.
|
||||
var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
|
||||
@ -144,39 +71,15 @@ var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
|
||||
framework.ExpectNotEqual(len(nodes.Items), 0, "no Nodes in the cluster")
|
||||
|
||||
for _, node := range nodes.Items {
|
||||
// find the Node's internal ip address to feed to the Pod
|
||||
inIP, err := getIP(v1.NodeInternalIP, &node)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// target Pod at Node and feed Pod Node's InternalIP
|
||||
pod := newTestPod(node.Name, inIP)
|
||||
_, err = pc.Create(context.TODO(), pod, metav1.CreateOptions{})
|
||||
// target Pod at Node
|
||||
testPod.Spec.NodeName = node.Name
|
||||
_, err = pc.Create(context.TODO(), &testPod, metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
||||
// In some (most?) scenarios, the test harness doesn't run in the same network as the Pods,
|
||||
// which means it can't query Pods using their cluster-internal IPs. To get around this,
|
||||
// we create a Pod in a Node's host network, and have that Pod serve on a specific port of that Node.
|
||||
// We can then ask this proxy Pod to query the internal endpoints served by the test Pods.
|
||||
|
||||
// Find the first schedulable node; masters are marked unschedulable. We don't put the proxy on the master
|
||||
// because in some (most?) deployments firewall rules don't allow external traffic to hit ports 30000-32767
|
||||
// on the master, but do allow this on the nodes.
|
||||
node, err := getSchedulable(nodes.Items)
|
||||
framework.ExpectNoError(err)
|
||||
ginkgo.By("creating a no-snat-test-proxy Pod on Node " + node.Name + " port " + strconv.Itoa(testProxyPort) +
|
||||
" so we can target our test Pods through this Node's ExternalIP")
|
||||
|
||||
extIP, err := getIP(v1.NodeExternalIP, node)
|
||||
framework.ExpectNoError(err)
|
||||
proxyNodeIP := extIP + ":" + strconv.Itoa(testProxyPort)
|
||||
|
||||
_, err = pc.Create(context.TODO(), newTestProxyPod(node.Name), metav1.CreateOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("waiting for all of the no-snat-test pods to be scheduled and running")
|
||||
err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) {
|
||||
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: "no-snat-test"})
|
||||
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: noSNATTestName})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@ -194,64 +97,22 @@ var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("waiting for the no-snat-test-proxy Pod to be scheduled and running")
|
||||
err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) {
|
||||
pod, err := pc.Get(context.TODO(), "no-snat-test-proxy", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if pod.Status.Phase != v1.PodRunning {
|
||||
if pod.Status.Phase != v1.PodPending {
|
||||
return false, fmt.Errorf("expected pod to be in phase \"Pending\" or \"Running\"")
|
||||
}
|
||||
return false, nil // pod is still pending
|
||||
}
|
||||
return true, nil // pod is running
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("sending traffic from each pod to the others and checking that SNAT does not occur")
|
||||
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: "no-snat-test"})
|
||||
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: noSNATTestName})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// collect pod IPs
|
||||
podIPs := []string{}
|
||||
for _, pod := range pods.Items {
|
||||
podIPs = append(podIPs, pod.Status.PodIP+":"+strconv.Itoa(testPodPort))
|
||||
}
|
||||
|
||||
// hit the /checknosnat endpoint on each Pod, tell each Pod to check all the other Pods
|
||||
// hit the /clientip endpoint on every other Pods to check if source ip is preserved
|
||||
// this test is O(n^2) but it doesn't matter because we only run this test on small clusters (~3 nodes)
|
||||
errs := []string{}
|
||||
client := http.Client{
|
||||
Timeout: 5 * time.Minute,
|
||||
}
|
||||
for _, pip := range podIPs {
|
||||
ips := []string{}
|
||||
for _, ip := range podIPs {
|
||||
if ip == pip {
|
||||
for _, sourcePod := range pods.Items {
|
||||
for _, targetPod := range pods.Items {
|
||||
if targetPod.Name == sourcePod.Name {
|
||||
continue
|
||||
}
|
||||
ips = append(ips, ip)
|
||||
targetAddr := net.JoinHostPort(targetPod.Status.PodIP, testPodPort)
|
||||
sourceIP, execPodIP := execSourceIPTest(sourcePod, targetAddr)
|
||||
ginkgo.By("Verifying the preserved source ip")
|
||||
framework.ExpectEqual(sourceIP, execPodIP)
|
||||
}
|
||||
// hit /checknosnat on pip, via proxy
|
||||
resp, err := client.Get(checknosnatURL(proxyNodeIP, pip, ips))
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// check error code on the response, if 500 record the body, which will describe the error
|
||||
if resp.StatusCode == 500 {
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
framework.ExpectNoError(err)
|
||||
errs = append(errs, string(body))
|
||||
}
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
// report the errors all at the end
|
||||
if len(errs) > 0 {
|
||||
str := strings.Join(errs, "\n")
|
||||
err := fmt.Errorf("/checknosnat failed in the following cases:\n%s", str)
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
@ -1019,7 +1019,7 @@ var _ = SIGDescribe("Services", func() {
|
||||
serviceAddress := net.JoinHostPort(serviceIP, strconv.Itoa(servicePort))
|
||||
|
||||
for _, pausePod := range pausePods.Items {
|
||||
sourceIP, execPodIP := execSourceipTest(pausePod, serviceAddress)
|
||||
sourceIP, execPodIP := execSourceIPTest(pausePod, serviceAddress)
|
||||
ginkgo.By("Verifying the preserved source ip")
|
||||
framework.ExpectEqual(sourceIP, execPodIP)
|
||||
}
|
||||
@ -3382,38 +3382,6 @@ var _ = SIGDescribe("ESIPP [Slow]", func() {
|
||||
})
|
||||
})
|
||||
|
||||
func execSourceipTest(pausePod v1.Pod, serviceAddress string) (string, string) {
|
||||
var err error
|
||||
var stdout string
|
||||
timeout := 2 * time.Minute
|
||||
|
||||
framework.Logf("Waiting up to %v to get response from %s", timeout, serviceAddress)
|
||||
cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %s/clientip`, serviceAddress)
|
||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
|
||||
stdout, err = framework.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd)
|
||||
if err != nil {
|
||||
framework.Logf("got err: %v, retry until timeout", err)
|
||||
continue
|
||||
}
|
||||
// Need to check output because it might omit in case of error.
|
||||
if strings.TrimSpace(stdout) == "" {
|
||||
framework.Logf("got empty stdout, retry until timeout")
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// The stdout return from RunHostCmd is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
|
||||
host, _, err := net.SplitHostPort(stdout)
|
||||
if err != nil {
|
||||
// ginkgo.Fail the test if output format is unexpected.
|
||||
framework.Failf("exec pod returned unexpected stdout: [%v]\n", stdout)
|
||||
}
|
||||
return pausePod.Status.PodIP, host
|
||||
}
|
||||
|
||||
// execAffinityTestForSessionAffinityTimeout is a helper function that wrap the logic of
|
||||
// affinity test for non-load-balancer services. Session afinity will be
|
||||
// enabled when the service is created and a short timeout will be configured so
|
||||
|
@ -19,6 +19,7 @@ package network
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
@ -125,3 +126,39 @@ func CheckSCTPModuleLoadedOnNodes(f *framework.Framework, nodes *v1.NodeList) bo
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// execSourceIPTest executes curl to access "/clientip" endpoint on target address
|
||||
// from given Pod to check if source ip is preserved.
|
||||
func execSourceIPTest(sourcePod v1.Pod, targetAddr string) (string, string) {
|
||||
var (
|
||||
err error
|
||||
stdout string
|
||||
timeout = 2 * time.Minute
|
||||
)
|
||||
|
||||
framework.Logf("Waiting up to %v to get response from %s", timeout, targetAddr)
|
||||
cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %s/clientip`, targetAddr)
|
||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
|
||||
stdout, err = framework.RunHostCmd(sourcePod.Namespace, sourcePod.Name, cmd)
|
||||
if err != nil {
|
||||
framework.Logf("got err: %v, retry until timeout", err)
|
||||
continue
|
||||
}
|
||||
// Need to check output because it might omit in case of error.
|
||||
if strings.TrimSpace(stdout) == "" {
|
||||
framework.Logf("got empty stdout, retry until timeout")
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
// The stdout return from RunHostCmd is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
|
||||
host, _, err := net.SplitHostPort(stdout)
|
||||
if err != nil {
|
||||
// ginkgo.Fail the test if output format is unexpected.
|
||||
framework.Failf("exec pod returned unexpected stdout: [%v]\n", stdout)
|
||||
}
|
||||
return sourcePod.Status.PodIP, host
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user