fix(e2e::network): refactor NoSNAT test

Signed-off-by: knight42 <anonymousknight96@gmail.com>
This commit is contained in:
knight42 2020-07-30 15:18:00 +08:00
parent a5090a8ff2
commit 7b07b33094
No known key found for this signature in database
GPG Key ID: 1040B69865E7D86C
3 changed files with 60 additions and 194 deletions

View File

@ -19,116 +19,43 @@ package network
import (
"context"
"fmt"
"io/ioutil"
"net/http"
"strconv"
"strings"
"net"
"time"
"github.com/onsi/ginkgo"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/test/e2e/framework"
"github.com/onsi/ginkgo"
imageutils "k8s.io/kubernetes/test/utils/image"
)
const (
testPodPort = 8080
testProxyPort = 31235 // Firewall rule allows external traffic on ports 30000-32767. I just picked a random one.
testPodPort = "8080"
noSNATTestName = "no-snat-test"
)
var (
testPod = v1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "no-snat-test",
GenerateName: noSNATTestName,
Labels: map[string]string{
"no-snat-test": "",
noSNATTestName: "",
},
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "no-snat-test",
Name: noSNATTestName,
Image: imageutils.GetE2EImage(imageutils.Agnhost),
Args: []string{"no-snat-test", "--port", strconv.Itoa(testPodPort)},
Env: []v1.EnvVar{
{
Name: "POD_IP",
ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "status.podIP"}},
},
},
},
},
},
}
testProxyPod = v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "no-snat-test-proxy",
},
Spec: v1.PodSpec{
HostNetwork: true,
Containers: []v1.Container{
{
Name: "no-snat-test-proxy",
Image: imageutils.GetE2EImage(imageutils.Agnhost),
Args: []string{"no-snat-test-proxy", "--port", strconv.Itoa(testProxyPort)},
Ports: []v1.ContainerPort{
{
ContainerPort: testProxyPort,
HostPort: testProxyPort,
},
},
Args: []string{"netexec", "--http-port", testPodPort},
},
},
},
}
)
// Produces a pod spec that passes nip as NODE_IP env var using downward API
func newTestPod(nodename string, nip string) *v1.Pod {
pod := testPod
nodeIP := v1.EnvVar{
Name: "NODE_IP",
Value: nip,
}
pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, nodeIP)
pod.Spec.NodeName = nodename
return &pod
}
func newTestProxyPod(nodename string) *v1.Pod {
pod := testProxyPod
pod.Spec.NodeName = nodename
return &pod
}
func getIP(iptype v1.NodeAddressType, node *v1.Node) (string, error) {
for _, addr := range node.Status.Addresses {
if addr.Type == iptype {
return addr.Address, nil
}
}
return "", fmt.Errorf("did not find %s on Node", iptype)
}
func getSchedulable(nodes []v1.Node) (*v1.Node, error) {
for _, node := range nodes {
if !node.Spec.Unschedulable {
return &node, nil
}
}
return nil, fmt.Errorf("all Nodes were unschedulable")
}
func checknosnatURL(proxy, pip string, ips []string) string {
return fmt.Sprintf("http://%s/checknosnat?target=%s&ips=%s", proxy, pip, strings.Join(ips, ","))
}
// This test verifies that a Pod on each node in a cluster can talk to Pods on every other node without SNAT.
// We use the [Feature:NoSNAT] tag so that most jobs will skip this test by default.
var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
@ -144,39 +71,15 @@ var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
framework.ExpectNotEqual(len(nodes.Items), 0, "no Nodes in the cluster")
for _, node := range nodes.Items {
// find the Node's internal ip address to feed to the Pod
inIP, err := getIP(v1.NodeInternalIP, &node)
framework.ExpectNoError(err)
// target Pod at Node and feed Pod Node's InternalIP
pod := newTestPod(node.Name, inIP)
_, err = pc.Create(context.TODO(), pod, metav1.CreateOptions{})
// target Pod at Node
testPod.Spec.NodeName = node.Name
_, err = pc.Create(context.TODO(), &testPod, metav1.CreateOptions{})
framework.ExpectNoError(err)
}
// In some (most?) scenarios, the test harness doesn't run in the same network as the Pods,
// which means it can't query Pods using their cluster-internal IPs. To get around this,
// we create a Pod in a Node's host network, and have that Pod serve on a specific port of that Node.
// We can then ask this proxy Pod to query the internal endpoints served by the test Pods.
// Find the first schedulable node; masters are marked unschedulable. We don't put the proxy on the master
// because in some (most?) deployments firewall rules don't allow external traffic to hit ports 30000-32767
// on the master, but do allow this on the nodes.
node, err := getSchedulable(nodes.Items)
framework.ExpectNoError(err)
ginkgo.By("creating a no-snat-test-proxy Pod on Node " + node.Name + " port " + strconv.Itoa(testProxyPort) +
" so we can target our test Pods through this Node's ExternalIP")
extIP, err := getIP(v1.NodeExternalIP, node)
framework.ExpectNoError(err)
proxyNodeIP := extIP + ":" + strconv.Itoa(testProxyPort)
_, err = pc.Create(context.TODO(), newTestProxyPod(node.Name), metav1.CreateOptions{})
framework.ExpectNoError(err)
ginkgo.By("waiting for all of the no-snat-test pods to be scheduled and running")
err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) {
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: "no-snat-test"})
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: noSNATTestName})
if err != nil {
return false, err
}
@ -194,64 +97,22 @@ var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
})
framework.ExpectNoError(err)
ginkgo.By("waiting for the no-snat-test-proxy Pod to be scheduled and running")
err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) {
pod, err := pc.Get(context.TODO(), "no-snat-test-proxy", metav1.GetOptions{})
if err != nil {
return false, err
}
if pod.Status.Phase != v1.PodRunning {
if pod.Status.Phase != v1.PodPending {
return false, fmt.Errorf("expected pod to be in phase \"Pending\" or \"Running\"")
}
return false, nil // pod is still pending
}
return true, nil // pod is running
})
framework.ExpectNoError(err)
ginkgo.By("sending traffic from each pod to the others and checking that SNAT does not occur")
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: "no-snat-test"})
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: noSNATTestName})
framework.ExpectNoError(err)
// collect pod IPs
podIPs := []string{}
for _, pod := range pods.Items {
podIPs = append(podIPs, pod.Status.PodIP+":"+strconv.Itoa(testPodPort))
}
// hit the /checknosnat endpoint on each Pod, tell each Pod to check all the other Pods
// hit the /clientip endpoint on every other Pods to check if source ip is preserved
// this test is O(n^2) but it doesn't matter because we only run this test on small clusters (~3 nodes)
errs := []string{}
client := http.Client{
Timeout: 5 * time.Minute,
}
for _, pip := range podIPs {
ips := []string{}
for _, ip := range podIPs {
if ip == pip {
for _, sourcePod := range pods.Items {
for _, targetPod := range pods.Items {
if targetPod.Name == sourcePod.Name {
continue
}
ips = append(ips, ip)
targetAddr := net.JoinHostPort(targetPod.Status.PodIP, testPodPort)
sourceIP, execPodIP := execSourceIPTest(sourcePod, targetAddr)
ginkgo.By("Verifying the preserved source ip")
framework.ExpectEqual(sourceIP, execPodIP)
}
// hit /checknosnat on pip, via proxy
resp, err := client.Get(checknosnatURL(proxyNodeIP, pip, ips))
framework.ExpectNoError(err)
// check error code on the response, if 500 record the body, which will describe the error
if resp.StatusCode == 500 {
body, err := ioutil.ReadAll(resp.Body)
framework.ExpectNoError(err)
errs = append(errs, string(body))
}
resp.Body.Close()
}
// report the errors all at the end
if len(errs) > 0 {
str := strings.Join(errs, "\n")
err := fmt.Errorf("/checknosnat failed in the following cases:\n%s", str)
framework.ExpectNoError(err)
}
})
})

View File

@ -1019,7 +1019,7 @@ var _ = SIGDescribe("Services", func() {
serviceAddress := net.JoinHostPort(serviceIP, strconv.Itoa(servicePort))
for _, pausePod := range pausePods.Items {
sourceIP, execPodIP := execSourceipTest(pausePod, serviceAddress)
sourceIP, execPodIP := execSourceIPTest(pausePod, serviceAddress)
ginkgo.By("Verifying the preserved source ip")
framework.ExpectEqual(sourceIP, execPodIP)
}
@ -3382,38 +3382,6 @@ var _ = SIGDescribe("ESIPP [Slow]", func() {
})
})
func execSourceipTest(pausePod v1.Pod, serviceAddress string) (string, string) {
var err error
var stdout string
timeout := 2 * time.Minute
framework.Logf("Waiting up to %v to get response from %s", timeout, serviceAddress)
cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %s/clientip`, serviceAddress)
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
stdout, err = framework.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd)
if err != nil {
framework.Logf("got err: %v, retry until timeout", err)
continue
}
// Need to check output because it might omit in case of error.
if strings.TrimSpace(stdout) == "" {
framework.Logf("got empty stdout, retry until timeout")
continue
}
break
}
framework.ExpectNoError(err)
// The stdout return from RunHostCmd is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
host, _, err := net.SplitHostPort(stdout)
if err != nil {
// ginkgo.Fail the test if output format is unexpected.
framework.Failf("exec pod returned unexpected stdout: [%v]\n", stdout)
}
return pausePod.Status.PodIP, host
}
// execAffinityTestForSessionAffinityTimeout is a helper function that wrap the logic of
// affinity test for non-load-balancer services. Session afinity will be
// enabled when the service is created and a short timeout will be configured so

View File

@ -19,6 +19,7 @@ package network
import (
"bytes"
"fmt"
"net"
"regexp"
"strings"
"time"
@ -125,3 +126,39 @@ func CheckSCTPModuleLoadedOnNodes(f *framework.Framework, nodes *v1.NodeList) bo
}
return false
}
// execSourceIPTest executes curl to access "/clientip" endpoint on target address
// from given Pod to check if source ip is preserved.
func execSourceIPTest(sourcePod v1.Pod, targetAddr string) (string, string) {
var (
err error
stdout string
timeout = 2 * time.Minute
)
framework.Logf("Waiting up to %v to get response from %s", timeout, targetAddr)
cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %s/clientip`, targetAddr)
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
stdout, err = framework.RunHostCmd(sourcePod.Namespace, sourcePod.Name, cmd)
if err != nil {
framework.Logf("got err: %v, retry until timeout", err)
continue
}
// Need to check output because it might omit in case of error.
if strings.TrimSpace(stdout) == "" {
framework.Logf("got empty stdout, retry until timeout")
continue
}
break
}
framework.ExpectNoError(err)
// The stdout return from RunHostCmd is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
host, _, err := net.SplitHostPort(stdout)
if err != nil {
// ginkgo.Fail the test if output format is unexpected.
framework.Failf("exec pod returned unexpected stdout: [%v]\n", stdout)
}
return sourcePod.Status.PodIP, host
}