Merge pull request #93557 from knight42/fix/snat-test

fix(e2e::network): refactor NoSNAT test
This commit is contained in:
Kubernetes Prow Robot 2020-08-01 20:21:41 -07:00 committed by GitHub
commit f0485f5098
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 194 deletions

View File

@ -19,116 +19,43 @@ package network
import ( import (
"context" "context"
"fmt" "fmt"
"io/ioutil" "net"
"net/http"
"strconv"
"strings"
"time" "time"
"github.com/onsi/ginkgo"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
"github.com/onsi/ginkgo"
imageutils "k8s.io/kubernetes/test/utils/image" imageutils "k8s.io/kubernetes/test/utils/image"
) )
const ( const (
testPodPort = 8080 testPodPort = "8080"
noSNATTestName = "no-snat-test"
testProxyPort = 31235 // Firewall rule allows external traffic on ports 30000-32767. I just picked a random one.
) )
var ( var (
testPod = v1.Pod{ testPod = v1.Pod{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
GenerateName: "no-snat-test", GenerateName: noSNATTestName,
Labels: map[string]string{ Labels: map[string]string{
"no-snat-test": "", noSNATTestName: "",
}, },
}, },
Spec: v1.PodSpec{ Spec: v1.PodSpec{
Containers: []v1.Container{ Containers: []v1.Container{
{ {
Name: "no-snat-test", Name: noSNATTestName,
Image: imageutils.GetE2EImage(imageutils.Agnhost), Image: imageutils.GetE2EImage(imageutils.Agnhost),
Args: []string{"no-snat-test", "--port", strconv.Itoa(testPodPort)}, Args: []string{"netexec", "--http-port", testPodPort},
Env: []v1.EnvVar{
{
Name: "POD_IP",
ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "status.podIP"}},
},
},
},
},
},
}
testProxyPod = v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "no-snat-test-proxy",
},
Spec: v1.PodSpec{
HostNetwork: true,
Containers: []v1.Container{
{
Name: "no-snat-test-proxy",
Image: imageutils.GetE2EImage(imageutils.Agnhost),
Args: []string{"no-snat-test-proxy", "--port", strconv.Itoa(testProxyPort)},
Ports: []v1.ContainerPort{
{
ContainerPort: testProxyPort,
HostPort: testProxyPort,
},
},
}, },
}, },
}, },
} }
) )
// Produces a pod spec that passes nip as NODE_IP env var using downward API
func newTestPod(nodename string, nip string) *v1.Pod {
pod := testPod
nodeIP := v1.EnvVar{
Name: "NODE_IP",
Value: nip,
}
pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, nodeIP)
pod.Spec.NodeName = nodename
return &pod
}
func newTestProxyPod(nodename string) *v1.Pod {
pod := testProxyPod
pod.Spec.NodeName = nodename
return &pod
}
func getIP(iptype v1.NodeAddressType, node *v1.Node) (string, error) {
for _, addr := range node.Status.Addresses {
if addr.Type == iptype {
return addr.Address, nil
}
}
return "", fmt.Errorf("did not find %s on Node", iptype)
}
func getSchedulable(nodes []v1.Node) (*v1.Node, error) {
for _, node := range nodes {
if !node.Spec.Unschedulable {
return &node, nil
}
}
return nil, fmt.Errorf("all Nodes were unschedulable")
}
func checknosnatURL(proxy, pip string, ips []string) string {
return fmt.Sprintf("http://%s/checknosnat?target=%s&ips=%s", proxy, pip, strings.Join(ips, ","))
}
// This test verifies that a Pod on each node in a cluster can talk to Pods on every other node without SNAT. // This test verifies that a Pod on each node in a cluster can talk to Pods on every other node without SNAT.
// We use the [Feature:NoSNAT] tag so that most jobs will skip this test by default. // We use the [Feature:NoSNAT] tag so that most jobs will skip this test by default.
var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() { var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
@ -144,39 +71,15 @@ var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
framework.ExpectNotEqual(len(nodes.Items), 0, "no Nodes in the cluster") framework.ExpectNotEqual(len(nodes.Items), 0, "no Nodes in the cluster")
for _, node := range nodes.Items { for _, node := range nodes.Items {
// find the Node's internal ip address to feed to the Pod // target Pod at Node
inIP, err := getIP(v1.NodeInternalIP, &node) testPod.Spec.NodeName = node.Name
framework.ExpectNoError(err) _, err = pc.Create(context.TODO(), &testPod, metav1.CreateOptions{})
// target Pod at Node and feed Pod Node's InternalIP
pod := newTestPod(node.Name, inIP)
_, err = pc.Create(context.TODO(), pod, metav1.CreateOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
} }
// In some (most?) scenarios, the test harness doesn't run in the same network as the Pods,
// which means it can't query Pods using their cluster-internal IPs. To get around this,
// we create a Pod in a Node's host network, and have that Pod serve on a specific port of that Node.
// We can then ask this proxy Pod to query the internal endpoints served by the test Pods.
// Find the first schedulable node; masters are marked unschedulable. We don't put the proxy on the master
// because in some (most?) deployments firewall rules don't allow external traffic to hit ports 30000-32767
// on the master, but do allow this on the nodes.
node, err := getSchedulable(nodes.Items)
framework.ExpectNoError(err)
ginkgo.By("creating a no-snat-test-proxy Pod on Node " + node.Name + " port " + strconv.Itoa(testProxyPort) +
" so we can target our test Pods through this Node's ExternalIP")
extIP, err := getIP(v1.NodeExternalIP, node)
framework.ExpectNoError(err)
proxyNodeIP := extIP + ":" + strconv.Itoa(testProxyPort)
_, err = pc.Create(context.TODO(), newTestProxyPod(node.Name), metav1.CreateOptions{})
framework.ExpectNoError(err)
ginkgo.By("waiting for all of the no-snat-test pods to be scheduled and running") ginkgo.By("waiting for all of the no-snat-test pods to be scheduled and running")
err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) { err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) {
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: "no-snat-test"}) pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: noSNATTestName})
if err != nil { if err != nil {
return false, err return false, err
} }
@ -194,64 +97,22 @@ var _ = SIGDescribe("NoSNAT [Feature:NoSNAT] [Slow]", func() {
}) })
framework.ExpectNoError(err) framework.ExpectNoError(err)
ginkgo.By("waiting for the no-snat-test-proxy Pod to be scheduled and running")
err = wait.PollImmediate(10*time.Second, 1*time.Minute, func() (bool, error) {
pod, err := pc.Get(context.TODO(), "no-snat-test-proxy", metav1.GetOptions{})
if err != nil {
return false, err
}
if pod.Status.Phase != v1.PodRunning {
if pod.Status.Phase != v1.PodPending {
return false, fmt.Errorf("expected pod to be in phase \"Pending\" or \"Running\"")
}
return false, nil // pod is still pending
}
return true, nil // pod is running
})
framework.ExpectNoError(err)
ginkgo.By("sending traffic from each pod to the others and checking that SNAT does not occur") ginkgo.By("sending traffic from each pod to the others and checking that SNAT does not occur")
pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: "no-snat-test"}) pods, err := pc.List(context.TODO(), metav1.ListOptions{LabelSelector: noSNATTestName})
framework.ExpectNoError(err) framework.ExpectNoError(err)
// collect pod IPs // hit the /clientip endpoint on every other Pods to check if source ip is preserved
podIPs := []string{}
for _, pod := range pods.Items {
podIPs = append(podIPs, pod.Status.PodIP+":"+strconv.Itoa(testPodPort))
}
// hit the /checknosnat endpoint on each Pod, tell each Pod to check all the other Pods
// this test is O(n^2) but it doesn't matter because we only run this test on small clusters (~3 nodes) // this test is O(n^2) but it doesn't matter because we only run this test on small clusters (~3 nodes)
errs := []string{} for _, sourcePod := range pods.Items {
client := http.Client{ for _, targetPod := range pods.Items {
Timeout: 5 * time.Minute, if targetPod.Name == sourcePod.Name {
}
for _, pip := range podIPs {
ips := []string{}
for _, ip := range podIPs {
if ip == pip {
continue continue
} }
ips = append(ips, ip) targetAddr := net.JoinHostPort(targetPod.Status.PodIP, testPodPort)
sourceIP, execPodIP := execSourceIPTest(sourcePod, targetAddr)
ginkgo.By("Verifying the preserved source ip")
framework.ExpectEqual(sourceIP, execPodIP)
} }
// hit /checknosnat on pip, via proxy
resp, err := client.Get(checknosnatURL(proxyNodeIP, pip, ips))
framework.ExpectNoError(err)
// check error code on the response, if 500 record the body, which will describe the error
if resp.StatusCode == 500 {
body, err := ioutil.ReadAll(resp.Body)
framework.ExpectNoError(err)
errs = append(errs, string(body))
}
resp.Body.Close()
}
// report the errors all at the end
if len(errs) > 0 {
str := strings.Join(errs, "\n")
err := fmt.Errorf("/checknosnat failed in the following cases:\n%s", str)
framework.ExpectNoError(err)
} }
}) })
}) })

View File

@ -1019,7 +1019,7 @@ var _ = SIGDescribe("Services", func() {
serviceAddress := net.JoinHostPort(serviceIP, strconv.Itoa(servicePort)) serviceAddress := net.JoinHostPort(serviceIP, strconv.Itoa(servicePort))
for _, pausePod := range pausePods.Items { for _, pausePod := range pausePods.Items {
sourceIP, execPodIP := execSourceipTest(pausePod, serviceAddress) sourceIP, execPodIP := execSourceIPTest(pausePod, serviceAddress)
ginkgo.By("Verifying the preserved source ip") ginkgo.By("Verifying the preserved source ip")
framework.ExpectEqual(sourceIP, execPodIP) framework.ExpectEqual(sourceIP, execPodIP)
} }
@ -3382,38 +3382,6 @@ var _ = SIGDescribe("ESIPP [Slow]", func() {
}) })
}) })
func execSourceipTest(pausePod v1.Pod, serviceAddress string) (string, string) {
var err error
var stdout string
timeout := 2 * time.Minute
framework.Logf("Waiting up to %v to get response from %s", timeout, serviceAddress)
cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %s/clientip`, serviceAddress)
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
stdout, err = framework.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd)
if err != nil {
framework.Logf("got err: %v, retry until timeout", err)
continue
}
// Need to check output because it might omit in case of error.
if strings.TrimSpace(stdout) == "" {
framework.Logf("got empty stdout, retry until timeout")
continue
}
break
}
framework.ExpectNoError(err)
// The stdout return from RunHostCmd is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
host, _, err := net.SplitHostPort(stdout)
if err != nil {
// ginkgo.Fail the test if output format is unexpected.
framework.Failf("exec pod returned unexpected stdout: [%v]\n", stdout)
}
return pausePod.Status.PodIP, host
}
// execAffinityTestForSessionAffinityTimeout is a helper function that wrap the logic of // execAffinityTestForSessionAffinityTimeout is a helper function that wrap the logic of
// affinity test for non-load-balancer services. Session afinity will be // affinity test for non-load-balancer services. Session afinity will be
// enabled when the service is created and a short timeout will be configured so // enabled when the service is created and a short timeout will be configured so

View File

@ -19,6 +19,7 @@ package network
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"net"
"regexp" "regexp"
"strings" "strings"
"time" "time"
@ -125,3 +126,39 @@ func CheckSCTPModuleLoadedOnNodes(f *framework.Framework, nodes *v1.NodeList) bo
} }
return false return false
} }
// execSourceIPTest executes curl to access "/clientip" endpoint on target address
// from given Pod to check if source ip is preserved.
func execSourceIPTest(sourcePod v1.Pod, targetAddr string) (string, string) {
var (
err error
stdout string
timeout = 2 * time.Minute
)
framework.Logf("Waiting up to %v to get response from %s", timeout, targetAddr)
cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %s/clientip`, targetAddr)
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2 * time.Second) {
stdout, err = framework.RunHostCmd(sourcePod.Namespace, sourcePod.Name, cmd)
if err != nil {
framework.Logf("got err: %v, retry until timeout", err)
continue
}
// Need to check output because it might omit in case of error.
if strings.TrimSpace(stdout) == "" {
framework.Logf("got empty stdout, retry until timeout")
continue
}
break
}
framework.ExpectNoError(err)
// The stdout return from RunHostCmd is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
host, _, err := net.SplitHostPort(stdout)
if err != nil {
// ginkgo.Fail the test if output format is unexpected.
framework.Failf("exec pod returned unexpected stdout: [%v]\n", stdout)
}
return sourcePod.Status.PodIP, host
}