Abstract the logic of the TrafficDistribution test

Split the logic of creating the clients and the servers apart from the
logic of checking which clients connect to which servers. Add some
extra complexity to support additional use cases (like multiple
endpoints on the same node).
This commit is contained in:
Dan Winship 2025-03-24 08:46:02 -04:00
parent b1a0fea4c6
commit bc81a860b0

View File

@ -27,6 +27,7 @@ import (
discoveryv1 "k8s.io/api/discovery/v1" discoveryv1 "k8s.io/api/discovery/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework"
e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2enode "k8s.io/kubernetes/test/e2e/framework/node"
@ -92,6 +93,18 @@ var _ = common.SIGDescribe("Traffic Distribution", func() {
} }
} }
// Data structures for tracking server and client pods
type serverPod struct {
node *v1.Node
pod *v1.Pod
}
type clientPod struct {
node *v1.Node
endpoints []*serverPod
pod *v1.Pod
}
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Main test specifications. // Main test specifications.
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
@ -109,13 +122,13 @@ var _ = common.SIGDescribe("Traffic Distribution", func() {
ginkgo.By(fmt.Sprintf("finding a node in each of the chosen 3 zones %v", zones)) ginkgo.By(fmt.Sprintf("finding a node in each of the chosen 3 zones %v", zones))
nodeList, err := e2enode.GetReadySchedulableNodes(ctx, c) nodeList, err := e2enode.GetReadySchedulableNodes(ctx, c)
framework.ExpectNoError(err) framework.ExpectNoError(err)
nodeForZone := make(map[string]string) nodeForZone := make(map[string]*v1.Node)
for _, zone := range zones { for _, zone := range zones {
found := false found := false
for _, node := range nodeList.Items { for _, node := range nodeList.Items {
if zone == node.Labels[v1.LabelTopologyZone] { if zone == node.Labels[v1.LabelTopologyZone] {
found = true found = true
nodeForZone[zone] = node.GetName() nodeForZone[zone] = &node
} }
} }
if !found { if !found {
@ -123,20 +136,42 @@ var _ = common.SIGDescribe("Traffic Distribution", func() {
} }
} }
ginkgo.By(fmt.Sprintf("creating 1 pod each in 2 zones %v (out of the total 3 zones)", zones[:2])) var clientPods []*clientPod
zoneForServingPod := make(map[string]string) var serverPods []*serverPod
var servingPods []*v1.Pod
// We want clients in all three zones
for _, node := range nodeForZone {
clientPods = append(clientPods, &clientPod{node: node})
}
// and endpoints in the first two zones
serverPods = []*serverPod{
{node: clientPods[0].node},
{node: clientPods[1].node},
}
// The clients with an endpoint in the same zone should only connect to
// that endpoint. The client with no endpoint in its zone should connect
// to both endpoints.
clientPods[0].endpoints = []*serverPod{serverPods[0]}
clientPods[1].endpoints = []*serverPod{serverPods[1]}
clientPods[2].endpoints = serverPods
var podsToCreate []*v1.Pod
servingPodLabels := map[string]string{"app": f.UniqueName} servingPodLabels := map[string]string{"app": f.UniqueName}
for _, zone := range zones[:2] { for i, sp := range serverPods {
pod := e2epod.NewAgnhostPod(f.Namespace.Name, "serving-pod-in-"+zone, nil, nil, nil, "serve-hostname") node := sp.node.Name
nodeSelection := e2epod.NodeSelection{Name: nodeForZone[zone]} zone := sp.node.Labels[v1.LabelTopologyZone]
pod := e2epod.NewAgnhostPod(f.Namespace.Name, fmt.Sprintf("server-%d-%s", i, node), nil, nil, nil, "serve-hostname")
ginkgo.By(fmt.Sprintf("creating a server pod %q on node %q in zone %q", pod.Name, node, zone))
nodeSelection := e2epod.NodeSelection{Name: node}
e2epod.SetNodeSelection(&pod.Spec, nodeSelection) e2epod.SetNodeSelection(&pod.Spec, nodeSelection)
pod.Labels = servingPodLabels pod.Labels = servingPodLabels
servingPods = append(servingPods, pod) sp.pod = pod
zoneForServingPod[pod.Name] = zone podsToCreate = append(podsToCreate, pod)
} }
e2epod.NewPodClient(f).CreateBatch(ctx, servingPods) e2epod.NewPodClient(f).CreateBatch(ctx, podsToCreate)
trafficDist := v1.ServiceTrafficDistributionPreferClose trafficDist := v1.ServiceTrafficDistributionPreferClose
svc := createServiceReportErr(ctx, c, f.Namespace.Name, &v1.Service{ svc := createServiceReportErr(ctx, c, f.Namespace.Name, &v1.Service{
@ -156,95 +191,63 @@ var _ = common.SIGDescribe("Traffic Distribution", func() {
ginkgo.By(fmt.Sprintf("creating a service=%q with trafficDistribution=%v", svc.GetName(), *svc.Spec.TrafficDistribution)) ginkgo.By(fmt.Sprintf("creating a service=%q with trafficDistribution=%v", svc.GetName(), *svc.Spec.TrafficDistribution))
ginkgo.By("waiting for EndpointSlices to be created") ginkgo.By("waiting for EndpointSlices to be created")
err = framework.WaitForServiceEndpointsNum(ctx, c, svc.Namespace, svc.Name, len(servingPods), 1*time.Second, e2eservice.ServiceEndpointsTimeout) err = framework.WaitForServiceEndpointsNum(ctx, c, svc.Namespace, svc.Name, len(serverPods), 1*time.Second, e2eservice.ServiceEndpointsTimeout)
framework.ExpectNoError(err) framework.ExpectNoError(err)
slices := endpointSlicesForService(svc.Name) slices := endpointSlicesForService(svc.Name)
framework.Logf("got slices:\n%v", format.Object(slices, 1)) framework.Logf("got slices:\n%v", format.Object(slices, 1))
ginkgo.By("keeping traffic within the same zone as the client, when serving pods exist in the same zone") podsToCreate = nil
for i, cp := range clientPods {
createClientPod := func(ctx context.Context, zone string) *v1.Pod { node := cp.node.Name
pod := e2epod.NewAgnhostPod(f.Namespace.Name, "client-pod-in-"+zone, nil, nil, nil) zone := cp.node.Labels[v1.LabelTopologyZone]
nodeSelection := e2epod.NodeSelection{Name: nodeForZone[zone]} pod := e2epod.NewAgnhostPod(f.Namespace.Name, fmt.Sprintf("client-%d-%s", i, node), nil, nil, nil)
ginkgo.By(fmt.Sprintf("creating a client pod %q on node %q in zone %q", pod.Name, node, zone))
nodeSelection := e2epod.NodeSelection{Name: node}
e2epod.SetNodeSelection(&pod.Spec, nodeSelection) e2epod.SetNodeSelection(&pod.Spec, nodeSelection)
cmd := fmt.Sprintf(`date; for i in $(seq 1 3000); do sleep 1; echo "Date: $(date) Try: ${i}"; curl -q -s --connect-timeout 2 http://%s:80/ ; echo; done`, svc.Name) cmd := fmt.Sprintf(`date; for i in $(seq 1 3000); do sleep 1; echo "Date: $(date) Try: ${i}"; curl -q -s --connect-timeout 2 http://%s:80/ ; echo; done`, svc.Name)
pod.Spec.Containers[0].Command = []string{"/bin/sh", "-c", cmd} pod.Spec.Containers[0].Command = []string{"/bin/sh", "-c", cmd}
pod.Spec.Containers[0].Name = pod.Name pod.Spec.Containers[0].Name = pod.Name
return e2epod.NewPodClient(f).CreateSync(ctx, pod) cp.pod = pod
podsToCreate = append(podsToCreate, pod)
} }
e2epod.NewPodClient(f).CreateBatch(ctx, podsToCreate)
for _, clientZone := range zones[:2] { for _, cp := range clientPods {
framework.Logf("creating a client pod for probing the service from zone=%q which also has a serving pod", clientZone) wantedEndpoints := sets.New[string]()
clientPod := createClientPod(ctx, clientZone) for _, sp := range cp.endpoints {
wantedEndpoints.Insert(sp.pod.Name)
}
unreachedEndpoints := wantedEndpoints.Clone()
framework.Logf("ensuring that requests from clientPod=%q on zone=%q stay in the same zone", clientPod.Name, clientZone) ginkgo.By(fmt.Sprintf("ensuring that requests from %s on %s go to the endpoint(s) %v", cp.pod.Name, cp.node.Name, wantedEndpoints.UnsortedList()))
requestsSucceedAndStayInSameZone := framework.MakeMatcher(func(reverseChronologicalLogLines []string) (func() string, error) { requestsSucceed := framework.MakeMatcher(func(reverseChronologicalLogLines []string) (func() string, error) {
logLines := reverseChronologicalLogLines logLines := reverseChronologicalLogLines
if len(logLines) < 20 { if len(logLines) < 20 {
return gomegaCustomError("got %d log lines, waiting for at least 20\nreverseChronologicalLogLines=\n%v", len(logLines), strings.Join(reverseChronologicalLogLines, "\n")), nil return gomegaCustomError("got %d log lines, waiting for at least 20\nreverseChronologicalLogLines=\n%v", len(logLines), strings.Join(reverseChronologicalLogLines, "\n")), nil
} }
consecutiveSameZone := 0 consecutiveSuccessfulRequests := 0
for _, logLine := range logLines { for _, logLine := range logLines {
if logLine == "" || strings.HasPrefix(logLine, "Date:") { if logLine == "" || strings.HasPrefix(logLine, "Date:") {
continue continue
} }
destZone, ok := zoneForServingPod[logLine] destEndpoint := logLine
if !ok { if !wantedEndpoints.Has(destEndpoint) {
return gomegaCustomError("could not determine dest zone from log line: %s\nreverseChronologicalLogLines=\n%v", logLine, strings.Join(reverseChronologicalLogLines, "\n")), nil return gomegaCustomError("request from %s should not have reached %s\nreverseChronologicalLogLines=\n%v", cp.pod.Name, destEndpoint, strings.Join(reverseChronologicalLogLines, "\n")), nil
} }
if clientZone != destZone { consecutiveSuccessfulRequests++
return gomegaCustomError("expected request from clientPod=%q to stay in it's zone=%q, delivered to zone=%q\nreverseChronologicalLogLines=\n%v", clientPod.Name, clientZone, destZone, strings.Join(reverseChronologicalLogLines, "\n")), nil unreachedEndpoints.Delete(destEndpoint)
} if consecutiveSuccessfulRequests >= 10 && len(unreachedEndpoints) == 0 {
consecutiveSameZone++
if consecutiveSameZone >= 10 {
return nil, nil // Pass condition. return nil, nil // Pass condition.
} }
} }
// Ideally, the matcher would never reach this condition // Ideally, the matcher would never reach this condition
return gomegaCustomError("requests didn't meet the required criteria to stay in same zone\nreverseChronologicalLogLines=\n%v", strings.Join(reverseChronologicalLogLines, "\n")), nil return gomegaCustomError("requests didn't meet the required criteria to reach all endpoints %v\nreverseChronologicalLogLines=\n%v", wantedEndpoints.UnsortedList(), strings.Join(reverseChronologicalLogLines, "\n")), nil
}) })
gomega.Eventually(ctx, requestsFromClient(clientPod)).WithPolling(5 * time.Second).WithTimeout(e2eservice.KubeProxyLagTimeout).Should(requestsSucceedAndStayInSameZone) gomega.Eventually(ctx, requestsFromClient(cp.pod)).WithPolling(5 * time.Second).WithTimeout(e2eservice.KubeProxyLagTimeout).Should(requestsSucceed)
} }
ginkgo.By("routing traffic cluster-wide, when there are no serving pods in the same zone as the client")
clientZone := zones[2]
framework.Logf("creating a client pod for probing the service from zone=%q which DOES NOT has a serving pod", clientZone)
clientPod := createClientPod(ctx, clientZone)
framework.Logf("ensuring that requests from clientPod=%q on zone=%q (without a serving pod) are not dropped, and get routed to one of the serving pods anywhere in the cluster", clientPod.Name, clientZone)
requestsSucceedByReachingAnyServingPod := framework.MakeMatcher(func(reverseChronologicalLogLines []string) (func() string, error) {
logLines := reverseChronologicalLogLines
if len(logLines) < 20 {
return gomegaCustomError("got %d log lines, waiting for at least 20\nreverseChronologicalLogLines=\n%v", len(logLines), strings.Join(reverseChronologicalLogLines, "\n")), nil
}
// Requests are counted as successful when the response read from the log
// lines is the name of a recognizable serving pod.
consecutiveSuccessfulRequests := 0
for _, logLine := range logLines {
if logLine == "" || strings.HasPrefix(logLine, "Date:") {
continue
}
_, servingPodExists := zoneForServingPod[logLine]
if !servingPodExists {
return gomegaCustomError("request from client pod likely failed because we got an unrecognizable response = %v; want response to be one of the serving pod names\nreverseChronologicalLogLines=\n%v", logLine, strings.Join(reverseChronologicalLogLines, "\n")), nil
}
consecutiveSuccessfulRequests++
if consecutiveSuccessfulRequests >= 10 {
return nil, nil // Pass condition
}
}
// Ideally, the matcher would never reach this condition
return gomegaCustomError("requests didn't meet the required criteria to reach a serving pod\nreverseChronologicalLogLines=\n%v", strings.Join(reverseChronologicalLogLines, "\n")), nil
})
gomega.Eventually(ctx, requestsFromClient(clientPod)).WithPolling(5 * time.Second).WithTimeout(e2eservice.KubeProxyLagTimeout).Should(requestsSucceedByReachingAnyServingPod)
}) })
}) })