diff --git a/test/e2e/storage/BUILD b/test/e2e/storage/BUILD index abe4cfdbc43..a9133cd6d04 100644 --- a/test/e2e/storage/BUILD +++ b/test/e2e/storage/BUILD @@ -58,6 +58,7 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/strategicpatch:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/version:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", diff --git a/test/e2e/storage/regional_pd.go b/test/e2e/storage/regional_pd.go index 4c3a517721d..bbd196b1051 100644 --- a/test/e2e/storage/regional_pd.go +++ b/test/e2e/storage/regional_pd.go @@ -24,20 +24,24 @@ import ( "strings" "time" + "encoding/json" + appsv1 "k8s.io/api/apps/v1" "k8s.io/api/core/v1" storage "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/strategicpatch" + "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/kubelet/apis" kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" "k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/test/e2e/framework" - "k8s.io/kubernetes/test/e2e/framework/providers/gce" "k8s.io/kubernetes/test/e2e/storage/testsuites" "k8s.io/kubernetes/test/e2e/storage/utils" imageutils "k8s.io/kubernetes/test/utils/image" @@ -46,6 +50,7 @@ import ( const ( pvDeletionTimeout = 3 * time.Minute statefulSetReadyTimeout = 3 * time.Minute + taintKeyPrefix = "zoneTaint_" ) var _ = utils.SIGDescribe("Regional PD", func() { @@ -144,9 +149,6 @@ func testVolumeProvisioning(c clientset.Interface, ns string) { } func testZonalFailover(c clientset.Interface, ns string) { - nodes := framework.GetReadySchedulableNodesOrDie(c) - nodeCount := len(nodes.Items) - cloudZones := getTwoRandomZones(c) class := newRegionalStorageClass(ns, cloudZones) claimTemplate := newClaimTemplate(ns) @@ -203,41 +205,40 @@ func testZonalFailover(c clientset.Interface, ns string) { Expect(err).ToNot(HaveOccurred()) podZone := node.Labels[apis.LabelZoneFailureDomain] - // TODO (verult) Consider using node taints to simulate zonal failure instead. - By("deleting instance group belonging to pod's zone") - - // Asynchronously detect a pod reschedule is triggered during/after instance group deletion. - waitStatus := make(chan error) - go func() { - waitStatus <- waitForStatefulSetReplicasNotReady(statefulSet.Name, ns, c) - }() - - cloud, err := gce.GetGCECloud() - if err != nil { - Expect(err).NotTo(HaveOccurred()) - } - instanceGroupName := framework.TestContext.CloudConfig.NodeInstanceGroup - instanceGroup, err := cloud.GetInstanceGroup(instanceGroupName, podZone) - Expect(err).NotTo(HaveOccurred(), - "Error getting instance group %s in zone %s", instanceGroupName, podZone) - templateName, err := framework.GetManagedInstanceGroupTemplateName(podZone) - Expect(err).NotTo(HaveOccurred(), - "Error getting instance group template in zone %s", podZone) - err = framework.DeleteManagedInstanceGroup(podZone) - Expect(err).NotTo(HaveOccurred(), - "Error deleting instance group in zone %s", podZone) + By("tainting nodes in the zone the pod is scheduled in") + selector := labels.SelectorFromSet(labels.Set(map[string]string{apis.LabelZoneFailureDomain: podZone})) + nodesInZone, err := c.CoreV1().Nodes().List(metav1.ListOptions{LabelSelector: selector.String()}) + Expect(err).ToNot(HaveOccurred()) + removeTaintFunc := addTaint(c, ns, nodesInZone.Items, podZone) defer func() { - framework.Logf("recreating instance group %s", instanceGroup.Name) - - framework.ExpectNoError(framework.CreateManagedInstanceGroup(instanceGroup.Size, podZone, templateName), - "Error recreating instance group %s in zone %s", instanceGroup.Name, podZone) - framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount, framework.RestartNodeReadyAgainTimeout), - "Error waiting for nodes from the new instance group to become ready.") + framework.Logf("removing previously added node taints") + removeTaintFunc() }() - err = <-waitStatus - Expect(err).ToNot(HaveOccurred(), "Error waiting for replica to be deleted during failover: %v", err) + By("deleting StatefulSet pod") + err = c.CoreV1().Pods(ns).Delete(pod.Name, &metav1.DeleteOptions{}) + + // Verify the pod is scheduled in the other zone. + By("verifying the pod is scheduled in a different zone.") + var otherZone string + if cloudZones[0] == podZone { + otherZone = cloudZones[1] + } else { + otherZone = cloudZones[0] + } + err = wait.PollImmediate(framework.Poll, statefulSetReadyTimeout, func() (bool, error) { + framework.Logf("checking whether new pod is scheduled in zone %q", otherZone) + pod = getPod(c, ns, regionalPDLabels) + nodeName = pod.Spec.NodeName + node, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + if err != nil { + return false, nil + } + newPodZone := node.Labels[apis.LabelZoneFailureDomain] + return newPodZone == otherZone, nil + }) + Expect(err).NotTo(HaveOccurred(), "Error waiting for pod to be scheduled in a different zone (%q): %v", otherZone, err) err = framework.WaitForStatefulSetReplicasReady(statefulSet.Name, ns, c, 3*time.Second, framework.RestartPodReadyAgainTimeout) if err != nil { @@ -252,7 +253,6 @@ func testZonalFailover(c clientset.Interface, ns string) { "The same PVC should be used after failover.") By("verifying the container output has 2 lines, indicating the pod has been created twice using the same regional PD.") - pod = getPod(c, ns, regionalPDLabels) logs, err := framework.GetPodLogs(c, ns, pod.Name, "") Expect(err).NotTo(HaveOccurred(), "Error getting logs from pod %s in namespace %s", pod.Name, ns) @@ -261,21 +261,40 @@ func testZonalFailover(c clientset.Interface, ns string) { Expect(lineCount).To(Equal(expectedLineCount), "Line count of the written file should be %d.", expectedLineCount) - // Verify the pod is scheduled in the other zone. - By("verifying the pod is scheduled in a different zone.") - var otherZone string - if cloudZones[0] == podZone { - otherZone = cloudZones[1] - } else { - otherZone = cloudZones[0] - } - nodeName = pod.Spec.NodeName - node, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - newPodZone := node.Labels[apis.LabelZoneFailureDomain] - Expect(newPodZone).To(Equal(otherZone), - "The pod should be scheduled in zone %s after all nodes in zone %s have been deleted", otherZone, podZone) +} +func addTaint(c clientset.Interface, ns string, nodes []v1.Node, podZone string) (removeTaint func()) { + reversePatches := make(map[string][]byte) + for _, node := range nodes { + oldData, err := json.Marshal(node) + Expect(err).NotTo(HaveOccurred()) + + node.Spec.Taints = append(node.Spec.Taints, v1.Taint{ + Key: taintKeyPrefix + ns, + Value: podZone, + Effect: v1.TaintEffectNoSchedule, + }) + + newData, err := json.Marshal(node) + Expect(err).NotTo(HaveOccurred()) + + patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Node{}) + Expect(err).NotTo(HaveOccurred()) + + reversePatchBytes, err := strategicpatch.CreateTwoWayMergePatch(newData, oldData, v1.Node{}) + Expect(err).NotTo(HaveOccurred()) + reversePatches[node.Name] = reversePatchBytes + + _, err = c.CoreV1().Nodes().Patch(node.Name, types.StrategicMergePatchType, patchBytes) + Expect(err).ToNot(HaveOccurred()) + } + + return func() { + for nodeName, reversePatch := range reversePatches { + _, err := c.CoreV1().Nodes().Patch(nodeName, types.StrategicMergePatchType, reversePatch) + Expect(err).ToNot(HaveOccurred()) + } + } } func testRegionalDelayedBinding(c clientset.Interface, ns string) {