diff --git a/test/e2e/apps/job.go b/test/e2e/apps/job.go index 83fdf70ee2e..8e25dc433b9 100644 --- a/test/e2e/apps/job.go +++ b/test/e2e/apps/job.go @@ -185,22 +185,57 @@ var _ = SIGDescribe("Job", func() { // 4. Make sure the 0-indexed pod is running // 5. Evict the 0-indexed pod // 6. Await for the job to successfully complete - ginkgo.It("should allow to use the pod failure policy to not count pod disruption towards the backoffLimit", func() { - mode := batchv1.IndexedCompletion + ginkgo.DescribeTable("Using a pod failure policy to not count some failures towards the backoffLimit", + func(policy *batchv1.PodFailurePolicy) { + mode := batchv1.IndexedCompletion - // We set the backoffLimit to 0 so that any pod failure would trigger - // job failure if not for the pod failure policy to ignore the failed - // pods from counting them towards the backoffLimit. - backoffLimit := int32(0) + // We set the backoffLimit to 0 so that any pod failure would trigger + // job failure if not for the pod failure policy to ignore the failed + // pods from counting them towards the backoffLimit. + backoffLimit := int32(0) - ginkgo.By("Looking for a node to schedule job pods") - node, err := e2enode.GetRandomReadySchedulableNode(f.ClientSet) - framework.ExpectNoError(err) + ginkgo.By("Looking for a node to schedule job pods") + node, err := e2enode.GetRandomReadySchedulableNode(f.ClientSet) + framework.ExpectNoError(err) - ginkgo.By("Creating a job") - job := e2ejob.NewTestJobOnNode("notTerminateOnce", "pod-disruption-failure-ignore", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name) - job.Spec.CompletionMode = &mode - job.Spec.PodFailurePolicy = &batchv1.PodFailurePolicy{ + ginkgo.By("Creating a job") + job := e2ejob.NewTestJobOnNode("notTerminateOnce", "pod-disruption-failure-ignore", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name) + job.Spec.CompletionMode = &mode + job.Spec.PodFailurePolicy = policy + job, err = e2ejob.CreateJob(f.ClientSet, f.Namespace.Name, job) + framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) + + ginkgo.By("Awaiting for all non 0-indexed pods to succeed to ensure the marker file is created") + err = e2ejob.WaitForJobPodsSucceeded(f.ClientSet, f.Namespace.Name, job.Name, completions-1) + framework.ExpectNoError(err, "failed to await for all non 0-indexed pods to succeed for job: %s/%s", job.Name, job.Namespace) + + ginkgo.By("Awaiting for the 0-indexed pod to be running") + err = e2ejob.WaitForJobPodsRunning(f.ClientSet, f.Namespace.Name, job.Name, 1) + framework.ExpectNoError(err, "failed to await for the 0-indexed pod to be running for the job: %s/%s", job.Name, job.Namespace) + + pods, err := e2ejob.GetAllRunningJobPods(f.ClientSet, f.Namespace.Name, job.Name) + framework.ExpectNoError(err, "failed to get running pods for the job: %s/%s", job.Name, job.Namespace) + framework.ExpectEqual(len(pods), 1, "Exactly one running pod is expected") + pod := pods[0] + ginkgo.By(fmt.Sprintf("Evicting the running pod: %s/%s", pod.Name, pod.Namespace)) + evictTarget := &policyv1.Eviction{ + ObjectMeta: metav1.ObjectMeta{ + Name: pod.Name, + Namespace: pod.Namespace, + }, + } + f.ClientSet.CoreV1().Pods(pod.Namespace).EvictV1(context.TODO(), evictTarget) + framework.ExpectNoError(err, "failed to evict the pod: %s/%s", pod.Name, pod.Namespace) + + ginkgo.By(fmt.Sprintf("Awaiting for the pod: %s/%s to be deleted", pod.Name, pod.Namespace)) + err = e2epod.WaitForPodNotFoundInNamespace(f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete) + framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace) + + ginkgo.By("Ensuring job reaches completions") + err = e2ejob.WaitForJobComplete(f.ClientSet, f.Namespace.Name, job.Name, completions) + framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) + }, + ginkgo.Entry("Ignore DisruptionTarget condition", &batchv1.PodFailurePolicy{ Rules: []batchv1.PodFailurePolicyRule{ { // Ignore failures of the non 0-indexed pods which fail until the marker file is created @@ -221,40 +256,21 @@ var _ = SIGDescribe("Job", func() { }, }, }, - } - job, err = e2ejob.CreateJob(f.ClientSet, f.Namespace.Name, job) - framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) - - ginkgo.By("Awaiting for all non 0-indexed pods to succeed to ensure the marker file is created") - err = e2ejob.WaitForJobPodsSucceeded(f.ClientSet, f.Namespace.Name, job.Name, completions-1) - framework.ExpectNoError(err, "failed to await for all non 0-indexed pods to succeed for job: %s/%s", job.Name, job.Namespace) - - ginkgo.By("Awaiting for the 0-indexed pod to be running") - err = e2ejob.WaitForJobPodsRunning(f.ClientSet, f.Namespace.Name, job.Name, 1) - framework.ExpectNoError(err, "failed to await for the 0-indexed pod to be running for the job: %s/%s", job.Name, job.Namespace) - - pods, err := e2ejob.GetAllRunningJobPods(f.ClientSet, f.Namespace.Name, job.Name) - framework.ExpectNoError(err, "failed to get running pods for the job: %s/%s", job.Name, job.Namespace) - framework.ExpectEqual(len(pods), 1, "Exactly one running pod is expected") - pod := pods[0] - ginkgo.By(fmt.Sprintf("Evicting the running pod: %s/%s", pod.Name, pod.Namespace)) - evictTarget := &policyv1.Eviction{ - ObjectMeta: metav1.ObjectMeta{ - Name: pod.Name, - Namespace: pod.Namespace, + }), + ginkgo.Entry("Ignore exit code 137", &batchv1.PodFailurePolicy{ + Rules: []batchv1.PodFailurePolicyRule{ + { + // Ignore failures of the non 0-indexed pods which fail until the marker file is created + // And the 127 in the 0-indexed pod due to eviction. + Action: batchv1.PodFailurePolicyActionIgnore, + OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{ + Operator: batchv1.PodFailurePolicyOnExitCodesOpIn, + Values: []int32{1, 137}, + }, + }, }, - } - f.ClientSet.CoreV1().Pods(pod.Namespace).EvictV1(context.TODO(), evictTarget) - framework.ExpectNoError(err, "failed to evict the pod: %s/%s", pod.Name, pod.Namespace) - - ginkgo.By(fmt.Sprintf("Awaiting for the pod: %s/%s to be deleted", pod.Name, pod.Namespace)) - err = e2epod.WaitForPodNotFoundInNamespace(f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete) - framework.ExpectNoError(err, "failed to await for the pod to be deleted: %s/%s", pod.Name, pod.Namespace) - - ginkgo.By("Ensuring job reaches completions") - err = e2ejob.WaitForJobComplete(f.ClientSet, f.Namespace.Name, job.Name, completions) - framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) - }) + }), + ) ginkgo.It("should not create pods when created in suspend state", func() { ginkgo.By("Creating a job with suspend=true")