From af772412be6a91f8257f485b175ee1b1bf6ee338 Mon Sep 17 00:00:00 2001 From: Michal Wozniak Date: Fri, 25 Oct 2024 11:39:22 +0200 Subject: [PATCH] Job Pod Failure policy refactor e2e test to pass on Windows --- test/e2e/apps/job.go | 43 +++++------------------------- test/e2e/framework/job/fixtures.go | 15 +++++++++++ 2 files changed, 21 insertions(+), 37 deletions(-) diff --git a/test/e2e/apps/job.go b/test/e2e/apps/job.go index 03a5f043f47..e8d878a53d0 100644 --- a/test/e2e/apps/job.go +++ b/test/e2e/apps/job.go @@ -146,10 +146,10 @@ var _ = SIGDescribe("Job", func() { /* Testname: Ensure pod failure policy allows to ignore failure matching on the exit code Description: This test is using an indexed job. The pod corresponding to each index - creates a marker file on the host and runs 'forever' until evicted. Once - the marker file is created the pod succeeds seeing it on restart. Thus, - we trigger one failure per index due to eviction, so the Job would be - marked as failed, if not for the ignore rule matching on exit codes. + creates a marker file on the host and fails. Once the marker file is + created the pod succeeds seeing it on restart. Thus, we trigger one + failure per index, so the Job would be marked as failed, if not for the + ignore rule matching on exit codes. */ ginkgo.It("should allow to use a pod failure policy to ignore failure matching on exit code", func(ctx context.Context) { // We set the backoffLimit = numPods-1 so that we can tolerate random @@ -165,17 +165,15 @@ var _ = SIGDescribe("Job", func() { framework.ExpectNoError(err) ginkgo.By("Creating a job") - job := e2ejob.NewTestJobOnNode("notTerminateOncePerIndex", "evicted-pod-ignore-on-exit-code", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name) + job := e2ejob.NewTestJobOnNode("failOncePerIndex", "fail-pod-ignore-on-exit-code", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name) job.Spec.CompletionMode = ptr.To(batchv1.IndexedCompletion) job.Spec.PodFailurePolicy = &batchv1.PodFailurePolicy{ Rules: []batchv1.PodFailurePolicyRule{ { - // Ignore the pod failure caused by the eviction based on the - // exit code corresponding to SIGKILL. Action: batchv1.PodFailurePolicyActionIgnore, OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{ Operator: batchv1.PodFailurePolicyOnExitCodesOpIn, - Values: []int32{137}, + Values: []int32{42}, }, }, }, @@ -183,35 +181,6 @@ var _ = SIGDescribe("Job", func() { job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job) framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) - ginkgo.By("Waiting for all the pods to be ready") - err = e2ejob.WaitForJobReady(ctx, f.ClientSet, f.Namespace.Name, job.Name, ptr.To(int32(numPods))) - framework.ExpectNoError(err, "failed to await for all pods to be ready for job: %s/%s", job.Name, job.Namespace) - - ginkgo.By("Fetch all running pods") - pods, err := e2ejob.GetAllRunningJobPods(ctx, f.ClientSet, f.Namespace.Name, job.Name) - framework.ExpectNoError(err, "failed to get running pods for the job: %s/%s", job.Name, job.Namespace) - gomega.Expect(pods).To(gomega.HaveLen(numPods), "Number of running pods doesn't match parallelism") - - ginkgo.By("Evict all the Pods") - workqueue.ParallelizeUntil(ctx, numPods, numPods, func(index int) { - defer ginkgo.GinkgoRecover() - - pod := pods[index] - ginkgo.By(fmt.Sprintf("Evicting the running pod: %s/%s", pod.Name, pod.Namespace)) - evictTarget := &policyv1.Eviction{ - ObjectMeta: metav1.ObjectMeta{ - Name: pod.Name, - Namespace: pod.Namespace, - }, - } - err = f.ClientSet.CoreV1().Pods(pod.Namespace).EvictV1(ctx, evictTarget) - framework.ExpectNoError(err, "failed to evict the pod: %s/%s", pod.Name, pod.Namespace) - - ginkgo.By(fmt.Sprintf("Awaiting for the pod: %s/%s to be deleted", pod.Name, pod.Namespace)) - err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete) - framework.ExpectNoError(err, "failed to await for all pods to be deleted: %s/%s", pod.Name, pod.Namespace) - }) - ginkgo.By("Ensuring job reaches completions") err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, nil, completions) framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) diff --git a/test/e2e/framework/job/fixtures.go b/test/e2e/framework/job/fixtures.go index 9ea21b92808..c1fbe581c9b 100644 --- a/test/e2e/framework/job/fixtures.go +++ b/test/e2e/framework/job/fixtures.go @@ -136,6 +136,21 @@ func NewTestJobOnNode(behavior, name string, rPol v1.RestartPolicy, parallelism, exit 1 fi `} + case "failOncePerIndex": + // Use marker files per index. If the given marker file already exists + // then terminate successfully. Otherwise create the marker file and + // fail with exit code 42. + setupHostPathDirectory(job) + job.Spec.Template.Spec.Containers[0].Command = []string{"/bin/sh", "-c"} + job.Spec.Template.Spec.Containers[0].Args = []string{` + if [[ -r /data/foo-$JOB_COMPLETION_INDEX ]] + then + exit 0 + else + touch /data/foo-$JOB_COMPLETION_INDEX + exit 42 + fi + `} case "notTerminateOncePerIndex": // Use marker files per index. If the given marker file already exists // then terminate successfully. Otherwise create the marker file and