Merge pull request #113927 from alculquicondor/job-wait-finish

Add e2e test to ignore failures with 137 exit code
2025-09-04 18:52:38 +00:00 · 2022-11-15 12:32:48 -08:00
parent 591fc0d8ab f40debc8c5
commit e39a0af5ce
1 changed files with 62 additions and 46 deletions
--- a/test/e2e/apps/job.go
+++ b/test/e2e/apps/job.go
@@ -185,7 +185,8 @@ var _ = SIGDescribe("Job", func() {
 	// 4. Make sure the 0-indexed pod is running
 	// 5. Evict the 0-indexed pod
 	// 6. Await for the job to successfully complete
-	ginkgo.It("should allow to use the pod failure policy to not count pod disruption towards the backoffLimit", func() {
+	ginkgo.DescribeTable("Using a pod failure policy to not count some failures towards the backoffLimit",
+		func(policy *batchv1.PodFailurePolicy) {
 			mode := batchv1.IndexedCompletion

 			// We set the backoffLimit to 0 so that any pod failure would trigger
@@ -200,28 +201,7 @@ var _ = SIGDescribe("Job", func() {
 			ginkgo.By("Creating a job")
 			job := e2ejob.NewTestJobOnNode("notTerminateOnce", "pod-disruption-failure-ignore", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name)
 			job.Spec.CompletionMode = &mode
-		job.Spec.PodFailurePolicy = &batchv1.PodFailurePolicy{
-			Rules: []batchv1.PodFailurePolicyRule{
-				{
-					// Ignore failures of the non 0-indexed pods which fail until the marker file is created
-					Action: batchv1.PodFailurePolicyActionIgnore,
-					OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
-						Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
-						Values:   []int32{1},
-					},
-				},
-				{
-					// Ignore the pod failure caused by the eviction
-					Action: batchv1.PodFailurePolicyActionIgnore,
-					OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
-						{
-							Type:   v1.DisruptionTarget,
-							Status: v1.ConditionTrue,
-						},
-					},
-				},
-			},
-		}
+			job.Spec.PodFailurePolicy = policy
 			job, err = e2ejob.CreateJob(f.ClientSet, f.Namespace.Name, job)
 			framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)

@@ -254,7 +234,43 @@ var _ = SIGDescribe("Job", func() {
 			ginkgo.By("Ensuring job reaches completions")
 			err = e2ejob.WaitForJobComplete(f.ClientSet, f.Namespace.Name, job.Name, completions)
 			framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
-	})
+		},
+		ginkgo.Entry("Ignore DisruptionTarget condition", &batchv1.PodFailurePolicy{
+			Rules: []batchv1.PodFailurePolicyRule{
+				{
+					// Ignore failures of the non 0-indexed pods which fail until the marker file is created
+					Action: batchv1.PodFailurePolicyActionIgnore,
+					OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
+						Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
+						Values:   []int32{1},
+					},
+				},
+				{
+					// Ignore the pod failure caused by the eviction
+					Action: batchv1.PodFailurePolicyActionIgnore,
+					OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
+						{
+							Type:   v1.DisruptionTarget,
+							Status: v1.ConditionTrue,
+						},
+					},
+				},
+			},
+		}),
+		ginkgo.Entry("Ignore exit code 137", &batchv1.PodFailurePolicy{
+			Rules: []batchv1.PodFailurePolicyRule{
+				{
+					// Ignore failures of the non 0-indexed pods which fail until the marker file is created
+					// And the 127 in the 0-indexed pod due to eviction.
+					Action: batchv1.PodFailurePolicyActionIgnore,
+					OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
+						Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
+						Values:   []int32{1, 137},
+					},
+				},
+			},
+		}),
+	)

 	ginkgo.It("should not create pods when created in suspend state", func() {
 		ginkgo.By("Creating a job with suspend=true")