Merge pull request #113927 from alculquicondor/job-wait-finish

Add e2e test to ignore failures with 137 exit code
2025-09-05 19:21:37 +00:00 · 2022-11-15 12:32:48 -08:00
parent 591fc0d8ab f40debc8c5
commit e39a0af5ce
1 changed files with 62 additions and 46 deletions
--- a/test/e2e/apps/job.go
+++ b/test/e2e/apps/job.go
@@ -185,7 +185,8 @@ var _ = SIGDescribe("Job", func() {
 	// 4. Make sure the 0-indexed pod is running
 	// 5. Evict the 0-indexed pod
 	// 6. Await for the job to successfully complete
-	ginkgo.It("should allow to use the pod failure policy to not count pod disruption towards the backoffLimit", func() {
+	ginkgo.DescribeTable("Using a pod failure policy to not count some failures towards the backoffLimit",
 		func(policy *batchv1.PodFailurePolicy) {
 			mode := batchv1.IndexedCompletion
 			// We set the backoffLimit to 0 so that any pod failure would trigger
@@ -200,28 +201,7 @@ var _ = SIGDescribe("Job", func() {
 			ginkgo.By("Creating a job")
 			job := e2ejob.NewTestJobOnNode("notTerminateOnce", "pod-disruption-failure-ignore", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name)
 			job.Spec.CompletionMode = &mode
-		job.Spec.PodFailurePolicy = &batchv1.PodFailurePolicy{
+			job.Spec.PodFailurePolicy = policy
 			Rules: []batchv1.PodFailurePolicyRule{
 				{
 					// Ignore failures of the non 0-indexed pods which fail until the marker file is created
 					Action: batchv1.PodFailurePolicyActionIgnore,
 					OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
 						Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
 						Values:   []int32{1},
 					},
 				},
 				{
 					// Ignore the pod failure caused by the eviction
 					Action: batchv1.PodFailurePolicyActionIgnore,
 					OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
 						{
 							Type:   v1.DisruptionTarget,
 							Status: v1.ConditionTrue,
 						},
 					},
 				},
 			},
 		}
 			job, err = e2ejob.CreateJob(f.ClientSet, f.Namespace.Name, job)
 			framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
@@ -254,7 +234,43 @@ var _ = SIGDescribe("Job", func() {
 			ginkgo.By("Ensuring job reaches completions")
 			err = e2ejob.WaitForJobComplete(f.ClientSet, f.Namespace.Name, job.Name, completions)
 			framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
-	})
+		},
 		ginkgo.Entry("Ignore DisruptionTarget condition", &batchv1.PodFailurePolicy{
 			Rules: []batchv1.PodFailurePolicyRule{
 				{
 					// Ignore failures of the non 0-indexed pods which fail until the marker file is created
 					Action: batchv1.PodFailurePolicyActionIgnore,
 					OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
 						Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
 						Values:   []int32{1},
 					},
 				},
 				{
 					// Ignore the pod failure caused by the eviction
 					Action: batchv1.PodFailurePolicyActionIgnore,
 					OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
 						{
 							Type:   v1.DisruptionTarget,
 							Status: v1.ConditionTrue,
 						},
 					},
 				},
 			},
 		}),
 		ginkgo.Entry("Ignore exit code 137", &batchv1.PodFailurePolicy{
 			Rules: []batchv1.PodFailurePolicyRule{
 				{
 					// Ignore failures of the non 0-indexed pods which fail until the marker file is created
 					// And the 127 in the 0-indexed pod due to eviction.
 					Action: batchv1.PodFailurePolicyActionIgnore,
 					OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
 						Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
 						Values:   []int32{1, 137},
 					},
 				},
 			},
 		}),
 	)
 	ginkgo.It("should not create pods when created in suspend state", func() {
 		ginkgo.By("Creating a job with suspend=true")