Merge pull request #113927 from alculquicondor/job-wait-finish

Add e2e test to ignore failures with 137 exit code
This commit is contained in:
Kubernetes Prow Robot 2022-11-15 12:32:48 -08:00 committed by GitHub
commit e39a0af5ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -185,7 +185,8 @@ var _ = SIGDescribe("Job", func() {
// 4. Make sure the 0-indexed pod is running
// 5. Evict the 0-indexed pod
// 6. Await for the job to successfully complete
ginkgo.It("should allow to use the pod failure policy to not count pod disruption towards the backoffLimit", func() {
ginkgo.DescribeTable("Using a pod failure policy to not count some failures towards the backoffLimit",
func(policy *batchv1.PodFailurePolicy) {
mode := batchv1.IndexedCompletion
// We set the backoffLimit to 0 so that any pod failure would trigger
@ -200,28 +201,7 @@ var _ = SIGDescribe("Job", func() {
ginkgo.By("Creating a job")
job := e2ejob.NewTestJobOnNode("notTerminateOnce", "pod-disruption-failure-ignore", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name)
job.Spec.CompletionMode = &mode
job.Spec.PodFailurePolicy = &batchv1.PodFailurePolicy{
Rules: []batchv1.PodFailurePolicyRule{
{
// Ignore failures of the non 0-indexed pods which fail until the marker file is created
Action: batchv1.PodFailurePolicyActionIgnore,
OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1},
},
},
{
// Ignore the pod failure caused by the eviction
Action: batchv1.PodFailurePolicyActionIgnore,
OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
},
},
}
job.Spec.PodFailurePolicy = policy
job, err = e2ejob.CreateJob(f.ClientSet, f.Namespace.Name, job)
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
@ -254,7 +234,43 @@ var _ = SIGDescribe("Job", func() {
ginkgo.By("Ensuring job reaches completions")
err = e2ejob.WaitForJobComplete(f.ClientSet, f.Namespace.Name, job.Name, completions)
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
})
},
ginkgo.Entry("Ignore DisruptionTarget condition", &batchv1.PodFailurePolicy{
Rules: []batchv1.PodFailurePolicyRule{
{
// Ignore failures of the non 0-indexed pods which fail until the marker file is created
Action: batchv1.PodFailurePolicyActionIgnore,
OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1},
},
},
{
// Ignore the pod failure caused by the eviction
Action: batchv1.PodFailurePolicyActionIgnore,
OnPodConditions: []batchv1.PodFailurePolicyOnPodConditionsPattern{
{
Type: v1.DisruptionTarget,
Status: v1.ConditionTrue,
},
},
},
},
}),
ginkgo.Entry("Ignore exit code 137", &batchv1.PodFailurePolicy{
Rules: []batchv1.PodFailurePolicyRule{
{
// Ignore failures of the non 0-indexed pods which fail until the marker file is created
// And the 127 in the 0-indexed pod due to eviction.
Action: batchv1.PodFailurePolicyActionIgnore,
OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{1, 137},
},
},
},
}),
)
ginkgo.It("should not create pods when created in suspend state", func() {
ginkgo.By("Creating a job with suspend=true")