Support handling of pod failures with respect to the specified rules

2026-01-05 15:37:24 +00:00 · 2022-08-04 08:21:32 +02:00
parent c8edeab234
commit bf9ce70de3
43 changed files with 5934 additions and 127 deletions
--- a/pkg/controller/job/job_controller.go
+++ b/pkg/controller/job/job_controller.go
@@ -61,6 +61,12 @@ import (
 // a Job. It is used if the feature gate JobReadyPods is enabled.
 const podUpdateBatchPeriod = time.Second

+const (
+	// PodFailurePolicy reason indicates a job failure condition is added due to
+	// a failed pod matching a pod failure policy rule
+	jobConditionReasonPodFailurePolicy = "PodFailurePolicy"
+)
+
 // controllerKind contains the schema.GroupVersionKind for this controller type.
 var controllerKind = batch.SchemeGroupVersion.WithKind("Job")

@@ -758,16 +764,31 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr
 	exceedsBackoffLimit := jobHasNewFailure && (active != *job.Spec.Parallelism) &&
 		(failed > *job.Spec.BackoffLimit)

-	if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
-		// check if the number of pod restart exceeds backoff (for restart OnFailure only)
-		// OR if the number of failed jobs increased since the last syncJob
-		finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "BackoffLimitExceeded", "Job has reached the specified backoff limit")
-	} else if pastActiveDeadline(&job) {
-		finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline")
-	} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
-		syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - time.Since(job.Status.StartTime.Time)
-		klog.V(2).InfoS("Job has activeDeadlineSeconds configuration. Will sync this job again", "job", key, "nextSyncIn", syncDuration)
-		jm.queue.AddAfter(key, syncDuration)
+	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
+		if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.AlphaNoCompatGuaranteeJobFailureTarget); failureTargetCondition != nil {
+			finishedCondition = newFailedConditionForFailureTarget(failureTargetCondition)
+		} else if failJobMessage := getFailJobMessage(&job, pods, uncounted.Failed()); failJobMessage != nil {
+			if uncounted != nil {
+				// Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed.
+				finishedCondition = newCondition(batch.AlphaNoCompatGuaranteeJobFailureTarget, v1.ConditionTrue, jobConditionReasonPodFailurePolicy, *failJobMessage)
+			} else {
+				// Prepare the Failed job condition for the legacy path without finalizers (don't use the interim FailureTarget condition).
+				finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, jobConditionReasonPodFailurePolicy, *failJobMessage)
+			}
+		}
+	}
+	if finishedCondition == nil {
+		if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
+			// check if the number of pod restart exceeds backoff (for restart OnFailure only)
+			// OR if the number of failed jobs increased since the last syncJob
+			finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "BackoffLimitExceeded", "Job has reached the specified backoff limit")
+		} else if pastActiveDeadline(&job) {
+			finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline")
+		} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
+			syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - time.Since(job.Status.StartTime.Time)
+			klog.V(2).InfoS("Job has activeDeadlineSeconds configuration. Will sync this job again", "job", key, "nextSyncIn", syncDuration)
+			jm.queue.AddAfter(key, syncDuration)
+		}
 	}

 	var prevSucceededIndexes, succeededIndexes orderedIntervals
@@ -1039,8 +1060,16 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
 		} else if pod.Status.Phase == v1.PodFailed || podTerminating {
 			ix := getCompletionIndex(pod.Annotations)
 			if !uncounted.failed.Has(string(pod.UID)) && (!isIndexed || (ix != unknownCompletionIndex && ix < int(*job.Spec.Completions))) {
-				needsFlush = true
-				uncountedStatus.Failed = append(uncountedStatus.Failed, pod.UID)
+				if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
+					_, countFailed := matchPodFailurePolicy(job.Spec.PodFailurePolicy, pod)
+					if countFailed {
+						needsFlush = true
+						uncountedStatus.Failed = append(uncountedStatus.Failed, pod.UID)
+					}
+				} else {
+					needsFlush = true
+					uncountedStatus.Failed = append(uncountedStatus.Failed, pod.UID)
+				}
 			}
 		}
 		if len(newSucceededIndexes)+len(uncountedStatus.Succeeded)+len(uncountedStatus.Failed) >= MaxUncountedPods {
@@ -1060,6 +1089,18 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
 		job.Status.Succeeded = int32(succeededIndexes.total())
 		job.Status.CompletedIndexes = succeededIndexes.String()
 	}
+	if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
+		if finishedCond != nil && finishedCond.Type == batch.AlphaNoCompatGuaranteeJobFailureTarget {
+
+			// Append the interim FailureTarget condition to update the job status with before finalizers are removed.
+			job.Status.Conditions = append(job.Status.Conditions, *finishedCond)
+			needsFlush = true
+
+			// Prepare the final Failed condition to update the job status with after the finalizers are removed.
+			// It is also used in the enactJobFinished function for reporting.
+			finishedCond = newFailedConditionForFailureTarget(finishedCond)
+		}
+	}
 	var err error
 	if job, needsFlush, err = jm.flushUncountedAndRemoveFinalizers(ctx, job, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, needsFlush); err != nil {
 		return err
@@ -1077,7 +1118,8 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
 }

 // flushUncountedAndRemoveFinalizers does:
-//  1. flush the Job status that might include new uncounted Pod UIDs.
+//  1. flush the Job status that might include new uncounted Pod UIDs. Also flush the interim FailureTarget condition
+//     if present.
 //  2. perform the removal of finalizers from Pods which are in the uncounted
 //     lists.
 //  3. update the counters based on the Pods for which it successfully removed
@@ -1231,6 +1273,12 @@ func filterInUncountedUIDs(uncounted []types.UID, include sets.String) []types.U
 	return newUncounted
 }

+// newFailedConditionForFailureTarget creates a job Failed condition based on
+// the interim FailureTarget condition.
+func newFailedConditionForFailureTarget(condition *batch.JobCondition) *batch.JobCondition {
+	return newCondition(batch.JobFailed, v1.ConditionTrue, condition.Reason, condition.Message)
+}
+
 // pastBackoffLimitOnFailure checks if container restartCounts sum exceeds BackoffLimit
 // this method applies only to pods with restartPolicy == OnFailure
 func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool {
@@ -1282,7 +1330,24 @@ func newCondition(conditionType batch.JobConditionType, status v1.ConditionStatu
 	}
 }

-// getStatus returns number of succeeded and failed pods running a job
+// getFailJobMessage returns a job failure message if the job should fail with the current counters
+func getFailJobMessage(job *batch.Job, pods []*v1.Pod, uncounted sets.String) *string {
+	if !feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) || job.Spec.PodFailurePolicy == nil {
+		return nil
+	}
+	for _, p := range pods {
+		if isPodFailed(p, uncounted != nil) {
+			jobFailureMessage, _ := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
+			if jobFailureMessage != nil {
+				return jobFailureMessage
+			}
+		}
+	}
+	return nil
+}
+
+// getStatus returns number of succeeded and failed pods running a job. The number
+// of failed pods can be affected by the podFailurePolicy.
 func getStatus(job *batch.Job, pods []*v1.Pod, uncounted *uncountedTerminatedPods, expectedRmFinalizers sets.String) (succeeded, failed int32) {
 	if uncounted != nil {
 		succeeded = job.Status.Succeeded
@@ -1292,13 +1357,15 @@ func getStatus(job *batch.Job, pods []*v1.Pod, uncounted *uncountedTerminatedPod
 		return p.Status.Phase == v1.PodSucceeded
 	}))
 	failed += int32(countValidPodsWithFilter(job, pods, uncounted.Failed(), expectedRmFinalizers, func(p *v1.Pod) bool {
-		if p.Status.Phase == v1.PodFailed {
-			return true
+		if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && job.Spec.PodFailurePolicy != nil {
+			if !isPodFailed(p, uncounted != nil) {
+				return false
+			}
+			_, countFailed := matchPodFailurePolicy(job.Spec.PodFailurePolicy, p)
+			return countFailed
+		} else {
+			return isPodFailed(p, uncounted != nil)
 		}
-		// When tracking with finalizers: counting deleted Pods as failures to
-		// account for orphan Pods that never have a chance to reach the Failed
-		// phase.
-		return uncounted != nil && p.DeletionTimestamp != nil && p.Status.Phase != v1.PodSucceeded
 	}))
 	return succeeded, failed
 }
@@ -1667,6 +1734,16 @@ func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditio
 	return list, false
 }

+func isPodFailed(p *v1.Pod, wFinalizers bool) bool {
+	if p.Status.Phase == v1.PodFailed {
+		return true
+	}
+	// When tracking with finalizers: counting deleted Pods as failures to
+	// account for orphan Pods that never have a chance to reach the Failed
+	// phase.
+	return wFinalizers && p.DeletionTimestamp != nil && p.Status.Phase != v1.PodSucceeded
+}
+
 func findConditionByType(list []batch.JobCondition, cType batch.JobConditionType) *batch.JobCondition {
 	for i := range list {
 		if list[i].Type == cType {