mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-05 10:19:50 +00:00
Include ignored pods when computing backoff delay for Job pod failures
This commit is contained in:
parent
90c362b343
commit
35d0af9243
@ -794,7 +794,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
|
||||
active := int32(len(jobCtx.activePods))
|
||||
newSucceededPods, newFailedPods := getNewFinishedPods(jobCtx)
|
||||
jobCtx.succeeded = job.Status.Succeeded + int32(len(newSucceededPods)) + int32(len(jobCtx.uncounted.succeeded))
|
||||
failed := job.Status.Failed + int32(len(newFailedPods)) + int32(len(jobCtx.uncounted.failed))
|
||||
failed := job.Status.Failed + int32(nonIgnoredFailedPodsCount(jobCtx, newFailedPods)) + int32(len(jobCtx.uncounted.failed))
|
||||
var ready *int32
|
||||
if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
|
||||
ready = pointer.Int32(countReadyPods(jobCtx.activePods))
|
||||
@ -951,6 +951,19 @@ func (jm *Controller) deleteActivePods(ctx context.Context, job *batch.Job, pods
|
||||
return successfulDeletes, errorFromChannel(errCh)
|
||||
}
|
||||
|
||||
func nonIgnoredFailedPodsCount(jobCtx *syncJobCtx, failedPods []*v1.Pod) int {
|
||||
result := len(failedPods)
|
||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
|
||||
for _, p := range failedPods {
|
||||
_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
|
||||
if !countFailed {
|
||||
result--
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// deleteJobPods deletes the pods, returns the number of successful removals
|
||||
// and any error.
|
||||
func (jm *Controller) deleteJobPods(ctx context.Context, job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) {
|
||||
@ -1406,15 +1419,7 @@ func getNewFinishedPods(jobCtx *syncJobCtx) (succeededPods, failedPods []*v1.Pod
|
||||
return p.Status.Phase == v1.PodSucceeded
|
||||
})
|
||||
failedPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Failed(), func(p *v1.Pod) bool {
|
||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
|
||||
if !isPodFailed(p, jobCtx.job) {
|
||||
return false
|
||||
}
|
||||
_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
|
||||
return countFailed
|
||||
} else {
|
||||
return isPodFailed(p, jobCtx.job)
|
||||
}
|
||||
})
|
||||
return succeededPods, failedPods
|
||||
}
|
||||
|
@ -3019,6 +3019,53 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
||||
wantStatusFailed: 0,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"ignore pod failure based on OnPodConditions, ignored failures delays pod recreation": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||
ObjectMeta: validObjectMeta,
|
||||
Spec: batch.JobSpec{
|
||||
Selector: validSelector,
|
||||
Template: validTemplate,
|
||||
Parallelism: pointer.Int32(1),
|
||||
Completions: pointer.Int32(1),
|
||||
BackoffLimit: pointer.Int32(0),
|
||||
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||
Rules: []batch.PodFailurePolicyRule{
|
||||
{
|
||||
Action: batch.PodFailurePolicyActionIgnore,
|
||||
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||
{
|
||||
Type: v1.DisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
pods: []v1.Pod{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
DeletionTimestamp: &now,
|
||||
},
|
||||
Status: v1.PodStatus{
|
||||
Phase: v1.PodFailed,
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.DisruptionTarget,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
wantConditions: nil,
|
||||
wantStatusActive: 0,
|
||||
wantStatusFailed: 0,
|
||||
wantStatusSucceeded: 0,
|
||||
},
|
||||
"fail job based on OnPodConditions": {
|
||||
enableJobPodFailurePolicy: true,
|
||||
job: batch.Job{
|
||||
|
Loading…
Reference in New Issue
Block a user