mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-06 10:43:56 +00:00
Include ignored pods when computing backoff delay for Job pod failures
This commit is contained in:
parent
90c362b343
commit
35d0af9243
@ -794,7 +794,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
|
|||||||
active := int32(len(jobCtx.activePods))
|
active := int32(len(jobCtx.activePods))
|
||||||
newSucceededPods, newFailedPods := getNewFinishedPods(jobCtx)
|
newSucceededPods, newFailedPods := getNewFinishedPods(jobCtx)
|
||||||
jobCtx.succeeded = job.Status.Succeeded + int32(len(newSucceededPods)) + int32(len(jobCtx.uncounted.succeeded))
|
jobCtx.succeeded = job.Status.Succeeded + int32(len(newSucceededPods)) + int32(len(jobCtx.uncounted.succeeded))
|
||||||
failed := job.Status.Failed + int32(len(newFailedPods)) + int32(len(jobCtx.uncounted.failed))
|
failed := job.Status.Failed + int32(nonIgnoredFailedPodsCount(jobCtx, newFailedPods)) + int32(len(jobCtx.uncounted.failed))
|
||||||
var ready *int32
|
var ready *int32
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
|
if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
|
||||||
ready = pointer.Int32(countReadyPods(jobCtx.activePods))
|
ready = pointer.Int32(countReadyPods(jobCtx.activePods))
|
||||||
@ -951,6 +951,19 @@ func (jm *Controller) deleteActivePods(ctx context.Context, job *batch.Job, pods
|
|||||||
return successfulDeletes, errorFromChannel(errCh)
|
return successfulDeletes, errorFromChannel(errCh)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func nonIgnoredFailedPodsCount(jobCtx *syncJobCtx, failedPods []*v1.Pod) int {
|
||||||
|
result := len(failedPods)
|
||||||
|
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
|
||||||
|
for _, p := range failedPods {
|
||||||
|
_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
|
||||||
|
if !countFailed {
|
||||||
|
result--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
// deleteJobPods deletes the pods, returns the number of successful removals
|
// deleteJobPods deletes the pods, returns the number of successful removals
|
||||||
// and any error.
|
// and any error.
|
||||||
func (jm *Controller) deleteJobPods(ctx context.Context, job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) {
|
func (jm *Controller) deleteJobPods(ctx context.Context, job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) {
|
||||||
@ -1406,15 +1419,7 @@ func getNewFinishedPods(jobCtx *syncJobCtx) (succeededPods, failedPods []*v1.Pod
|
|||||||
return p.Status.Phase == v1.PodSucceeded
|
return p.Status.Phase == v1.PodSucceeded
|
||||||
})
|
})
|
||||||
failedPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Failed(), func(p *v1.Pod) bool {
|
failedPods = getValidPodsWithFilter(jobCtx, jobCtx.uncounted.Failed(), func(p *v1.Pod) bool {
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) && jobCtx.job.Spec.PodFailurePolicy != nil {
|
return isPodFailed(p, jobCtx.job)
|
||||||
if !isPodFailed(p, jobCtx.job) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
_, countFailed, _ := matchPodFailurePolicy(jobCtx.job.Spec.PodFailurePolicy, p)
|
|
||||||
return countFailed
|
|
||||||
} else {
|
|
||||||
return isPodFailed(p, jobCtx.job)
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
return succeededPods, failedPods
|
return succeededPods, failedPods
|
||||||
}
|
}
|
||||||
|
@ -3019,6 +3019,53 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
|
|||||||
wantStatusFailed: 0,
|
wantStatusFailed: 0,
|
||||||
wantStatusSucceeded: 0,
|
wantStatusSucceeded: 0,
|
||||||
},
|
},
|
||||||
|
"ignore pod failure based on OnPodConditions, ignored failures delays pod recreation": {
|
||||||
|
enableJobPodFailurePolicy: true,
|
||||||
|
job: batch.Job{
|
||||||
|
TypeMeta: metav1.TypeMeta{Kind: "Job"},
|
||||||
|
ObjectMeta: validObjectMeta,
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Selector: validSelector,
|
||||||
|
Template: validTemplate,
|
||||||
|
Parallelism: pointer.Int32(1),
|
||||||
|
Completions: pointer.Int32(1),
|
||||||
|
BackoffLimit: pointer.Int32(0),
|
||||||
|
PodFailurePolicy: &batch.PodFailurePolicy{
|
||||||
|
Rules: []batch.PodFailurePolicyRule{
|
||||||
|
{
|
||||||
|
Action: batch.PodFailurePolicyActionIgnore,
|
||||||
|
OnPodConditions: []batch.PodFailurePolicyOnPodConditionsPattern{
|
||||||
|
{
|
||||||
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pods: []v1.Pod{
|
||||||
|
{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
DeletionTimestamp: &now,
|
||||||
|
},
|
||||||
|
Status: v1.PodStatus{
|
||||||
|
Phase: v1.PodFailed,
|
||||||
|
Conditions: []v1.PodCondition{
|
||||||
|
{
|
||||||
|
Type: v1.DisruptionTarget,
|
||||||
|
Status: v1.ConditionTrue,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantConditions: nil,
|
||||||
|
wantStatusActive: 0,
|
||||||
|
wantStatusFailed: 0,
|
||||||
|
wantStatusSucceeded: 0,
|
||||||
|
},
|
||||||
"fail job based on OnPodConditions": {
|
"fail job based on OnPodConditions": {
|
||||||
enableJobPodFailurePolicy: true,
|
enableJobPodFailurePolicy: true,
|
||||||
job: batch.Job{
|
job: batch.Job{
|
||||||
|
Loading…
Reference in New Issue
Block a user