mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-07 19:23:40 +00:00
Only declare job as finished after removing all finalizers
Change-Id: Id4b01b0e6fabe24134e57e687356e0fc613cead4
This commit is contained in:
parent
7581ae8123
commit
f7a1fb76f4
@ -792,12 +792,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
|
|||||||
var manageJobErr error
|
var manageJobErr error
|
||||||
var finishedCondition *batch.JobCondition
|
var finishedCondition *batch.JobCondition
|
||||||
|
|
||||||
jobHasNewFailure := failed > job.Status.Failed
|
exceedsBackoffLimit := failed > *job.Spec.BackoffLimit
|
||||||
// new failures happen when status does not reflect the failures and active
|
|
||||||
// is different than parallelism, otherwise the previous controller loop
|
|
||||||
// failed updating status so even if we pick up failure it is not a new one
|
|
||||||
exceedsBackoffLimit := jobHasNewFailure && (active != *job.Spec.Parallelism) &&
|
|
||||||
(failed > *job.Spec.BackoffLimit)
|
|
||||||
|
|
||||||
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
|
if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) {
|
||||||
if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget); failureTargetCondition != nil {
|
if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget); failureTargetCondition != nil {
|
||||||
@ -999,6 +994,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
|
|||||||
needsFlush = true
|
needsFlush = true
|
||||||
}
|
}
|
||||||
podFailureCountByPolicyAction := map[string]int{}
|
podFailureCountByPolicyAction := map[string]int{}
|
||||||
|
reachedMaxUncountedPods := false
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) {
|
if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) {
|
||||||
// This pod was processed in a previous sync.
|
// This pod was processed in a previous sync.
|
||||||
@ -1049,6 +1045,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
|
|||||||
//
|
//
|
||||||
// The job will be synced again because the Job status and Pod updates
|
// The job will be synced again because the Job status and Pod updates
|
||||||
// will put the Job back to the work queue.
|
// will put the Job back to the work queue.
|
||||||
|
reachedMaxUncountedPods = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1077,7 +1074,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
|
|||||||
if job, needsFlush, err = jm.flushUncountedAndRemoveFinalizers(ctx, job, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, podFailureCountByPolicyAction, needsFlush, newBackoffRecord); err != nil {
|
if job, needsFlush, err = jm.flushUncountedAndRemoveFinalizers(ctx, job, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, podFailureCountByPolicyAction, needsFlush, newBackoffRecord); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
jobFinished := jm.enactJobFinished(job, finishedCond)
|
jobFinished := !reachedMaxUncountedPods && jm.enactJobFinished(job, finishedCond)
|
||||||
if jobFinished {
|
if jobFinished {
|
||||||
needsFlush = true
|
needsFlush = true
|
||||||
}
|
}
|
||||||
|
@ -1341,6 +1341,9 @@ func TestOrphanPodsFinalizersClearedWithGC(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestFinalizersClearedWhenBackoffLimitExceeded(t *testing.T) {
|
func TestFinalizersClearedWhenBackoffLimitExceeded(t *testing.T) {
|
||||||
|
// Set a maximum number of uncounted pods below parallelism, to ensure it
|
||||||
|
// doesn't affect the termination of pods.
|
||||||
|
t.Cleanup(setDuringTest(&jobcontroller.MaxUncountedPods, 50))
|
||||||
closeFn, restConfig, clientSet, ns := setup(t, "simple")
|
closeFn, restConfig, clientSet, ns := setup(t, "simple")
|
||||||
defer closeFn()
|
defer closeFn()
|
||||||
ctx, cancel := startJobControllerAndWaitForCaches(restConfig)
|
ctx, cancel := startJobControllerAndWaitForCaches(restConfig)
|
||||||
|
Loading…
Reference in New Issue
Block a user