From 53aa05df3ae7b2db0894699d5b8103fcff44e81c Mon Sep 17 00:00:00 2001 From: Aldo Culquicondor Date: Thu, 14 Apr 2022 15:07:09 -0400 Subject: [PATCH] Don't mark job as failed until expectations are satisfied Change-Id: I99206f35f6f145054c005ab362c792e71b9b15f4 --- pkg/controller/job/job_controller.go | 8 ++++---- pkg/controller/job/job_controller_test.go | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index 01be1a951f7..e75162f446c 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -725,7 +725,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr // Check the expectations of the job before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the job sync is just deferred till the next relist. - jobNeedsSync := jm.expectations.SatisfiedExpectations(key) + satisfiedExpectations := jm.expectations.SatisfiedExpectations(key) pods, err := jm.getPodsForJob(ctx, &job, uncounted != nil) if err != nil { @@ -782,9 +782,9 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr if uncounted == nil { // Legacy behavior: pretend all active pods were successfully removed. deleted = active - } else if deleted != active { + } else if deleted != active || !satisfiedExpectations { // Can't declare the Job as finished yet, as there might be remaining - // pod finalizers. + // pod finalizers or pods that are not in the informer's cache yet. finishedCondition = nil } active -= deleted @@ -792,7 +792,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr manageJobErr = err } else { manageJobCalled := false - if jobNeedsSync && job.DeletionTimestamp == nil { + if satisfiedExpectations && job.DeletionTimestamp == nil { active, action, manageJobErr = jm.manageJob(ctx, &job, activePods, succeeded, succeededIndexes) manageJobCalled = true } diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go index 9f406c9011b..93e4ac78617 100644 --- a/pkg/controller/job/job_controller_test.go +++ b/pkg/controller/job/job_controller_test.go @@ -196,6 +196,9 @@ func TestControllerSyncJob(t *testing.T) { wasSuspended bool suspend bool + // If set, it means that the case is exclusive to tracking with/without finalizers. + wFinalizersExclusive *bool + // pod setup podControllerError error jobKeyForget bool @@ -480,6 +483,16 @@ func TestControllerSyncJob(t *testing.T) { expectedConditionReason: "BackoffLimitExceeded", expectedPodPatches: 1, }, + "job failures, unsatisfied expectations": { + wFinalizersExclusive: pointer.Bool(true), + parallelism: 2, + completions: 5, + deleting: true, + failedPods: 1, + fakeExpectationAtCreation: 1, + expectedFailed: 1, + expectedPodPatches: 1, + }, "indexed job start": { parallelism: 2, completions: 5, @@ -706,6 +719,9 @@ func TestControllerSyncJob(t *testing.T) { if wFinalizers && tc.podControllerError != nil { t.Skip("Can't track status if finalizers can't be removed") } + if tc.wFinalizersExclusive != nil && *tc.wFinalizersExclusive != wFinalizers { + t.Skipf("Test is exclusive for wFinalizers=%t", *tc.wFinalizersExclusive) + } defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobReadyPods, tc.jobReadyPodsEnabled)() defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, wFinalizers)()