diff --git a/pkg/controller/controller_ref_manager.go b/pkg/controller/controller_ref_manager.go index 6840b6a45a1..b21281dcf1e 100644 --- a/pkg/controller/controller_ref_manager.go +++ b/pkg/controller/controller_ref_manager.go @@ -124,6 +124,7 @@ type PodControllerRefManager struct { BaseControllerRefManager controllerKind schema.GroupVersionKind podControl PodControlInterface + finalizers []string } // NewPodControllerRefManager returns a PodControllerRefManager that exposes @@ -143,6 +144,7 @@ func NewPodControllerRefManager( selector labels.Selector, controllerKind schema.GroupVersionKind, canAdopt func() error, + finalizers ...string, ) *PodControllerRefManager { return &PodControllerRefManager{ BaseControllerRefManager: BaseControllerRefManager{ @@ -152,6 +154,7 @@ func NewPodControllerRefManager( }, controllerKind: controllerKind, podControl: podControl, + finalizers: finalizers, } } @@ -216,7 +219,7 @@ func (m *PodControllerRefManager) AdoptPod(pod *v1.Pod) error { // Note that ValidateOwnerReferences() will reject this patch if another // OwnerReference exists with controller=true. - patchBytes, err := ownerRefControllerPatch(m.Controller, m.controllerKind, pod.UID) + patchBytes, err := ownerRefControllerPatch(m.Controller, m.controllerKind, pod.UID, m.finalizers...) if err != nil { return err } @@ -228,7 +231,7 @@ func (m *PodControllerRefManager) AdoptPod(pod *v1.Pod) error { func (m *PodControllerRefManager) ReleasePod(pod *v1.Pod) error { klog.V(2).Infof("patching pod %s_%s to remove its controllerRef to %s/%s:%s", pod.Namespace, pod.Name, m.controllerKind.GroupVersion(), m.controllerKind.Kind, m.Controller.GetName()) - patchBytes, err := deleteOwnerRefStrategicMergePatch(pod.UID, m.Controller.GetUID()) + patchBytes, err := deleteOwnerRefStrategicMergePatch(pod.UID, m.Controller.GetUID(), m.finalizers...) if err != nil { return err } @@ -518,19 +521,22 @@ type objectForDeleteOwnerRefStrategicMergePatch struct { } type objectMetaForMergePatch struct { - UID types.UID `json:"uid"` - OwnerReferences []map[string]string `json:"ownerReferences"` + UID types.UID `json:"uid"` + OwnerReferences []map[string]string `json:"ownerReferences"` + DeleteFinalizers []string `json:"$deleteFromPrimitiveList/finalizers,omitempty"` } -func deleteOwnerRefStrategicMergePatch(dependentUID types.UID, ownerUIDs ...types.UID) ([]byte, error) { - var pieces []map[string]string - for _, ownerUID := range ownerUIDs { - pieces = append(pieces, map[string]string{"$patch": "delete", "uid": string(ownerUID)}) - } +func deleteOwnerRefStrategicMergePatch(dependentUID types.UID, ownerUID types.UID, finalizers ...string) ([]byte, error) { patch := objectForDeleteOwnerRefStrategicMergePatch{ Metadata: objectMetaForMergePatch{ - UID: dependentUID, - OwnerReferences: pieces, + UID: dependentUID, + OwnerReferences: []map[string]string{ + { + "$patch": "delete", + "uid": string(ownerUID), + }, + }, + DeleteFinalizers: finalizers, }, } patchBytes, err := json.Marshal(&patch) @@ -547,9 +553,10 @@ type objectForAddOwnerRefPatch struct { type objectMetaForPatch struct { OwnerReferences []metav1.OwnerReference `json:"ownerReferences"` UID types.UID `json:"uid"` + Finalizers []string `json:"finalizers,omitempty"` } -func ownerRefControllerPatch(controller metav1.Object, controllerKind schema.GroupVersionKind, uid types.UID) ([]byte, error) { +func ownerRefControllerPatch(controller metav1.Object, controllerKind schema.GroupVersionKind, uid types.UID, finalizers ...string) ([]byte, error) { blockOwnerDeletion := true isController := true addControllerPatch := objectForAddOwnerRefPatch{ @@ -565,6 +572,7 @@ func ownerRefControllerPatch(controller metav1.Object, controllerKind schema.Gro BlockOwnerDeletion: &blockOwnerDeletion, }, }, + Finalizers: finalizers, }, } patchBytes, err := json.Marshal(&addControllerPatch) diff --git a/pkg/controller/controller_ref_manager_test.go b/pkg/controller/controller_ref_manager_test.go index 4967314a223..9fa2fd9b3f1 100644 --- a/pkg/controller/controller_ref_manager_test.go +++ b/pkg/controller/controller_ref_manager_test.go @@ -17,9 +17,10 @@ limitations under the License. package controller import ( - "reflect" + "strings" "testing" + "github.com/google/go-cmp/cmp" apps "k8s.io/api/apps/v1" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -63,6 +64,7 @@ func TestClaimPods(t *testing.T) { manager *PodControllerRefManager pods []*v1.Pod claimed []*v1.Pod + patches int } var tests = []test{ { @@ -74,6 +76,7 @@ func TestClaimPods(t *testing.T) { func() error { return nil }), pods: []*v1.Pod{newPod("pod1", productionLabel, nil), newPod("pod2", testLabel, nil)}, claimed: []*v1.Pod{newPod("pod1", productionLabel, nil)}, + patches: 1, }, func() test { controller := v1.ReplicationController{} @@ -135,6 +138,7 @@ func TestClaimPods(t *testing.T) { func() error { return nil }), pods: []*v1.Pod{newPod("pod1", productionLabel, &controller), newPod("pod2", testLabel, &controller)}, claimed: []*v1.Pod{newPod("pod1", productionLabel, &controller)}, + patches: 1, } }(), func() test { @@ -157,22 +161,50 @@ func TestClaimPods(t *testing.T) { claimed: []*v1.Pod{podToDelete1}, } }(), + func() test { + controller := v1.ReplicationController{} + controller.UID = types.UID(controllerUID) + return test{ + name: "Controller claims or release pods according to selector with finalizers", + manager: NewPodControllerRefManager(&FakePodControl{}, + &controller, + productionLabelSelector, + controllerKind, + func() error { return nil }, + "foo-finalizer", "bar-finalizer"), + pods: []*v1.Pod{newPod("pod1", productionLabel, &controller), newPod("pod2", testLabel, &controller), newPod("pod3", productionLabel, nil)}, + claimed: []*v1.Pod{newPod("pod1", productionLabel, &controller), newPod("pod3", productionLabel, nil)}, + patches: 2, + } + }(), } for _, test := range tests { - claimed, err := test.manager.ClaimPods(test.pods) - if err != nil { - t.Errorf("Test case `%s`, unexpected error: %v", test.name, err) - } else if !reflect.DeepEqual(test.claimed, claimed) { - t.Errorf("Test case `%s`, claimed wrong pods. Expected %v, got %v", test.name, podToStringSlice(test.claimed), podToStringSlice(claimed)) - } - + t.Run(test.name, func(t *testing.T) { + claimed, err := test.manager.ClaimPods(test.pods) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if diff := cmp.Diff(test.claimed, claimed); diff != "" { + t.Errorf("Claimed wrong pods (-want,+got):\n%s", diff) + } + fakePodControl, ok := test.manager.podControl.(*FakePodControl) + if !ok { + return + } + if p := len(fakePodControl.Patches); p != test.patches { + t.Errorf("ClaimPods issues %d patches, want %d", p, test.patches) + } + for _, p := range fakePodControl.Patches { + patch := string(p) + if uid := string(test.manager.Controller.GetUID()); !strings.Contains(patch, uid) { + t.Errorf("Patch doesn't contain controller UID %s", uid) + } + for _, f := range test.manager.finalizers { + if !strings.Contains(patch, f) { + t.Errorf("Patch doesn't contain finalizer %q", f) + } + } + } + }) } } - -func podToStringSlice(pods []*v1.Pod) []string { - var names []string - for _, pod := range pods { - names = append(names, pod.Name) - } - return names -} diff --git a/pkg/controller/job/indexed_job_utils.go b/pkg/controller/job/indexed_job_utils.go index d456ffeb830..7fa9bdc22b4 100644 --- a/pkg/controller/job/indexed_job_utils.go +++ b/pkg/controller/job/indexed_job_utils.go @@ -18,7 +18,6 @@ package job import ( "fmt" - "math" "sort" "strconv" "strings" @@ -27,6 +26,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apiserver/pkg/storage/names" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/controller" ) @@ -39,85 +39,184 @@ func isIndexedJob(job *batch.Job) bool { return job.Spec.CompletionMode != nil && *job.Spec.CompletionMode == batch.IndexedCompletion } -// calculateSucceededIndexes returns a string representation of the list of -// succeeded indexes in compressed format and the number of succeeded indexes. -func calculateSucceededIndexes(pods []*v1.Pod, completions int32) (string, int32) { - sort.Sort(byCompletionIndex(pods)) - var result strings.Builder - var lastSucceeded int - var count int32 - firstSucceeded := math.MinInt32 - for _, p := range pods { - ix := getCompletionIndex(p.Annotations) - if ix == unknownCompletionIndex { - continue - } - if ix >= int(completions) { - break - } - if p.Status.Phase == v1.PodSucceeded { - if firstSucceeded == math.MinInt32 { - firstSucceeded = ix - } else if ix > lastSucceeded+1 { - addSingleOrRangeStr(&result, firstSucceeded, lastSucceeded) - count += int32(lastSucceeded - firstSucceeded + 1) - firstSucceeded = ix - } - lastSucceeded = ix - } - } - if firstSucceeded != math.MinInt32 { - addSingleOrRangeStr(&result, firstSucceeded, lastSucceeded) - count += int32(lastSucceeded - firstSucceeded + 1) - } - return result.String(), count +type interval struct { + First int + Last int } -func addSingleOrRangeStr(builder *strings.Builder, first, last int) { - if builder.Len() > 0 { - builder.WriteRune(',') +type orderedIntervals []interval + +// calculateSucceededIndexes returns the old and new list of succeeded indexes +// in compressed format (intervals). +// The old list is solely based off .status.completedIndexes, but returns an +// empty list if this Job is not tracked with finalizers. The new list includes +// the indexes that succeeded since the last sync. +func calculateSucceededIndexes(job *batch.Job, pods []*v1.Pod) (orderedIntervals, orderedIntervals) { + var prevIntervals orderedIntervals + withFinalizers := trackingUncountedPods(job) + if withFinalizers { + prevIntervals = succeededIndexesFromJob(job) } - builder.WriteString(strconv.Itoa(first)) - if last > first { - if last == first+1 { - builder.WriteRune(',') - } else { - builder.WriteRune('-') + newSucceeded := sets.NewInt() + for _, p := range pods { + ix := getCompletionIndex(p.Annotations) + // Succeeded Pod with valid index and, if tracking with finalizers, + // has a finalizer (meaning that it is not counted yet). + if p.Status.Phase == v1.PodSucceeded && ix != unknownCompletionIndex && ix < int(*job.Spec.Completions) && (!withFinalizers || hasJobTrackingFinalizer(p)) { + newSucceeded.Insert(ix) } - builder.WriteString(strconv.Itoa(last)) } + // List returns the items of the set in order. + result := prevIntervals.withOrderedIndexes(newSucceeded.List()) + return prevIntervals, result +} + +// withOrderedIndexes returns a new list of ordered intervals that contains +// the newIndexes, provided in increasing order. +func (oi orderedIntervals) withOrderedIndexes(newIndexes []int) orderedIntervals { + var result orderedIntervals + i := 0 + j := 0 + var lastInterval *interval + appendOrMergeWithLastInterval := func(thisInterval interval) { + if lastInterval == nil || thisInterval.First > lastInterval.Last+1 { + result = append(result, thisInterval) + lastInterval = &result[len(result)-1] + } else if lastInterval.Last < thisInterval.Last { + lastInterval.Last = thisInterval.Last + } + } + for i < len(oi) && j < len(newIndexes) { + if oi[i].First < newIndexes[j] { + appendOrMergeWithLastInterval(oi[i]) + i++ + } else { + appendOrMergeWithLastInterval(interval{newIndexes[j], newIndexes[j]}) + j++ + } + } + for i < len(oi) { + appendOrMergeWithLastInterval(oi[i]) + i++ + } + for j < len(newIndexes) { + appendOrMergeWithLastInterval(interval{newIndexes[j], newIndexes[j]}) + j++ + } + return result +} + +// total returns number of indexes contained in the intervals. +func (oi orderedIntervals) total() int { + var count int + for _, iv := range oi { + count += iv.Last - iv.First + 1 + } + return count +} + +func (oi orderedIntervals) String() string { + var builder strings.Builder + for _, v := range oi { + if builder.Len() > 0 { + builder.WriteRune(',') + } + builder.WriteString(strconv.Itoa(v.First)) + if v.Last > v.First { + if v.Last == v.First+1 { + builder.WriteRune(',') + } else { + builder.WriteRune('-') + } + builder.WriteString(strconv.Itoa(v.Last)) + } + } + return builder.String() +} + +func (oi orderedIntervals) has(ix int) bool { + lo := 0 + hi := len(oi) + // Invariant: oi[hi].Last >= ix + for hi > lo { + mid := lo + (hi-lo)/2 + if oi[mid].Last >= ix { + hi = mid + } else { + lo = mid + 1 + } + } + if hi == len(oi) { + return false + } + return oi[hi].First <= ix +} + +func succeededIndexesFromJob(job *batch.Job) orderedIntervals { + if job.Status.CompletedIndexes == "" { + return nil + } + var result orderedIntervals + var lastInterval *interval + completions := int(*job.Spec.Completions) + for _, intervalStr := range strings.Split(job.Status.CompletedIndexes, ",") { + limitsStr := strings.Split(intervalStr, "-") + var inter interval + var err error + inter.First, err = strconv.Atoi(limitsStr[0]) + if err != nil { + klog.InfoS("Corrupted completed indexes interval, ignoring", "job", klog.KObj(job), "interval", intervalStr, "err", err) + continue + } + if inter.First >= completions { + break + } + if len(limitsStr) > 1 { + inter.Last, err = strconv.Atoi(limitsStr[1]) + if err != nil { + klog.InfoS("Corrupted completed indexes interval, ignoring", "job", klog.KObj(job), "interval", intervalStr, "err", err) + continue + } + if inter.Last >= completions { + inter.Last = completions - 1 + } + } else { + inter.Last = inter.First + } + if lastInterval != nil && lastInterval.Last == inter.First-1 { + lastInterval.Last = inter.Last + } else { + result = append(result, inter) + lastInterval = &result[len(result)-1] + } + } + return result } // firstPendingIndexes returns `count` indexes less than `completions` that are -// not covered by running or succeeded pods. -func firstPendingIndexes(pods []*v1.Pod, count, completions int) []int { +// not covered by `activePods` or `succeededIndexes`. +func firstPendingIndexes(activePods []*v1.Pod, succeededIndexes orderedIntervals, count, completions int) []int { if count == 0 { return nil } - nonPending := sets.NewInt() - for _, p := range pods { - if p.Status.Phase == v1.PodSucceeded || controller.IsPodActive(p) { - ix := getCompletionIndex(p.Annotations) - if ix != unknownCompletionIndex { - nonPending.Insert(ix) - } + active := sets.NewInt() + for _, p := range activePods { + ix := getCompletionIndex(p.Annotations) + if ix != unknownCompletionIndex { + active.Insert(ix) } } result := make([]int, 0, count) - // The following algorithm is bounded by the number of non pending pods and - // parallelism. - // TODO(#99368): Convert the list of non-pending pods into a set of - // non-pending intervals from the job's .status.completedIndexes and active - // pods. + nonPending := succeededIndexes.withOrderedIndexes(active.List()) + // The following algorithm is bounded by len(nonPending) and count. candidate := 0 - for _, np := range nonPending.List() { - for ; candidate < np && candidate < completions; candidate++ { + for _, sInterval := range nonPending { + for ; candidate < completions && len(result) < count && candidate < sInterval.First; candidate++ { result = append(result, candidate) - if len(result) == count { - return result - } } - candidate = np + 1 + if candidate < sInterval.Last+1 { + candidate = sInterval.Last + 1 + } } for ; candidate < completions && len(result) < count; candidate++ { result = append(result, candidate) diff --git a/pkg/controller/job/indexed_job_utils_test.go b/pkg/controller/job/indexed_job_utils_test.go index cad45172d08..6825982b3be 100644 --- a/pkg/controller/job/indexed_job_utils_test.go +++ b/pkg/controller/job/indexed_job_utils_test.go @@ -22,22 +22,30 @@ import ( "github.com/google/go-cmp/cmp" batch "k8s.io/api/batch/v1" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apiserver/pkg/util/feature" + featuregatetesting "k8s.io/component-base/featuregate/testing" + "k8s.io/kubernetes/pkg/features" + "k8s.io/utils/pointer" ) const noIndex = "-" func TestCalculateSucceededIndexes(t *testing.T) { cases := map[string]struct { - pods []indexPhase - wantCount int32 - completions int32 + prevSucceeded string + pods []indexPhase + completions int32 + trackingWithFinalizers bool + wantStatusIntervals orderedIntervals + wantIntervals orderedIntervals }{ - "1": { - pods: []indexPhase{{"1", v1.PodSucceeded}}, - wantCount: 1, - completions: 2, + "one index": { + pods: []indexPhase{{"1", v1.PodSucceeded}}, + completions: 2, + wantIntervals: []interval{{1, 1}}, }, - "5,10": { + "two separate": { pods: []indexPhase{ {"2", v1.PodFailed}, {"5", v1.PodSucceeded}, @@ -45,10 +53,10 @@ func TestCalculateSucceededIndexes(t *testing.T) { {"10", v1.PodFailed}, {"10", v1.PodSucceeded}, }, - wantCount: 2, - completions: 11, + completions: 11, + wantIntervals: []interval{{5, 5}, {10, 10}}, }, - "2,3,5-7": { + "two intervals": { pods: []indexPhase{ {"0", v1.PodRunning}, {"1", v1.PodPending}, @@ -58,10 +66,11 @@ func TestCalculateSucceededIndexes(t *testing.T) { {"6", v1.PodSucceeded}, {"7", v1.PodSucceeded}, }, - wantCount: 5, - completions: 8, + completions: 8, + wantIntervals: []interval{{2, 3}, {5, 7}}, }, - "0-2": { + "one interval, ignore previous": { + prevSucceeded: "3-5", pods: []indexPhase{ {"0", v1.PodSucceeded}, {"1", v1.PodFailed}, @@ -70,10 +79,10 @@ func TestCalculateSucceededIndexes(t *testing.T) { {"2", v1.PodSucceeded}, {"3", v1.PodFailed}, }, - wantCount: 3, - completions: 4, + completions: 4, + wantIntervals: []interval{{0, 2}}, }, - "0,2-5": { + "one index and one interval": { pods: []indexPhase{ {"0", v1.PodSucceeded}, {"1", v1.PodFailed}, @@ -84,10 +93,10 @@ func TestCalculateSucceededIndexes(t *testing.T) { {noIndex, v1.PodSucceeded}, {"-2", v1.PodSucceeded}, }, - wantCount: 5, - completions: 6, + completions: 6, + wantIntervals: []interval{{0, 0}, {2, 5}}, }, - "0-2,4": { + "out of range": { pods: []indexPhase{ {"0", v1.PodSucceeded}, {"1", v1.PodSucceeded}, @@ -98,19 +107,184 @@ func TestCalculateSucceededIndexes(t *testing.T) { {noIndex, v1.PodSucceeded}, {"-2", v1.PodSucceeded}, }, - wantCount: 4, - completions: 5, + completions: 5, + wantIntervals: []interval{{0, 2}, {4, 4}}, + }, + "prev interval out of range": { + prevSucceeded: "0-5,8-10", + completions: 8, + trackingWithFinalizers: true, + wantStatusIntervals: []interval{{0, 5}}, + wantIntervals: []interval{{0, 5}}, + }, + "prev interval partially out of range": { + prevSucceeded: "0-5,8-10", + completions: 10, + trackingWithFinalizers: true, + wantStatusIntervals: []interval{{0, 5}, {8, 9}}, + wantIntervals: []interval{{0, 5}, {8, 9}}, + }, + "prev and new separate": { + prevSucceeded: "0,4,5,10-12", + pods: []indexPhase{ + {"2", v1.PodSucceeded}, + {"7", v1.PodSucceeded}, + {"8", v1.PodSucceeded}, + }, + completions: 13, + trackingWithFinalizers: true, + wantStatusIntervals: []interval{ + {0, 0}, + {4, 5}, + {10, 12}, + }, + wantIntervals: []interval{ + {0, 0}, + {2, 2}, + {4, 5}, + {7, 8}, + {10, 12}, + }, + }, + "prev between new": { + prevSucceeded: "3,4,6", + pods: []indexPhase{ + {"2", v1.PodSucceeded}, + {"7", v1.PodSucceeded}, + {"8", v1.PodSucceeded}, + }, + completions: 9, + trackingWithFinalizers: true, + wantStatusIntervals: []interval{ + {3, 4}, + {6, 6}, + }, + wantIntervals: []interval{ + {2, 4}, + {6, 8}, + }, + }, + "new between prev": { + prevSucceeded: "2,7,8", + pods: []indexPhase{ + {"3", v1.PodSucceeded}, + {"4", v1.PodSucceeded}, + {"6", v1.PodSucceeded}, + }, + completions: 9, + trackingWithFinalizers: true, + wantStatusIntervals: []interval{ + {2, 2}, + {7, 8}, + }, + wantIntervals: []interval{ + {2, 4}, + {6, 8}, + }, + }, + "new within prev": { + prevSucceeded: "2-7", + pods: []indexPhase{ + {"0", v1.PodSucceeded}, + {"3", v1.PodSucceeded}, + {"5", v1.PodSucceeded}, + {"9", v1.PodSucceeded}, + }, + completions: 10, + trackingWithFinalizers: true, + wantStatusIntervals: []interval{ + {2, 7}, + }, + wantIntervals: []interval{ + {0, 0}, + {2, 7}, + {9, 9}, + }, + }, + "corrupted interval": { + prevSucceeded: "0,1-foo,bar", + pods: []indexPhase{ + {"3", v1.PodSucceeded}, + }, + completions: 4, + trackingWithFinalizers: true, + wantStatusIntervals: []interval{ + {0, 0}, + }, + wantIntervals: []interval{ + {0, 0}, + {3, 3}, + }, }, } - for want, tc := range cases { - t.Run(want, func(t *testing.T) { + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, tc.trackingWithFinalizers)() + job := &batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + batch.JobTrackingFinalizer: "", + }, + }, + Status: batch.JobStatus{ + CompletedIndexes: tc.prevSucceeded, + }, + Spec: batch.JobSpec{ + Completions: pointer.Int32Ptr(tc.completions), + }, + } pods := hollowPodsWithIndexPhase(tc.pods) - gotStr, gotCnt := calculateSucceededIndexes(pods, tc.completions) - if diff := cmp.Diff(want, gotStr); diff != "" { + for _, p := range pods { + p.Finalizers = append(p.Finalizers, batch.JobTrackingFinalizer) + } + gotStatusIntervals, gotIntervals := calculateSucceededIndexes(job, pods) + if diff := cmp.Diff(tc.wantStatusIntervals, gotStatusIntervals); diff != "" { + t.Errorf("Unexpected completed indexes from status (-want,+got):\n%s", diff) + } + if diff := cmp.Diff(tc.wantIntervals, gotIntervals); diff != "" { t.Errorf("Unexpected completed indexes (-want,+got):\n%s", diff) } - if gotCnt != tc.wantCount { - t.Errorf("Got number of completed indexes %d, want %d", gotCnt, tc.wantCount) + }) + } +} + +func TestIntervalsHaveIndex(t *testing.T) { + cases := map[string]struct { + intervals orderedIntervals + index int + wantHas bool + }{ + "empty": { + index: 4, + }, + "before all": { + index: 1, + intervals: []interval{{2, 4}, {5, 7}}, + }, + "after all": { + index: 9, + intervals: []interval{{2, 4}, {6, 8}}, + }, + "in between": { + index: 5, + intervals: []interval{{2, 4}, {6, 8}}, + }, + "in first": { + index: 2, + intervals: []interval{{2, 4}, {6, 8}}, + wantHas: true, + }, + "in second": { + index: 8, + intervals: []interval{{2, 4}, {6, 8}}, + wantHas: true, + }, + } + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + has := tc.intervals.has(tc.index) + if has != tc.wantHas { + t.Errorf("intervalsHaveIndex(_, _) = %t, want %t", has, tc.wantHas) } }) } @@ -118,10 +292,11 @@ func TestCalculateSucceededIndexes(t *testing.T) { func TestFirstPendingIndexes(t *testing.T) { cases := map[string]struct { - cnt int - completions int - pods []indexPhase - want []int + cnt int + completions int + activePods []indexPhase + succeededIndexes []interval + want []int }{ "cnt greater than completions": { cnt: 5, @@ -133,46 +308,42 @@ func TestFirstPendingIndexes(t *testing.T) { completions: 5, want: []int{0, 1}, }, - "first pods running or succeeded": { - pods: []indexPhase{ + "first pods active": { + activePods: []indexPhase{ {"0", v1.PodRunning}, {"1", v1.PodPending}, - {"2", v1.PodFailed}, }, cnt: 3, completions: 10, want: []int{2, 3, 4}, }, - "last pods running or succeeded": { - pods: []indexPhase{ - {"4", v1.PodFailed}, - {"5", v1.PodSucceeded}, + "last pods active or succeeded": { + activePods: []indexPhase{ {"6", v1.PodPending}, }, - cnt: 6, - completions: 6, - want: []int{0, 1, 2, 3, 4}, + succeededIndexes: []interval{{4, 5}}, + cnt: 6, + completions: 6, + want: []int{0, 1, 2, 3}, }, "mixed": { - pods: []indexPhase{ - {"1", v1.PodFailed}, - {"2", v1.PodSucceeded}, + activePods: []indexPhase{ {"3", v1.PodPending}, - {"5", v1.PodFailed}, {"5", v1.PodRunning}, {"8", v1.PodPending}, {noIndex, v1.PodRunning}, {"-3", v1.PodRunning}, }, - cnt: 5, - completions: 10, - want: []int{0, 1, 4, 6, 7}, + succeededIndexes: []interval{{2, 4}, {9, 9}}, + cnt: 5, + completions: 20, + want: []int{0, 1, 6, 7, 10}, }, } for name, tc := range cases { t.Run(name, func(t *testing.T) { - pods := hollowPodsWithIndexPhase(tc.pods) - got := firstPendingIndexes(pods, tc.cnt, tc.completions) + pods := hollowPodsWithIndexPhase(tc.activePods) + got := firstPendingIndexes(pods, tc.succeededIndexes, tc.cnt, tc.completions) if diff := cmp.Diff(tc.want, got); diff != "" { t.Errorf("Wrong first pending indexes (-want,+got):\n%s", diff) } diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index a69e1410cf3..551c5e58e2c 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -31,9 +31,12 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/json" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" - utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/apiserver/pkg/util/feature" batchinformers "k8s.io/client-go/informers/batch/v1" coreinformers "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" @@ -52,7 +55,14 @@ import ( "k8s.io/utils/integer" ) -const statusUpdateRetries = 3 +const ( + statusUpdateRetries = 3 + + // maxUncountedPods is the maximum size the slices in + // .status.uncountedTerminatedPods should have to keep their representation + // roughly below 20 KB. + maxUncountedPods = 500 +) // controllerKind contains the schema.GroupVersionKind for this controller type. var controllerKind = batch.SchemeGroupVersion.WithKind("Job") @@ -71,9 +81,11 @@ type Controller struct { kubeClient clientset.Interface podControl controller.PodControlInterface - // To allow injection of updateJobStatus for testing. - updateHandler func(job *batch.Job) error - syncHandler func(jobKey string) (bool, error) + // To allow injection of the following for testing. + updateStatusHandler func(job *batch.Job) error + patchJobHandler func(job *batch.Job, patch []byte) error + syncHandler func(jobKey string) (bool, error) + // podStoreSynced returns true if the pod store has been synced at least once. // Added as a member to the struct to allow injection for testing. podStoreSynced cache.InformerSynced @@ -93,6 +105,9 @@ type Controller struct { // Jobs that need to be updated queue workqueue.RateLimitingInterface + // Orphan deleted pods that still have a Job tracking finalizer to be removed + orphanQueue workqueue.RateLimitingInterface + recorder record.EventRecorder } @@ -115,6 +130,7 @@ func NewController(podInformer coreinformers.PodInformer, jobInformer batchinfor }, expectations: controller.NewControllerExpectations(), queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff), "job"), + orphanQueue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff), "job_orphan_pod"), recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}), } @@ -138,7 +154,8 @@ func NewController(podInformer coreinformers.PodInformer, jobInformer batchinfor jm.podStore = podInformer.Lister() jm.podStoreSynced = podInformer.Informer().HasSynced - jm.updateHandler = jm.updateJobStatus + jm.updateStatusHandler = jm.updateJobStatus + jm.patchJobHandler = jm.patchJob jm.syncHandler = jm.syncJob metrics.Register() @@ -150,6 +167,7 @@ func NewController(podInformer coreinformers.PodInformer, jobInformer batchinfor func (jm *Controller) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer jm.queue.ShutDown() + defer jm.orphanQueue.ShutDown() klog.Infof("Starting job controller") defer klog.Infof("Shutting down job controller") @@ -162,6 +180,8 @@ func (jm *Controller) Run(workers int, stopCh <-chan struct{}) { go wait.Until(jm.worker, time.Second, stopCh) } + go wait.Until(jm.orphanWorker, time.Second, stopCh) + <-stopCh } @@ -292,7 +312,7 @@ func (jm *Controller) updatePod(old, cur interface{}) { } // When a pod is deleted, enqueue the job that manages the pod and update its expectations. -// obj could be an *v1.Pod, or a DeletionFinalStateUnknown marker item. +// obj could be an *v1.Pod, or a DeleteFinalStateUnknown marker item. func (jm *Controller) deletePod(obj interface{}) { pod, ok := obj.(*v1.Pod) @@ -320,6 +340,9 @@ func (jm *Controller) deletePod(obj interface{}) { } job := jm.resolveControllerRef(pod.Namespace, controllerRef) if job == nil { + if hasJobTrackingFinalizer(pod) { + jm.enqueueOrphanPod(pod) + } return } jobKey, err := controller.KeyFunc(job) @@ -380,9 +403,19 @@ func (jm *Controller) enqueueController(obj interface{}, immediate bool) { // all controllers there will still be some replica instability. One way to handle this is // by querying the store for all controllers that this rc overlaps, as well as all // controllers that overlap this rc, and sorting them. + klog.Infof("enqueueing job %s", key) jm.queue.AddAfter(key, backoff) } +func (jm *Controller) enqueueOrphanPod(obj *v1.Pod) { + key, err := controller.KeyFunc(obj) + if err != nil { + utilruntime.HandleError(fmt.Errorf("couldn't get key for object %+v: %v", obj, err)) + return + } + jm.orphanQueue.Add(key) +} + // worker runs a worker thread that just dequeues items, processes them, and marks them done. // It enforces that the syncHandler is never invoked concurrently with the same key. func (jm *Controller) worker() { @@ -411,10 +444,61 @@ func (jm *Controller) processNextWorkItem() bool { return true } +func (jm *Controller) orphanWorker() { + for jm.processNextOrphanPod() { + } +} + +func (jm Controller) processNextOrphanPod() bool { + key, quit := jm.orphanQueue.Get() + if quit { + return false + } + defer jm.orphanQueue.Done(key) + err := jm.syncOrphanPod(key.(string)) + if err != nil { + utilruntime.HandleError(fmt.Errorf("Error syncing orphan pod: %v", err)) + jm.orphanQueue.AddRateLimited(key) + } else { + jm.orphanQueue.Forget(key) + } + + return true +} + +// syncOrphanPod removes the tracking finalizer from an orphan pod if found. +func (jm Controller) syncOrphanPod(key string) error { + startTime := time.Now() + defer func() { + klog.V(4).Infof("Finished syncing orphan pod %q (%v)", key, time.Since(startTime)) + }() + + ns, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + + sharedPod, err := jm.podStore.Pods(ns).Get(name) + if err != nil { + if apierrors.IsNotFound(err) { + klog.V(4).Infof("Orphan pod has been deleted: %v", key) + return nil + } + return err + } + if patch := removeTrackingFinalizerPatch(sharedPod); patch != nil { + if err := jm.podControl.PatchPod(ns, name, patch); err != nil && !apierrors.IsNotFound(err) { + return err + } + } + return nil +} + // getPodsForJob returns the set of pods that this Job should manage. -// It also reconciles ControllerRef by adopting/orphaning. +// It also reconciles ControllerRef by adopting/orphaning, adding tracking +// finalizers, if enabled. // Note that the returned Pods are pointers into the cache. -func (jm *Controller) getPodsForJob(j *batch.Job) ([]*v1.Pod, error) { +func (jm *Controller) getPodsForJob(j *batch.Job, withFinalizers bool) ([]*v1.Pod, error) { selector, err := metav1.LabelSelectorAsSelector(j.Spec.Selector) if err != nil { return nil, fmt.Errorf("couldn't convert Job selector: %v", err) @@ -437,8 +521,31 @@ func (jm *Controller) getPodsForJob(j *batch.Job) ([]*v1.Pod, error) { } return fresh, nil }) - cm := controller.NewPodControllerRefManager(jm.podControl, j, selector, controllerKind, canAdoptFunc) - return cm.ClaimPods(pods) + var finalizers []string + if withFinalizers { + finalizers = append(finalizers, batch.JobTrackingFinalizer) + } + cm := controller.NewPodControllerRefManager(jm.podControl, j, selector, controllerKind, canAdoptFunc, finalizers...) + // When adopting Pods, this operation adds an ownerRef and finalizers. + pods, err = cm.ClaimPods(pods) + if err != nil || !withFinalizers { + return pods, err + } + // Set finalizer on adopted pods for the remaining calculations. + for i, p := range pods { + adopted := true + for _, r := range p.OwnerReferences { + if r.UID == j.UID { + adopted = false + break + } + } + if adopted && !hasJobTrackingFinalizer(p) { + pods[i] = p.DeepCopy() + pods[i].Finalizers = append(p.Finalizers, batch.JobTrackingFinalizer) + } + } + return pods, err } // syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning @@ -475,7 +582,7 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) { } // Cannot create Pods if this is an Indexed Job and the feature is disabled. - if !utilfeature.DefaultFeatureGate.Enabled(features.IndexedJob) && isIndexedJob(&job) { + if !feature.DefaultFeatureGate.Enabled(features.IndexedJob) && isIndexedJob(&job) { jm.recorder.Event(&job, v1.EventTypeWarning, "IndexedJobDisabled", "Skipped Indexed Job sync because feature is disabled.") return false, nil } @@ -500,18 +607,32 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) { metrics.JobSyncNum.WithLabelValues(completionMode, result, action).Inc() }() + var uncounted *uncountedTerminatedPods + if trackingUncountedPods(&job) { + klog.V(4).InfoS("Tracking uncounted Pods with pod finalizers", "job", klog.KObj(&job)) + if job.Status.UncountedTerminatedPods == nil { + job.Status.UncountedTerminatedPods = &batch.UncountedTerminatedPods{} + } + uncounted = newUncountedTerminatedPods(*job.Status.UncountedTerminatedPods) + } else if patch := removeTrackingAnnotationPatch(&job); patch != nil { + if err := jm.patchJobHandler(&job, patch); err != nil { + return false, fmt.Errorf("removing tracking finalizer from job %s: %w", key, err) + } + } + // Check the expectations of the job before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the job sync is just deferred till the next relist. jobNeedsSync := jm.expectations.SatisfiedExpectations(key) - pods, err := jm.getPodsForJob(&job) + pods, err := jm.getPodsForJob(&job, uncounted != nil) if err != nil { return false, err } + activePods := controller.FilterActivePods(pods) active := int32(len(activePods)) - succeeded, failed := getStatus(&job, pods) + succeeded, failed := getStatus(&job, pods, uncounted) // Job first start. Set StartTime and start the ActiveDeadlineSeconds timer // only if the job is not in the suspended state. if job.Status.StartTime == nil && !jobSuspended(&job) { @@ -526,53 +647,49 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) { } var manageJobErr error - jobFailed := false - var failureReason string - var failureMessage string + var finishedCondition *batch.JobCondition - jobHaveNewFailure := failed > job.Status.Failed + jobHasNewFailure := failed > job.Status.Failed // new failures happen when status does not reflect the failures and active // is different than parallelism, otherwise the previous controller loop // failed updating status so even if we pick up failure it is not a new one - exceedsBackoffLimit := jobHaveNewFailure && (active != *job.Spec.Parallelism) && + exceedsBackoffLimit := jobHasNewFailure && (active != *job.Spec.Parallelism) && (failed > *job.Spec.BackoffLimit) if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) { // check if the number of pod restart exceeds backoff (for restart OnFailure only) // OR if the number of failed jobs increased since the last syncJob - jobFailed = true - failureReason = "BackoffLimitExceeded" - failureMessage = "Job has reached the specified backoff limit" + finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "BackoffLimitExceeded", "Job has reached the specified backoff limit") } else if pastActiveDeadline(&job) { - jobFailed = true - failureReason = "DeadlineExceeded" - failureMessage = "Job was active longer than specified deadline" + finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline") } - var succeededIndexes string + var prevSucceededIndexes, succeededIndexes orderedIntervals if isIndexedJob(&job) { - succeededIndexes, succeeded = calculateSucceededIndexes(pods, *job.Spec.Completions) + prevSucceededIndexes, succeededIndexes = calculateSucceededIndexes(&job, pods) + succeeded = int32(succeededIndexes.total()) } - jobConditionsChanged := false - manageJobCalled := false - if jobFailed { - // TODO(#28486): Account for pod failures in status once we can track - // completions without lingering pods. - _, manageJobErr = jm.deleteJobPods(&job, "", activePods) - - // update status values accordingly - failed += active - active = 0 - job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, v1.ConditionTrue, failureReason, failureMessage)) - jobConditionsChanged = true - jm.recorder.Event(&job, v1.EventTypeWarning, failureReason, failureMessage) - metrics.JobFinishedNum.WithLabelValues(completionMode, "failed").Inc() + suspendCondChanged := false + // Remove active pods if Job failed. + if finishedCondition != nil { + deleted, err := jm.deleteActivePods(&job, activePods) + if uncounted == nil { + // Legacy behavior: pretend all active pods were successfully removed. + deleted = active + } else if deleted != active { + // Can't declare the Job as finished yet, as there might be remaining + // pod finalizers. + finishedCondition = nil + } + active -= deleted + failed += deleted + manageJobErr = err } else { + manageJobCalled := false if jobNeedsSync && job.DeletionTimestamp == nil { - active, action, manageJobErr = jm.manageJob(&job, activePods, succeeded, pods) + active, action, manageJobErr = jm.manageJob(&job, activePods, succeeded, succeededIndexes) manageJobCalled = true } - completions := succeeded complete := false if job.Spec.Completions == nil { // This type of job is complete when any pod exits with success. @@ -581,29 +698,17 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) { // not expected to fail, but if they do, the failure is ignored. Once any // pod succeeds, the controller waits for remaining pods to finish, and // then the job is complete. - if succeeded > 0 && active == 0 { - complete = true - } + complete = succeeded > 0 && active == 0 } else { // Job specifies a number of completions. This type of job signals // success by having that number of successes. Since we do not // start more pods than there are remaining completions, there should // not be any remaining active pods once this count is reached. - if completions >= *job.Spec.Completions && active == 0 { - complete = true - if completions > *job.Spec.Completions { - jm.recorder.Event(&job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached") - } - } + complete = succeeded >= *job.Spec.Completions && active == 0 } if complete { - job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, v1.ConditionTrue, "", "")) - jobConditionsChanged = true - now := metav1.Now() - job.Status.CompletionTime = &now - jm.recorder.Event(&job, v1.EventTypeNormal, "Completed", "Job completed") - metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded").Inc() - } else if utilfeature.DefaultFeatureGate.Enabled(features.SuspendJob) && manageJobCalled { + finishedCondition = newCondition(batch.JobComplete, v1.ConditionTrue, "", "") + } else if feature.DefaultFeatureGate.Enabled(features.SuspendJob) && manageJobCalled { // Update the conditions / emit events only if manageJob was called in // this syncJob. Otherwise wait for the right syncJob call to make // updates. @@ -612,7 +717,7 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) { var isUpdated bool job.Status.Conditions, isUpdated = ensureJobConditionStatus(job.Status.Conditions, batch.JobSuspended, v1.ConditionTrue, "JobSuspended", "Job suspended") if isUpdated { - jobConditionsChanged = true + suspendCondChanged = true jm.recorder.Event(&job, v1.EventTypeNormal, "Suspended", "Job suspended") } } else { @@ -620,7 +725,7 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) { var isUpdated bool job.Status.Conditions, isUpdated = ensureJobConditionStatus(job.Status.Conditions, batch.JobSuspended, v1.ConditionFalse, "JobResumed", "Job resumed") if isUpdated { - jobConditionsChanged = true + suspendCondChanged = true jm.recorder.Event(&job, v1.EventTypeNormal, "Resumed", "Job resumed") // Resumed jobs will always reset StartTime to current time. This is // done because the ActiveDeadlineSeconds timer shouldn't go off @@ -644,20 +749,44 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) { forget = true } + if uncounted != nil { + needsStatusUpdate := suspendCondChanged || active != job.Status.Active + job.Status.Active = active + err = jm.trackJobStatusAndRemoveFinalizers(&job, pods, prevSucceededIndexes, *uncounted, finishedCondition, needsStatusUpdate) + if err != nil { + return false, err + } + jobFinished := IsJobFinished(&job) + if jobHasNewFailure && !jobFinished { + // returning an error will re-enqueue Job after the backoff period + return forget, fmt.Errorf("failed pod(s) detected for job key %q", key) + } + forget = true + return forget, manageJobErr + } + // Legacy path: tracking without finalizers. + + // Ensure that there are no leftover tracking finalizers. + if err := jm.removeTrackingFinalizersFromAllPods(pods); err != nil { + return false, fmt.Errorf("removing disabled finalizers from job pods %s: %w", key, err) + } + // no need to update the job if the status hasn't changed since last time - if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || jobConditionsChanged { + if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || suspendCondChanged || finishedCondition != nil { job.Status.Active = active job.Status.Succeeded = succeeded job.Status.Failed = failed if isIndexedJob(&job) { - job.Status.CompletedIndexes = succeededIndexes + job.Status.CompletedIndexes = succeededIndexes.String() } + job.Status.UncountedTerminatedPods = nil + jm.enactJobFinished(&job, finishedCondition) - if err := jm.updateHandler(&job); err != nil { + if err := jm.updateStatusHandler(&job); err != nil { return forget, err } - if jobHaveNewFailure && !IsJobFinished(&job) { + if jobHasNewFailure && !IsJobFinished(&job) { // returning an error will re-enqueue Job after the backoff period return forget, fmt.Errorf("failed pod(s) detected for job key %q", key) } @@ -668,9 +797,12 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) { return forget, manageJobErr } -// deleteJobPods deletes the pods, returns the number of successful removals -// and any error. -func (jm *Controller) deleteJobPods(job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) { +// deleteActivePods issues deletion for active Pods, preserving finalizers. +// This is done through DELETE calls that set deletion timestamps. +// The method trackJobStatusAndRemoveFinalizers removes the finalizers, after +// which the objects can actually be deleted. +// Returns number of successfully deletions issued. +func (jm *Controller) deleteActivePods(job *batch.Job, pods []*v1.Pod) (int32, error) { errCh := make(chan error, len(pods)) successfulDeletes := int32(len(pods)) wg := sync.WaitGroup{} @@ -678,17 +810,10 @@ func (jm *Controller) deleteJobPods(job *batch.Job, jobKey string, pods []*v1.Po for i := range pods { go func(pod *v1.Pod) { defer wg.Done() - if err := jm.podControl.DeletePod(job.Namespace, pod.Name, job); err != nil { - // Decrement the expected number of deletes because the informer won't observe this deletion - if jobKey != "" { - jm.expectations.DeletionObserved(jobKey) - } - if !apierrors.IsNotFound(err) { - klog.V(2).Infof("Failed to delete Pod", "job", klog.KObj(job), "pod", klog.KObj(pod), "err", err) - atomic.AddInt32(&successfulDeletes, -1) - errCh <- err - utilruntime.HandleError(err) - } + if err := jm.podControl.DeletePod(job.Namespace, pod.Name, job); err != nil && !apierrors.IsNotFound(err) { + atomic.AddInt32(&successfulDeletes, -1) + errCh <- err + utilruntime.HandleError(err) } }(pods[i]) } @@ -696,6 +821,267 @@ func (jm *Controller) deleteJobPods(job *batch.Job, jobKey string, pods []*v1.Po return successfulDeletes, errorFromChannel(errCh) } +// deleteJobPods deletes the pods, returns the number of successful removals +// and any error. +func (jm *Controller) deleteJobPods(job *batch.Job, jobKey string, pods []*v1.Pod) (int32, error) { + errCh := make(chan error, len(pods)) + successfulDeletes := int32(len(pods)) + + failDelete := func(pod *v1.Pod, err error) { + // Decrement the expected number of deletes because the informer won't observe this deletion + jm.expectations.DeletionObserved(jobKey) + if !apierrors.IsNotFound(err) { + klog.V(2).Infof("Failed to delete Pod", "job", klog.KObj(job), "pod", klog.KObj(pod), "err", err) + atomic.AddInt32(&successfulDeletes, -1) + errCh <- err + utilruntime.HandleError(err) + } + } + + wg := sync.WaitGroup{} + wg.Add(len(pods)) + for i := range pods { + go func(pod *v1.Pod) { + defer wg.Done() + if patch := removeTrackingFinalizerPatch(pod); patch != nil { + if err := jm.podControl.PatchPod(pod.Namespace, pod.Name, patch); err != nil { + failDelete(pod, fmt.Errorf("removing completion finalizer: %w", err)) + return + } + } + if err := jm.podControl.DeletePod(job.Namespace, pod.Name, job); err != nil { + failDelete(pod, err) + } + }(pods[i]) + } + wg.Wait() + return successfulDeletes, errorFromChannel(errCh) +} + +// removeTrackingFinalizersFromAllPods removes finalizers from any Job Pod. This is called +// when Job tracking with finalizers is disabled. +func (jm *Controller) removeTrackingFinalizersFromAllPods(pods []*v1.Pod) error { + var podsWithFinalizer []*v1.Pod + for _, pod := range pods { + if hasJobTrackingFinalizer(pod) { + podsWithFinalizer = append(podsWithFinalizer, pod) + } + } + if len(podsWithFinalizer) == 0 { + return nil + } + _, err := jm.removeTrackingFinalizerFromPods(podsWithFinalizer) + return err +} + +// trackJobStatusAndRemoveFinalizers does: +// 1. Add finished Pods to .status.uncountedTerminatedPods +// 2. Remove the finalizers from the Pods if they completed or were removed +// or the job was removed. +// 3. Increment job counters for pods that no longer have a finalizer. +// 4. Add Complete condition if satisfied with current counters. +// It does this in a controlled way such that the size of .status doesn't grow +// too much. +func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*v1.Pod, succeededIndexes orderedIntervals, uncounted uncountedTerminatedPods, finishedCond *batch.JobCondition, needsFlush bool) error { + isIndexed := isIndexedJob(job) + var podsToRemoveFinalizer []*v1.Pod + uncountedStatus := job.Status.UncountedTerminatedPods + var newSucceededIndexes []int + if isIndexed { + // Sort to introduce completed Indexes First. + sort.Sort(byCompletionIndex(pods)) + } + for _, pod := range pods { + if !hasJobTrackingFinalizer(pod) { + continue + } + podFinished := pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed + // Terminating pods are counted as failed. This guarantees that orphan Pods + // count as failures. + // Active pods are terminated when the job has completed, thus they count as + // failures as well. + podTerminating := pod.DeletionTimestamp != nil || finishedCond != nil + if podFinished || podTerminating || job.DeletionTimestamp != nil { + podsToRemoveFinalizer = append(podsToRemoveFinalizer, pod) + } + if pod.Status.Phase == v1.PodSucceeded { + if isIndexed { + // The completion index is enough to avoid recounting succeeded pods. + // No need to track UIDs. + ix := getCompletionIndex(pod.Annotations) + if ix != unknownCompletionIndex && ix < int(*job.Spec.Completions) && !succeededIndexes.has(ix) { + newSucceededIndexes = append(newSucceededIndexes, ix) + needsFlush = true + } + } else if !uncounted.succeeded.Has(string(pod.UID)) { + needsFlush = true + uncountedStatus.Succeeded = append(uncountedStatus.Succeeded, pod.UID) + } + } else if pod.Status.Phase == v1.PodFailed || podTerminating { + ix := getCompletionIndex(pod.Annotations) + if !uncounted.failed.Has(string(pod.UID)) && (!isIndexed || (ix != unknownCompletionIndex && ix < int(*job.Spec.Completions))) { + needsFlush = true + uncountedStatus.Failed = append(uncountedStatus.Failed, pod.UID) + } + } + if len(uncountedStatus.Succeeded)+len(uncountedStatus.Failed) >= maxUncountedPods { + if len(newSucceededIndexes) > 0 { + succeededIndexes = succeededIndexes.withOrderedIndexes(newSucceededIndexes) + job.Status.Succeeded = int32(succeededIndexes.total()) + job.Status.CompletedIndexes = succeededIndexes.String() + } + var err error + if needsFlush, err = jm.flushUncountedAndRemoveFinalizers(job, podsToRemoveFinalizer, needsFlush); err != nil { + return err + } + podsToRemoveFinalizer = nil + newSucceededIndexes = nil + } + } + if len(newSucceededIndexes) > 0 { + succeededIndexes = succeededIndexes.withOrderedIndexes(newSucceededIndexes) + job.Status.Succeeded = int32(succeededIndexes.total()) + job.Status.CompletedIndexes = succeededIndexes.String() + } + var err error + if needsFlush, err = jm.flushUncountedAndRemoveFinalizers(job, podsToRemoveFinalizer, needsFlush); err != nil { + return err + } + if jm.enactJobFinished(job, finishedCond) { + needsFlush = true + } + if needsFlush { + if err := jm.updateStatusHandler(job); err != nil { + return fmt.Errorf("removing uncounted pods from status: %w", err) + } + } + return nil +} + +// flushUncountedAndRemoveFinalizers does: +// 1. flush the Job status that might include new uncounted Pod UIDs. +// 2. perform the removal of finalizers from Pods which are in the uncounted +// lists. +// 3. update the counters based on the Pods for which it successfully removed +// the finalizers. +// 4. (if not all removals succeeded) flush Job status again. +// Returns whether there are pending changes in the Job status that need to be +// flushed in subsequent calls. +func (jm *Controller) flushUncountedAndRemoveFinalizers(job *batch.Job, podsToRemoveFinalizer []*v1.Pod, needsFlush bool) (bool, error) { + if needsFlush { + if err := jm.updateStatusHandler(job); err != nil { + return needsFlush, fmt.Errorf("adding uncounted pods to status: %w", err) + } + needsFlush = false + } + var failedToRm []*v1.Pod + var rmErr error + if len(podsToRemoveFinalizer) > 0 { + failedToRm, rmErr = jm.removeTrackingFinalizerFromPods(podsToRemoveFinalizer) + } + uncountedStatus := job.Status.UncountedTerminatedPods + if rmErr == nil { + needsFlush = len(uncountedStatus.Succeeded) > 0 || len(uncountedStatus.Failed) > 0 + job.Status.Succeeded += int32(len(uncountedStatus.Succeeded)) + uncountedStatus.Succeeded = nil + job.Status.Failed += int32(len(uncountedStatus.Failed)) + uncountedStatus.Failed = nil + return needsFlush, nil + } + uidsWithFinalizer := make(sets.String, len(failedToRm)) + for _, p := range failedToRm { + uidsWithFinalizer.Insert(string(p.UID)) + } + newUncounted := uncountedWithFailedFinalizerRemovals(uncountedStatus.Succeeded, uidsWithFinalizer) + if len(newUncounted) != len(uncountedStatus.Succeeded) { + needsFlush = true + job.Status.Succeeded += int32(len(uncountedStatus.Succeeded) - len(newUncounted)) + uncountedStatus.Succeeded = newUncounted + } + newUncounted = uncountedWithFailedFinalizerRemovals(uncountedStatus.Failed, uidsWithFinalizer) + if len(newUncounted) != len(uncountedStatus.Failed) { + needsFlush = true + job.Status.Failed += int32(len(uncountedStatus.Failed) - len(newUncounted)) + uncountedStatus.Failed = newUncounted + } + if needsFlush { + if err := jm.updateStatusHandler(job); err != nil { + return needsFlush, fmt.Errorf("removing uncounted pods from status: %w", err) + } + } + return needsFlush, rmErr +} + +// removeTrackingFinalizerFromPods removes tracking finalizers from Pods and +// returns the pod for which the operation failed (if the pod was deleted when +// this function was called, it's considered as the finalizer was removed +// successfully). +func (jm *Controller) removeTrackingFinalizerFromPods(pods []*v1.Pod) ([]*v1.Pod, error) { + errCh := make(chan error, len(pods)) + var failed []*v1.Pod + var lock sync.Mutex + wg := sync.WaitGroup{} + wg.Add(len(pods)) + for i := range pods { + go func(i int) { + pod := pods[i] + defer wg.Done() + if patch := removeTrackingFinalizerPatch(pod); patch != nil { + if err := jm.podControl.PatchPod(pod.Namespace, pod.Name, patch); err != nil && !apierrors.IsNotFound(err) { + errCh <- err + utilruntime.HandleError(err) + lock.Lock() + failed = append(failed, pod) + lock.Unlock() + return + } + } + }(i) + } + wg.Wait() + return failed, errorFromChannel(errCh) +} + +// enactJobFinished adds the Complete or Failed condition and records events. +// Returns whether the Job was considered finished. +func (jm *Controller) enactJobFinished(job *batch.Job, finishedCond *batch.JobCondition) bool { + if finishedCond == nil { + return false + } + if uncounted := job.Status.UncountedTerminatedPods; uncounted != nil { + if len(uncounted.Succeeded) > 0 || len(uncounted.Failed) > 0 { + return false + } + } + completionMode := string(batch.NonIndexedCompletion) + if isIndexedJob(job) { + completionMode = string(*job.Spec.CompletionMode) + } + job.Status.Conditions = append(job.Status.Conditions, *finishedCond) + if finishedCond.Type == batch.JobComplete { + if job.Spec.Completions != nil && job.Status.Succeeded > *job.Spec.Completions { + jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached") + } + job.Status.CompletionTime = &finishedCond.LastTransitionTime + jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed") + metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded").Inc() + } else { + jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message) + metrics.JobFinishedNum.WithLabelValues(completionMode, "failed").Inc() + } + return true +} + +func uncountedWithFailedFinalizerRemovals(uncounted []types.UID, uidsWithFinalizer sets.String) []types.UID { + var newUncounted []types.UID + for _, uid := range uncounted { + if uidsWithFinalizer.Has(string(uid)) { + newUncounted = append(newUncounted, uid) + } + } + return newUncounted +} + // pastBackoffLimitOnFailure checks if container restartCounts sum exceeds BackoffLimit // this method applies only to pods with restartPolicy == OnFailure func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool { @@ -736,8 +1122,8 @@ func pastActiveDeadline(job *batch.Job) bool { return duration >= allowedDuration } -func newCondition(conditionType batch.JobConditionType, status v1.ConditionStatus, reason, message string) batch.JobCondition { - return batch.JobCondition{ +func newCondition(conditionType batch.JobConditionType, status v1.ConditionStatus, reason, message string) *batch.JobCondition { + return &batch.JobCondition{ Type: conditionType, Status: status, LastProbeTime: metav1.Now(), @@ -747,23 +1133,33 @@ func newCondition(conditionType batch.JobConditionType, status v1.ConditionStatu } } -// getStatus returns no of succeeded and failed pods running a job -func getStatus(job *batch.Job, pods []*v1.Pod) (succeeded, failed int32) { - succeeded = int32(countPodsByPhase(job, pods, v1.PodSucceeded)) - failed = int32(countPodsByPhase(job, pods, v1.PodFailed)) - return +// getStatus returns number of succeeded and failed pods running a job +func getStatus(job *batch.Job, pods []*v1.Pod, uncounted *uncountedTerminatedPods) (succeeded, failed int32) { + if uncounted != nil { + succeeded = job.Status.Succeeded + failed = job.Status.Failed + } + succeeded += int32(countValidPodsWithFilter(job, pods, uncounted.Succeeded(), func(p *v1.Pod) bool { + return p.Status.Phase == v1.PodSucceeded + })) + failed += int32(countValidPodsWithFilter(job, pods, uncounted.Failed(), func(p *v1.Pod) bool { + // Counting deleted Pods as failures to account for orphan Pods that never + // have a chance to reach the Failed phase. + return p.Status.Phase == v1.PodFailed || (p.DeletionTimestamp != nil && p.Status.Phase != v1.PodSucceeded) + })) + return succeeded, failed } // jobSuspended returns whether a Job is suspended while taking the feature // gate into account. func jobSuspended(job *batch.Job) bool { - return utilfeature.DefaultFeatureGate.Enabled(features.SuspendJob) && job.Spec.Suspend != nil && *job.Spec.Suspend + return feature.DefaultFeatureGate.Enabled(features.SuspendJob) && job.Spec.Suspend != nil && *job.Spec.Suspend } // manageJob is the core method responsible for managing the number of running // pods according to what is specified in the job.Spec. // Does NOT modify . -func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded int32, allPods []*v1.Pod) (int32, string, error) { +func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded int32, succeededIndexes []interval) (int32, string, error) { active := int32(len(activePods)) parallelism := *job.Spec.Parallelism jobKey, err := controller.KeyFunc(job) @@ -798,6 +1194,9 @@ func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded if wantActive > parallelism { wantActive = parallelism } + if wantActive < 0 { + wantActive = 0 + } } rmAtLeast := active - wantActive @@ -834,7 +1233,7 @@ func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded var indexesToAdd []int if isIndexedJob(job) { - indexesToAdd = firstPendingIndexes(allPods, int(diff), int(*job.Spec.Completions)) + indexesToAdd = firstPendingIndexes(activePods, succeededIndexes, int(diff), int(*job.Spec.Completions)) diff = int32(len(indexesToAdd)) } active += diff @@ -843,6 +1242,9 @@ func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded if isIndexedJob(job) { addCompletionIndexEnvVariables(podTemplate) } + if trackingUncountedPods(job) { + podTemplate.Finalizers = appendJobCompletionFinalizerIfNotFound(podTemplate.Finalizers) + } // Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize // and double with each successful iteration in a kind of "slow start". @@ -857,7 +1259,7 @@ func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded wait.Add(int(batchSize)) for i := int32(0); i < batchSize; i++ { completionIndex := unknownCompletionIndex - if indexesToAdd != nil { + if len(indexesToAdd) > 0 { completionIndex = indexesToAdd[0] indexesToAdd = indexesToAdd[1:] } @@ -952,6 +1354,12 @@ func (jm *Controller) updateJobStatus(job *batch.Job) error { return err } +func (jm *Controller) patchJob(job *batch.Job, data []byte) error { + _, err := jm.kubeClient.BatchV1().Jobs(job.Namespace).Patch( + context.TODO(), job.Name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) + return err +} + func getBackoff(queue workqueue.RateLimitingInterface, key interface{}) time.Duration { exp := queue.NumRequeues(key) @@ -972,18 +1380,121 @@ func getBackoff(queue workqueue.RateLimitingInterface, key interface{}) time.Dur return calculated } -// countPodsByPhase returns pods based on their phase. -func countPodsByPhase(job *batch.Job, pods []*v1.Pod, phase v1.PodPhase) int { - result := 0 +// countValidPodsWithFilter returns number of valid pods that pass the filter. +// Pods are valid if they have a finalizer and, for Indexed Jobs, a valid +// completion index. +func countValidPodsWithFilter(job *batch.Job, pods []*v1.Pod, uncounted sets.String, filter func(*v1.Pod) bool) int { + result := len(uncounted) for _, p := range pods { - idx := getCompletionIndex(p.Annotations) - if phase == p.Status.Phase && (!isIndexedJob(job) || (idx != unknownCompletionIndex && idx < int(*job.Spec.Completions))) { + // Pods that don't have a completion finalizer are in the uncounted set or + // have already been accounted for in the Job status. + if uncounted != nil && (!hasJobTrackingFinalizer(p) || uncounted.Has(string(p.UID))) { + continue + } + if isIndexedJob(job) { + idx := getCompletionIndex(p.Annotations) + if idx == unknownCompletionIndex || idx >= int(*job.Spec.Completions) { + continue + } + } + if filter(p) { result++ } } return result } +func trackingUncountedPods(job *batch.Job) bool { + return feature.DefaultFeatureGate.Enabled(features.JobTrackingWithFinalizers) && hasJobTrackingAnnotation(job) +} + +func hasJobTrackingAnnotation(job *batch.Job) bool { + if job.Annotations == nil { + return false + } + _, ok := job.Annotations[batch.JobTrackingFinalizer] + return ok +} + +func appendJobCompletionFinalizerIfNotFound(finalizers []string) []string { + for _, fin := range finalizers { + if fin == batch.JobTrackingFinalizer { + return finalizers + } + } + return append(finalizers, batch.JobTrackingFinalizer) +} + +func removeTrackingFinalizerPatch(pod *v1.Pod) []byte { + if !hasJobTrackingFinalizer(pod) { + return nil + } + patch := map[string]interface{}{ + "metadata": map[string]interface{}{ + "$deleteFromPrimitiveList/finalizers": []string{batch.JobTrackingFinalizer}, + }, + } + patchBytes, _ := json.Marshal(patch) + return patchBytes +} + +func removeTrackingAnnotationPatch(job *batch.Job) []byte { + if !hasJobTrackingAnnotation(job) { + return nil + } + patch := map[string]interface{}{ + "metadata": map[string]interface{}{ + "annotations": map[string]interface{}{ + batch.JobTrackingFinalizer: nil, + }, + }, + } + patchBytes, _ := json.Marshal(patch) + return patchBytes +} + +func hasJobTrackingFinalizer(pod *v1.Pod) bool { + for _, fin := range pod.Finalizers { + if fin == batch.JobTrackingFinalizer { + return true + } + } + return false +} + +type uncountedTerminatedPods struct { + succeeded sets.String + failed sets.String +} + +func newUncountedTerminatedPods(in batch.UncountedTerminatedPods) *uncountedTerminatedPods { + obj := uncountedTerminatedPods{ + succeeded: make(sets.String, len(in.Succeeded)), + failed: make(sets.String, len(in.Failed)), + } + for _, v := range in.Succeeded { + obj.succeeded.Insert(string(v)) + } + for _, v := range in.Failed { + obj.failed.Insert(string(v)) + } + return &obj +} + +func (u *uncountedTerminatedPods) Succeeded() sets.String { + if u == nil { + return nil + } + return u.succeeded +} + +func (u *uncountedTerminatedPods) Failed() sets.String { + if u == nil { + return nil + } + return u.failed +} + func errorFromChannel(errCh <-chan error) error { select { case err := <-errCh: @@ -1014,7 +1525,7 @@ func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditio } // A condition with that type doesn't exist in the list. if status != v1.ConditionFalse { - return append(list, newCondition(cType, status, reason, message)), true + return append(list, *newCondition(cType, status, reason, message)), true } return list, false } diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go index e1962a47522..e19c855ada3 100644 --- a/pkg/controller/job/job_controller_test.go +++ b/pkg/controller/job/job_controller_test.go @@ -17,6 +17,8 @@ limitations under the License. package job import ( + "context" + "errors" "fmt" "sort" "strconv" @@ -29,6 +31,7 @@ import ( apiequality "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/uuid" @@ -123,6 +126,9 @@ func newPodList(count int32, status v1.PodPhase, job *batch.Job) []v1.Pod { for i := int32(0); i < count; i++ { newPod := newPod(fmt.Sprintf("pod-%v", rand.String(10)), job) newPod.Status = v1.PodStatus{Phase: status} + if trackingUncountedPods(job) { + newPod.Finalizers = append(newPod.Finalizers, batch.JobTrackingFinalizer) + } pods = append(pods, *newPod) } return pods @@ -153,6 +159,9 @@ func setPodsStatusesWithIndexes(podIndexer cache.Indexer, job *batch.Job, status } p.Spec.Hostname = fmt.Sprintf("%s-%s", job.Name, s.Index) } + if trackingUncountedPods(job) { + p.Finalizers = append(p.Finalizers, batch.JobTrackingFinalizer) + } podIndexer.Add(p) } } @@ -195,6 +204,9 @@ func TestControllerSyncJob(t *testing.T) { expectedConditionReason string expectedCreatedIndexes sets.Int + // only applicable to tracking with finalizers + expectedPodPatches int + // features indexedJobEnabled bool suspendJobEnabled bool @@ -240,15 +252,16 @@ func TestControllerSyncJob(t *testing.T) { expectedActive: 2, }, "too few active pods": { - parallelism: 2, - completions: 5, - backoffLimit: 6, - jobKeyForget: true, - activePods: 1, - succeededPods: 1, - expectedCreations: 1, - expectedActive: 2, - expectedSucceeded: 1, + parallelism: 2, + completions: 5, + backoffLimit: 6, + jobKeyForget: true, + activePods: 1, + succeededPods: 1, + expectedCreations: 1, + expectedActive: 2, + expectedSucceeded: 1, + expectedPodPatches: 1, }, "too few active pods with a dynamic job": { parallelism: 2, @@ -272,13 +285,14 @@ func TestControllerSyncJob(t *testing.T) { expectedSucceeded: 1, }, "too many active pods": { - parallelism: 2, - completions: 5, - backoffLimit: 6, - jobKeyForget: true, - activePods: 3, - expectedDeletions: 1, - expectedActive: 2, + parallelism: 2, + completions: 5, + backoffLimit: 6, + jobKeyForget: true, + activePods: 3, + expectedDeletions: 1, + expectedActive: 2, + expectedPodPatches: 1, }, "too many active pods, with controller error": { parallelism: 2, @@ -305,14 +319,15 @@ func TestControllerSyncJob(t *testing.T) { expectedFailed: 1, }, "new failed pod": { - parallelism: 2, - completions: 5, - backoffLimit: 6, - activePods: 1, - failedPods: 1, - expectedCreations: 1, - expectedActive: 2, - expectedFailed: 1, + parallelism: 2, + completions: 5, + backoffLimit: 6, + activePods: 1, + failedPods: 1, + expectedCreations: 1, + expectedActive: 2, + expectedFailed: 1, + expectedPodPatches: 1, }, "only new failed pod with controller error": { parallelism: 2, @@ -334,16 +349,18 @@ func TestControllerSyncJob(t *testing.T) { expectedSucceeded: 5, expectedCondition: &jobConditionComplete, expectedConditionStatus: v1.ConditionTrue, + expectedPodPatches: 5, }, "WQ job finishing": { - parallelism: 2, - completions: -1, - backoffLimit: 6, - jobKeyForget: true, - activePods: 1, - succeededPods: 1, - expectedActive: 1, - expectedSucceeded: 1, + parallelism: 2, + completions: -1, + backoffLimit: 6, + jobKeyForget: true, + activePods: 1, + succeededPods: 1, + expectedActive: 1, + expectedSucceeded: 1, + expectedPodPatches: 1, }, "WQ job all finished": { parallelism: 2, @@ -354,6 +371,7 @@ func TestControllerSyncJob(t *testing.T) { expectedSucceeded: 2, expectedCondition: &jobConditionComplete, expectedConditionStatus: v1.ConditionTrue, + expectedPodPatches: 2, }, "WQ job all finished despite one failure": { parallelism: 2, @@ -366,48 +384,53 @@ func TestControllerSyncJob(t *testing.T) { expectedFailed: 1, expectedCondition: &jobConditionComplete, expectedConditionStatus: v1.ConditionTrue, + expectedPodPatches: 2, }, "more active pods than parallelism": { - parallelism: 2, - completions: 5, - backoffLimit: 6, - jobKeyForget: true, - activePods: 10, - expectedDeletions: 8, - expectedActive: 2, + parallelism: 2, + completions: 5, + backoffLimit: 6, + jobKeyForget: true, + activePods: 10, + expectedDeletions: 8, + expectedActive: 2, + expectedPodPatches: 8, }, "more active pods than remaining completions": { - parallelism: 3, - completions: 4, - backoffLimit: 6, - jobKeyForget: true, - activePods: 3, - succeededPods: 2, - expectedDeletions: 1, - expectedActive: 2, - expectedSucceeded: 2, + parallelism: 3, + completions: 4, + backoffLimit: 6, + jobKeyForget: true, + activePods: 3, + succeededPods: 2, + expectedDeletions: 1, + expectedActive: 2, + expectedSucceeded: 2, + expectedPodPatches: 3, }, "status change": { - parallelism: 2, - completions: 5, - backoffLimit: 6, - jobKeyForget: true, - activePods: 2, - succeededPods: 2, - expectedActive: 2, - expectedSucceeded: 2, + parallelism: 2, + completions: 5, + backoffLimit: 6, + jobKeyForget: true, + activePods: 2, + succeededPods: 2, + expectedActive: 2, + expectedSucceeded: 2, + expectedPodPatches: 2, }, "deleting job": { - parallelism: 2, - completions: 5, - backoffLimit: 6, - deleting: true, - jobKeyForget: true, - pendingPods: 1, - activePods: 1, - succeededPods: 1, - expectedActive: 2, - expectedSucceeded: 1, + parallelism: 2, + completions: 5, + backoffLimit: 6, + deleting: true, + jobKeyForget: true, + pendingPods: 1, + activePods: 1, + succeededPods: 1, + expectedActive: 2, + expectedSucceeded: 1, + expectedPodPatches: 3, }, "limited pods": { parallelism: 100, @@ -428,6 +451,7 @@ func TestControllerSyncJob(t *testing.T) { expectedCondition: &jobConditionFailed, expectedConditionStatus: v1.ConditionTrue, expectedConditionReason: "BackoffLimitExceeded", + expectedPodPatches: 1, }, "indexed job start": { parallelism: 2, @@ -457,6 +481,7 @@ func TestControllerSyncJob(t *testing.T) { expectedCompletedIdxs: "0-2", expectedCondition: &jobConditionComplete, expectedConditionStatus: v1.ConditionTrue, + expectedPodPatches: 4, indexedJobEnabled: true, }, "indexed job repeated completed index": { @@ -475,6 +500,7 @@ func TestControllerSyncJob(t *testing.T) { expectedSucceeded: 2, expectedCompletedIdxs: "0,1", expectedCreatedIndexes: sets.NewInt(2), + expectedPodPatches: 3, indexedJobEnabled: true, }, "indexed job some running and completed pods": { @@ -498,6 +524,7 @@ func TestControllerSyncJob(t *testing.T) { expectedSucceeded: 6, expectedCompletedIdxs: "2,4,5,7-9", expectedCreatedIndexes: sets.NewInt(1, 6, 10, 11, 12, 13), + expectedPodPatches: 6, indexedJobEnabled: true, }, "indexed job some failed pods": { @@ -514,6 +541,7 @@ func TestControllerSyncJob(t *testing.T) { expectedActive: 3, expectedFailed: 2, expectedCreatedIndexes: sets.NewInt(0, 2), + expectedPodPatches: 2, indexedJobEnabled: true, }, "indexed job some pods without index": { @@ -539,6 +567,7 @@ func TestControllerSyncJob(t *testing.T) { expectedSucceeded: 1, expectedFailed: 0, expectedCompletedIdxs: "0", + expectedPodPatches: 8, indexedJobEnabled: true, }, "indexed job repeated indexes": { @@ -561,6 +590,7 @@ func TestControllerSyncJob(t *testing.T) { expectedActive: 2, expectedSucceeded: 1, expectedCompletedIdxs: "0", + expectedPodPatches: 5, indexedJobEnabled: true, }, "indexed job with indexes outside of range": { @@ -582,6 +612,7 @@ func TestControllerSyncJob(t *testing.T) { expectedCompletedIdxs: "0", expectedActive: 0, expectedFailed: 0, + expectedPodPatches: 5, indexedJobEnabled: true, }, "indexed job feature disabled": { @@ -612,6 +643,7 @@ func TestControllerSyncJob(t *testing.T) { expectedCondition: &jobConditionSuspended, expectedConditionStatus: v1.ConditionTrue, expectedConditionReason: "JobSuspended", + expectedPodPatches: 2, }, "suspending a job with unsatisfied expectations": { // Unlike the previous test, we expect the controller to NOT suspend the @@ -650,158 +682,177 @@ func TestControllerSyncJob(t *testing.T) { // cases above), but since this job is being deleted, we don't expect // anything changed here from before the job was suspended. The // JobSuspended condition is also missing. - suspendJobEnabled: true, - suspend: true, - deleting: true, - parallelism: 2, - activePods: 2, // parallelism == active, expectations satisfied - completions: 4, - backoffLimit: 6, - jobKeyForget: true, - expectedCreations: 0, - expectedDeletions: 0, - expectedActive: 2, + suspendJobEnabled: true, + suspend: true, + deleting: true, + parallelism: 2, + activePods: 2, // parallelism == active, expectations satisfied + completions: 4, + backoffLimit: 6, + jobKeyForget: true, + expectedCreations: 0, + expectedDeletions: 0, + expectedActive: 2, + expectedPodPatches: 2, }, } for name, tc := range testCases { - t.Run(name, func(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, tc.indexedJobEnabled)() - defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SuspendJob, tc.suspendJobEnabled)() + for _, wFinalizers := range []bool{false, true} { + t.Run(fmt.Sprintf("%s, finalizers=%t", name, wFinalizers), func(t *testing.T) { + if wFinalizers && tc.podControllerError != nil { + t.Skip("Can't track status if finalizers can't be removed") + } + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, tc.indexedJobEnabled)() + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SuspendJob, tc.suspendJobEnabled)() + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, wFinalizers)() - // job manager setup - clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}}) - manager, sharedInformerFactory := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc) - fakePodControl := controller.FakePodControl{Err: tc.podControllerError, CreateLimit: tc.podLimit} - manager.podControl = &fakePodControl - manager.podStoreSynced = alwaysReady - manager.jobStoreSynced = alwaysReady + // job manager setup + clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}}) + manager, sharedInformerFactory := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc) + fakePodControl := controller.FakePodControl{Err: tc.podControllerError, CreateLimit: tc.podLimit} + manager.podControl = &fakePodControl + manager.podStoreSynced = alwaysReady + manager.jobStoreSynced = alwaysReady - // job & pods setup - job := newJob(tc.parallelism, tc.completions, tc.backoffLimit, tc.completionMode) - job.Spec.Suspend = pointer.BoolPtr(tc.suspend) - key, err := controller.KeyFunc(job) - if err != nil { - t.Errorf("Unexpected error getting job key: %v", err) - } - if tc.fakeExpectationAtCreation < 0 { - manager.expectations.ExpectDeletions(key, int(-tc.fakeExpectationAtCreation)) - } else if tc.fakeExpectationAtCreation > 0 { - manager.expectations.ExpectCreations(key, int(tc.fakeExpectationAtCreation)) - } - if tc.wasSuspended { - job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobSuspended, v1.ConditionTrue, "JobSuspended", "Job suspended")) - } - if tc.deleting { - now := metav1.Now() - job.DeletionTimestamp = &now - } - sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job) - podIndexer := sharedInformerFactory.Core().V1().Pods().Informer().GetIndexer() - setPodsStatuses(podIndexer, job, tc.pendingPods, tc.activePods, tc.succeededPods, tc.failedPods) - setPodsStatusesWithIndexes(podIndexer, job, tc.podsWithIndexes) + // job & pods setup + job := newJob(tc.parallelism, tc.completions, tc.backoffLimit, tc.completionMode) + job.Spec.Suspend = pointer.BoolPtr(tc.suspend) + key, err := controller.KeyFunc(job) + if err != nil { + t.Errorf("Unexpected error getting job key: %v", err) + } + if tc.fakeExpectationAtCreation < 0 { + manager.expectations.ExpectDeletions(key, int(-tc.fakeExpectationAtCreation)) + } else if tc.fakeExpectationAtCreation > 0 { + manager.expectations.ExpectCreations(key, int(tc.fakeExpectationAtCreation)) + } + if tc.wasSuspended { + job.Status.Conditions = append(job.Status.Conditions, *newCondition(batch.JobSuspended, v1.ConditionTrue, "JobSuspended", "Job suspended")) + } + if wFinalizers { + job.Annotations = map[string]string{ + batch.JobTrackingFinalizer: "", + } + } + if tc.deleting { + now := metav1.Now() + job.DeletionTimestamp = &now + } + sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job) + podIndexer := sharedInformerFactory.Core().V1().Pods().Informer().GetIndexer() + setPodsStatuses(podIndexer, job, tc.pendingPods, tc.activePods, tc.succeededPods, tc.failedPods) + setPodsStatusesWithIndexes(podIndexer, job, tc.podsWithIndexes) - actual := job - manager.updateHandler = func(job *batch.Job) error { - actual = job - return nil - } + actual := job + manager.updateStatusHandler = func(job *batch.Job) error { + actual = job + return nil + } - // run - forget, err := manager.syncJob(testutil.GetKey(job, t)) + // run + forget, err := manager.syncJob(testutil.GetKey(job, t)) - // We need requeue syncJob task if podController error - if tc.podControllerError != nil { - if err == nil { - t.Error("Syncing jobs expected to return error on podControl exception") + // We need requeue syncJob task if podController error + if tc.podControllerError != nil { + if err == nil { + t.Error("Syncing jobs expected to return error on podControl exception") + } + } else if tc.expectedCondition == nil && (hasValidFailingPods(tc.podsWithIndexes, int(tc.completions)) || (tc.completionMode != batch.IndexedCompletion && tc.failedPods > 0)) { + if err == nil { + t.Error("Syncing jobs expected to return error when there are new failed pods and Job didn't finish") + } + } else if tc.podLimit != 0 && fakePodControl.CreateCallCount > tc.podLimit { + if err == nil { + t.Error("Syncing jobs expected to return error when reached the podControl limit") + } + } else if err != nil { + t.Errorf("Unexpected error when syncing jobs: %v", err) } - } else if tc.expectedCondition == nil && (hasValidFailingPods(tc.podsWithIndexes, int(tc.completions)) || (tc.completionMode != batch.IndexedCompletion && tc.failedPods > 0)) { - if err == nil { - t.Error("Syncing jobs expected to return error when there are new failed pods and Job didn't finish") + if forget != tc.jobKeyForget { + t.Errorf("Unexpected forget value. Expected %v, saw %v\n", tc.jobKeyForget, forget) } - } else if tc.podLimit != 0 && fakePodControl.CreateCallCount > tc.podLimit { - if err == nil { - t.Error("Syncing jobs expected to return error when reached the podControl limit") + // validate created/deleted pods + if int32(len(fakePodControl.Templates)) != tc.expectedCreations { + t.Errorf("Unexpected number of creates. Expected %d, saw %d\n", tc.expectedCreations, len(fakePodControl.Templates)) } - } else if err != nil { - t.Errorf("Unexpected error when syncing jobs: %v", err) - } - if forget != tc.jobKeyForget { - t.Errorf("Unexpected forget value. Expected %v, saw %v\n", tc.jobKeyForget, forget) - } - // validate created/deleted pods - if int32(len(fakePodControl.Templates)) != tc.expectedCreations { - t.Errorf("Unexpected number of creates. Expected %d, saw %d\n", tc.expectedCreations, len(fakePodControl.Templates)) - } - if tc.completionMode == batch.IndexedCompletion { - checkIndexedJobPods(t, &fakePodControl, tc.expectedCreatedIndexes, job.Name) - } - if int32(len(fakePodControl.DeletePodName)) != tc.expectedDeletions { - t.Errorf("Unexpected number of deletes. Expected %d, saw %d\n", tc.expectedDeletions, len(fakePodControl.DeletePodName)) - } - // Each create should have an accompanying ControllerRef. - if len(fakePodControl.ControllerRefs) != int(tc.expectedCreations) { - t.Errorf("Unexpected number of ControllerRefs. Expected %d, saw %d\n", tc.expectedCreations, len(fakePodControl.ControllerRefs)) - } - // Make sure the ControllerRefs are correct. - for _, controllerRef := range fakePodControl.ControllerRefs { - if got, want := controllerRef.APIVersion, "batch/v1"; got != want { - t.Errorf("controllerRef.APIVersion = %q, want %q", got, want) + if tc.completionMode == batch.IndexedCompletion { + checkIndexedJobPods(t, &fakePodControl, tc.expectedCreatedIndexes, job.Name) } - if got, want := controllerRef.Kind, "Job"; got != want { - t.Errorf("controllerRef.Kind = %q, want %q", got, want) + if int32(len(fakePodControl.DeletePodName)) != tc.expectedDeletions { + t.Errorf("Unexpected number of deletes. Expected %d, saw %d\n", tc.expectedDeletions, len(fakePodControl.DeletePodName)) } - if got, want := controllerRef.Name, job.Name; got != want { - t.Errorf("controllerRef.Name = %q, want %q", got, want) + // Each create should have an accompanying ControllerRef. + if len(fakePodControl.ControllerRefs) != int(tc.expectedCreations) { + t.Errorf("Unexpected number of ControllerRefs. Expected %d, saw %d\n", tc.expectedCreations, len(fakePodControl.ControllerRefs)) } - if got, want := controllerRef.UID, job.UID; got != want { - t.Errorf("controllerRef.UID = %q, want %q", got, want) + // Make sure the ControllerRefs are correct. + for _, controllerRef := range fakePodControl.ControllerRefs { + if got, want := controllerRef.APIVersion, "batch/v1"; got != want { + t.Errorf("controllerRef.APIVersion = %q, want %q", got, want) + } + if got, want := controllerRef.Kind, "Job"; got != want { + t.Errorf("controllerRef.Kind = %q, want %q", got, want) + } + if got, want := controllerRef.Name, job.Name; got != want { + t.Errorf("controllerRef.Name = %q, want %q", got, want) + } + if got, want := controllerRef.UID, job.UID; got != want { + t.Errorf("controllerRef.UID = %q, want %q", got, want) + } + if controllerRef.Controller == nil || *controllerRef.Controller != true { + t.Errorf("controllerRef.Controller is not set to true") + } } - if controllerRef.Controller == nil || *controllerRef.Controller != true { - t.Errorf("controllerRef.Controller is not set to true") + // validate status + if actual.Status.Active != tc.expectedActive { + t.Errorf("Unexpected number of active pods. Expected %d, saw %d\n", tc.expectedActive, actual.Status.Active) } - } - // validate status - if actual.Status.Active != tc.expectedActive { - t.Errorf("Unexpected number of active pods. Expected %d, saw %d\n", tc.expectedActive, actual.Status.Active) - } - if actual.Status.Succeeded != tc.expectedSucceeded { - t.Errorf("Unexpected number of succeeded pods. Expected %d, saw %d\n", tc.expectedSucceeded, actual.Status.Succeeded) - } - if diff := cmp.Diff(tc.expectedCompletedIdxs, actual.Status.CompletedIndexes); diff != "" { - t.Errorf("Unexpected completed indexes (-want,+got):\n%s", diff) - } - if actual.Status.Failed != tc.expectedFailed { - t.Errorf("Unexpected number of failed pods. Expected %d, saw %d\n", tc.expectedFailed, actual.Status.Failed) - } - if actual.Status.StartTime != nil && tc.suspend { - t.Error("Unexpected .status.startTime not nil when suspend is true") - } - if actual.Status.StartTime == nil && tc.indexedJobEnabled && !tc.suspend { - t.Error("Missing .status.startTime") - } - // validate conditions - if tc.expectedCondition != nil { - if !getCondition(actual, *tc.expectedCondition, tc.expectedConditionStatus, tc.expectedConditionReason) { - t.Errorf("Expected completion condition. Got %#v", actual.Status.Conditions) + if actual.Status.Succeeded != tc.expectedSucceeded { + t.Errorf("Unexpected number of succeeded pods. Expected %d, saw %d\n", tc.expectedSucceeded, actual.Status.Succeeded) } - } else { - if cond := hasTrueCondition(actual); cond != nil { - t.Errorf("Got condition %s, want none", *cond) + if diff := cmp.Diff(tc.expectedCompletedIdxs, actual.Status.CompletedIndexes); diff != "" { + t.Errorf("Unexpected completed indexes (-want,+got):\n%s", diff) } - } - if tc.expectedCondition == nil && tc.suspend && len(actual.Status.Conditions) != 0 { - t.Errorf("Unexpected conditions %v", actual.Status.Conditions) - } - // validate slow start - expectedLimit := 0 - for pass := uint8(0); expectedLimit <= tc.podLimit; pass++ { - expectedLimit += controller.SlowStartInitialBatchSize << pass - } - if tc.podLimit > 0 && fakePodControl.CreateCallCount > expectedLimit { - t.Errorf("Unexpected number of create calls. Expected <= %d, saw %d\n", fakePodControl.CreateLimit*2, fakePodControl.CreateCallCount) - } - }) + if actual.Status.Failed != tc.expectedFailed { + t.Errorf("Unexpected number of failed pods. Expected %d, saw %d\n", tc.expectedFailed, actual.Status.Failed) + } + if actual.Status.StartTime != nil && tc.suspend { + t.Error("Unexpected .status.startTime not nil when suspend is true") + } + if actual.Status.StartTime == nil && tc.indexedJobEnabled && !tc.suspend { + t.Error("Missing .status.startTime") + } + // validate conditions + if tc.expectedCondition != nil { + if !getCondition(actual, *tc.expectedCondition, tc.expectedConditionStatus, tc.expectedConditionReason) { + t.Errorf("Expected completion condition. Got %#v", actual.Status.Conditions) + } + } else { + if cond := hasTrueCondition(actual); cond != nil { + t.Errorf("Got condition %s, want none", *cond) + } + } + if tc.expectedCondition == nil && tc.suspend && len(actual.Status.Conditions) != 0 { + t.Errorf("Unexpected conditions %v", actual.Status.Conditions) + } + // validate slow start + expectedLimit := 0 + for pass := uint8(0); expectedLimit <= tc.podLimit; pass++ { + expectedLimit += controller.SlowStartInitialBatchSize << pass + } + if tc.podLimit > 0 && fakePodControl.CreateCallCount > expectedLimit { + t.Errorf("Unexpected number of create calls. Expected <= %d, saw %d\n", fakePodControl.CreateLimit*2, fakePodControl.CreateCallCount) + } + wantPodPatches := 0 + if wFinalizers { + wantPodPatches = tc.expectedPodPatches + } + if p := len(fakePodControl.Patches); p != wantPodPatches { + t.Errorf("Got %d pod patches, want %d", p, wantPodPatches) + } + }) + } } } @@ -826,6 +877,645 @@ func checkIndexedJobPods(t *testing.T, control *controller.FakePodControl, wantI } } +// TestSyncJobLegacyTracking makes sure that a Job is only tracked with +// finalizers only when the feature is enabled and the job has the finalizer. +func TestSyncJobLegacyTracking(t *testing.T) { + cases := map[string]struct { + job batch.Job + trackingWithFinalizersEnabled bool + wantUncounted bool + wantPatches int + }{ + "no annotation": { + job: batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Namespace: "ns", + }, + Spec: batch.JobSpec{ + Parallelism: pointer.Int32Ptr(1), + }, + }, + }, + "no annotation, feature enabled": { + job: batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Namespace: "ns", + }, + Spec: batch.JobSpec{ + Parallelism: pointer.Int32Ptr(1), + }, + }, + trackingWithFinalizersEnabled: true, + }, + "tracking annotation, feature disabled": { + job: batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Namespace: "ns", + Annotations: map[string]string{ + batch.JobTrackingFinalizer: "", + }, + }, + Spec: batch.JobSpec{ + Parallelism: pointer.Int32Ptr(1), + }, + }, + // Finalizer removed. + wantPatches: 1, + }, + "tracking annotation, feature enabled": { + job: batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Namespace: "ns", + Annotations: map[string]string{ + batch.JobTrackingFinalizer: "", + }, + }, + Spec: batch.JobSpec{ + Parallelism: pointer.Int32Ptr(1), + }, + }, + trackingWithFinalizersEnabled: true, + wantUncounted: true, + }, + "different annotation, feature enabled": { + job: batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Namespace: "ns", + Annotations: map[string]string{ + "foo": "bar", + }, + }, + Spec: batch.JobSpec{ + Parallelism: pointer.Int32Ptr(1), + }, + }, + trackingWithFinalizersEnabled: true, + }, + } + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, tc.trackingWithFinalizersEnabled)() + + // Job manager setup. + clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}}) + manager, sharedInformerFactory := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc) + fakePodControl := controller.FakePodControl{} + manager.podControl = &fakePodControl + manager.podStoreSynced = alwaysReady + manager.jobStoreSynced = alwaysReady + jobPatches := 0 + manager.patchJobHandler = func(*batch.Job, []byte) error { + jobPatches++ + return nil + } + sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(&tc.job) + + var actual *batch.Job + manager.updateStatusHandler = func(job *batch.Job) error { + actual = job + return nil + } + + // Run. + _, err := manager.syncJob(testutil.GetKey(&tc.job, t)) + if err != nil { + t.Fatalf("Syncing job: %v", err) + } + + // Checks. + if got := actual.Status.UncountedTerminatedPods != nil; got != tc.wantUncounted { + t.Errorf("Job got uncounted pods %t, want %t", got, tc.wantUncounted) + } + if jobPatches != tc.wantPatches { + t.Errorf("Sync did %d patches, want %d", jobPatches, tc.wantPatches) + } + }) + } +} + +func TestGetStatus(t *testing.T) { + cases := map[string]struct { + job batch.Job + pods []*v1.Pod + wantSucceeded int32 + wantFailed int32 + }{ + "without finalizers": { + job: batch.Job{ + Status: batch.JobStatus{ + Succeeded: 1, + Failed: 2, + }, + }, + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).Pod, + buildPod().uid("b").phase(v1.PodSucceeded).Pod, + buildPod().uid("c").phase(v1.PodFailed).Pod, + buildPod().uid("d").phase(v1.PodFailed).Pod, + buildPod().uid("e").phase(v1.PodFailed).Pod, + buildPod().uid("f").phase(v1.PodRunning).Pod, + }, + wantSucceeded: 2, + wantFailed: 3, + }, + "some counted": { + job: batch.Job{ + Status: batch.JobStatus{ + Succeeded: 2, + Failed: 1, + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + }, + }, + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).Pod, + buildPod().uid("b").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("c").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("d").phase(v1.PodFailed).Pod, + buildPod().uid("e").phase(v1.PodFailed).trackingFinalizer().Pod, + buildPod().uid("f").phase(v1.PodRunning).Pod, + }, + wantSucceeded: 4, + wantFailed: 2, + }, + "some uncounted": { + job: batch.Job{ + Status: batch.JobStatus{ + Succeeded: 1, + Failed: 1, + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a", "c"}, + Failed: []types.UID{"e", "f"}, + }, + }, + }, + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).Pod, + buildPod().uid("b").phase(v1.PodSucceeded).Pod, + buildPod().uid("c").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("d").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("e").phase(v1.PodFailed).Pod, + buildPod().uid("f").phase(v1.PodFailed).trackingFinalizer().Pod, + buildPod().uid("g").phase(v1.PodFailed).trackingFinalizer().Pod, + }, + wantSucceeded: 4, + wantFailed: 4, + }, + "deleted pods": { + job: batch.Job{ + Status: batch.JobStatus{ + Succeeded: 1, + Failed: 1, + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + }, + }, + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().deletionTimestamp().Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().deletionTimestamp().Pod, + buildPod().uid("c").phase(v1.PodRunning).trackingFinalizer().deletionTimestamp().Pod, + buildPod().uid("d").phase(v1.PodPending).trackingFinalizer().deletionTimestamp().Pod, + buildPod().uid("e").phase(v1.PodRunning).deletionTimestamp().Pod, + buildPod().uid("f").phase(v1.PodPending).deletionTimestamp().Pod, + }, + wantSucceeded: 2, + wantFailed: 4, + }, + } + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + var uncounted *uncountedTerminatedPods + if tc.job.Status.UncountedTerminatedPods != nil { + uncounted = newUncountedTerminatedPods(*tc.job.Status.UncountedTerminatedPods) + } + succeeded, failed := getStatus(&tc.job, tc.pods, uncounted) + if succeeded != tc.wantSucceeded { + t.Errorf("getStatus reports %d succeeded pods, want %d", succeeded, tc.wantSucceeded) + } + if failed != tc.wantFailed { + t.Errorf("getStatus reports %d succeeded pods, want %d", failed, tc.wantFailed) + } + }) + } +} + +func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) { + succeededCond := newCondition(batch.JobComplete, v1.ConditionTrue, "", "") + failedCond := newCondition(batch.JobFailed, v1.ConditionTrue, "", "") + indexedCompletion := batch.IndexedCompletion + mockErr := errors.New("mock error") + cases := map[string]struct { + job batch.Job + pods []*v1.Pod + finishedCond *batch.JobCondition + needsFlush bool + statusUpdateErr error + podControlErr error + wantErr error + wantRmFinalizers int + wantStatusUpdates []batch.JobStatus + }{ + "no updates": {}, + "new active": { + job: batch.Job{ + Status: batch.JobStatus{ + Active: 1, + }, + }, + needsFlush: true, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + Active: 1, + }, + }, + }, + "track finished pods": { + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().Pod, + buildPod().uid("c").phase(v1.PodSucceeded).trackingFinalizer().deletionTimestamp().Pod, + buildPod().uid("d").phase(v1.PodFailed).trackingFinalizer().deletionTimestamp().Pod, + buildPod().uid("e").phase(v1.PodPending).trackingFinalizer().deletionTimestamp().Pod, + buildPod().phase(v1.PodPending).trackingFinalizer().Pod, + buildPod().phase(v1.PodRunning).trackingFinalizer().Pod, + }, + wantRmFinalizers: 5, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a", "c"}, + Failed: []types.UID{"b", "d", "e"}, + }, + }, + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + Succeeded: 2, + Failed: 3, + }, + }, + }, + "past and new finished pods": { + job: batch.Job{ + Status: batch.JobStatus{ + Active: 1, + Succeeded: 2, + Failed: 3, + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a", "e"}, + Failed: []types.UID{"b", "f"}, + }, + }, + }, + pods: []*v1.Pod{ + buildPod().phase(v1.PodSucceeded).Pod, + buildPod().phase(v1.PodFailed).Pod, + buildPod().phase(v1.PodPending).Pod, + buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().Pod, + buildPod().uid("c").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("d").phase(v1.PodFailed).trackingFinalizer().Pod, + }, + wantRmFinalizers: 4, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a", "e", "c"}, + Failed: []types.UID{"b", "f", "d"}, + }, + Active: 1, + Succeeded: 2, + Failed: 3, + }, + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + Active: 1, + Succeeded: 5, + Failed: 6, + }, + }, + }, + "succeeding job": { + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().Pod, + }, + finishedCond: succeededCond, + wantRmFinalizers: 2, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a"}, + Failed: []types.UID{"b"}, + }, + }, + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + Succeeded: 1, + Failed: 1, + Conditions: []batch.JobCondition{*succeededCond}, + CompletionTime: &succeededCond.LastTransitionTime, + }, + }, + }, + "failing job": { + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().Pod, + buildPod().uid("c").phase(v1.PodRunning).trackingFinalizer().Pod, + }, + finishedCond: failedCond, + // Running pod counts as failed. + wantRmFinalizers: 3, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a"}, + Failed: []types.UID{"b", "c"}, + }, + }, + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + Succeeded: 1, + Failed: 2, + Conditions: []batch.JobCondition{*failedCond}, + }, + }, + }, + "deleted job": { + job: batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: &metav1.Time{}, + }, + Status: batch.JobStatus{ + Active: 1, + }, + }, + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().Pod, + buildPod().phase(v1.PodRunning).trackingFinalizer().Pod, + }, + // Removing finalizer from Running pod, but doesn't count as failed. + wantRmFinalizers: 3, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a"}, + Failed: []types.UID{"b"}, + }, + Active: 1, + }, + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + Active: 1, + Succeeded: 1, + Failed: 1, + }, + }, + }, + "status update error": { + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().Pod, + }, + statusUpdateErr: mockErr, + wantErr: mockErr, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a"}, + Failed: []types.UID{"b"}, + }, + }, + }, + }, + "pod patch errors": { + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().Pod, + }, + podControlErr: mockErr, + wantErr: mockErr, + wantRmFinalizers: 2, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a"}, + Failed: []types.UID{"b"}, + }, + }, + }, + }, + "pod patch errors with partial success": { + job: batch.Job{ + Status: batch.JobStatus{ + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a"}, + Failed: []types.UID{"b"}, + }, + }, + }, + pods: []*v1.Pod{ + buildPod().uid("c").phase(v1.PodSucceeded).trackingFinalizer().Pod, + buildPod().uid("d").phase(v1.PodFailed).trackingFinalizer().Pod, + }, + podControlErr: mockErr, + wantErr: mockErr, + wantRmFinalizers: 2, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"a", "c"}, + Failed: []types.UID{"b", "d"}, + }, + }, + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"c"}, + Failed: []types.UID{"d"}, + }, + Succeeded: 1, + Failed: 1, + }, + }, + }, + "indexed job new successful pods": { + job: batch.Job{ + Spec: batch.JobSpec{ + CompletionMode: &indexedCompletion, + Completions: pointer.Int32Ptr(6), + }, + Status: batch.JobStatus{ + Active: 1, + }, + }, + pods: []*v1.Pod{ + buildPod().phase(v1.PodSucceeded).trackingFinalizer().index("1").Pod, + buildPod().phase(v1.PodSucceeded).trackingFinalizer().index("3").Pod, + buildPod().phase(v1.PodSucceeded).trackingFinalizer().index("3").Pod, + buildPod().phase(v1.PodRunning).trackingFinalizer().index("5").Pod, + buildPod().phase(v1.PodSucceeded).trackingFinalizer().Pod, + }, + wantRmFinalizers: 4, + wantStatusUpdates: []batch.JobStatus{ + { + Active: 1, + Succeeded: 2, + CompletedIndexes: "1,3", + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + }, + }, + }, + "indexed job new failed pods": { + job: batch.Job{ + Spec: batch.JobSpec{ + CompletionMode: &indexedCompletion, + Completions: pointer.Int32Ptr(6), + }, + Status: batch.JobStatus{ + Active: 1, + }, + }, + pods: []*v1.Pod{ + buildPod().uid("a").phase(v1.PodFailed).trackingFinalizer().index("1").Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().index("3").Pod, + buildPod().uid("c").phase(v1.PodFailed).trackingFinalizer().index("3").Pod, + buildPod().uid("d").phase(v1.PodRunning).trackingFinalizer().index("5").Pod, + buildPod().phase(v1.PodFailed).trackingFinalizer().Pod, + }, + wantRmFinalizers: 4, + wantStatusUpdates: []batch.JobStatus{ + { + Active: 1, + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Failed: []types.UID{"a", "b", "c"}, + }, + }, + { + Active: 1, + Failed: 3, + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + }, + }, + }, + "indexed job past and new pods": { + job: batch.Job{ + Spec: batch.JobSpec{ + CompletionMode: &indexedCompletion, + Completions: pointer.Int32Ptr(7), + }, + Status: batch.JobStatus{ + Failed: 2, + Succeeded: 5, + CompletedIndexes: "0-2,4,6,7", + }, + }, + pods: []*v1.Pod{ + buildPod().phase(v1.PodSucceeded).index("0").Pod, + buildPod().phase(v1.PodFailed).index("1").Pod, + buildPod().phase(v1.PodSucceeded).trackingFinalizer().index("1").Pod, + buildPod().phase(v1.PodSucceeded).trackingFinalizer().index("3").Pod, + buildPod().uid("a").phase(v1.PodFailed).trackingFinalizer().index("2").Pod, + buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().index("5").Pod, + }, + wantRmFinalizers: 4, + wantStatusUpdates: []batch.JobStatus{ + { + Succeeded: 6, + Failed: 2, + CompletedIndexes: "0-4,6", + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Failed: []types.UID{"a", "b"}, + }, + }, + { + Succeeded: 6, + Failed: 4, + CompletedIndexes: "0-4,6", + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + }, + }, + }, + "too many finished": { + job: batch.Job{ + Status: batch.JobStatus{ + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Failed: []types.UID{"a"}, + }, + }, + }, + pods: func() []*v1.Pod { + pods := make([]*v1.Pod, 501) + for i := range pods { + pods[i] = buildPod().uid(strconv.Itoa(i)).phase(v1.PodSucceeded).trackingFinalizer().Pod + } + return pods + }(), + wantRmFinalizers: 501, + wantStatusUpdates: []batch.JobStatus{ + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: func() []types.UID { + uids := make([]types.UID, 499) + for i := range uids { + uids[i] = types.UID(strconv.Itoa(i)) + } + return uids + }(), + Failed: []types.UID{"a"}, + }, + }, + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{ + Succeeded: []types.UID{"499", "500"}, + }, + Succeeded: 499, + Failed: 1, + }, + { + UncountedTerminatedPods: &batch.UncountedTerminatedPods{}, + Succeeded: 501, + Failed: 1, + }, + }, + }, + } + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}}) + manager, _ := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc) + fakePodControl := controller.FakePodControl{Err: tc.podControlErr} + manager.podControl = &fakePodControl + var statusUpdates []batch.JobStatus + manager.updateStatusHandler = func(job *batch.Job) error { + statusUpdates = append(statusUpdates, *job.Status.DeepCopy()) + return tc.statusUpdateErr + } + + if tc.job.Status.UncountedTerminatedPods == nil { + tc.job.Status.UncountedTerminatedPods = &batch.UncountedTerminatedPods{} + } + uncounted := newUncountedTerminatedPods(*tc.job.Status.UncountedTerminatedPods) + succeededIndexes := succeededIndexesFromJob(&tc.job) + err := manager.trackJobStatusAndRemoveFinalizers(&tc.job, tc.pods, succeededIndexes, *uncounted, tc.finishedCond, tc.needsFlush) + if !errors.Is(err, tc.wantErr) { + t.Errorf("Got error %v, want %w", err, tc.wantErr) + } + if diff := cmp.Diff(tc.wantStatusUpdates, statusUpdates); diff != "" { + t.Errorf("Unexpected status updates (-want,+got):\n%s", diff) + } + rmFinalizers := len(fakePodControl.Patches) + if rmFinalizers != tc.wantRmFinalizers { + t.Errorf("Removed %d finalizers, want %d", rmFinalizers, tc.wantRmFinalizers) + } + }) + } +} + func TestSyncJobPastDeadline(t *testing.T) { testCases := map[string]struct { // job setup @@ -928,7 +1618,7 @@ func TestSyncJobPastDeadline(t *testing.T) { manager.podStoreSynced = alwaysReady manager.jobStoreSynced = alwaysReady var actual *batch.Job - manager.updateHandler = func(job *batch.Job) error { + manager.updateStatusHandler = func(job *batch.Job) error { actual = job return nil } @@ -1005,7 +1695,7 @@ func TestSyncPastDeadlineJobFinished(t *testing.T) { manager.podStoreSynced = alwaysReady manager.jobStoreSynced = alwaysReady var actual *batch.Job - manager.updateHandler = func(job *batch.Job) error { + manager.updateStatusHandler = func(job *batch.Job) error { actual = job return nil } @@ -1015,7 +1705,7 @@ func TestSyncPastDeadlineJobFinished(t *testing.T) { job.Spec.ActiveDeadlineSeconds = &activeDeadlineSeconds start := metav1.Unix(metav1.Now().Time.Unix()-15, 0) job.Status.StartTime = &start - job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline")) + job.Status.Conditions = append(job.Status.Conditions, *newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline")) sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job) forget, err := manager.syncJob(testutil.GetKey(job, t)) if err != nil { @@ -1044,7 +1734,7 @@ func TestSyncJobComplete(t *testing.T) { manager.jobStoreSynced = alwaysReady job := newJob(1, 1, 6, batch.NonIndexedCompletion) - job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, v1.ConditionTrue, "", "")) + job.Status.Conditions = append(job.Status.Conditions, *newCondition(batch.JobComplete, v1.ConditionTrue, "", "")) sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job) forget, err := manager.syncJob(testutil.GetKey(job, t)) if err != nil { @@ -1070,7 +1760,7 @@ func TestSyncJobDeleted(t *testing.T) { manager.podControl = &fakePodControl manager.podStoreSynced = alwaysReady manager.jobStoreSynced = alwaysReady - manager.updateHandler = func(job *batch.Job) error { return nil } + manager.updateStatusHandler = func(job *batch.Job) error { return nil } job := newJob(2, 2, 6, batch.NonIndexedCompletion) forget, err := manager.syncJob(testutil.GetKey(job, t)) if err != nil { @@ -1096,7 +1786,7 @@ func TestSyncJobUpdateRequeue(t *testing.T) { manager.podStoreSynced = alwaysReady manager.jobStoreSynced = alwaysReady updateError := fmt.Errorf("update error") - manager.updateHandler = func(job *batch.Job) error { + manager.updateStatusHandler = func(job *batch.Job) error { manager.queue.AddRateLimited(testutil.GetKey(job, t)) return updateError } @@ -1209,98 +1899,98 @@ func TestGetPodsForJob(t *testing.T) { jobDeletedInCache bool pods []*v1.Pod wantPods []string + // only applicable to tracking with finalizers + wantPodsFinalizer []string }{ "only matching": { pods: []*v1.Pod{ - newPod("pod1", job), - newPod("pod2", otherJob), - {ObjectMeta: metav1.ObjectMeta{Name: "pod3", Namespace: job.Namespace}}, - newPod("pod4", job), + buildPod().name("pod1").job(job).trackingFinalizer().Pod, + buildPod().name("pod2").job(otherJob).Pod, + buildPod().name("pod3").ns(job.Namespace).Pod, + buildPod().name("pod4").job(job).Pod, }, - wantPods: []string{"pod1", "pod4"}, + wantPods: []string{"pod1", "pod4"}, + wantPodsFinalizer: []string{"pod1"}, }, "adopt": { pods: []*v1.Pod{ - newPod("pod1", job), - func() *v1.Pod { - p := newPod("pod2", job) - p.OwnerReferences = nil - return p - }(), - newPod("pod3", otherJob), + buildPod().name("pod1").job(job).Pod, + buildPod().name("pod2").job(job).clearOwner().Pod, + buildPod().name("pod3").job(otherJob).Pod, }, - wantPods: []string{"pod1", "pod2"}, + wantPods: []string{"pod1", "pod2"}, + wantPodsFinalizer: []string{"pod2"}, }, "no adopt when deleting": { jobDeleted: true, jobDeletedInCache: true, pods: []*v1.Pod{ - newPod("pod1", job), - func() *v1.Pod { - p := newPod("pod2", job) - p.OwnerReferences = nil - return p - }(), + buildPod().name("pod1").job(job).Pod, + buildPod().name("pod2").job(job).clearOwner().Pod, }, wantPods: []string{"pod1"}, }, "no adopt when deleting race": { jobDeleted: true, pods: []*v1.Pod{ - newPod("pod1", job), - func() *v1.Pod { - p := newPod("pod2", job) - p.OwnerReferences = nil - return p - }(), + buildPod().name("pod1").job(job).Pod, + buildPod().name("pod2").job(job).clearOwner().Pod, }, wantPods: []string{"pod1"}, }, "release": { pods: []*v1.Pod{ - newPod("pod1", job), - func() *v1.Pod { - p := newPod("pod2", job) - p.Labels = nil - return p - }(), + buildPod().name("pod1").job(job).Pod, + buildPod().name("pod2").job(job).clearLabels().Pod, }, wantPods: []string{"pod1"}, }, } for name, tc := range cases { - t.Run(name, func(t *testing.T) { - job := job.DeepCopy() - if tc.jobDeleted { - job.DeletionTimestamp = &metav1.Time{} - } - clientSet := fake.NewSimpleClientset(job, otherJob) - jm, informer := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc) - jm.podStoreSynced = alwaysReady - jm.jobStoreSynced = alwaysReady - cachedJob := job.DeepCopy() - if tc.jobDeletedInCache { - cachedJob.DeletionTimestamp = &metav1.Time{} - } - informer.Batch().V1().Jobs().Informer().GetIndexer().Add(cachedJob) - informer.Batch().V1().Jobs().Informer().GetIndexer().Add(otherJob) - for _, p := range tc.pods { - informer.Core().V1().Pods().Informer().GetIndexer().Add(p) - } + for _, wFinalizers := range []bool{false, true} { + t.Run(fmt.Sprintf("%s, finalizers=%t", name, wFinalizers), func(t *testing.T) { + job := job.DeepCopy() + if tc.jobDeleted { + job.DeletionTimestamp = &metav1.Time{} + } + clientSet := fake.NewSimpleClientset(job, otherJob) + jm, informer := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc) + jm.podStoreSynced = alwaysReady + jm.jobStoreSynced = alwaysReady + cachedJob := job.DeepCopy() + if tc.jobDeletedInCache { + cachedJob.DeletionTimestamp = &metav1.Time{} + } + informer.Batch().V1().Jobs().Informer().GetIndexer().Add(cachedJob) + informer.Batch().V1().Jobs().Informer().GetIndexer().Add(otherJob) + for _, p := range tc.pods { + informer.Core().V1().Pods().Informer().GetIndexer().Add(p) + } - pods, err := jm.getPodsForJob(job) - if err != nil { - t.Fatalf("getPodsForJob() error: %v", err) - } - got := make([]string, len(pods)) - for i, p := range pods { - got[i] = p.Name - } - sort.Strings(got) - if diff := cmp.Diff(tc.wantPods, got); diff != "" { - t.Errorf("getPodsForJob() returned (-want,+got):\n%s", diff) - } - }) + pods, err := jm.getPodsForJob(job, wFinalizers) + if err != nil { + t.Fatalf("getPodsForJob() error: %v", err) + } + got := make([]string, len(pods)) + var gotFinalizer []string + for i, p := range pods { + got[i] = p.Name + if hasJobTrackingFinalizer(p) { + gotFinalizer = append(gotFinalizer, p.Name) + } + } + sort.Strings(got) + if diff := cmp.Diff(tc.wantPods, got); diff != "" { + t.Errorf("getPodsForJob() returned (-want,+got):\n%s", diff) + } + if wFinalizers { + sort.Strings(gotFinalizer) + if diff := cmp.Diff(tc.wantPodsFinalizer, gotFinalizer); diff != "" { + t.Errorf("Pods with finalizers (-want,+got):\n%s", diff) + } + } + }) + } } } @@ -1597,7 +2287,7 @@ func TestSyncJobExpectations(t *testing.T) { manager.podControl = &fakePodControl manager.podStoreSynced = alwaysReady manager.jobStoreSynced = alwaysReady - manager.updateHandler = func(job *batch.Job) error { return nil } + manager.updateStatusHandler = func(job *batch.Job) error { return nil } job := newJob(2, 2, 6, batch.NonIndexedCompletion) sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job) @@ -1702,7 +2392,7 @@ func TestWatchPods(t *testing.T) { stopCh := make(chan struct{}) defer close(stopCh) go sharedInformerFactory.Core().V1().Pods().Informer().Run(stopCh) - go wait.Until(manager.worker, 10*time.Millisecond, stopCh) + go manager.Run(1, stopCh) pods := newPodList(1, v1.PodRunning, testJob) testPod := pods[0] @@ -1713,6 +2403,47 @@ func TestWatchPods(t *testing.T) { <-received } +func TestWatchOrphanPods(t *testing.T) { + clientset := fake.NewSimpleClientset() + sharedInformers := informers.NewSharedInformerFactory(clientset, controller.NoResyncPeriodFunc()) + manager := NewController(sharedInformers.Core().V1().Pods(), sharedInformers.Batch().V1().Jobs(), clientset) + manager.podStoreSynced = alwaysReady + manager.jobStoreSynced = alwaysReady + + jobSynced := false + manager.syncHandler = func(jobKey string) (bool, error) { + jobSynced = true + return true, nil + } + + // Create job but don't add it to the store. + testJob := newJob(2, 2, 6, batch.NonIndexedCompletion) + + stopCh := make(chan struct{}) + defer close(stopCh) + go sharedInformers.Core().V1().Pods().Informer().Run(stopCh) + go manager.Run(1, stopCh) + + orphanPod := buildPod().name("a").job(testJob).deletionTimestamp().trackingFinalizer().Pod + orphanPod, err := clientset.CoreV1().Pods("default").Create(context.Background(), orphanPod, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("Creating orphan pod: %v", err) + } + + if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) { + p, err := clientset.CoreV1().Pods(orphanPod.Namespace).Get(context.Background(), orphanPod.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + return !hasJobTrackingFinalizer(p), nil + }); err != nil { + t.Errorf("Waiting for Pod to get the finalizer removed: %v", err) + } + if jobSynced { + t.Error("Tried to sync deleted job") + } +} + func bumpResourceVersion(obj metav1.Object) { ver, _ := strconv.ParseInt(obj.GetResourceVersion(), 10, 32) obj.SetResourceVersion(strconv.FormatInt(ver+1, 10)) @@ -1760,7 +2491,7 @@ func TestJobBackoffReset(t *testing.T) { manager.podStoreSynced = alwaysReady manager.jobStoreSynced = alwaysReady var actual *batch.Job - manager.updateHandler = func(job *batch.Job) error { + manager.updateStatusHandler = func(job *batch.Job) error { actual = job return nil } @@ -1940,7 +2671,7 @@ func TestJobBackoffForOnFailure(t *testing.T) { manager.podStoreSynced = alwaysReady manager.jobStoreSynced = alwaysReady var actual *batch.Job - manager.updateHandler = func(job *batch.Job) error { + manager.updateStatusHandler = func(job *batch.Job) error { actual = job return nil } @@ -2042,7 +2773,7 @@ func TestJobBackoffOnRestartPolicyNever(t *testing.T) { manager.podStoreSynced = alwaysReady manager.jobStoreSynced = alwaysReady var actual *batch.Job - manager.updateHandler = func(job *batch.Job) error { + manager.updateStatusHandler = func(job *batch.Job) error { actual = job return nil } @@ -2102,7 +2833,7 @@ func TestEnsureJobConditions(t *testing.T) { wantType: batch.JobSuspended, wantStatus: v1.ConditionTrue, wantReason: "foo", - expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, + expectList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, expectUpdate: true, }, { @@ -2116,38 +2847,38 @@ func TestEnsureJobConditions(t *testing.T) { }, { name: "update true condition reason", - haveList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, + haveList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, wantType: batch.JobSuspended, wantStatus: v1.ConditionTrue, wantReason: "bar", - expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "bar", "")}, + expectList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionTrue, "bar", "")}, expectUpdate: true, }, { name: "update true condition status", - haveList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, + haveList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, wantType: batch.JobSuspended, wantStatus: v1.ConditionFalse, wantReason: "foo", - expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionFalse, "foo", "")}, + expectList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionFalse, "foo", "")}, expectUpdate: true, }, { name: "update false condition status", - haveList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionFalse, "foo", "")}, + haveList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionFalse, "foo", "")}, wantType: batch.JobSuspended, wantStatus: v1.ConditionTrue, wantReason: "foo", - expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, + expectList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, expectUpdate: true, }, { name: "condition already exists", - haveList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, + haveList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, wantType: batch.JobSuspended, wantStatus: v1.ConditionTrue, wantReason: "foo", - expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, + expectList: []batch.JobCondition{*newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")}, expectUpdate: false, }, } @@ -2212,3 +2943,75 @@ func hasValidFailingPods(status []indexPhase, completions int) bool { } return false } + +type podBuilder struct { + *v1.Pod +} + +func buildPod() podBuilder { + return podBuilder{Pod: &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(rand.String(5)), + }, + }} +} + +func (pb podBuilder) name(n string) podBuilder { + pb.Name = n + return pb +} + +func (pb podBuilder) ns(n string) podBuilder { + pb.Namespace = n + return pb +} + +func (pb podBuilder) uid(u string) podBuilder { + pb.UID = types.UID(u) + return pb +} + +func (pb podBuilder) job(j *batch.Job) podBuilder { + pb.Labels = j.Spec.Selector.MatchLabels + pb.Namespace = j.Namespace + pb.OwnerReferences = []metav1.OwnerReference{*metav1.NewControllerRef(j, controllerKind)} + return pb +} + +func (pb podBuilder) clearOwner() podBuilder { + pb.OwnerReferences = nil + return pb +} + +func (pb podBuilder) clearLabels() podBuilder { + pb.Labels = nil + return pb +} + +func (pb podBuilder) index(ix string) podBuilder { + if pb.Annotations == nil { + pb.Annotations = make(map[string]string) + } + pb.Annotations[batch.JobCompletionIndexAnnotation] = ix + return pb +} + +func (pb podBuilder) phase(p v1.PodPhase) podBuilder { + pb.Status.Phase = p + return pb +} + +func (pb podBuilder) trackingFinalizer() podBuilder { + for _, f := range pb.Finalizers { + if f == batch.JobTrackingFinalizer { + return pb + } + } + pb.Finalizers = append(pb.Finalizers, batch.JobTrackingFinalizer) + return pb +} + +func (pb podBuilder) deletionTimestamp() podBuilder { + pb.DeletionTimestamp = &metav1.Time{} + return pb +} diff --git a/pkg/controller/testutil/test_utils.go b/pkg/controller/testutil/test_utils.go index 190a2adb242..ea7e953df59 100644 --- a/pkg/controller/testutil/test_utils.go +++ b/pkg/controller/testutil/test_utils.go @@ -529,15 +529,13 @@ func GetKey(obj interface{}, t *testing.T) string { } val := reflect.ValueOf(obj).Elem() name := val.FieldByName("Name").String() - kind := val.FieldByName("Kind").String() - // Note kind is not always set in the tests, so ignoring that for now - if len(name) == 0 || len(kind) == 0 { + if len(name) == 0 { t.Errorf("Unexpected object %v", obj) } key, err := keyFunc(obj) if err != nil { - t.Errorf("Unexpected error getting key for %v %v: %v", kind, name, err) + t.Errorf("Unexpected error getting key for %T %v: %v", val.Interface(), name, err) return "" } return key diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go index 3cd6390a02c..e2e3db36c07 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go @@ -231,7 +231,7 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding) addControllerRole(&controllerRoles, &controllerRoleBindings, rbacv1.ClusterRole{ ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "job-controller"}, Rules: []rbacv1.PolicyRule{ - rbacv1helpers.NewRule("get", "list", "watch", "update").Groups(batchGroup).Resources("jobs").RuleOrDie(), + rbacv1helpers.NewRule("get", "list", "watch", "update", "patch").Groups(batchGroup).Resources("jobs").RuleOrDie(), rbacv1helpers.NewRule("update").Groups(batchGroup).Resources("jobs/status").RuleOrDie(), rbacv1helpers.NewRule("update").Groups(batchGroup).Resources("jobs/finalizers").RuleOrDie(), rbacv1helpers.NewRule("list", "watch", "create", "delete", "patch").Groups(legacyGroup).Resources("pods").RuleOrDie(), diff --git a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml index 3baa1d50902..6c8491f3870 100644 --- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml +++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml @@ -791,6 +791,7 @@ items: verbs: - get - list + - patch - update - watch - apiGroups: diff --git a/test/integration/job/job_test.go b/test/integration/job/job_test.go index 32d0b1e1b9b..a3cdb9d161a 100644 --- a/test/integration/job/job_test.go +++ b/test/integration/job/job_test.go @@ -44,10 +44,277 @@ import ( "k8s.io/utils/pointer" ) +const waitInterval = 500 * time.Millisecond + // TestNonParallelJob tests that a Job that only executes one Pod. The test // recreates the Job controller at some points to make sure a new controller // is able to pickup. func TestNonParallelJob(t *testing.T) { + for _, wFinalizers := range []bool{false, true} { + t.Run(fmt.Sprintf("finalizers=%t", wFinalizers), func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, wFinalizers)() + + closeFn, restConfig, clientSet, ns := setup(t, "simple") + defer closeFn() + ctx, cancel := startJobController(restConfig, clientSet) + defer func() { + cancel() + }() + + jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{}) + if err != nil { + t.Fatalf("Failed to create Job: %v", err) + } + if got := hasJobTrackingAnnotation(jobObj); got != wFinalizers { + t.Errorf("apiserver created job with tracking annotation: %t, want %t", got, wFinalizers) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 1, + }, wFinalizers) + + // Restarting controller. + cancel() + ctx, cancel = startJobController(restConfig, clientSet) + + // Failed Pod is replaced. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 1); err != nil { + t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodFailed, err) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 1, + Failed: 1, + }, wFinalizers) + + // Restarting controller. + cancel() + ctx, cancel = startJobController(restConfig, clientSet) + + // No more Pods are created after the Pod succeeds. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 1); err != nil { + t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodSucceeded, err) + } + validateJobSucceeded(ctx, t, clientSet, jobObj) + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Failed: 1, + Succeeded: 1, + }, false) + validateFinishedPodsNoFinalizer(ctx, t, clientSet, jobObj) + }) + } +} + +func TestParallelJob(t *testing.T) { + for _, wFinalizers := range []bool{false, true} { + t.Run(fmt.Sprintf("finalizers=%t", wFinalizers), func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, wFinalizers)() + + closeFn, restConfig, clientSet, ns := setup(t, "parallel") + defer closeFn() + ctx, cancel := startJobController(restConfig, clientSet) + defer cancel() + + jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{ + Spec: batchv1.JobSpec{ + Parallelism: pointer.Int32Ptr(5), + }, + }) + if err != nil { + t.Fatalf("Failed to create Job: %v", err) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 5, + }, wFinalizers) + // Failed Pods are replaced. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 2); err != nil { + t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodFailed, err) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 5, + Failed: 2, + }, wFinalizers) + // Once one Pod succeeds, no more Pods are created, even if some fail. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 1); err != nil { + t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodSucceeded, err) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Failed: 2, + Succeeded: 1, + Active: 4, + }, wFinalizers) + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 2); err != nil { + t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodFailed, err) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Failed: 4, + Succeeded: 1, + Active: 2, + }, wFinalizers) + // No more Pods are created after remaining Pods succeed. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 2); err != nil { + t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodSucceeded, err) + } + validateJobSucceeded(ctx, t, clientSet, jobObj) + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Failed: 4, + Succeeded: 3, + }, false) + validateFinishedPodsNoFinalizer(ctx, t, clientSet, jobObj) + }) + } +} + +func TestParallelJobWithCompletions(t *testing.T) { + for _, wFinalizers := range []bool{false, true} { + t.Run(fmt.Sprintf("finalizers=%t", wFinalizers), func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, wFinalizers)() + closeFn, restConfig, clientSet, ns := setup(t, "completions") + defer closeFn() + ctx, cancel := startJobController(restConfig, clientSet) + defer cancel() + + jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{ + Spec: batchv1.JobSpec{ + Parallelism: pointer.Int32Ptr(4), + Completions: pointer.Int32Ptr(6), + }, + }) + if err != nil { + t.Fatalf("Failed to create Job: %v", err) + } + if got := hasJobTrackingAnnotation(jobObj); got != wFinalizers { + t.Errorf("apiserver created job with tracking annotation: %t, want %t", got, wFinalizers) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 4, + }, wFinalizers) + // Failed Pods are replaced. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 2); err != nil { + t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodFailed, err) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 4, + Failed: 2, + }, wFinalizers) + // Pods are created until the number of succeeded Pods equals completions. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 3); err != nil { + t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodSucceeded, err) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Failed: 2, + Succeeded: 3, + Active: 3, + }, wFinalizers) + // No more Pods are created after the Job completes. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 3); err != nil { + t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodSucceeded, err) + } + validateJobSucceeded(ctx, t, clientSet, jobObj) + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Failed: 2, + Succeeded: 6, + }, false) + validateFinishedPodsNoFinalizer(ctx, t, clientSet, jobObj) + }) + } +} + +func TestIndexedJob(t *testing.T) { + for _, wFinalizers := range []bool{false, true} { + t.Run(fmt.Sprintf("finalizers=%t", wFinalizers), func(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, wFinalizers)() + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, true)() + + closeFn, restConfig, clientSet, ns := setup(t, "indexed") + defer closeFn() + ctx, cancel := startJobController(restConfig, clientSet) + defer func() { + cancel() + }() + + mode := batchv1.IndexedCompletion + jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{ + Spec: batchv1.JobSpec{ + Parallelism: pointer.Int32Ptr(3), + Completions: pointer.Int32Ptr(4), + CompletionMode: &mode, + }, + }) + if err != nil { + t.Fatalf("Failed to create Job: %v", err) + } + if got := hasJobTrackingAnnotation(jobObj); got != wFinalizers { + t.Errorf("apiserver created job with tracking annotation: %t, want %t", got, wFinalizers) + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 3, + }, wFinalizers) + validateIndexedJobPods(ctx, t, clientSet, jobObj, sets.NewInt(0, 1, 2), "") + + // One Pod succeeds. + if err := setJobPhaseForIndex(ctx, clientSet, jobObj, v1.PodSucceeded, 1); err != nil { + t.Fatal("Failed trying to succeed pod with index 1") + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 3, + Succeeded: 1, + }, wFinalizers) + validateIndexedJobPods(ctx, t, clientSet, jobObj, sets.NewInt(0, 2, 3), "1") + + // Disable feature gate and restart controller. + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, false)() + cancel() + ctx, cancel = startJobController(restConfig, clientSet) + events, err := clientSet.EventsV1().Events(ns.Name).Watch(ctx, metav1.ListOptions{}) + if err != nil { + t.Fatal(err) + } + defer events.Stop() + + // One Pod fails, but no recreations happen because feature is disabled. + if err := setJobPhaseForIndex(ctx, clientSet, jobObj, v1.PodFailed, 2); err != nil { + t.Fatal("Failed trying to succeed pod with index 2") + } + if err := waitForEvent(events, jobObj.UID, "IndexedJobDisabled"); err != nil { + t.Errorf("Waiting for an event for IndexedJobDisabled: %v", err) + } + validateIndexedJobPods(ctx, t, clientSet, jobObj, sets.NewInt(0, 3), "1") + + // Re-enable feature gate and restart controller. Failed Pod should be recreated now. + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, true)() + cancel() + ctx, cancel = startJobController(restConfig, clientSet) + + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 3, + Failed: 1, + Succeeded: 1, + }, wFinalizers) + validateIndexedJobPods(ctx, t, clientSet, jobObj, sets.NewInt(0, 2, 3), "1") + + // Remaining Pods succeed. + if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 3); err != nil { + t.Fatal("Failed trying to succeed remaining pods") + } + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 0, + Failed: 1, + Succeeded: 4, + }, false) + validateIndexedJobPods(ctx, t, clientSet, jobObj, nil, "0-3") + validateJobSucceeded(ctx, t, clientSet, jobObj) + validateFinishedPodsNoFinalizer(ctx, t, clientSet, jobObj) + }) + } +} + +// TestDisableJobTrackingWithFinalizers ensures that when the +// JobTrackingWithFinalizers feature is disabled, tracking finalizers are +// removed from all pods, but Job continues to be tracked. +// This test can be removed once the feature graduates to GA. +func TestDisableJobTrackingWithFinalizers(t *testing.T) { + // Step 1: job created while feature is enabled. + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, true)() + closeFn, restConfig, clientSet, ns := setup(t, "simple") defer closeFn() ctx, cancel := startJobController(restConfig, clientSet) @@ -55,219 +322,123 @@ func TestNonParallelJob(t *testing.T) { cancel() }() - jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{}) + jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{ + Spec: batchv1.JobSpec{ + Parallelism: pointer.Int32Ptr(2), + }, + }) if err != nil { t.Fatalf("Failed to create Job: %v", err) } + if !hasJobTrackingAnnotation(jobObj) { + t.Error("apiserver didn't add the tracking annotation") + } validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 1, - }) + Active: 2, + }, true) - // Restarting controller. + // Step 2: Disable tracking with finalizers. + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, false)() cancel() - ctx, cancel = startJobController(restConfig, clientSet) - // Failed Pod is replaced. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 1); err != nil { + // Fail a pod while Job controller is stopped. + if err := setJobPodsPhase(context.Background(), clientSet, jobObj, v1.PodFailed, 1); err != nil { t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodFailed, err) } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 1, - Failed: 1, - }) - // Restarting controller. - cancel() + // Restart controller. ctx, cancel = startJobController(restConfig, clientSet) - // No more Pods are created after the Pod succeeds. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 1); err != nil { - t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodSucceeded, err) - } - validateJobSucceeded(ctx, t, clientSet, jobObj) + // Ensure Job continues to be tracked and finalizers are removed. validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 2, + Failed: 1, + }, false) + + jobObj, err = clientSet.BatchV1().Jobs(jobObj.Namespace).Get(ctx, jobObj.Name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Obtaining updated Job object: %v", err) + } + if hasJobTrackingAnnotation(jobObj) { + t.Error("controller didn't remove the tracking annotation") + } + + // Step 3: Reenable tracking with finalizers. + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, true)() + cancel() + + // Succeed a pod while Job controller is stopped. + if err := setJobPodsPhase(context.Background(), clientSet, jobObj, v1.PodSucceeded, 1); err != nil { + t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodFailed, err) + } + + // Restart controller. + ctx, cancel = startJobController(restConfig, clientSet) + + // Ensure Job continues to be tracked and finalizers are removed. + validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ + Active: 1, Failed: 1, Succeeded: 1, - }) + }, false) } -func TestParallelJob(t *testing.T) { - closeFn, restConfig, clientSet, ns := setup(t, "parallel") - defer closeFn() - ctx, cancel := startJobController(restConfig, clientSet) - defer cancel() +func TestOrphanPodsFinalizersCleared(t *testing.T) { + // Step 0: job created while feature is enabled. + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, true)() - jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{ - Spec: batchv1.JobSpec{ - Parallelism: pointer.Int32Ptr(5), - }, - }) - if err != nil { - t.Fatalf("Failed to create Job: %v", err) - } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 5, - }) - // Failed Pods are replaced. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 2); err != nil { - t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodFailed, err) - } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 5, - Failed: 2, - }) - // Once one Pod succeeds, no more Pods are created, even if some fail. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 1); err != nil { - t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodSucceeded, err) - } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Failed: 2, - Succeeded: 1, - Active: 4, - }) - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 2); err != nil { - t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodFailed, err) - } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Failed: 4, - Succeeded: 1, - Active: 2, - }) - // No more Pods are created after remaining Pods succeed. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 2); err != nil { - t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodSucceeded, err) - } - validateJobSucceeded(ctx, t, clientSet, jobObj) - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Failed: 4, - Succeeded: 3, - }) -} - -func TestParallelJobWithCompletions(t *testing.T) { - closeFn, restConfig, clientSet, ns := setup(t, "completions") - defer closeFn() - ctx, cancel := startJobController(restConfig, clientSet) - defer cancel() - - jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{ - Spec: batchv1.JobSpec{ - Parallelism: pointer.Int32Ptr(4), - Completions: pointer.Int32Ptr(6), - }, - }) - if err != nil { - t.Fatalf("Failed to create Job: %v", err) - } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 4, - }) - // Failed Pods are replaced. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 2); err != nil { - t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodFailed, err) - } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 4, - Failed: 2, - }) - // Pods are created until the number of succeeded Pods equals completions. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 3); err != nil { - t.Fatalf("Failed setting phase %s on Job Pod: %v", v1.PodSucceeded, err) - } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Failed: 2, - Succeeded: 3, - Active: 3, - }) - // No more Pods are created after the Job completes. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 3); err != nil { - t.Fatalf("Failed setting phase %s on Job Pods: %v", v1.PodSucceeded, err) - } - validateJobSucceeded(ctx, t, clientSet, jobObj) - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Failed: 2, - Succeeded: 6, - }) -} - -func TestIndexedJob(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, true)() - - closeFn, restConfig, clientSet, ns := setup(t, "indexed") + closeFn, restConfig, clientSet, ns := setup(t, "simple") defer closeFn() ctx, cancel := startJobController(restConfig, clientSet) defer func() { cancel() }() - mode := batchv1.IndexedCompletion jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{ Spec: batchv1.JobSpec{ - Parallelism: pointer.Int32Ptr(3), - Completions: pointer.Int32Ptr(4), - CompletionMode: &mode, + Parallelism: pointer.Int32Ptr(1), }, }) if err != nil { t.Fatalf("Failed to create Job: %v", err) } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 3, - }) - validateIndexedJobPods(ctx, t, clientSet, jobObj, sets.NewInt(0, 1, 2), "") - - // One Pod succeeds. - if err := setJobPhaseForIndex(ctx, clientSet, jobObj, v1.PodSucceeded, 1); err != nil { - t.Fatal("Failed trying to succeed pod with index 1") + if !hasJobTrackingAnnotation(jobObj) { + t.Error("apiserver didn't add the tracking annotation") } validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 3, - Succeeded: 1, - }) - validateIndexedJobPods(ctx, t, clientSet, jobObj, sets.NewInt(0, 2, 3), "1") + Active: 1, + }, true) - // Disable feature gate and restart controller. - defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, false)() + // Step 2: Disable tracking with finalizers. + defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, false)() cancel() - ctx, cancel = startJobController(restConfig, clientSet) - events, err := clientSet.EventsV1().Events(ns.Name).Watch(ctx, metav1.ListOptions{}) + + // Delete the Job while controller is stopped. + err = clientSet.BatchV1().Jobs(jobObj.Namespace).Delete(context.Background(), jobObj.Name, metav1.DeleteOptions{}) if err != nil { - t.Fatal(err) + t.Fatalf("Failed to delete job: %v", err) } - defer events.Stop() - // One Pod fails, but no recreations happen because feature is disabled. - if err := setJobPhaseForIndex(ctx, clientSet, jobObj, v1.PodFailed, 2); err != nil { - t.Fatal("Failed trying to succeed pod with index 2") - } - if err := waitForEvent(events, jobObj.UID, "IndexedJobDisabled"); err != nil { - t.Errorf("Waiting for an event for IndexedJobDisabled: %v", err) - } - validateIndexedJobPods(ctx, t, clientSet, jobObj, sets.NewInt(0, 3), "1") - - // Re-enable feature gate and restart controller. Failed Pod should be recreated now. - defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, true)() - cancel() + // Restart controller. ctx, cancel = startJobController(restConfig, clientSet) - - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 3, - Failed: 1, - Succeeded: 1, - }) - validateIndexedJobPods(ctx, t, clientSet, jobObj, sets.NewInt(0, 2, 3), "1") - - // Remaining Pods succeed. - if err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodSucceeded, 3); err != nil { - t.Fatal("Failed trying to succeed remaining pods") + if err := wait.Poll(waitInterval, wait.ForeverTestTimeout, func() (done bool, err error) { + pods, err := clientSet.CoreV1().Pods(jobObj.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + t.Fatalf("Falied to list Job Pods: %v", err) + } + sawPods := false + for _, pod := range pods.Items { + if isPodOwnedByJob(&pod, jobObj) { + if hasJobTrackingFinalizer(&pod) { + return false, nil + } + sawPods = true + } + } + return sawPods, nil + }); err != nil { + t.Errorf("Waiting for finalizers to be removed: %v", err) } - validateJobPodsStatus(ctx, t, clientSet, jobObj, podsByStatus{ - Active: 0, - Failed: 1, - Succeeded: 4, - }) - validateIndexedJobPods(ctx, t, clientSet, jobObj, nil, "0-3") - validateJobSucceeded(ctx, t, clientSet, jobObj) } func TestSuspendJob(t *testing.T) { @@ -336,7 +507,7 @@ func TestSuspendJob(t *testing.T) { validate := func(s string, active int, status v1.ConditionStatus, reason string) { validateJobPodsStatus(ctx, t, clientSet, job, podsByStatus{ Active: active, - }) + }, false) job, err = clientSet.BatchV1().Jobs(ns.Name).Get(ctx, job.Name, metav1.GetOptions{}) if err != nil { t.Fatalf("Failed to get Job after %s: %v", s, err) @@ -382,7 +553,7 @@ func TestSuspendJobControllerRestart(t *testing.T) { } validateJobPodsStatus(ctx, t, clientSet, job, podsByStatus{ Active: 0, - }) + }, false) // Disable feature gate and restart controller to test that pods get created. defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SuspendJob, false)() @@ -394,7 +565,7 @@ func TestSuspendJobControllerRestart(t *testing.T) { } validateJobPodsStatus(ctx, t, clientSet, job, podsByStatus{ Active: 2, - }) + }, false) } type podsByStatus struct { @@ -403,10 +574,10 @@ type podsByStatus struct { Succeeded int } -func validateJobPodsStatus(ctx context.Context, t *testing.T, clientSet clientset.Interface, jobObj *batchv1.Job, desired podsByStatus) { +func validateJobPodsStatus(ctx context.Context, t *testing.T, clientSet clientset.Interface, jobObj *batchv1.Job, desired podsByStatus, wFinalizer bool) { t.Helper() var actualCounts podsByStatus - if err := wait.Poll(time.Second, wait.ForeverTestTimeout, func() (bool, error) { + if err := wait.Poll(waitInterval, wait.ForeverTestTimeout, func() (bool, error) { updatedJob, err := clientSet.BatchV1().Jobs(jobObj.Namespace).Get(ctx, jobObj.Name, metav1.GetOptions{}) if err != nil { t.Fatalf("Failed to get updated Job: %v", err) @@ -419,24 +590,47 @@ func validateJobPodsStatus(ctx context.Context, t *testing.T, clientSet clientse return cmp.Equal(actualCounts, desired), nil }); err != nil { diff := cmp.Diff(desired, actualCounts) - t.Errorf("Waiting for Job Pods: %v\nPods (-want,+got):\n%s", err, diff) + t.Errorf("Waiting for Job Status: %v\nPods (-want,+got):\n%s", err, diff) } - // Verify active Pods. No need for another wait.Poll. + var active []*v1.Pod + if err := wait.PollImmediate(waitInterval, wait.ForeverTestTimeout, func() (bool, error) { + pods, err := clientSet.CoreV1().Pods(jobObj.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + t.Fatalf("Failed to list Job Pods: %v", err) + } + active = nil + for _, pod := range pods.Items { + phase := pod.Status.Phase + if isPodOwnedByJob(&pod, jobObj) && (phase == v1.PodPending || phase == v1.PodRunning) { + p := pod + active = append(active, &p) + } + } + return len(active) == desired.Active, nil + }); err != nil { + if len(active) != desired.Active { + t.Errorf("Found %d active Pods, want %d", len(active), desired.Active) + } + } + for _, p := range active { + if got := hasJobTrackingFinalizer(p); got != wFinalizer { + t.Errorf("Pod %s has tracking finalizer %t, want %t", p.Name, got, wFinalizer) + } + } +} + +func validateFinishedPodsNoFinalizer(ctx context.Context, t *testing.T, clientSet clientset.Interface, jobObj *batchv1.Job) { + t.Helper() pods, err := clientSet.CoreV1().Pods(jobObj.Namespace).List(ctx, metav1.ListOptions{}) if err != nil { t.Fatalf("Failed to list Job Pods: %v", err) } - active := 0 for _, pod := range pods.Items { - if isPodOwnedByJob(&pod, jobObj) { - if pod.Status.Phase == v1.PodPending || pod.Status.Phase == v1.PodRunning { - active++ - } + phase := pod.Status.Phase + if isPodOwnedByJob(&pod, jobObj) && (phase == v1.PodPending || phase == v1.PodRunning) && hasJobTrackingFinalizer(&pod) { + t.Errorf("Finished pod %s still has a tracking finalizer", pod.Name) } } - if active != desired.Active { - t.Errorf("Found %d active Pods, want %d", active, desired.Active) - } } // validateIndexedJobPods validates indexes and hostname of @@ -484,7 +678,7 @@ func waitForEvent(events watch.Interface, uid types.UID, reason string) error { if reason == "" { return nil } - return wait.Poll(time.Second, wait.ForeverTestTimeout, func() (bool, error) { + return wait.Poll(waitInterval, wait.ForeverTestTimeout, func() (bool, error) { for { var ev watch.Event select { @@ -515,7 +709,7 @@ func getJobConditionStatus(ctx context.Context, job *batchv1.Job, cType batchv1. func validateJobSucceeded(ctx context.Context, t *testing.T, clientSet clientset.Interface, jobObj *batchv1.Job) { t.Helper() - if err := wait.Poll(time.Second, wait.ForeverTestTimeout, func() (bool, error) { + if err := wait.Poll(waitInterval, wait.ForeverTestTimeout, func() (bool, error) { j, err := clientSet.BatchV1().Jobs(jobObj.Namespace).Get(ctx, jobObj.Name, metav1.GetOptions{}) if err != nil { t.Fatalf("Failed to obtain updated Job: %v", err) @@ -632,3 +826,20 @@ func startJobController(restConfig *restclient.Config, clientSet clientset.Inter go jc.Run(1, ctx.Done()) return ctx, cancel } + +func hasJobTrackingFinalizer(obj metav1.Object) bool { + for _, fin := range obj.GetFinalizers() { + if fin == batchv1.JobTrackingFinalizer { + return true + } + } + return false +} + +func hasJobTrackingAnnotation(job *batchv1.Job) bool { + if job.Annotations == nil { + return false + } + _, ok := job.Annotations[batchv1.JobTrackingFinalizer] + return ok +}