mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Track expected removals of Pod finalizers
Add the UIDs of Pods for which we are removing finalizers to an in-memory cache. The controller removes UIDs from the cache as Pod updates or deletes come in. This avoids double counting finished Pods when Pod updates arrive after Job status updates. https://github.com/kubernetes/kubernetes/issues/105200
This commit is contained in:
parent
95c2a8024c
commit
5929ccd391
@ -93,6 +93,10 @@ type Controller struct {
|
||||
// A TTLCache of pod creates/deletes each rc expects to see
|
||||
expectations controller.ControllerExpectationsInterface
|
||||
|
||||
// finalizerExpectations tracks the Pod UIDs for which the controller
|
||||
// expects to observe the tracking finalizer removed.
|
||||
finalizerExpectations *uidTrackingExpectations
|
||||
|
||||
// A store of jobs
|
||||
jobLister batchv1listers.JobLister
|
||||
|
||||
@ -125,10 +129,11 @@ func NewController(podInformer coreinformers.PodInformer, jobInformer batchinfor
|
||||
KubeClient: kubeClient,
|
||||
Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
|
||||
},
|
||||
expectations: controller.NewControllerExpectations(),
|
||||
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff), "job"),
|
||||
orphanQueue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff), "job_orphan_pod"),
|
||||
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
|
||||
expectations: controller.NewControllerExpectations(),
|
||||
finalizerExpectations: newUIDTrackingExpectations(),
|
||||
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff), "job"),
|
||||
orphanQueue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff), "job_orphan_pod"),
|
||||
recorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
|
||||
}
|
||||
|
||||
jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
@ -146,7 +151,9 @@ func NewController(podInformer coreinformers.PodInformer, jobInformer batchinfor
|
||||
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: jm.addPod,
|
||||
UpdateFunc: jm.updatePod,
|
||||
DeleteFunc: jm.deletePod,
|
||||
DeleteFunc: func(obj interface{}) {
|
||||
jm.deletePod(obj, true)
|
||||
},
|
||||
})
|
||||
jm.podStore = podInformer.Lister()
|
||||
jm.podStoreSynced = podInformer.Informer().HasSynced
|
||||
@ -227,7 +234,7 @@ func (jm *Controller) addPod(obj interface{}) {
|
||||
if pod.DeletionTimestamp != nil {
|
||||
// on a restart of the controller, it's possible a new pod shows up in a state that
|
||||
// is already pending deletion. Prevent the pod from being a creation observation.
|
||||
jm.deletePod(pod)
|
||||
jm.deletePod(pod, false)
|
||||
return
|
||||
}
|
||||
|
||||
@ -271,19 +278,31 @@ func (jm *Controller) updatePod(old, cur interface{}) {
|
||||
// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
|
||||
// for modification of the deletion timestamp and expect an job to create more pods asap, not wait
|
||||
// until the kubelet actually deletes the pod.
|
||||
jm.deletePod(curPod)
|
||||
jm.deletePod(curPod, false)
|
||||
return
|
||||
}
|
||||
|
||||
// the only time we want the backoff to kick-in, is when the pod failed
|
||||
immediate := curPod.Status.Phase != v1.PodFailed
|
||||
|
||||
// Don't check if oldPod has the finalizer, as during ownership transfer
|
||||
// finalizers might be re-added and removed again in behalf of the new owner.
|
||||
// If all those Pod updates collapse into a single event, the finalizer
|
||||
// might be removed in oldPod and curPod. We want to record the latest
|
||||
// state.
|
||||
finalizerRemoved := !hasJobTrackingFinalizer(curPod)
|
||||
curControllerRef := metav1.GetControllerOf(curPod)
|
||||
oldControllerRef := metav1.GetControllerOf(oldPod)
|
||||
controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
|
||||
if controllerRefChanged && oldControllerRef != nil {
|
||||
// The ControllerRef was changed. Sync the old controller, if any.
|
||||
if job := jm.resolveControllerRef(oldPod.Namespace, oldControllerRef); job != nil {
|
||||
if finalizerRemoved {
|
||||
key, err := controller.KeyFunc(job)
|
||||
if err == nil {
|
||||
jm.finalizerExpectations.finalizerRemovalObserved(key, string(curPod.UID))
|
||||
}
|
||||
}
|
||||
jm.enqueueController(job, immediate)
|
||||
}
|
||||
}
|
||||
@ -294,6 +313,12 @@ func (jm *Controller) updatePod(old, cur interface{}) {
|
||||
if job == nil {
|
||||
return
|
||||
}
|
||||
if finalizerRemoved {
|
||||
key, err := controller.KeyFunc(job)
|
||||
if err == nil {
|
||||
jm.finalizerExpectations.finalizerRemovalObserved(key, string(curPod.UID))
|
||||
}
|
||||
}
|
||||
jm.enqueueController(job, immediate)
|
||||
return
|
||||
}
|
||||
@ -310,7 +335,7 @@ func (jm *Controller) updatePod(old, cur interface{}) {
|
||||
|
||||
// When a pod is deleted, enqueue the job that manages the pod and update its expectations.
|
||||
// obj could be an *v1.Pod, or a DeleteFinalStateUnknown marker item.
|
||||
func (jm *Controller) deletePod(obj interface{}) {
|
||||
func (jm *Controller) deletePod(obj interface{}, final bool) {
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
|
||||
// When a delete is dropped, the relist will notice a pod in the store not
|
||||
@ -347,6 +372,13 @@ func (jm *Controller) deletePod(obj interface{}) {
|
||||
return
|
||||
}
|
||||
jm.expectations.DeletionObserved(jobKey)
|
||||
|
||||
// Consider the finalizer removed if this is the final delete. Otherwise,
|
||||
// it's an update for the deletion timestamp, then check finalizer.
|
||||
if final || !hasJobTrackingFinalizer(pod) {
|
||||
jm.finalizerExpectations.finalizerRemovalObserved(jobKey, string(pod.UID))
|
||||
}
|
||||
|
||||
jm.enqueueController(job, true)
|
||||
}
|
||||
|
||||
@ -571,6 +603,7 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) {
|
||||
if apierrors.IsNotFound(err) {
|
||||
klog.V(4).Infof("Job has been deleted: %v", key)
|
||||
jm.expectations.DeleteExpectations(key)
|
||||
jm.finalizerExpectations.deleteExpectations(key)
|
||||
return true, nil
|
||||
}
|
||||
return false, err
|
||||
@ -609,6 +642,7 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) {
|
||||
metrics.JobSyncNum.WithLabelValues(completionMode, result, action).Inc()
|
||||
}()
|
||||
|
||||
var expectedRmFinalizers sets.String
|
||||
var uncounted *uncountedTerminatedPods
|
||||
if trackingUncountedPods(&job) {
|
||||
klog.V(4).InfoS("Tracking uncounted Pods with pod finalizers", "job", klog.KObj(&job))
|
||||
@ -616,6 +650,7 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) {
|
||||
job.Status.UncountedTerminatedPods = &batch.UncountedTerminatedPods{}
|
||||
}
|
||||
uncounted = newUncountedTerminatedPods(*job.Status.UncountedTerminatedPods)
|
||||
expectedRmFinalizers = jm.finalizerExpectations.getExpectedUIDs(key)
|
||||
} else if patch := removeTrackingAnnotationPatch(&job); patch != nil {
|
||||
if err := jm.patchJobHandler(&job, patch); err != nil {
|
||||
return false, fmt.Errorf("removing tracking finalizer from job %s: %w", key, err)
|
||||
@ -634,7 +669,7 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) {
|
||||
|
||||
activePods := controller.FilterActivePods(pods)
|
||||
active := int32(len(activePods))
|
||||
succeeded, failed := getStatus(&job, pods, uncounted)
|
||||
succeeded, failed := getStatus(&job, pods, uncounted, expectedRmFinalizers)
|
||||
// Job first start. Set StartTime and start the ActiveDeadlineSeconds timer
|
||||
// only if the job is not in the suspended state.
|
||||
if job.Status.StartTime == nil && !jobSuspended(&job) {
|
||||
@ -754,7 +789,7 @@ func (jm *Controller) syncJob(key string) (forget bool, rErr error) {
|
||||
if uncounted != nil {
|
||||
needsStatusUpdate := suspendCondChanged || active != job.Status.Active
|
||||
job.Status.Active = active
|
||||
err = jm.trackJobStatusAndRemoveFinalizers(&job, pods, prevSucceededIndexes, *uncounted, finishedCondition, needsStatusUpdate)
|
||||
err = jm.trackJobStatusAndRemoveFinalizers(&job, pods, prevSucceededIndexes, *uncounted, expectedRmFinalizers, finishedCondition, needsStatusUpdate)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("tracking status: %w", err)
|
||||
}
|
||||
@ -872,7 +907,8 @@ func (jm *Controller) removeTrackingFinalizersFromAllPods(pods []*v1.Pod) error
|
||||
if len(podsWithFinalizer) == 0 {
|
||||
return nil
|
||||
}
|
||||
_, err := jm.removeTrackingFinalizerFromPods(podsWithFinalizer)
|
||||
// Tracking with finalizers is disabled, no need to set expectations.
|
||||
_, err := jm.removeTrackingFinalizerFromPods("", podsWithFinalizer)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -884,7 +920,7 @@ func (jm *Controller) removeTrackingFinalizersFromAllPods(pods []*v1.Pod) error
|
||||
// 4. Add Complete condition if satisfied with current counters.
|
||||
// It does this up to a limited number of Pods so that the size of .status
|
||||
// doesn't grow too much and this sync doesn't starve other Jobs.
|
||||
func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*v1.Pod, succeededIndexes orderedIntervals, uncounted uncountedTerminatedPods, finishedCond *batch.JobCondition, needsFlush bool) error {
|
||||
func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*v1.Pod, succeededIndexes orderedIntervals, uncounted uncountedTerminatedPods, expectedRmFinalizers sets.String, finishedCond *batch.JobCondition, needsFlush bool) error {
|
||||
isIndexed := isIndexedJob(job)
|
||||
var podsToRemoveFinalizer []*v1.Pod
|
||||
uncountedStatus := job.Status.UncountedTerminatedPods
|
||||
@ -895,8 +931,9 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*
|
||||
}
|
||||
uidsWithFinalizer := make(sets.String, len(pods))
|
||||
for _, p := range pods {
|
||||
if hasJobTrackingFinalizer(p) {
|
||||
uidsWithFinalizer.Insert(string(p.UID))
|
||||
uid := string(p.UID)
|
||||
if hasJobTrackingFinalizer(p) && !expectedRmFinalizers.Has(uid) {
|
||||
uidsWithFinalizer.Insert(uid)
|
||||
}
|
||||
}
|
||||
// Shallow copy, as it will only be used to detect changes in the counters.
|
||||
@ -905,7 +942,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(job *batch.Job, pods []*
|
||||
needsFlush = true
|
||||
}
|
||||
for _, pod := range pods {
|
||||
if !hasJobTrackingFinalizer(pod) {
|
||||
if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) {
|
||||
continue
|
||||
}
|
||||
podFinished := pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed
|
||||
@ -986,14 +1023,18 @@ func (jm *Controller) flushUncountedAndRemoveFinalizers(job *batch.Job, podsToRe
|
||||
return job, needsFlush, fmt.Errorf("adding uncounted pods to status: %w", err)
|
||||
}
|
||||
recordJobPodFinished(job, *oldCounters)
|
||||
// Shallow copy.
|
||||
// Shallow copy, as it will only be used to detect changes in the counters.
|
||||
*oldCounters = job.Status
|
||||
needsFlush = false
|
||||
}
|
||||
jobKey, err := controller.KeyFunc(job)
|
||||
if err != nil {
|
||||
return job, needsFlush, fmt.Errorf("getting job key: %w", err)
|
||||
}
|
||||
var rmErr error
|
||||
if len(podsToRemoveFinalizer) > 0 {
|
||||
var rmSucceded []bool
|
||||
rmSucceded, rmErr = jm.removeTrackingFinalizerFromPods(podsToRemoveFinalizer)
|
||||
rmSucceded, rmErr = jm.removeTrackingFinalizerFromPods(jobKey, podsToRemoveFinalizer)
|
||||
for i, p := range podsToRemoveFinalizer {
|
||||
if rmSucceded[i] {
|
||||
uidsWithFinalizer.Delete(string(p.UID))
|
||||
@ -1039,9 +1080,19 @@ func cleanUncountedPodsWithoutFinalizers(status *batch.JobStatus, uidsWithFinali
|
||||
// returns an array of booleans where the i-th value is true if the finalizer
|
||||
// of the i-th Pod was successfully removed (if the pod was deleted when this
|
||||
// function was called, it's considered as the finalizer was removed successfully).
|
||||
func (jm *Controller) removeTrackingFinalizerFromPods(pods []*v1.Pod) ([]bool, error) {
|
||||
func (jm *Controller) removeTrackingFinalizerFromPods(jobKey string, pods []*v1.Pod) ([]bool, error) {
|
||||
errCh := make(chan error, len(pods))
|
||||
succeeded := make([]bool, len(pods))
|
||||
uids := make([]string, len(pods))
|
||||
for i, p := range pods {
|
||||
uids[i] = string(p.UID)
|
||||
}
|
||||
if jobKey != "" {
|
||||
err := jm.finalizerExpectations.expectFinalizersRemoved(jobKey, uids)
|
||||
if err != nil {
|
||||
return succeeded, fmt.Errorf("setting expected removed finalizers: %w", err)
|
||||
}
|
||||
}
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(pods))
|
||||
for i := range pods {
|
||||
@ -1049,10 +1100,17 @@ func (jm *Controller) removeTrackingFinalizerFromPods(pods []*v1.Pod) ([]bool, e
|
||||
pod := pods[i]
|
||||
defer wg.Done()
|
||||
if patch := removeTrackingFinalizerPatch(pod); patch != nil {
|
||||
if err := jm.podControl.PatchPod(pod.Namespace, pod.Name, patch); err != nil && !apierrors.IsNotFound(err) {
|
||||
errCh <- err
|
||||
utilruntime.HandleError(err)
|
||||
return
|
||||
if err := jm.podControl.PatchPod(pod.Namespace, pod.Name, patch); err != nil {
|
||||
// In case of any failure, we don't expect a Pod update for the
|
||||
// finalizer removed. Clear expectation now.
|
||||
if jobKey != "" {
|
||||
jm.finalizerExpectations.finalizerRemovalObserved(jobKey, string(pod.UID))
|
||||
}
|
||||
if !apierrors.IsNotFound(err) {
|
||||
errCh <- err
|
||||
utilruntime.HandleError(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
succeeded[i] = true
|
||||
}
|
||||
@ -1155,15 +1213,15 @@ func newCondition(conditionType batch.JobConditionType, status v1.ConditionStatu
|
||||
}
|
||||
|
||||
// getStatus returns number of succeeded and failed pods running a job
|
||||
func getStatus(job *batch.Job, pods []*v1.Pod, uncounted *uncountedTerminatedPods) (succeeded, failed int32) {
|
||||
func getStatus(job *batch.Job, pods []*v1.Pod, uncounted *uncountedTerminatedPods, expectedRmFinalizers sets.String) (succeeded, failed int32) {
|
||||
if uncounted != nil {
|
||||
succeeded = job.Status.Succeeded
|
||||
failed = job.Status.Failed
|
||||
}
|
||||
succeeded += int32(countValidPodsWithFilter(job, pods, uncounted.Succeeded(), func(p *v1.Pod) bool {
|
||||
succeeded += int32(countValidPodsWithFilter(job, pods, uncounted.Succeeded(), expectedRmFinalizers, func(p *v1.Pod) bool {
|
||||
return p.Status.Phase == v1.PodSucceeded
|
||||
}))
|
||||
failed += int32(countValidPodsWithFilter(job, pods, uncounted.Failed(), func(p *v1.Pod) bool {
|
||||
failed += int32(countValidPodsWithFilter(job, pods, uncounted.Failed(), expectedRmFinalizers, func(p *v1.Pod) bool {
|
||||
if p.Status.Phase == v1.PodFailed {
|
||||
return true
|
||||
}
|
||||
@ -1395,12 +1453,13 @@ func getBackoff(queue workqueue.RateLimitingInterface, key interface{}) time.Dur
|
||||
// countValidPodsWithFilter returns number of valid pods that pass the filter.
|
||||
// Pods are valid if they have a finalizer and, for Indexed Jobs, a valid
|
||||
// completion index.
|
||||
func countValidPodsWithFilter(job *batch.Job, pods []*v1.Pod, uncounted sets.String, filter func(*v1.Pod) bool) int {
|
||||
func countValidPodsWithFilter(job *batch.Job, pods []*v1.Pod, uncounted sets.String, expectedRmFinalizers sets.String, filter func(*v1.Pod) bool) int {
|
||||
result := len(uncounted)
|
||||
for _, p := range pods {
|
||||
uid := string(p.UID)
|
||||
// Pods that don't have a completion finalizer are in the uncounted set or
|
||||
// have already been accounted for in the Job status.
|
||||
if uncounted != nil && (!hasJobTrackingFinalizer(p) || uncounted.Has(string(p.UID))) {
|
||||
if uncounted != nil && (!hasJobTrackingFinalizer(p) || uncounted.Has(uid) || expectedRmFinalizers.Has(uid)) {
|
||||
continue
|
||||
}
|
||||
if isIndexedJob(job) {
|
||||
|
@ -116,6 +116,7 @@ func newPod(name string, job *batch.Job) *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
UID: types.UID(name),
|
||||
Labels: job.Spec.Selector.MatchLabels,
|
||||
Namespace: job.Namespace,
|
||||
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(job, controllerKind)},
|
||||
@ -1003,10 +1004,11 @@ func TestSyncJobLegacyTracking(t *testing.T) {
|
||||
|
||||
func TestGetStatus(t *testing.T) {
|
||||
cases := map[string]struct {
|
||||
job batch.Job
|
||||
pods []*v1.Pod
|
||||
wantSucceeded int32
|
||||
wantFailed int32
|
||||
job batch.Job
|
||||
pods []*v1.Pod
|
||||
expectedRmFinalizers sets.String
|
||||
wantSucceeded int32
|
||||
wantFailed int32
|
||||
}{
|
||||
"without finalizers": {
|
||||
job: batch.Job{
|
||||
@ -1068,6 +1070,30 @@ func TestGetStatus(t *testing.T) {
|
||||
wantSucceeded: 4,
|
||||
wantFailed: 4,
|
||||
},
|
||||
"with expected removed finalizers": {
|
||||
job: batch.Job{
|
||||
Status: batch.JobStatus{
|
||||
Succeeded: 2,
|
||||
Failed: 2,
|
||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||
Succeeded: []types.UID{"a"},
|
||||
Failed: []types.UID{"d"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRmFinalizers: sets.NewString("b", "f"),
|
||||
pods: []*v1.Pod{
|
||||
buildPod().uid("a").phase(v1.PodSucceeded).Pod,
|
||||
buildPod().uid("b").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||
buildPod().uid("c").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||
buildPod().uid("d").phase(v1.PodFailed).Pod,
|
||||
buildPod().uid("e").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||
buildPod().uid("f").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||
buildPod().uid("g").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||
},
|
||||
wantSucceeded: 4,
|
||||
wantFailed: 5,
|
||||
},
|
||||
"deleted pods": {
|
||||
pods: []*v1.Pod{
|
||||
buildPod().uid("a").phase(v1.PodSucceeded).deletionTimestamp().Pod,
|
||||
@ -1104,7 +1130,7 @@ func TestGetStatus(t *testing.T) {
|
||||
if tc.job.Status.UncountedTerminatedPods != nil {
|
||||
uncounted = newUncountedTerminatedPods(*tc.job.Status.UncountedTerminatedPods)
|
||||
}
|
||||
succeeded, failed := getStatus(&tc.job, tc.pods, uncounted)
|
||||
succeeded, failed := getStatus(&tc.job, tc.pods, uncounted, tc.expectedRmFinalizers)
|
||||
if succeeded != tc.wantSucceeded {
|
||||
t.Errorf("getStatus reports %d succeeded pods, want %d", succeeded, tc.wantSucceeded)
|
||||
}
|
||||
@ -1121,15 +1147,16 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
|
||||
indexedCompletion := batch.IndexedCompletion
|
||||
mockErr := errors.New("mock error")
|
||||
cases := map[string]struct {
|
||||
job batch.Job
|
||||
pods []*v1.Pod
|
||||
finishedCond *batch.JobCondition
|
||||
needsFlush bool
|
||||
statusUpdateErr error
|
||||
podControlErr error
|
||||
wantErr error
|
||||
wantRmFinalizers int
|
||||
wantStatusUpdates []batch.JobStatus
|
||||
job batch.Job
|
||||
pods []*v1.Pod
|
||||
finishedCond *batch.JobCondition
|
||||
expectedRmFinalizers sets.String
|
||||
needsFlush bool
|
||||
statusUpdateErr error
|
||||
podControlErr error
|
||||
wantErr error
|
||||
wantRmFinalizers int
|
||||
wantStatusUpdates []batch.JobStatus
|
||||
}{
|
||||
"no updates": {},
|
||||
"new active": {
|
||||
@ -1211,6 +1238,45 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
"expecting removed finalizers": {
|
||||
job: batch.Job{
|
||||
Status: batch.JobStatus{
|
||||
Succeeded: 2,
|
||||
Failed: 3,
|
||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||
Succeeded: []types.UID{"a", "g"},
|
||||
Failed: []types.UID{"b", "h"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRmFinalizers: sets.NewString("c", "d", "g", "h"),
|
||||
pods: []*v1.Pod{
|
||||
buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||
buildPod().uid("b").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||
buildPod().uid("c").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||
buildPod().uid("d").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||
buildPod().uid("e").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||
buildPod().uid("f").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||
buildPod().uid("g").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||
buildPod().uid("h").phase(v1.PodFailed).trackingFinalizer().Pod,
|
||||
},
|
||||
wantRmFinalizers: 4,
|
||||
wantStatusUpdates: []batch.JobStatus{
|
||||
{
|
||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{
|
||||
Succeeded: []types.UID{"a", "e"},
|
||||
Failed: []types.UID{"b", "f"},
|
||||
},
|
||||
Succeeded: 3,
|
||||
Failed: 4,
|
||||
},
|
||||
{
|
||||
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
|
||||
Succeeded: 5,
|
||||
Failed: 6,
|
||||
},
|
||||
},
|
||||
},
|
||||
"succeeding job": {
|
||||
pods: []*v1.Pod{
|
||||
buildPod().uid("a").phase(v1.PodSucceeded).trackingFinalizer().Pod,
|
||||
@ -1530,7 +1596,7 @@ func TestTrackJobStatusAndRemoveFinalizers(t *testing.T) {
|
||||
}
|
||||
uncounted := newUncountedTerminatedPods(*job.Status.UncountedTerminatedPods)
|
||||
succeededIndexes := succeededIndexesFromJob(job)
|
||||
err := manager.trackJobStatusAndRemoveFinalizers(job, tc.pods, succeededIndexes, *uncounted, tc.finishedCond, tc.needsFlush)
|
||||
err := manager.trackJobStatusAndRemoveFinalizers(job, tc.pods, succeededIndexes, *uncounted, tc.expectedRmFinalizers, tc.finishedCond, tc.needsFlush)
|
||||
if !errors.Is(err, tc.wantErr) {
|
||||
t.Errorf("Got error %v, want %v", err, tc.wantErr)
|
||||
}
|
||||
@ -2285,7 +2351,7 @@ func TestDeletePod(t *testing.T) {
|
||||
informer.Core().V1().Pods().Informer().GetIndexer().Add(pod1)
|
||||
informer.Core().V1().Pods().Informer().GetIndexer().Add(pod2)
|
||||
|
||||
jm.deletePod(pod1)
|
||||
jm.deletePod(pod1, true)
|
||||
if got, want := jm.queue.Len(), 1; got != want {
|
||||
t.Fatalf("queue.Len() = %v, want %v", got, want)
|
||||
}
|
||||
@ -2298,7 +2364,7 @@ func TestDeletePod(t *testing.T) {
|
||||
t.Errorf("queue.Get() = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
jm.deletePod(pod2)
|
||||
jm.deletePod(pod2, true)
|
||||
if got, want := jm.queue.Len(), 1; got != want {
|
||||
t.Fatalf("queue.Len() = %v, want %v", got, want)
|
||||
}
|
||||
@ -2333,7 +2399,7 @@ func TestDeletePodOrphan(t *testing.T) {
|
||||
pod1.OwnerReferences = nil
|
||||
informer.Core().V1().Pods().Informer().GetIndexer().Add(pod1)
|
||||
|
||||
jm.deletePod(pod1)
|
||||
jm.deletePod(pod1, true)
|
||||
if got, want := jm.queue.Len(), 0; got != want {
|
||||
t.Fatalf("queue.Len() = %v, want %v", got, want)
|
||||
}
|
||||
@ -2977,6 +3043,105 @@ func TestEnsureJobConditions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFinalizersRemovedExpectations(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, true)()
|
||||
clientset := fake.NewSimpleClientset()
|
||||
sharedInformers := informers.NewSharedInformerFactory(clientset, controller.NoResyncPeriodFunc())
|
||||
manager := NewController(sharedInformers.Core().V1().Pods(), sharedInformers.Batch().V1().Jobs(), clientset)
|
||||
manager.podStoreSynced = alwaysReady
|
||||
manager.jobStoreSynced = alwaysReady
|
||||
manager.podControl = &controller.FakePodControl{Err: errors.New("fake pod controller error")}
|
||||
manager.updateStatusHandler = func(job *batch.Job) (*batch.Job, error) {
|
||||
return job, nil
|
||||
}
|
||||
|
||||
job := newJob(2, 2, 6, batch.NonIndexedCompletion)
|
||||
job.Annotations = map[string]string{
|
||||
batch.JobTrackingFinalizer: "",
|
||||
}
|
||||
sharedInformers.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
||||
pods := append(newPodList(2, v1.PodSucceeded, job), newPodList(2, v1.PodFailed, job)...)
|
||||
podInformer := sharedInformers.Core().V1().Pods().Informer()
|
||||
podIndexer := podInformer.GetIndexer()
|
||||
uids := sets.NewString()
|
||||
for i := range pods {
|
||||
clientset.Tracker().Add(&pods[i])
|
||||
podIndexer.Add(&pods[i])
|
||||
uids.Insert(string(pods[i].UID))
|
||||
}
|
||||
jobKey := testutil.GetKey(job, t)
|
||||
|
||||
manager.syncJob(jobKey)
|
||||
gotExpectedUIDs := manager.finalizerExpectations.getExpectedUIDs(jobKey)
|
||||
if len(gotExpectedUIDs) != 0 {
|
||||
t.Errorf("Got unwanted expectations for removed finalizers after first syncJob with client failures:\n%s", gotExpectedUIDs.List())
|
||||
}
|
||||
|
||||
// Remove failures and re-sync.
|
||||
manager.podControl.(*controller.FakePodControl).Err = nil
|
||||
manager.syncJob(jobKey)
|
||||
gotExpectedUIDs = manager.finalizerExpectations.getExpectedUIDs(jobKey)
|
||||
if diff := cmp.Diff(uids, gotExpectedUIDs); diff != "" {
|
||||
t.Errorf("Different expectations for removed finalizers after syncJob (-want,+got):\n%s", diff)
|
||||
}
|
||||
|
||||
stopCh := make(chan struct{})
|
||||
defer close(stopCh)
|
||||
go sharedInformers.Core().V1().Pods().Informer().Run(stopCh)
|
||||
cache.WaitForCacheSync(stopCh, podInformer.HasSynced)
|
||||
|
||||
// Make sure the first syncJob sets the expectations, even after the caches synced.
|
||||
gotExpectedUIDs = manager.finalizerExpectations.getExpectedUIDs(jobKey)
|
||||
if diff := cmp.Diff(uids, gotExpectedUIDs); diff != "" {
|
||||
t.Errorf("Different expectations for removed finalizers after syncJob and cacheSync (-want,+got):\n%s", diff)
|
||||
}
|
||||
|
||||
// Change pods in different ways.
|
||||
|
||||
podsResource := schema.GroupVersionResource{Version: "v1", Resource: "pods"}
|
||||
|
||||
update := pods[0].DeepCopy()
|
||||
update.Finalizers = nil
|
||||
update.ResourceVersion = "1"
|
||||
err := clientset.Tracker().Update(podsResource, update, update.Namespace)
|
||||
if err != nil {
|
||||
t.Errorf("Removing finalizer: %v", err)
|
||||
}
|
||||
|
||||
update = pods[1].DeepCopy()
|
||||
update.Finalizers = nil
|
||||
update.DeletionTimestamp = &metav1.Time{Time: time.Now()}
|
||||
update.ResourceVersion = "1"
|
||||
err = clientset.Tracker().Update(podsResource, update, update.Namespace)
|
||||
if err != nil {
|
||||
t.Errorf("Removing finalizer and setting deletion timestamp: %v", err)
|
||||
}
|
||||
|
||||
// Preserve the finalizer.
|
||||
update = pods[2].DeepCopy()
|
||||
update.DeletionTimestamp = &metav1.Time{Time: time.Now()}
|
||||
update.ResourceVersion = "1"
|
||||
err = clientset.Tracker().Update(podsResource, update, update.Namespace)
|
||||
if err != nil {
|
||||
t.Errorf("Setting deletion timestamp: %v", err)
|
||||
}
|
||||
|
||||
err = clientset.Tracker().Delete(podsResource, pods[3].Namespace, pods[3].Name)
|
||||
if err != nil {
|
||||
t.Errorf("Deleting pod that had finalizer: %v", err)
|
||||
}
|
||||
|
||||
uids = sets.NewString(string(pods[2].UID))
|
||||
var diff string
|
||||
if err := wait.Poll(100*time.Millisecond, wait.ForeverTestTimeout, func() (bool, error) {
|
||||
gotExpectedUIDs = manager.finalizerExpectations.getExpectedUIDs(jobKey)
|
||||
diff = cmp.Diff(uids, gotExpectedUIDs)
|
||||
return diff == "", nil
|
||||
}); err != nil {
|
||||
t.Errorf("Timeout waiting for expectations (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func checkJobCompletionEnvVariable(t *testing.T, spec *v1.PodSpec) {
|
||||
t.Helper()
|
||||
want := []v1.EnvVar{
|
||||
|
117
pkg/controller/job/tracking_utils.go
Normal file
117
pkg/controller/job/tracking_utils.go
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package job
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// uidSetKeyFunc to parse out the key from a uidSet.
|
||||
var uidSetKeyFunc = func(obj interface{}) (string, error) {
|
||||
if u, ok := obj.(*uidSet); ok {
|
||||
return u.key, nil
|
||||
}
|
||||
return "", fmt.Errorf("could not find key for obj %#v", obj)
|
||||
}
|
||||
|
||||
// uidSet holds a key and a set of UIDs. Used by the
|
||||
// uidTrackingExpectations to remember which UID it has seen/still waiting for.
|
||||
type uidSet struct {
|
||||
sync.RWMutex
|
||||
set sets.String
|
||||
key string
|
||||
}
|
||||
|
||||
// uidTrackingExpectations tracks the UIDs of Pods the controller is waiting to
|
||||
// observe tracking finalizer deletions.
|
||||
type uidTrackingExpectations struct {
|
||||
store cache.Store
|
||||
}
|
||||
|
||||
// GetUIDs is a convenience method to avoid exposing the set of expected uids.
|
||||
// The returned set is not thread safe, all modifications must be made holding
|
||||
// the uidStoreLock.
|
||||
func (u *uidTrackingExpectations) getSet(controllerKey string) *uidSet {
|
||||
if obj, exists, err := u.store.GetByKey(controllerKey); err == nil && exists {
|
||||
return obj.(*uidSet)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (u *uidTrackingExpectations) getExpectedUIDs(controllerKey string) sets.String {
|
||||
uids := u.getSet(controllerKey)
|
||||
if uids == nil {
|
||||
return nil
|
||||
}
|
||||
uids.RLock()
|
||||
set := sets.NewString(uids.set.UnsortedList()...)
|
||||
uids.RUnlock()
|
||||
return set
|
||||
}
|
||||
|
||||
// ExpectDeletions records expectations for the given deleteKeys, against the
|
||||
// given job-key.
|
||||
// This is thread-safe across different job keys.
|
||||
func (u *uidTrackingExpectations) expectFinalizersRemoved(jobKey string, deletedKeys []string) error {
|
||||
klog.V(4).InfoS("Expecting tracking finalizers removed", "job", jobKey, "podUIDs", deletedKeys)
|
||||
|
||||
uids := u.getSet(jobKey)
|
||||
if uids == nil {
|
||||
uids = &uidSet{
|
||||
key: jobKey,
|
||||
set: sets.NewString(),
|
||||
}
|
||||
if err := u.store.Add(uids); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
uids.Lock()
|
||||
uids.set.Insert(deletedKeys...)
|
||||
uids.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// FinalizerRemovalObserved records the given deleteKey as a deletion, for the given job.
|
||||
func (u *uidTrackingExpectations) finalizerRemovalObserved(jobKey, deleteKey string) {
|
||||
uids := u.getSet(jobKey)
|
||||
if uids != nil {
|
||||
uids.Lock()
|
||||
if uids.set.Has(deleteKey) {
|
||||
klog.V(4).InfoS("Observed tracking finalizer removed", "job", jobKey, "podUID", deleteKey)
|
||||
uids.set.Delete(deleteKey)
|
||||
}
|
||||
uids.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteExpectations deletes the UID set.
|
||||
func (u *uidTrackingExpectations) deleteExpectations(jobKey string) {
|
||||
if err := u.store.Delete(jobKey); err != nil {
|
||||
klog.ErrorS(err, "deleting tracking annotation UID expectations", "job", jobKey)
|
||||
}
|
||||
}
|
||||
|
||||
// NewUIDTrackingControllerExpectations returns a wrapper around
|
||||
// ControllerExpectations that is aware of deleteKeys.
|
||||
func newUIDTrackingExpectations() *uidTrackingExpectations {
|
||||
return &uidTrackingExpectations{store: cache.NewStore(uidSetKeyFunc)}
|
||||
}
|
111
pkg/controller/job/tracking_utils_test.go
Normal file
111
pkg/controller/job/tracking_utils_test.go
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package job
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func TestUIDTrackingExpectations(t *testing.T) {
|
||||
tracks := []struct {
|
||||
job string
|
||||
firstRound []string
|
||||
secondRound []string
|
||||
}{
|
||||
{
|
||||
job: "foo",
|
||||
firstRound: []string{"a", "b", "c", "d"},
|
||||
secondRound: []string{"e", "f"},
|
||||
},
|
||||
{
|
||||
job: "bar",
|
||||
firstRound: []string{"x", "y", "z"},
|
||||
secondRound: []string{"u", "v", "w"},
|
||||
},
|
||||
{
|
||||
job: "baz",
|
||||
firstRound: []string{"w"},
|
||||
secondRound: []string{"a"},
|
||||
},
|
||||
}
|
||||
expectations := newUIDTrackingExpectations()
|
||||
|
||||
// Insert first round of keys in parallel.
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(tracks))
|
||||
errs := make([]error, len(tracks))
|
||||
for i := range tracks {
|
||||
track := tracks[i]
|
||||
go func(errID int) {
|
||||
errs[errID] = expectations.expectFinalizersRemoved(track.job, track.firstRound)
|
||||
wg.Done()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
for i, err := range errs {
|
||||
if err != nil {
|
||||
t.Errorf("Failed adding first round of UIDs for job %s: %v", tracks[i].job, err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, track := range tracks {
|
||||
uids := expectations.getSet(track.job)
|
||||
if uids == nil {
|
||||
t.Errorf("Set of UIDs is empty for job %s", track.job)
|
||||
} else if diff := cmp.Diff(track.firstRound, uids.set.List()); diff != "" {
|
||||
t.Errorf("Unexpected keys for job %s (-want,+got):\n%s", track.job, diff)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete the first round of keys and add the second round in parallel.
|
||||
|
||||
for i, track := range tracks {
|
||||
wg.Add(len(track.firstRound) + 1)
|
||||
track := track
|
||||
for _, uid := range track.firstRound {
|
||||
uid := uid
|
||||
go func() {
|
||||
expectations.finalizerRemovalObserved(track.job, uid)
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
go func(errID int) {
|
||||
errs[errID] = expectations.expectFinalizersRemoved(track.job, track.secondRound)
|
||||
wg.Done()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
for i, err := range errs {
|
||||
if err != nil {
|
||||
t.Errorf("Failed adding second round of UIDs for job %s: %v", tracks[i].job, err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, track := range tracks {
|
||||
uids := expectations.getSet(track.job)
|
||||
if uids == nil {
|
||||
t.Errorf("Set of UIDs is empty for job %s", track.job)
|
||||
} else if diff := cmp.Diff(track.secondRound, uids.set.List()); diff != "" {
|
||||
t.Errorf("Unexpected keys for job %s (-want,+got):\n%s", track.job, diff)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user