add pod create/delete operation limitations per job sync

This commit is contained in:
Mengxue Zhang 2021-04-26 14:55:32 +00:00
parent cda503fcc9
commit 5fd4ab3dc3

View File

@ -61,7 +61,8 @@ var (
// DefaultJobBackOff is the default backoff period, exported for the e2e test
DefaultJobBackOff = 10 * time.Second
// MaxJobBackOff is the max backoff period, exported for the e2e test
MaxJobBackOff = 360 * time.Second
MaxJobBackOff = 360 * time.Second
maxPodCreateDeletePerSync = 500
)
// Controller ensures that all Job objects have corresponding pods to
@ -803,6 +804,9 @@ func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded
rmAtLeast = 0
}
podsToDelete := activePodsForRemoval(job, activePods, int(rmAtLeast))
if len(podsToDelete) > maxPodCreateDeletePerSync {
podsToDelete = podsToDelete[:maxPodCreateDeletePerSync]
}
if len(podsToDelete) > 0 {
jm.expectations.ExpectDeletions(jobKey, len(podsToDelete))
klog.V(4).InfoS("Too many pods running for job", "job", klog.KObj(job), "deleted", len(podsToDelete), "target", parallelism)
@ -823,6 +827,10 @@ func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded
return active, nil
}
if diff > int32(maxPodCreateDeletePerSync) {
diff = int32(maxPodCreateDeletePerSync)
}
jm.expectations.ExpectCreations(jobKey, int(diff))
errCh := make(chan error, diff)
klog.V(4).Infof("Too few pods running job %q, need %d, creating %d", jobKey, wantActive, diff)