Lower the constants for the rate limiter in Job controller

2025-08-09 03:57:41 +00:00 · 2023-06-16 16:04:28 +02:00 · 2023-06-16 16:04:28 +02:00 · 74c5ff97f1
commit 74c5ff97f1
parent c51a422d78
3 changed files with 27 additions and 23 deletions
--- a/pkg/controller/job/job_controller.go
+++ b/pkg/controller/job/job_controller.go
@ -69,10 +69,14 @@ const (
 var controllerKind = batch.SchemeGroupVersion.WithKind("Job")
 var (
-	// DefaultJobBackOff is the default backoff period. Exported for tests.
+	// DefaultJobApiBackOff is the default backoff period. Exported for tests.
-	DefaultJobBackOff = 10 * time.Second
+	DefaultJobApiBackOff = 1 * time.Second
-	// MaxJobBackOff is the max backoff period. Exported for tests.
+	// MaxJobApiBackOff is the max backoff period. Exported for tests.
-	MaxJobBackOff = 360 * time.Second
+	MaxJobApiBackOff = 60 * time.Second
 	// DefaultJobPodFailureBackOff is the default backoff period. Exported for tests.
 	DefaultJobPodFailureBackOff = 10 * time.Second
 	// MaxJobPodFailureBackOff is the max backoff period. Exported for tests.
 	MaxJobPodFailureBackOff = 360 * time.Second
 	// MaxUncountedPods is the maximum size the slices in
 	// .status.uncountedTerminatedPods should have to keep their representation
 	// roughly below 20 KB. Exported for tests
@ -148,8 +152,8 @@ func newControllerWithClock(ctx context.Context, podInformer coreinformers.PodIn
 		},
 		expectations:          controller.NewControllerExpectations(),
 		finalizerExpectations: newUIDTrackingExpectations(),
-		queue:                 workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff)),
+		queue:                 workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobApiBackOff, MaxJobApiBackOff)),
-		orphanQueue:           workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job_orphan_pod"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobBackOff, MaxJobBackOff)),
+		orphanQueue:           workqueue.NewRateLimitingQueueWithDelayingInterface(workqueue.NewDelayingQueueWithCustomClock(clock, "job_orphan_pod"), workqueue.NewItemExponentialFailureRateLimiter(DefaultJobApiBackOff, MaxJobApiBackOff)),
 		broadcaster:           eventBroadcaster,
 		recorder:              eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "job-controller"}),
 		clock:                 clock,
@ -1436,7 +1440,7 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, activePods
 	}
 	if active < wantActive {
-		remainingTime := newBackoffRecord.getRemainingTime(jm.clock, DefaultJobBackOff, MaxJobBackOff)
+		remainingTime := newBackoffRecord.getRemainingTime(jm.clock, DefaultJobPodFailureBackOff, MaxJobPodFailureBackOff)
 		if remainingTime > 0 {
 			jm.enqueueSyncJobWithDelay(logger, job, remainingTime)
 			return 0, metrics.JobSyncActionPodsCreated, nil
--- a/pkg/controller/job/job_controller_test.go
+++ b/pkg/controller/job/job_controller_test.go
@ -3106,8 +3106,8 @@ func TestSyncJobWithJobPodFailurePolicy(t *testing.T) {
 func TestSyncJobUpdateRequeue(t *testing.T) {
 	_, ctx := ktesting.NewTestContext(t)
 	clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
-	defer func() { DefaultJobBackOff = 10 * time.Second }()
+	defer func() { DefaultJobApiBackOff = 1 * time.Second }()
-	DefaultJobBackOff = time.Duration(0) // overwrite the default value for testing
+	DefaultJobApiBackOff = time.Duration(0) // overwrite the default value for testing
 	cases := map[string]struct {
 		updateErr               error
 		wantRequeuedImmediately bool
@ -3136,7 +3136,7 @@ func TestSyncJobUpdateRequeue(t *testing.T) {
 			sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
 			manager.queue.Add(testutil.GetKey(job, t))
 			manager.processNextWorkItem(context.TODO())
-			// With DefaultJobBackOff=0, the queueing is synchronous.
+			// With DefaultJobApiBackOff=0, the queueing is synchronous.
 			requeued := manager.queue.Len() > 0
 			if requeued != tc.wantRequeuedImmediately {
 				t.Errorf("Unexpected requeue, got %t, want %t", requeued, tc.wantRequeuedImmediately)
@ -3934,8 +3934,8 @@ func TestJobBackoffReset(t *testing.T) {
 	for name, tc := range testCases {
 		clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
-		defer func() { DefaultJobBackOff = 10 * time.Second }()
+		defer func() { DefaultJobApiBackOff = 1 * time.Second }()
-		DefaultJobBackOff = time.Duration(0) // overwrite the default value for testing
+		DefaultJobApiBackOff = time.Duration(0) // overwrite the default value for testing
 		manager, sharedInformerFactory := newControllerFromClient(ctx, clientset, controller.NoResyncPeriodFunc)
 		fakePodControl := controller.FakePodControl{}
 		manager.podControl = &fakePodControl
--- a/test/integration/job/job_test.go
+++ b/test/integration/job/job_test.go
@ -1379,9 +1379,9 @@ func TestFinalizersClearedWhenBackoffLimitExceeded(t *testing.T) {
 func TestJobPodsCreatedWithExponentialBackoff(t *testing.T) {
 	// overwrite the default value for faster testing
-	oldBackoff := jobcontroller.DefaultJobBackOff
+	oldBackoff := jobcontroller.DefaultJobPodFailureBackOff
-	defer func() { jobcontroller.DefaultJobBackOff = oldBackoff }()
+	defer func() { jobcontroller.DefaultJobPodFailureBackOff = oldBackoff }()
-	jobcontroller.DefaultJobBackOff = 2 * time.Second
+	jobcontroller.DefaultJobPodFailureBackOff = 2 * time.Second
 	closeFn, restConfig, clientSet, ns := setup(t, "simple")
 	defer closeFn()
@ -1441,25 +1441,25 @@ func TestJobPodsCreatedWithExponentialBackoff(t *testing.T) {
 		return finishTime[i].Before(finishTime[j])
 	})
-	if creationTime[1].Sub(finishTime[0]).Seconds() < jobcontroller.DefaultJobBackOff.Seconds() {
+	if creationTime[1].Sub(finishTime[0]).Seconds() < jobcontroller.DefaultJobPodFailureBackOff.Seconds() {
-		t.Fatalf("Second pod should be created at least %v seconds after the first pod", jobcontroller.DefaultJobBackOff)
+		t.Fatalf("Second pod should be created at least %v seconds after the first pod", jobcontroller.DefaultJobPodFailureBackOff)
 	}
-	if creationTime[1].Sub(finishTime[0]).Seconds() >= 2*jobcontroller.DefaultJobBackOff.Seconds() {
+	if creationTime[1].Sub(finishTime[0]).Seconds() >= 2*jobcontroller.DefaultJobPodFailureBackOff.Seconds() {
-		t.Fatalf("Second pod should be created before %v seconds after the first pod", 2*jobcontroller.DefaultJobBackOff)
+		t.Fatalf("Second pod should be created before %v seconds after the first pod", 2*jobcontroller.DefaultJobPodFailureBackOff)
 	}
 	diff := creationTime[2].Sub(finishTime[1]).Seconds()
 	// The third pod should not be created before 4 seconds
-	if diff < 2*jobcontroller.DefaultJobBackOff.Seconds() {
+	if diff < 2*jobcontroller.DefaultJobPodFailureBackOff.Seconds() {
-		t.Fatalf("Third pod should be created at least %v seconds after the second pod", 2*jobcontroller.DefaultJobBackOff)
+		t.Fatalf("Third pod should be created at least %v seconds after the second pod", 2*jobcontroller.DefaultJobPodFailureBackOff)
 	}
 	// The third pod should be created within 8 seconds
 	// This check rules out double counting
-	if diff >= 4*jobcontroller.DefaultJobBackOff.Seconds() {
+	if diff >= 4*jobcontroller.DefaultJobPodFailureBackOff.Seconds() {
-		t.Fatalf("Third pod should be created before %v seconds after the second pod", 4*jobcontroller.DefaultJobBackOff)
+		t.Fatalf("Third pod should be created before %v seconds after the second pod", 4*jobcontroller.DefaultJobPodFailureBackOff)
 	}
 }