Merge pull request #110294 from harshanarayana/bug/git-110239/fix-activedeadline-enforcement

GIT-110239: fix activeDeadlineSeconds enforcement bug
2025-09-14 13:45:06 +00:00 · 2022-06-14 08:03:33 -07:00
parent 4407a02aef eea7dca085
commit 03b18bf138
2 changed files with 67 additions and 39 deletions
--- a/pkg/controller/job/job_controller.go
+++ b/pkg/controller/job/job_controller.go
@@ -746,17 +746,11 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr
 	if feature.DefaultFeatureGate.Enabled(features.JobReadyPods) {
 		ready = pointer.Int32(countReadyPods(activePods))
 	}
-	// Job first start. Set StartTime and start the ActiveDeadlineSeconds timer
+
-	// only if the job is not in the suspended state.
+	// Job first start. Set StartTime only if the job is not in the suspended state.
 	if job.Status.StartTime == nil && !jobSuspended(&job) {
 		now := metav1.Now()
 		job.Status.StartTime = &now
 		// enqueue a sync to check if job past ActiveDeadlineSeconds
 		if job.Spec.ActiveDeadlineSeconds != nil {
 			klog.V(4).Infof("Job %s has ActiveDeadlineSeconds will sync after %d seconds",
 				key, *job.Spec.ActiveDeadlineSeconds)
 			jm.queue.AddAfter(key, time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second)
 		}
 	}
 	var manageJobErr error
@@ -775,6 +769,10 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr
 		finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "BackoffLimitExceeded", "Job has reached the specified backoff limit")
 	} else if pastActiveDeadline(&job) {
 		finishedCondition = newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline")
 	} else if job.Spec.ActiveDeadlineSeconds != nil && !jobSuspended(&job) {
 		syncDuration := time.Duration(*job.Spec.ActiveDeadlineSeconds)*time.Second - time.Since(job.Status.StartTime.Time)
 		klog.V(2).InfoS("Job has activeDeadlineSeconds configuration. Will sync this job again", "job", key, "nextSyncIn", syncDuration)
 		jm.queue.AddAfter(key, syncDuration)
 	}
 	var prevSucceededIndexes, succeededIndexes orderedIntervals
--- a/pkg/controller/job/job_controller_test.go
+++ b/pkg/controller/job/job_controller_test.go
@@ -59,11 +59,11 @@ import (
 var alwaysReady = func() bool { return true }
-func newJob(parallelism, completions, backoffLimit int32, completionMode batch.CompletionMode) *batch.Job {
+func newJobWithName(name string, parallelism, completions, backoffLimit int32, completionMode batch.CompletionMode) *batch.Job {
 	j := &batch.Job{
 		TypeMeta: metav1.TypeMeta{Kind: "Job"},
 		ObjectMeta: metav1.ObjectMeta{
-			Name:      "foobar",
+			Name:      name,
 			UID:       uuid.NewUUID(),
 			Namespace: metav1.NamespaceDefault,
 		},
@@ -105,6 +105,10 @@ func newJob(parallelism, completions, backoffLimit int32, completionMode batch.C
 	return j
 }
 func newJob(parallelism, completions, backoffLimit int32, completionMode batch.CompletionMode) *batch.Job {
 	return newJobWithName("foobar", parallelism, completions, backoffLimit, completionMode)
 }
 func newControllerFromClient(kubeClient clientset.Interface, resyncPeriod controller.ResyncPeriodFunc) (*Controller, informers.SharedInformerFactory) {
 	sharedInformers := informers.NewSharedInformerFactory(kubeClient, resyncPeriod())
 	jm := NewController(sharedInformers.Core().V1().Pods(), sharedInformers.Batch().V1().Jobs(), kubeClient)
@@ -1828,40 +1832,66 @@ func hasTrueCondition(job *batch.Job) *batch.JobConditionType {
 }
 func TestSyncPastDeadlineJobFinished(t *testing.T) {
-	clientset := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
+	clientset := fake.NewSimpleClientset()
 	manager, sharedInformerFactory := newControllerFromClient(clientset, controller.NoResyncPeriodFunc)
 	fakePodControl := controller.FakePodControl{}
 	manager.podControl = &fakePodControl
 	manager.podStoreSynced = alwaysReady
 	manager.jobStoreSynced = alwaysReady
 	var actual *batch.Job
 	manager.updateStatusHandler = func(ctx context.Context, job *batch.Job) (*batch.Job, error) {
 		actual = job
 		return job, nil
 	}
-	job := newJob(1, 1, 6, batch.NonIndexedCompletion)
+	ctx, cancel := context.WithCancel(context.Background())
-	activeDeadlineSeconds := int64(10)
+	defer cancel()
-	job.Spec.ActiveDeadlineSeconds = &activeDeadlineSeconds
+	sharedInformerFactory.Start(ctx.Done())
-	start := metav1.Unix(metav1.Now().Time.Unix()-15, 0)
+
-	job.Status.StartTime = &start
+	go manager.Run(ctx, 1)
-	job.Status.Conditions = append(job.Status.Conditions, *newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline"))
+
-	sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
+	tests := []struct {
-	forget, err := manager.syncJob(context.TODO(), testutil.GetKey(job, t))
+		name         string
-	if err != nil {
+		setStartTime bool
-		t.Errorf("Unexpected error when syncing jobs %v", err)
+		jobName      string
 	}{
 		{
 			name:         "New job created without start time being set",
 			setStartTime: false,
 			jobName:      "job1",
 		},
 		{
 			name:         "New job created with start time being set",
 			setStartTime: true,
 			jobName:      "job2",
 		},
 	}
-	if !forget {
+	for _, tc := range tests {
-		t.Errorf("Unexpected forget value. Expected %v, saw %v\n", true, forget)
+		t.Run(tc.name, func(t *testing.T) {
-	}
+			job := newJobWithName(tc.jobName, 1, 1, 6, batch.NonIndexedCompletion)
-	if len(fakePodControl.Templates) != 0 {
+			activeDeadlineSeconds := int64(1)
-		t.Errorf("Unexpected number of creates.  Expected %d, saw %d\n", 0, len(fakePodControl.Templates))
+			job.Spec.ActiveDeadlineSeconds = &activeDeadlineSeconds
-	}
+			if tc.setStartTime {
-	if len(fakePodControl.DeletePodName) != 0 {
+				start := metav1.Unix(metav1.Now().Time.Unix()-1, 0)
-		t.Errorf("Unexpected number of deletes.  Expected %d, saw %d\n", 0, len(fakePodControl.DeletePodName))
+				job.Status.StartTime = &start
-	}
+			}
-	if actual != nil {
+
-		t.Error("Unexpected job modification")
+			_, err := clientset.BatchV1().Jobs(job.GetNamespace()).Create(ctx, job, metav1.CreateOptions{})
 			if err != nil {
 				t.Errorf("Could not create Job: %v", err)
 			}
 			if err := sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job); err != nil {
 				t.Fatalf("Failed to insert job in index: %v", err)
 			}
 			var j *batch.Job
 			err = wait.Poll(200*time.Millisecond, 3*time.Second, func() (done bool, err error) {
 				j, err = clientset.BatchV1().Jobs(metav1.NamespaceDefault).Get(ctx, job.GetName(), metav1.GetOptions{})
 				if err != nil {
 					return false, nil
 				}
 				if len(j.Status.Conditions) == 1 && j.Status.Conditions[0].Reason == "DeadlineExceeded" {
 					return true, nil
 				}
 				return false, nil
 			})
 			if err != nil {
 				t.Errorf("Job failed to enforce activeDeadlineSeconds configuration. Expected condition with Reason 'DeadlineExceeded' was not found in %v", j.Status)
 			}
 		})
 	}
 }