mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-22 02:18:51 +00:00
batch: add suspended job
Signed-off-by: Adhityaa Chandrasekar <adtac@google.com>
This commit is contained in:
@@ -487,9 +487,9 @@ func (jm *Controller) syncJob(key string) (bool, error) {
|
||||
activePods := controller.FilterActivePods(pods)
|
||||
active := int32(len(activePods))
|
||||
succeeded, failed := getStatus(&job, pods)
|
||||
conditions := len(job.Status.Conditions)
|
||||
// job first start
|
||||
if job.Status.StartTime == nil {
|
||||
// Job first start. Set StartTime and start the ActiveDeadlineSeconds timer
|
||||
// only if the job is not in the suspended state.
|
||||
if job.Status.StartTime == nil && !jobSuspended(&job) {
|
||||
now := metav1.Now()
|
||||
job.Status.StartTime = &now
|
||||
// enqueue a sync to check if job past ActiveDeadlineSeconds
|
||||
@@ -524,6 +524,8 @@ func (jm *Controller) syncJob(key string) (bool, error) {
|
||||
failureMessage = "Job was active longer than specified deadline"
|
||||
}
|
||||
|
||||
jobConditionsChanged := false
|
||||
manageJobCalled := false
|
||||
if jobFailed {
|
||||
// TODO(#28486): Account for pod failures in status once we can track
|
||||
// completions without lingering pods.
|
||||
@@ -532,11 +534,13 @@ func (jm *Controller) syncJob(key string) (bool, error) {
|
||||
// update status values accordingly
|
||||
failed += active
|
||||
active = 0
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, failureReason, failureMessage))
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, v1.ConditionTrue, failureReason, failureMessage))
|
||||
jobConditionsChanged = true
|
||||
jm.recorder.Event(&job, v1.EventTypeWarning, failureReason, failureMessage)
|
||||
} else {
|
||||
if jobNeedsSync && job.DeletionTimestamp == nil {
|
||||
active, manageJobErr = jm.manageJob(&job, activePods, succeeded, pods)
|
||||
manageJobCalled = true
|
||||
}
|
||||
completions := succeeded
|
||||
complete := false
|
||||
@@ -566,10 +570,40 @@ func (jm *Controller) syncJob(key string) (bool, error) {
|
||||
}
|
||||
}
|
||||
if complete {
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, "", ""))
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, v1.ConditionTrue, "", ""))
|
||||
jobConditionsChanged = true
|
||||
now := metav1.Now()
|
||||
job.Status.CompletionTime = &now
|
||||
jm.recorder.Event(&job, v1.EventTypeNormal, "Completed", "Job completed")
|
||||
} else if utilfeature.DefaultFeatureGate.Enabled(features.SuspendJob) && manageJobCalled {
|
||||
// Update the conditions / emit events only if manageJob was called in
|
||||
// this syncJob. Otherwise wait for the right syncJob call to make
|
||||
// updates.
|
||||
if job.Spec.Suspend != nil && *job.Spec.Suspend {
|
||||
// Job can be in the suspended state only if it is NOT completed.
|
||||
var isUpdated bool
|
||||
job.Status.Conditions, isUpdated = ensureJobConditionStatus(job.Status.Conditions, batch.JobSuspended, v1.ConditionTrue, "JobSuspended", "Job suspended")
|
||||
if isUpdated {
|
||||
jobConditionsChanged = true
|
||||
jm.recorder.Event(&job, v1.EventTypeNormal, "Suspended", "Job suspended")
|
||||
}
|
||||
} else {
|
||||
// Job not suspended.
|
||||
var isUpdated bool
|
||||
job.Status.Conditions, isUpdated = ensureJobConditionStatus(job.Status.Conditions, batch.JobSuspended, v1.ConditionFalse, "JobResumed", "Job resumed")
|
||||
if isUpdated {
|
||||
jobConditionsChanged = true
|
||||
jm.recorder.Event(&job, v1.EventTypeNormal, "Resumed", "Job resumed")
|
||||
// Resumed jobs will always reset StartTime to current time. This is
|
||||
// done because the ActiveDeadlineSeconds timer shouldn't go off
|
||||
// whilst the Job is still suspended and resetting StartTime is
|
||||
// consistent with resuming a Job created in the suspended state.
|
||||
// (ActiveDeadlineSeconds is interpreted as the number of seconds a
|
||||
// Job is continuously active.)
|
||||
now := metav1.Now()
|
||||
job.Status.StartTime = &now
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -583,7 +617,7 @@ func (jm *Controller) syncJob(key string) (bool, error) {
|
||||
}
|
||||
|
||||
// no need to update the job if the status hasn't changed since last time
|
||||
if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions {
|
||||
if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || jobConditionsChanged {
|
||||
job.Status.Active = active
|
||||
job.Status.Succeeded = succeeded
|
||||
job.Status.Failed = failed
|
||||
@@ -660,9 +694,11 @@ func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool {
|
||||
return result >= *job.Spec.BackoffLimit
|
||||
}
|
||||
|
||||
// pastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if it is exceeded.
|
||||
// pastActiveDeadline checks if job has ActiveDeadlineSeconds field set and if
|
||||
// it is exceeded. If the job is currently suspended, the function will always
|
||||
// return false.
|
||||
func pastActiveDeadline(job *batch.Job) bool {
|
||||
if job.Spec.ActiveDeadlineSeconds == nil || job.Status.StartTime == nil {
|
||||
if job.Spec.ActiveDeadlineSeconds == nil || job.Status.StartTime == nil || jobSuspended(job) {
|
||||
return false
|
||||
}
|
||||
now := metav1.Now()
|
||||
@@ -672,10 +708,10 @@ func pastActiveDeadline(job *batch.Job) bool {
|
||||
return duration >= allowedDuration
|
||||
}
|
||||
|
||||
func newCondition(conditionType batch.JobConditionType, reason, message string) batch.JobCondition {
|
||||
func newCondition(conditionType batch.JobConditionType, status v1.ConditionStatus, reason, message string) batch.JobCondition {
|
||||
return batch.JobCondition{
|
||||
Type: conditionType,
|
||||
Status: v1.ConditionTrue,
|
||||
Status: status,
|
||||
LastProbeTime: metav1.Now(),
|
||||
LastTransitionTime: metav1.Now(),
|
||||
Reason: reason,
|
||||
@@ -690,6 +726,12 @@ func getStatus(job *batch.Job, pods []*v1.Pod) (succeeded, failed int32) {
|
||||
return
|
||||
}
|
||||
|
||||
// jobSuspended returns whether a Job is suspended while taking the feature
|
||||
// gate into account.
|
||||
func jobSuspended(job *batch.Job) bool {
|
||||
return utilfeature.DefaultFeatureGate.Enabled(features.SuspendJob) && job.Spec.Suspend != nil && *job.Spec.Suspend
|
||||
}
|
||||
|
||||
// manageJob is the core method responsible for managing the number of running
|
||||
// pods according to what is specified in the job.Spec.
|
||||
// Does NOT modify <activePods>.
|
||||
@@ -702,6 +744,15 @@ func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
if jobSuspended(job) {
|
||||
klog.V(4).InfoS("Deleting all active pods in suspended job", "job", klog.KObj(job), "active", active)
|
||||
podsToDelete := activePodsForRemoval(job, activePods, int(active))
|
||||
jm.expectations.ExpectDeletions(jobKey, len(podsToDelete))
|
||||
removed, err := jm.deleteJobPods(job, jobKey, podsToDelete)
|
||||
active -= removed
|
||||
return active, err
|
||||
}
|
||||
|
||||
rmAtLeast := active - parallelism
|
||||
if rmAtLeast < 0 {
|
||||
rmAtLeast = 0
|
||||
@@ -709,7 +760,7 @@ func (jm *Controller) manageJob(job *batch.Job, activePods []*v1.Pod, succeeded
|
||||
podsToDelete := activePodsForRemoval(job, activePods, int(rmAtLeast))
|
||||
if len(podsToDelete) > 0 {
|
||||
jm.expectations.ExpectDeletions(jobKey, len(podsToDelete))
|
||||
klog.V(4).InfoS("Too many pods running for job", "job", klog.KObj(job), "deleted", len(podsToDelete), "target", parallelism)
|
||||
klog.V(4).InfoS("Too many pods running for job", "job", klog.KObj(job), "deleted", rmAtLeast, "target", parallelism)
|
||||
removed, err := jm.deleteJobPods(job, jobKey, podsToDelete)
|
||||
active -= removed
|
||||
if err != nil {
|
||||
@@ -910,3 +961,29 @@ func errorFromChannel(errCh <-chan error) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ensureJobConditionStatus appends or updates an existing job condition of the
|
||||
// given type with the given status value. Note that this function will not
|
||||
// append to the conditions list if the new condition's status is false
|
||||
// (because going from nothing to false is meaningless); it can, however,
|
||||
// update the status condition to false. The function returns a bool to let the
|
||||
// caller know if the list was changed (either appended or updated).
|
||||
func ensureJobConditionStatus(list []batch.JobCondition, cType batch.JobConditionType, status v1.ConditionStatus, reason, message string) ([]batch.JobCondition, bool) {
|
||||
for i := range list {
|
||||
if list[i].Type == cType {
|
||||
if list[i].Status != status || list[i].Reason != reason || list[i].Message != message {
|
||||
list[i].Status = status
|
||||
list[i].LastTransitionTime = metav1.Now()
|
||||
list[i].Reason = reason
|
||||
list[i].Message = message
|
||||
return list, true
|
||||
}
|
||||
return list, false
|
||||
}
|
||||
}
|
||||
// A condition with that type doesn't exist in the list.
|
||||
if status != v1.ConditionFalse {
|
||||
return append(list, newCondition(cType, status, reason, message)), true
|
||||
}
|
||||
return list, false
|
||||
}
|
||||
|
@@ -47,6 +47,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/testutil"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/utils/pointer"
|
||||
)
|
||||
|
||||
var alwaysReady = func() bool { return true }
|
||||
@@ -156,6 +157,7 @@ func setPodsStatusesWithIndexes(podIndexer cache.Indexer, job *batch.Job, status
|
||||
func TestControllerSyncJob(t *testing.T) {
|
||||
jobConditionComplete := batch.JobComplete
|
||||
jobConditionFailed := batch.JobFailed
|
||||
jobConditionSuspended := batch.JobSuspended
|
||||
|
||||
testCases := map[string]struct {
|
||||
// job setup
|
||||
@@ -165,15 +167,18 @@ func TestControllerSyncJob(t *testing.T) {
|
||||
deleting bool
|
||||
podLimit int
|
||||
completionMode batch.CompletionMode
|
||||
wasSuspended bool
|
||||
suspend bool
|
||||
|
||||
// pod setup
|
||||
podControllerError error
|
||||
jobKeyForget bool
|
||||
pendingPods int32
|
||||
activePods int32
|
||||
succeededPods int32
|
||||
failedPods int32
|
||||
podsWithIndexes []indexPhase
|
||||
podControllerError error
|
||||
jobKeyForget bool
|
||||
pendingPods int32
|
||||
activePods int32
|
||||
succeededPods int32
|
||||
failedPods int32
|
||||
podsWithIndexes []indexPhase
|
||||
fakeExpectationAtCreation int32 // negative: ExpectDeletions, positive: ExpectCreations
|
||||
|
||||
// expectations
|
||||
expectedCreations int32
|
||||
@@ -183,11 +188,13 @@ func TestControllerSyncJob(t *testing.T) {
|
||||
expectedCompletedIdxs string
|
||||
expectedFailed int32
|
||||
expectedCondition *batch.JobConditionType
|
||||
expectedConditionStatus v1.ConditionStatus
|
||||
expectedConditionReason string
|
||||
expectedCreatedIndexes sets.Int
|
||||
|
||||
// features
|
||||
indexedJobEnabled bool
|
||||
suspendJobEnabled bool
|
||||
}{
|
||||
"job start": {
|
||||
parallelism: 2,
|
||||
@@ -334,24 +341,26 @@ func TestControllerSyncJob(t *testing.T) {
|
||||
expectedSucceeded: 1,
|
||||
},
|
||||
"WQ job all finished": {
|
||||
parallelism: 2,
|
||||
completions: -1,
|
||||
backoffLimit: 6,
|
||||
jobKeyForget: true,
|
||||
succeededPods: 2,
|
||||
expectedSucceeded: 2,
|
||||
expectedCondition: &jobConditionComplete,
|
||||
parallelism: 2,
|
||||
completions: -1,
|
||||
backoffLimit: 6,
|
||||
jobKeyForget: true,
|
||||
succeededPods: 2,
|
||||
expectedSucceeded: 2,
|
||||
expectedCondition: &jobConditionComplete,
|
||||
expectedConditionStatus: v1.ConditionTrue,
|
||||
},
|
||||
"WQ job all finished despite one failure": {
|
||||
parallelism: 2,
|
||||
completions: -1,
|
||||
backoffLimit: 6,
|
||||
jobKeyForget: true,
|
||||
succeededPods: 1,
|
||||
failedPods: 1,
|
||||
expectedSucceeded: 1,
|
||||
expectedFailed: 1,
|
||||
expectedCondition: &jobConditionComplete,
|
||||
parallelism: 2,
|
||||
completions: -1,
|
||||
backoffLimit: 6,
|
||||
jobKeyForget: true,
|
||||
succeededPods: 1,
|
||||
failedPods: 1,
|
||||
expectedSucceeded: 1,
|
||||
expectedFailed: 1,
|
||||
expectedCondition: &jobConditionComplete,
|
||||
expectedConditionStatus: v1.ConditionTrue,
|
||||
},
|
||||
"more active pods than completions": {
|
||||
parallelism: 2,
|
||||
@@ -401,6 +410,7 @@ func TestControllerSyncJob(t *testing.T) {
|
||||
failedPods: 1,
|
||||
expectedFailed: 1,
|
||||
expectedCondition: &jobConditionFailed,
|
||||
expectedConditionStatus: v1.ConditionTrue,
|
||||
expectedConditionReason: "BackoffLimitExceeded",
|
||||
},
|
||||
"indexed job start": {
|
||||
@@ -510,11 +520,78 @@ func TestControllerSyncJob(t *testing.T) {
|
||||
// No status updates.
|
||||
indexedJobEnabled: false,
|
||||
},
|
||||
"suspending a job with satisfied expectations": {
|
||||
// Suspended Job should delete active pods when expectations are
|
||||
// satisfied.
|
||||
suspendJobEnabled: true,
|
||||
suspend: true,
|
||||
parallelism: 2,
|
||||
activePods: 2, // parallelism == active, expectations satisfied
|
||||
completions: 4,
|
||||
backoffLimit: 6,
|
||||
jobKeyForget: true,
|
||||
expectedCreations: 0,
|
||||
expectedDeletions: 2,
|
||||
expectedActive: 0,
|
||||
expectedCondition: &jobConditionSuspended,
|
||||
expectedConditionStatus: v1.ConditionTrue,
|
||||
expectedConditionReason: "JobSuspended",
|
||||
},
|
||||
"suspending a job with unsatisfied expectations": {
|
||||
// Unlike the previous test, we expect the controller to NOT suspend the
|
||||
// Job in the syncJob call because the controller will wait for
|
||||
// expectations to be satisfied first. The next syncJob call (not tested
|
||||
// here) will be the same as the previous test.
|
||||
suspendJobEnabled: true,
|
||||
suspend: true,
|
||||
parallelism: 2,
|
||||
activePods: 3, // active > parallelism, expectations unsatisfied
|
||||
fakeExpectationAtCreation: -1, // the controller is expecting a deletion
|
||||
completions: 4,
|
||||
backoffLimit: 6,
|
||||
jobKeyForget: true,
|
||||
expectedCreations: 0,
|
||||
expectedDeletions: 0,
|
||||
expectedActive: 3,
|
||||
},
|
||||
"resuming a suspended job": {
|
||||
suspendJobEnabled: true,
|
||||
wasSuspended: true,
|
||||
suspend: false,
|
||||
parallelism: 2,
|
||||
completions: 4,
|
||||
backoffLimit: 6,
|
||||
jobKeyForget: true,
|
||||
expectedCreations: 2,
|
||||
expectedDeletions: 0,
|
||||
expectedActive: 2,
|
||||
expectedCondition: &jobConditionSuspended,
|
||||
expectedConditionStatus: v1.ConditionFalse,
|
||||
expectedConditionReason: "JobResumed",
|
||||
},
|
||||
"suspending a deleted job": {
|
||||
// We would normally expect the active pods to be deleted (see a few test
|
||||
// cases above), but since this job is being deleted, we don't expect
|
||||
// anything changed here from before the job was suspended. The
|
||||
// JobSuspended condition is also missing.
|
||||
suspendJobEnabled: true,
|
||||
suspend: true,
|
||||
deleting: true,
|
||||
parallelism: 2,
|
||||
activePods: 2, // parallelism == active, expectations satisfied
|
||||
completions: 4,
|
||||
backoffLimit: 6,
|
||||
jobKeyForget: true,
|
||||
expectedCreations: 0,
|
||||
expectedDeletions: 0,
|
||||
expectedActive: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for name, tc := range testCases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.IndexedJob, tc.indexedJobEnabled)()
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SuspendJob, tc.suspendJobEnabled)()
|
||||
|
||||
// job manager setup
|
||||
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||
@@ -526,6 +603,19 @@ func TestControllerSyncJob(t *testing.T) {
|
||||
|
||||
// job & pods setup
|
||||
job := newJob(tc.parallelism, tc.completions, tc.backoffLimit, tc.completionMode)
|
||||
job.Spec.Suspend = pointer.BoolPtr(tc.suspend)
|
||||
key, err := controller.KeyFunc(job)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error getting job key: %v", err)
|
||||
}
|
||||
if tc.fakeExpectationAtCreation < 0 {
|
||||
manager.expectations.ExpectDeletions(key, int(-tc.fakeExpectationAtCreation))
|
||||
} else if tc.fakeExpectationAtCreation > 0 {
|
||||
manager.expectations.ExpectCreations(key, int(tc.fakeExpectationAtCreation))
|
||||
}
|
||||
if tc.wasSuspended {
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobSuspended, v1.ConditionTrue, "JobSuspended", "Job suspended"))
|
||||
}
|
||||
if tc.deleting {
|
||||
now := metav1.Now()
|
||||
job.DeletionTimestamp = &now
|
||||
@@ -608,13 +698,19 @@ func TestControllerSyncJob(t *testing.T) {
|
||||
if actual.Status.Failed != tc.expectedFailed {
|
||||
t.Errorf("Unexpected number of failed pods. Expected %d, saw %d\n", tc.expectedFailed, actual.Status.Failed)
|
||||
}
|
||||
if actual.Status.StartTime == nil && tc.indexedJobEnabled {
|
||||
if actual.Status.StartTime != nil && tc.suspend {
|
||||
t.Error("Unexpected .status.startTime not nil when suspend is true")
|
||||
}
|
||||
if actual.Status.StartTime == nil && tc.indexedJobEnabled && !tc.suspend {
|
||||
t.Error("Missing .status.startTime")
|
||||
}
|
||||
// validate conditions
|
||||
if tc.expectedCondition != nil && !getCondition(actual, *tc.expectedCondition, tc.expectedConditionReason) {
|
||||
if tc.expectedCondition != nil && !getCondition(actual, *tc.expectedCondition, tc.expectedConditionStatus, tc.expectedConditionReason) {
|
||||
t.Errorf("Expected completion condition. Got %#v", actual.Status.Conditions)
|
||||
}
|
||||
if tc.expectedCondition == nil && tc.suspend && len(actual.Status.Conditions) != 0 {
|
||||
t.Errorf("Unexpected conditions %v", actual.Status.Conditions)
|
||||
}
|
||||
// validate slow start
|
||||
expectedLimit := 0
|
||||
for pass := uint8(0); expectedLimit <= tc.podLimit; pass++ {
|
||||
@@ -652,6 +748,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
||||
activeDeadlineSeconds int64
|
||||
startTime int64
|
||||
backoffLimit int32
|
||||
suspend bool
|
||||
|
||||
// pod setup
|
||||
activePods int32
|
||||
@@ -664,7 +761,11 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
||||
expectedActive int32
|
||||
expectedSucceeded int32
|
||||
expectedFailed int32
|
||||
expectedCondition batch.JobConditionType
|
||||
expectedConditionReason string
|
||||
|
||||
// features
|
||||
suspendJobEnabled bool
|
||||
}{
|
||||
"activeDeadlineSeconds less than single pod execution": {
|
||||
parallelism: 1,
|
||||
@@ -676,6 +777,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
||||
expectedForGetKey: true,
|
||||
expectedDeletions: 1,
|
||||
expectedFailed: 1,
|
||||
expectedCondition: batch.JobFailed,
|
||||
expectedConditionReason: "DeadlineExceeded",
|
||||
},
|
||||
"activeDeadlineSeconds bigger than single pod execution": {
|
||||
@@ -690,6 +792,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
||||
expectedDeletions: 1,
|
||||
expectedSucceeded: 1,
|
||||
expectedFailed: 1,
|
||||
expectedCondition: batch.JobFailed,
|
||||
expectedConditionReason: "DeadlineExceeded",
|
||||
},
|
||||
"activeDeadlineSeconds times-out before any pod starts": {
|
||||
@@ -699,6 +802,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
||||
startTime: 10,
|
||||
backoffLimit: 6,
|
||||
expectedForGetKey: true,
|
||||
expectedCondition: batch.JobFailed,
|
||||
expectedConditionReason: "DeadlineExceeded",
|
||||
},
|
||||
"activeDeadlineSeconds with backofflimit reach": {
|
||||
@@ -709,12 +813,27 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
||||
failedPods: 1,
|
||||
expectedForGetKey: true,
|
||||
expectedFailed: 1,
|
||||
expectedCondition: batch.JobFailed,
|
||||
expectedConditionReason: "BackoffLimitExceeded",
|
||||
},
|
||||
"activeDeadlineSeconds is not triggered when Job is suspended": {
|
||||
suspendJobEnabled: true,
|
||||
suspend: true,
|
||||
parallelism: 1,
|
||||
completions: 2,
|
||||
activeDeadlineSeconds: 10,
|
||||
startTime: 15,
|
||||
backoffLimit: 6,
|
||||
expectedForGetKey: true,
|
||||
expectedCondition: batch.JobSuspended,
|
||||
expectedConditionReason: "JobSuspended",
|
||||
},
|
||||
}
|
||||
|
||||
for name, tc := range testCases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SuspendJob, tc.suspendJobEnabled)()
|
||||
|
||||
// job manager setup
|
||||
clientSet := clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}})
|
||||
manager, sharedInformerFactory := newControllerFromClient(clientSet, controller.NoResyncPeriodFunc)
|
||||
@@ -731,6 +850,7 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
||||
// job & pods setup
|
||||
job := newJob(tc.parallelism, tc.completions, tc.backoffLimit, batch.NonIndexedCompletion)
|
||||
job.Spec.ActiveDeadlineSeconds = &tc.activeDeadlineSeconds
|
||||
job.Spec.Suspend = pointer.BoolPtr(tc.suspend)
|
||||
start := metav1.Unix(metav1.Now().Time.Unix()-tc.startTime, 0)
|
||||
job.Status.StartTime = &start
|
||||
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
||||
@@ -766,16 +886,16 @@ func TestSyncJobPastDeadline(t *testing.T) {
|
||||
t.Error("Missing .status.startTime")
|
||||
}
|
||||
// validate conditions
|
||||
if !getCondition(actual, batch.JobFailed, tc.expectedConditionReason) {
|
||||
if !getCondition(actual, tc.expectedCondition, v1.ConditionTrue, tc.expectedConditionReason) {
|
||||
t.Errorf("Expected fail condition. Got %#v", actual.Status.Conditions)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func getCondition(job *batch.Job, condition batch.JobConditionType, reason string) bool {
|
||||
func getCondition(job *batch.Job, condition batch.JobConditionType, status v1.ConditionStatus, reason string) bool {
|
||||
for _, v := range job.Status.Conditions {
|
||||
if v.Type == condition && v.Status == v1.ConditionTrue && v.Reason == reason {
|
||||
if v.Type == condition && v.Status == status && v.Reason == reason {
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -800,7 +920,7 @@ func TestSyncPastDeadlineJobFinished(t *testing.T) {
|
||||
job.Spec.ActiveDeadlineSeconds = &activeDeadlineSeconds
|
||||
start := metav1.Unix(metav1.Now().Time.Unix()-15, 0)
|
||||
job.Status.StartTime = &start
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, "DeadlineExceeded", "Job was active longer than specified deadline"))
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, v1.ConditionTrue, "DeadlineExceeded", "Job was active longer than specified deadline"))
|
||||
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
||||
forget, err := manager.syncJob(testutil.GetKey(job, t))
|
||||
if err != nil {
|
||||
@@ -829,7 +949,7 @@ func TestSyncJobComplete(t *testing.T) {
|
||||
manager.jobStoreSynced = alwaysReady
|
||||
|
||||
job := newJob(1, 1, 6, batch.NonIndexedCompletion)
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, "", ""))
|
||||
job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, v1.ConditionTrue, "", ""))
|
||||
sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
|
||||
forget, err := manager.syncJob(testutil.GetKey(job, t))
|
||||
if err != nil {
|
||||
@@ -1572,7 +1692,7 @@ func TestJobBackoffReset(t *testing.T) {
|
||||
if retries != 0 {
|
||||
t.Errorf("%s: expected exactly 0 retries, got %d", name, retries)
|
||||
}
|
||||
if getCondition(actual, batch.JobFailed, "BackoffLimitExceeded") {
|
||||
if getCondition(actual, batch.JobFailed, v1.ConditionTrue, "BackoffLimitExceeded") {
|
||||
t.Errorf("%s: unexpected job failure", name)
|
||||
}
|
||||
}
|
||||
@@ -1760,7 +1880,7 @@ func TestJobBackoffForOnFailure(t *testing.T) {
|
||||
t.Errorf("unexpected number of failed pods. Expected %d, saw %d\n", tc.expectedFailed, actual.Status.Failed)
|
||||
}
|
||||
// validate conditions
|
||||
if tc.expectedCondition != nil && !getCondition(actual, *tc.expectedCondition, tc.expectedConditionReason) {
|
||||
if tc.expectedCondition != nil && !getCondition(actual, *tc.expectedCondition, v1.ConditionTrue, tc.expectedConditionReason) {
|
||||
t.Errorf("expected completion condition. Got %#v", actual.Status.Conditions)
|
||||
}
|
||||
})
|
||||
@@ -1864,13 +1984,99 @@ func TestJobBackoffOnRestartPolicyNever(t *testing.T) {
|
||||
t.Errorf("unexpected number of failed pods. Expected %d, saw %d\n", tc.expectedFailed, actual.Status.Failed)
|
||||
}
|
||||
// validate conditions
|
||||
if tc.expectedCondition != nil && !getCondition(actual, *tc.expectedCondition, tc.expectedConditionReason) {
|
||||
if tc.expectedCondition != nil && !getCondition(actual, *tc.expectedCondition, v1.ConditionTrue, tc.expectedConditionReason) {
|
||||
t.Errorf("expected completion condition. Got %#v", actual.Status.Conditions)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureJobConditions(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
haveList []batch.JobCondition
|
||||
wantType batch.JobConditionType
|
||||
wantStatus v1.ConditionStatus
|
||||
wantReason string
|
||||
expectList []batch.JobCondition
|
||||
expectUpdate bool
|
||||
}{
|
||||
{
|
||||
name: "append true condition",
|
||||
haveList: []batch.JobCondition{},
|
||||
wantType: batch.JobSuspended,
|
||||
wantStatus: v1.ConditionTrue,
|
||||
wantReason: "foo",
|
||||
expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")},
|
||||
expectUpdate: true,
|
||||
},
|
||||
{
|
||||
name: "append false condition",
|
||||
haveList: []batch.JobCondition{},
|
||||
wantType: batch.JobSuspended,
|
||||
wantStatus: v1.ConditionFalse,
|
||||
wantReason: "foo",
|
||||
expectList: []batch.JobCondition{},
|
||||
expectUpdate: false,
|
||||
},
|
||||
{
|
||||
name: "update true condition reason",
|
||||
haveList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")},
|
||||
wantType: batch.JobSuspended,
|
||||
wantStatus: v1.ConditionTrue,
|
||||
wantReason: "bar",
|
||||
expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "bar", "")},
|
||||
expectUpdate: true,
|
||||
},
|
||||
{
|
||||
name: "update true condition status",
|
||||
haveList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")},
|
||||
wantType: batch.JobSuspended,
|
||||
wantStatus: v1.ConditionFalse,
|
||||
wantReason: "foo",
|
||||
expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionFalse, "foo", "")},
|
||||
expectUpdate: true,
|
||||
},
|
||||
{
|
||||
name: "update false condition status",
|
||||
haveList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionFalse, "foo", "")},
|
||||
wantType: batch.JobSuspended,
|
||||
wantStatus: v1.ConditionTrue,
|
||||
wantReason: "foo",
|
||||
expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")},
|
||||
expectUpdate: true,
|
||||
},
|
||||
{
|
||||
name: "condition already exists",
|
||||
haveList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")},
|
||||
wantType: batch.JobSuspended,
|
||||
wantStatus: v1.ConditionTrue,
|
||||
wantReason: "foo",
|
||||
expectList: []batch.JobCondition{newCondition(batch.JobSuspended, v1.ConditionTrue, "foo", "")},
|
||||
expectUpdate: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
gotList, isUpdated := ensureJobConditionStatus(tc.haveList, tc.wantType, tc.wantStatus, tc.wantReason, "")
|
||||
if isUpdated != tc.expectUpdate {
|
||||
t.Errorf("Got isUpdated=%v, want %v", isUpdated, tc.expectUpdate)
|
||||
}
|
||||
if len(gotList) != len(tc.expectList) {
|
||||
t.Errorf("got a list of length %d, want %d", len(gotList), len(tc.expectList))
|
||||
}
|
||||
for i := range gotList {
|
||||
// Make timestamps the same before comparing the two lists.
|
||||
gotList[i].LastProbeTime = tc.expectList[i].LastProbeTime
|
||||
gotList[i].LastTransitionTime = tc.expectList[i].LastTransitionTime
|
||||
}
|
||||
if diff := cmp.Diff(tc.expectList, gotList); diff != "" {
|
||||
t.Errorf("Unexpected JobCondition list: (-want,+got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func checkJobCompletionEnvVariable(t *testing.T, spec *v1.PodSpec) {
|
||||
t.Helper()
|
||||
want := []v1.EnvVar{
|
||||
|
Reference in New Issue
Block a user