diff --git a/pkg/controller/cronjob/utils.go b/pkg/controller/cronjob/utils.go index 9f5595f9d1d..7325b327be7 100644 --- a/pkg/controller/cronjob/utils.go +++ b/pkg/controller/cronjob/utils.go @@ -72,11 +72,10 @@ func deleteFromActiveList(cj *batchv1.CronJob, uid types.UID) { // mostRecentScheduleTime returns: // - the last schedule time or CronJob's creation time, // - the most recent time a Job should be created or nil, if that's after now, -// - very rough approximation of number of missed schedules (based on difference -// between two schedules), +// - boolean indicating an excessive number of missed schedules, // - error in an edge case where the schedule specification is grammatically correct, // but logically doesn't make sense (31st day for months with only 30 days, for example). -func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, includeStartingDeadlineSeconds bool) (time.Time, *time.Time, int64, error) { +func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, includeStartingDeadlineSeconds bool) (time.Time, *time.Time, bool, error) { earliestTime := cj.ObjectMeta.CreationTimestamp.Time if cj.Status.LastScheduleTime != nil { earliestTime = cj.Status.LastScheduleTime.Time @@ -94,10 +93,10 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc t2 := schedule.Next(t1) if now.Before(t1) { - return earliestTime, nil, 0, nil + return earliestTime, nil, false, nil } if now.Before(t2) { - return earliestTime, &t1, 1, nil + return earliestTime, &t1, false, nil } // It is possible for cron.ParseStandard("59 23 31 2 *") to return an invalid schedule @@ -105,7 +104,7 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc // In this case the timeBetweenTwoSchedules will be 0, and we error out the invalid schedule timeBetweenTwoSchedules := int64(t2.Sub(t1).Round(time.Second).Seconds()) if timeBetweenTwoSchedules < 1 { - return earliestTime, nil, 0, fmt.Errorf("time difference between two schedules is less than 1 second") + return earliestTime, nil, false, fmt.Errorf("time difference between two schedules is less than 1 second") } // this logic used for calculating number of missed schedules does a rough // approximation, by calculating a diff between two schedules (t1 and t2), @@ -127,10 +126,29 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc mostRecentTime = t } + // An object might miss several starts. For example, if + // controller gets wedged on friday at 5:01pm when everyone has + // gone home, and someone comes in on tuesday AM and discovers + // the problem and restarts the controller, then all the hourly + // jobs, more than 80 of them for one hourly cronJob, should + // all start running with no further intervention (if the cronJob + // allows concurrency and late starts). + // + // However, if there is a bug somewhere, or incorrect clock + // on controller's server or apiservers (for setting creationTimestamp) + // then there could be so many missed start times (it could be off + // by decades or more), that it would eat up all the CPU and memory + // of this controller. In that case, we want to not try to list + // all the missed start times. + // + // I've somewhat arbitrarily picked 100, as more than 80, + // but less than "lots". + tooManyMissed := numberOfMissedSchedules > 100 + if mostRecentTime.IsZero() { - return earliestTime, nil, numberOfMissedSchedules, nil + return earliestTime, nil, tooManyMissed, nil } - return earliestTime, &mostRecentTime, numberOfMissedSchedules, nil + return earliestTime, &mostRecentTime, tooManyMissed, nil } // nextScheduleTimeDuration returns the time duration to requeue based on @@ -156,33 +174,17 @@ func nextScheduleTimeDuration(cj *batchv1.CronJob, now time.Time, schedule cron. // and before now, or nil if no unmet schedule times, and an error. // If there are too many (>100) unstarted times, it will also record a warning. func nextScheduleTime(logger klog.Logger, cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, recorder record.EventRecorder) (*time.Time, error) { - _, mostRecentTime, numberOfMissedSchedules, err := mostRecentScheduleTime(cj, now, schedule, true) + _, mostRecentTime, tooManyMissed, err := mostRecentScheduleTime(cj, now, schedule, true) if mostRecentTime == nil || mostRecentTime.After(now) { return nil, err } - if numberOfMissedSchedules > 100 { - // An object might miss several starts. For example, if - // controller gets wedged on friday at 5:01pm when everyone has - // gone home, and someone comes in on tuesday AM and discovers - // the problem and restarts the controller, then all the hourly - // jobs, more than 80 of them for one hourly cronJob, should - // all start running with no further intervention (if the cronJob - // allows concurrency and late starts). - // - // However, if there is a bug somewhere, or incorrect clock - // on controller's server or apiservers (for setting creationTimestamp) - // then there could be so many missed start times (it could be off - // by decades or more), that it would eat up all the CPU and memory - // of this controller. In that case, we want to not try to list - // all the missed start times. - // - // I've somewhat arbitrarily picked 100, as more than 80, - // but less than "lots". - recorder.Eventf(cj, corev1.EventTypeWarning, "TooManyMissedTimes", "too many missed start times: %d. Set or decrease .spec.startingDeadlineSeconds or check clock skew", numberOfMissedSchedules) - logger.Info("too many missed times", "cronjob", klog.KObj(cj), "missedTimes", numberOfMissedSchedules) + if tooManyMissed { + recorder.Eventf(cj, corev1.EventTypeWarning, "TooManyMissedTimes", "too many missed start times. Set or decrease .spec.startingDeadlineSeconds or check clock skew") + logger.Info("too many missed times", "cronjob", klog.KObj(cj)) } + return mostRecentTime, err } diff --git a/pkg/controller/cronjob/utils_test.go b/pkg/controller/cronjob/utils_test.go index 994fb388cd0..f04b37a4558 100644 --- a/pkg/controller/cronjob/utils_test.go +++ b/pkg/controller/cronjob/utils_test.go @@ -295,14 +295,14 @@ func TestMostRecentScheduleTime(t *testing.T) { tenSeconds := int64(10) tests := []struct { - name string - cj *batchv1.CronJob - includeSDS bool - now time.Time - expectedEarliestTime time.Time - expectedRecentTime *time.Time - expectedNumberOfMisses int64 - wantErr bool + name string + cj *batchv1.CronJob + includeSDS bool + now time.Time + expectedEarliestTime time.Time + expectedRecentTime *time.Time + expectedTooManyMissed bool + wantErr bool }{ { name: "now before next schedule", @@ -328,10 +328,9 @@ func TestMostRecentScheduleTime(t *testing.T) { Schedule: "0 * * * *", }, }, - now: topOfTheHour().Add(61 * time.Minute), - expectedRecentTime: deltaTimeAfterTopOfTheHour(60 * time.Minute), - expectedEarliestTime: *topOfTheHour(), - expectedNumberOfMisses: 1, + now: topOfTheHour().Add(61 * time.Minute), + expectedRecentTime: deltaTimeAfterTopOfTheHour(60 * time.Minute), + expectedEarliestTime: *topOfTheHour(), }, { name: "missed 5 schedules", @@ -343,10 +342,9 @@ func TestMostRecentScheduleTime(t *testing.T) { Schedule: "0 * * * *", }, }, - now: *deltaTimeAfterTopOfTheHour(301 * time.Minute), - expectedRecentTime: deltaTimeAfterTopOfTheHour(300 * time.Minute), - expectedEarliestTime: *deltaTimeAfterTopOfTheHour(10 * time.Second), - expectedNumberOfMisses: 5, + now: *deltaTimeAfterTopOfTheHour(301 * time.Minute), + expectedRecentTime: deltaTimeAfterTopOfTheHour(300 * time.Minute), + expectedEarliestTime: *deltaTimeAfterTopOfTheHour(10 * time.Second), }, { name: "complex schedule", @@ -361,10 +359,9 @@ func TestMostRecentScheduleTime(t *testing.T) { LastScheduleTime: &metav1HalfPastTheHour, }, }, - now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute), - expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute), - expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), - expectedNumberOfMisses: 2, + now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute), + expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute), + expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), }, { name: "another complex schedule", @@ -379,10 +376,9 @@ func TestMostRecentScheduleTime(t *testing.T) { LastScheduleTime: &metav1HalfPastTheHour, }, }, - now: *deltaTimeAfterTopOfTheHour(30*time.Hour + 30*time.Minute), - expectedRecentTime: nil, - expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), - expectedNumberOfMisses: 30, + now: *deltaTimeAfterTopOfTheHour(30*time.Hour + 30*time.Minute), + expectedRecentTime: nil, + expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), }, { name: "complex schedule with longer diff between executions", @@ -397,10 +393,9 @@ func TestMostRecentScheduleTime(t *testing.T) { LastScheduleTime: &metav1HalfPastTheHour, }, }, - now: *deltaTimeAfterTopOfTheHour(96*time.Hour + 31*time.Minute), - expectedRecentTime: deltaTimeAfterTopOfTheHour(96*time.Hour + 30*time.Minute), - expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), - expectedNumberOfMisses: 6, + now: *deltaTimeAfterTopOfTheHour(96*time.Hour + 31*time.Minute), + expectedRecentTime: deltaTimeAfterTopOfTheHour(96*time.Hour + 30*time.Minute), + expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), }, { name: "complex schedule with shorter diff between executions", @@ -412,10 +407,9 @@ func TestMostRecentScheduleTime(t *testing.T) { Schedule: "30 6-16/4 * * 1-5", }, }, - now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute), - expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute), - expectedEarliestTime: *topOfTheHour(), - expectedNumberOfMisses: 7, + now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute), + expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute), + expectedEarliestTime: *topOfTheHour(), }, { name: "@every schedule", @@ -431,10 +425,10 @@ func TestMostRecentScheduleTime(t *testing.T) { LastScheduleTime: &metav1MinuteAfterTopOfTheHour, }, }, - now: *deltaTimeAfterTopOfTheHour(7 * 24 * time.Hour), - expectedRecentTime: deltaTimeAfterTopOfTheHour((6 * 24 * time.Hour) + 23*time.Hour + 1*time.Minute), - expectedEarliestTime: *deltaTimeAfterTopOfTheHour(1 * time.Minute), - expectedNumberOfMisses: 167, + now: *deltaTimeAfterTopOfTheHour(7 * 24 * time.Hour), + expectedRecentTime: deltaTimeAfterTopOfTheHour((6 * 24 * time.Hour) + 23*time.Hour + 1*time.Minute), + expectedEarliestTime: *deltaTimeAfterTopOfTheHour(1 * time.Minute), + expectedTooManyMissed: true, }, { name: "rogue cronjob", @@ -446,10 +440,9 @@ func TestMostRecentScheduleTime(t *testing.T) { Schedule: "59 23 31 2 *", }, }, - now: *deltaTimeAfterTopOfTheHour(1 * time.Hour), - expectedRecentTime: nil, - expectedNumberOfMisses: 0, - wantErr: true, + now: *deltaTimeAfterTopOfTheHour(1 * time.Hour), + expectedRecentTime: nil, + wantErr: true, }, { name: "earliestTime being CreationTimestamp and LastScheduleTime", @@ -529,7 +522,7 @@ func TestMostRecentScheduleTime(t *testing.T) { if err != nil { t.Errorf("error setting up the test, %s", err) } - gotEarliestTime, gotRecentTime, gotNumberOfMisses, err := mostRecentScheduleTime(tt.cj, tt.now, sched, tt.includeSDS) + gotEarliestTime, gotRecentTime, gotTooManyMissed, err := mostRecentScheduleTime(tt.cj, tt.now, sched, tt.includeSDS) if tt.wantErr { if err == nil { t.Error("mostRecentScheduleTime() got no error when expected one") @@ -548,8 +541,8 @@ func TestMostRecentScheduleTime(t *testing.T) { if !reflect.DeepEqual(gotRecentTime, tt.expectedRecentTime) { t.Errorf("expectedRecentTime - got %v, want %v", gotRecentTime, tt.expectedRecentTime) } - if gotNumberOfMisses != tt.expectedNumberOfMisses { - t.Errorf("expectedNumberOfMisses - got %v, want %v", gotNumberOfMisses, tt.expectedNumberOfMisses) + if gotTooManyMissed != tt.expectedTooManyMissed { + t.Errorf("expectedNumberOfMisses - got %v, want %v", gotTooManyMissed, tt.expectedTooManyMissed) } }) }