Hide numberOfMissedSchedules as an algorithm internal number

This commit is contained in:
Maciej Szulik 2023-06-28 16:54:34 +02:00
parent ddbf3575a7
commit 1240a29af9
No known key found for this signature in database
GPG Key ID: F15E55D276FA84C4
2 changed files with 67 additions and 72 deletions

View File

@ -72,11 +72,10 @@ func deleteFromActiveList(cj *batchv1.CronJob, uid types.UID) {
// mostRecentScheduleTime returns: // mostRecentScheduleTime returns:
// - the last schedule time or CronJob's creation time, // - the last schedule time or CronJob's creation time,
// - the most recent time a Job should be created or nil, if that's after now, // - the most recent time a Job should be created or nil, if that's after now,
// - very rough approximation of number of missed schedules (based on difference // - boolean indicating an excessive number of missed schedules,
// between two schedules),
// - error in an edge case where the schedule specification is grammatically correct, // - error in an edge case where the schedule specification is grammatically correct,
// but logically doesn't make sense (31st day for months with only 30 days, for example). // but logically doesn't make sense (31st day for months with only 30 days, for example).
func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, includeStartingDeadlineSeconds bool) (time.Time, *time.Time, int64, error) { func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, includeStartingDeadlineSeconds bool) (time.Time, *time.Time, bool, error) {
earliestTime := cj.ObjectMeta.CreationTimestamp.Time earliestTime := cj.ObjectMeta.CreationTimestamp.Time
if cj.Status.LastScheduleTime != nil { if cj.Status.LastScheduleTime != nil {
earliestTime = cj.Status.LastScheduleTime.Time earliestTime = cj.Status.LastScheduleTime.Time
@ -94,10 +93,10 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc
t2 := schedule.Next(t1) t2 := schedule.Next(t1)
if now.Before(t1) { if now.Before(t1) {
return earliestTime, nil, 0, nil return earliestTime, nil, false, nil
} }
if now.Before(t2) { if now.Before(t2) {
return earliestTime, &t1, 1, nil return earliestTime, &t1, false, nil
} }
// It is possible for cron.ParseStandard("59 23 31 2 *") to return an invalid schedule // It is possible for cron.ParseStandard("59 23 31 2 *") to return an invalid schedule
@ -105,7 +104,7 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc
// In this case the timeBetweenTwoSchedules will be 0, and we error out the invalid schedule // In this case the timeBetweenTwoSchedules will be 0, and we error out the invalid schedule
timeBetweenTwoSchedules := int64(t2.Sub(t1).Round(time.Second).Seconds()) timeBetweenTwoSchedules := int64(t2.Sub(t1).Round(time.Second).Seconds())
if timeBetweenTwoSchedules < 1 { if timeBetweenTwoSchedules < 1 {
return earliestTime, nil, 0, fmt.Errorf("time difference between two schedules is less than 1 second") return earliestTime, nil, false, fmt.Errorf("time difference between two schedules is less than 1 second")
} }
// this logic used for calculating number of missed schedules does a rough // this logic used for calculating number of missed schedules does a rough
// approximation, by calculating a diff between two schedules (t1 and t2), // approximation, by calculating a diff between two schedules (t1 and t2),
@ -127,10 +126,29 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc
mostRecentTime = t mostRecentTime = t
} }
// An object might miss several starts. For example, if
// controller gets wedged on friday at 5:01pm when everyone has
// gone home, and someone comes in on tuesday AM and discovers
// the problem and restarts the controller, then all the hourly
// jobs, more than 80 of them for one hourly cronJob, should
// all start running with no further intervention (if the cronJob
// allows concurrency and late starts).
//
// However, if there is a bug somewhere, or incorrect clock
// on controller's server or apiservers (for setting creationTimestamp)
// then there could be so many missed start times (it could be off
// by decades or more), that it would eat up all the CPU and memory
// of this controller. In that case, we want to not try to list
// all the missed start times.
//
// I've somewhat arbitrarily picked 100, as more than 80,
// but less than "lots".
tooManyMissed := numberOfMissedSchedules > 100
if mostRecentTime.IsZero() { if mostRecentTime.IsZero() {
return earliestTime, nil, numberOfMissedSchedules, nil return earliestTime, nil, tooManyMissed, nil
} }
return earliestTime, &mostRecentTime, numberOfMissedSchedules, nil return earliestTime, &mostRecentTime, tooManyMissed, nil
} }
// nextScheduleTimeDuration returns the time duration to requeue based on // nextScheduleTimeDuration returns the time duration to requeue based on
@ -156,33 +174,17 @@ func nextScheduleTimeDuration(cj *batchv1.CronJob, now time.Time, schedule cron.
// and before now, or nil if no unmet schedule times, and an error. // and before now, or nil if no unmet schedule times, and an error.
// If there are too many (>100) unstarted times, it will also record a warning. // If there are too many (>100) unstarted times, it will also record a warning.
func nextScheduleTime(logger klog.Logger, cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, recorder record.EventRecorder) (*time.Time, error) { func nextScheduleTime(logger klog.Logger, cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, recorder record.EventRecorder) (*time.Time, error) {
_, mostRecentTime, numberOfMissedSchedules, err := mostRecentScheduleTime(cj, now, schedule, true) _, mostRecentTime, tooManyMissed, err := mostRecentScheduleTime(cj, now, schedule, true)
if mostRecentTime == nil || mostRecentTime.After(now) { if mostRecentTime == nil || mostRecentTime.After(now) {
return nil, err return nil, err
} }
if numberOfMissedSchedules > 100 { if tooManyMissed {
// An object might miss several starts. For example, if recorder.Eventf(cj, corev1.EventTypeWarning, "TooManyMissedTimes", "too many missed start times. Set or decrease .spec.startingDeadlineSeconds or check clock skew")
// controller gets wedged on friday at 5:01pm when everyone has logger.Info("too many missed times", "cronjob", klog.KObj(cj))
// gone home, and someone comes in on tuesday AM and discovers
// the problem and restarts the controller, then all the hourly
// jobs, more than 80 of them for one hourly cronJob, should
// all start running with no further intervention (if the cronJob
// allows concurrency and late starts).
//
// However, if there is a bug somewhere, or incorrect clock
// on controller's server or apiservers (for setting creationTimestamp)
// then there could be so many missed start times (it could be off
// by decades or more), that it would eat up all the CPU and memory
// of this controller. In that case, we want to not try to list
// all the missed start times.
//
// I've somewhat arbitrarily picked 100, as more than 80,
// but less than "lots".
recorder.Eventf(cj, corev1.EventTypeWarning, "TooManyMissedTimes", "too many missed start times: %d. Set or decrease .spec.startingDeadlineSeconds or check clock skew", numberOfMissedSchedules)
logger.Info("too many missed times", "cronjob", klog.KObj(cj), "missedTimes", numberOfMissedSchedules)
} }
return mostRecentTime, err return mostRecentTime, err
} }

View File

@ -295,14 +295,14 @@ func TestMostRecentScheduleTime(t *testing.T) {
tenSeconds := int64(10) tenSeconds := int64(10)
tests := []struct { tests := []struct {
name string name string
cj *batchv1.CronJob cj *batchv1.CronJob
includeSDS bool includeSDS bool
now time.Time now time.Time
expectedEarliestTime time.Time expectedEarliestTime time.Time
expectedRecentTime *time.Time expectedRecentTime *time.Time
expectedNumberOfMisses int64 expectedTooManyMissed bool
wantErr bool wantErr bool
}{ }{
{ {
name: "now before next schedule", name: "now before next schedule",
@ -328,10 +328,9 @@ func TestMostRecentScheduleTime(t *testing.T) {
Schedule: "0 * * * *", Schedule: "0 * * * *",
}, },
}, },
now: topOfTheHour().Add(61 * time.Minute), now: topOfTheHour().Add(61 * time.Minute),
expectedRecentTime: deltaTimeAfterTopOfTheHour(60 * time.Minute), expectedRecentTime: deltaTimeAfterTopOfTheHour(60 * time.Minute),
expectedEarliestTime: *topOfTheHour(), expectedEarliestTime: *topOfTheHour(),
expectedNumberOfMisses: 1,
}, },
{ {
name: "missed 5 schedules", name: "missed 5 schedules",
@ -343,10 +342,9 @@ func TestMostRecentScheduleTime(t *testing.T) {
Schedule: "0 * * * *", Schedule: "0 * * * *",
}, },
}, },
now: *deltaTimeAfterTopOfTheHour(301 * time.Minute), now: *deltaTimeAfterTopOfTheHour(301 * time.Minute),
expectedRecentTime: deltaTimeAfterTopOfTheHour(300 * time.Minute), expectedRecentTime: deltaTimeAfterTopOfTheHour(300 * time.Minute),
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(10 * time.Second), expectedEarliestTime: *deltaTimeAfterTopOfTheHour(10 * time.Second),
expectedNumberOfMisses: 5,
}, },
{ {
name: "complex schedule", name: "complex schedule",
@ -361,10 +359,9 @@ func TestMostRecentScheduleTime(t *testing.T) {
LastScheduleTime: &metav1HalfPastTheHour, LastScheduleTime: &metav1HalfPastTheHour,
}, },
}, },
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute), now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute), expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute),
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
expectedNumberOfMisses: 2,
}, },
{ {
name: "another complex schedule", name: "another complex schedule",
@ -379,10 +376,9 @@ func TestMostRecentScheduleTime(t *testing.T) {
LastScheduleTime: &metav1HalfPastTheHour, LastScheduleTime: &metav1HalfPastTheHour,
}, },
}, },
now: *deltaTimeAfterTopOfTheHour(30*time.Hour + 30*time.Minute), now: *deltaTimeAfterTopOfTheHour(30*time.Hour + 30*time.Minute),
expectedRecentTime: nil, expectedRecentTime: nil,
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
expectedNumberOfMisses: 30,
}, },
{ {
name: "complex schedule with longer diff between executions", name: "complex schedule with longer diff between executions",
@ -397,10 +393,9 @@ func TestMostRecentScheduleTime(t *testing.T) {
LastScheduleTime: &metav1HalfPastTheHour, LastScheduleTime: &metav1HalfPastTheHour,
}, },
}, },
now: *deltaTimeAfterTopOfTheHour(96*time.Hour + 31*time.Minute), now: *deltaTimeAfterTopOfTheHour(96*time.Hour + 31*time.Minute),
expectedRecentTime: deltaTimeAfterTopOfTheHour(96*time.Hour + 30*time.Minute), expectedRecentTime: deltaTimeAfterTopOfTheHour(96*time.Hour + 30*time.Minute),
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute), expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
expectedNumberOfMisses: 6,
}, },
{ {
name: "complex schedule with shorter diff between executions", name: "complex schedule with shorter diff between executions",
@ -412,10 +407,9 @@ func TestMostRecentScheduleTime(t *testing.T) {
Schedule: "30 6-16/4 * * 1-5", Schedule: "30 6-16/4 * * 1-5",
}, },
}, },
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute), now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute), expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute),
expectedEarliestTime: *topOfTheHour(), expectedEarliestTime: *topOfTheHour(),
expectedNumberOfMisses: 7,
}, },
{ {
name: "@every schedule", name: "@every schedule",
@ -431,10 +425,10 @@ func TestMostRecentScheduleTime(t *testing.T) {
LastScheduleTime: &metav1MinuteAfterTopOfTheHour, LastScheduleTime: &metav1MinuteAfterTopOfTheHour,
}, },
}, },
now: *deltaTimeAfterTopOfTheHour(7 * 24 * time.Hour), now: *deltaTimeAfterTopOfTheHour(7 * 24 * time.Hour),
expectedRecentTime: deltaTimeAfterTopOfTheHour((6 * 24 * time.Hour) + 23*time.Hour + 1*time.Minute), expectedRecentTime: deltaTimeAfterTopOfTheHour((6 * 24 * time.Hour) + 23*time.Hour + 1*time.Minute),
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(1 * time.Minute), expectedEarliestTime: *deltaTimeAfterTopOfTheHour(1 * time.Minute),
expectedNumberOfMisses: 167, expectedTooManyMissed: true,
}, },
{ {
name: "rogue cronjob", name: "rogue cronjob",
@ -446,10 +440,9 @@ func TestMostRecentScheduleTime(t *testing.T) {
Schedule: "59 23 31 2 *", Schedule: "59 23 31 2 *",
}, },
}, },
now: *deltaTimeAfterTopOfTheHour(1 * time.Hour), now: *deltaTimeAfterTopOfTheHour(1 * time.Hour),
expectedRecentTime: nil, expectedRecentTime: nil,
expectedNumberOfMisses: 0, wantErr: true,
wantErr: true,
}, },
{ {
name: "earliestTime being CreationTimestamp and LastScheduleTime", name: "earliestTime being CreationTimestamp and LastScheduleTime",
@ -529,7 +522,7 @@ func TestMostRecentScheduleTime(t *testing.T) {
if err != nil { if err != nil {
t.Errorf("error setting up the test, %s", err) t.Errorf("error setting up the test, %s", err)
} }
gotEarliestTime, gotRecentTime, gotNumberOfMisses, err := mostRecentScheduleTime(tt.cj, tt.now, sched, tt.includeSDS) gotEarliestTime, gotRecentTime, gotTooManyMissed, err := mostRecentScheduleTime(tt.cj, tt.now, sched, tt.includeSDS)
if tt.wantErr { if tt.wantErr {
if err == nil { if err == nil {
t.Error("mostRecentScheduleTime() got no error when expected one") t.Error("mostRecentScheduleTime() got no error when expected one")
@ -548,8 +541,8 @@ func TestMostRecentScheduleTime(t *testing.T) {
if !reflect.DeepEqual(gotRecentTime, tt.expectedRecentTime) { if !reflect.DeepEqual(gotRecentTime, tt.expectedRecentTime) {
t.Errorf("expectedRecentTime - got %v, want %v", gotRecentTime, tt.expectedRecentTime) t.Errorf("expectedRecentTime - got %v, want %v", gotRecentTime, tt.expectedRecentTime)
} }
if gotNumberOfMisses != tt.expectedNumberOfMisses { if gotTooManyMissed != tt.expectedTooManyMissed {
t.Errorf("expectedNumberOfMisses - got %v, want %v", gotNumberOfMisses, tt.expectedNumberOfMisses) t.Errorf("expectedNumberOfMisses - got %v, want %v", gotTooManyMissed, tt.expectedTooManyMissed)
} }
}) })
} }