Update schedule logic to properly calculate missed schedules

Before this change we've assumed a constant time between schedule runs,
which is not true for cases like "30 6-16/4 * * 1-5".
The fix is to calculate the potential next run using the fixed schedule
as the baseline, and then go back one schedule back and allow the cron
library to calculate the correct time.

This approach saves us from iterating multiple times between last
schedule time and now, if the cronjob for any reason wasn't running for
significant amount of time.
This commit is contained in:
Maciej Szulik 2023-06-17 12:36:56 +02:00
parent 9d50c0a025
commit af1c9e49c4
No known key found for this signature in database
GPG Key ID: F15E55D276FA84C4
2 changed files with 112 additions and 2 deletions

View File

@ -72,7 +72,8 @@ func deleteFromActiveList(cj *batchv1.CronJob, uid types.UID) {
// mostRecentScheduleTime returns:
// - the last schedule time or CronJob's creation time,
// - the most recent time a Job should be created or nil, if that's after now,
// - number of missed schedules
// - very rough approximation of number of missed schedules (based on difference
// between two schedules),
// - error in an edge case where the schedule specification is grammatically correct,
// but logically doesn't make sense (31st day for months with only 30 days, for example).
func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, includeStartingDeadlineSeconds bool) (time.Time, *time.Time, int64, error) {
@ -106,10 +107,29 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc
if timeBetweenTwoSchedules < 1 {
return earliestTime, nil, 0, fmt.Errorf("time difference between two schedules is less than 1 second")
}
// this logic used for calculating number of missed schedules does a rough
// approximation, by calculating a diff between two schedules (t1 and t2),
// and counting how many of these will fit in between last schedule and now
timeElapsed := int64(now.Sub(t1).Seconds())
numberOfMissedSchedules := (timeElapsed / timeBetweenTwoSchedules) + 1
mostRecentTime := time.Unix(t1.Unix()+((numberOfMissedSchedules-1)*timeBetweenTwoSchedules), 0).UTC()
var mostRecentTime time.Time
// to get the most recent time accurate for regular schedules and the ones
// specified with @every form, we first need to calculate the potential earliest
// time by multiplying the initial number of missed schedules by its interval,
// this is critical to ensure @every starts at the correct time, this explains
// the numberOfMissedSchedules-1, the additional -1 serves there to go back
// in time one more time unit, and let the cron library calculate a proper
// schedule, for case where the schedule is not consistent, for example
// something like 30 6-16/4 * * 1-5
potentialEarliest := t1.Add(time.Duration((numberOfMissedSchedules-1-1)*timeBetweenTwoSchedules) * time.Second)
for t := schedule.Next(potentialEarliest); !t.After(now); t = schedule.Next(t) {
mostRecentTime = t
}
if mostRecentTime.IsZero() {
return earliestTime, nil, numberOfMissedSchedules, nil
}
return earliestTime, &mostRecentTime, numberOfMissedSchedules, nil
}

View File

@ -290,7 +290,9 @@ func TestByJobStartTime(t *testing.T) {
func TestMostRecentScheduleTime(t *testing.T) {
metav1TopOfTheHour := metav1.NewTime(*topOfTheHour())
metav1HalfPastTheHour := metav1.NewTime(*deltaTimeAfterTopOfTheHour(30 * time.Minute))
metav1MinuteAfterTopOfTheHour := metav1.NewTime(*deltaTimeAfterTopOfTheHour(1 * time.Minute))
oneMinute := int64(60)
tenSeconds := int64(10)
tests := []struct {
name string
@ -346,6 +348,94 @@ func TestMostRecentScheduleTime(t *testing.T) {
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(10 * time.Second),
expectedNumberOfMisses: 5,
},
{
name: "complex schedule",
cj: &batchv1.CronJob{
ObjectMeta: metav1.ObjectMeta{
CreationTimestamp: metav1TopOfTheHour,
},
Spec: batchv1.CronJobSpec{
Schedule: "30 6-16/4 * * 1-5",
},
Status: batchv1.CronJobStatus{
LastScheduleTime: &metav1HalfPastTheHour,
},
},
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute),
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
expectedNumberOfMisses: 2,
},
{
name: "another complex schedule",
cj: &batchv1.CronJob{
ObjectMeta: metav1.ObjectMeta{
CreationTimestamp: metav1TopOfTheHour,
},
Spec: batchv1.CronJobSpec{
Schedule: "30 10,11,12 * * 1-5",
},
Status: batchv1.CronJobStatus{
LastScheduleTime: &metav1HalfPastTheHour,
},
},
now: *deltaTimeAfterTopOfTheHour(30*time.Hour + 30*time.Minute),
expectedRecentTime: nil,
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
expectedNumberOfMisses: 30,
},
{
name: "complex schedule with longer diff between executions",
cj: &batchv1.CronJob{
ObjectMeta: metav1.ObjectMeta{
CreationTimestamp: metav1TopOfTheHour,
},
Spec: batchv1.CronJobSpec{
Schedule: "30 6-16/4 * * 1-5",
},
Status: batchv1.CronJobStatus{
LastScheduleTime: &metav1HalfPastTheHour,
},
},
now: *deltaTimeAfterTopOfTheHour(96*time.Hour + 31*time.Minute),
expectedRecentTime: deltaTimeAfterTopOfTheHour(96*time.Hour + 30*time.Minute),
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
expectedNumberOfMisses: 6,
},
{
name: "complex schedule with shorter diff between executions",
cj: &batchv1.CronJob{
ObjectMeta: metav1.ObjectMeta{
CreationTimestamp: metav1TopOfTheHour,
},
Spec: batchv1.CronJobSpec{
Schedule: "30 6-16/4 * * 1-5",
},
},
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute),
expectedEarliestTime: *topOfTheHour(),
expectedNumberOfMisses: 7,
},
{
name: "@every schedule",
cj: &batchv1.CronJob{
ObjectMeta: metav1.ObjectMeta{
CreationTimestamp: metav1.NewTime(*deltaTimeAfterTopOfTheHour(-59 * time.Minute)),
},
Spec: batchv1.CronJobSpec{
Schedule: "@every 1h",
StartingDeadlineSeconds: &tenSeconds,
},
Status: batchv1.CronJobStatus{
LastScheduleTime: &metav1MinuteAfterTopOfTheHour,
},
},
now: *deltaTimeAfterTopOfTheHour(7 * 24 * time.Hour),
expectedRecentTime: deltaTimeAfterTopOfTheHour((6 * 24 * time.Hour) + 23*time.Hour + 1*time.Minute),
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(1 * time.Minute),
expectedNumberOfMisses: 167,
},
{
name: "rogue cronjob",
cj: &batchv1.CronJob{