scheduler: run Unreserve if Reserve fails

If a reserve plugin's Reserve method returns an error, there could be
previously allocated resources from successfully completed reserve
plugins that must be unallocated by the corresponding Unreserve
operation. Since Unreserve operations are idempotent, this patch runs
the Unreserve operation of ALL reserve plugins when a Reserve operation
fails.
This commit is contained in:
Adhityaa Chandrasekar
2020-06-22 18:38:29 +00:00
parent 66ff1ae536
commit 1b223b861a
4 changed files with 27 additions and 11 deletions

View File

@@ -520,6 +520,8 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
// trigger un-reserve to clean up state associated with the reserved Pod
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
return
}