mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-28 13:45:50 +00:00
Hold Pod in cache until all other cleanup work is completed
- Move "ForgetPod" after "RunReservePluginsUnreserve", so that the cache would hold the pod to avoid it's being retried simutaneously until Unreserve is completed. - Move "assume" ahead of "RunReservePluginsReserve". This is based on the fact that "ForgetPod" is the last step of failure path, so "assume" should be reversly treated as the first step. The current failure path is like this: assume -> reserve -> unreserve -> forgetPod -> recordingFailure - Make subtests of TestReservePluginUnreserve stateless
This commit is contained in:
@@ -501,16 +501,6 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
// This allows us to keep scheduling without waiting on binding to occur.
|
||||
assumedPodInfo := podInfo.DeepCopy()
|
||||
assumedPod := assumedPodInfo.Pod
|
||||
|
||||
// Run the Reserve method of reserve plugins.
|
||||
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
// trigger un-reserve to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
return
|
||||
}
|
||||
|
||||
// assume modifies `assumedPod` by setting NodeName=scheduleResult.SuggestedHost
|
||||
err = sched.assume(assumedPod, scheduleResult.SuggestedHost)
|
||||
if err != nil {
|
||||
@@ -521,7 +511,14 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
// (otherwise this would cause an infinite loop).
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, err, SchedulerError, "")
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
return
|
||||
}
|
||||
|
||||
// Run the Reserve method of reserve plugins.
|
||||
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
// trigger un-reserve to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
return
|
||||
}
|
||||
@@ -537,11 +534,11 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
reason = SchedulerError
|
||||
}
|
||||
// One of the plugins returned status different than success or wait.
|
||||
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
||||
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
||||
}
|
||||
// One of the plugins returned status different than success or wait.
|
||||
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, runPermitStatus.AsError(), reason, "")
|
||||
return
|
||||
}
|
||||
@@ -563,11 +560,11 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
reason = SchedulerError
|
||||
}
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
||||
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
||||
}
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, waitOnPermitStatus.AsError(), reason, "")
|
||||
return
|
||||
}
|
||||
@@ -575,15 +572,13 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
// Run "prebind" plugins.
|
||||
preBindStatus := prof.RunPreBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
if !preBindStatus.IsSuccess() {
|
||||
var reason string
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
reason = SchedulerError
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
||||
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
||||
}
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, preBindStatus.AsError(), reason, "")
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, preBindStatus.AsError(), SchedulerError, "")
|
||||
return
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user