mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 19:01:49 +00:00
Merge pull request #93511 from Huang-Wei/flake-reserve-plugin
Hold Pod in cache until all other cleanup work is completed
This commit is contained in:
commit
db28b0239a
@ -501,16 +501,6 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
// This allows us to keep scheduling without waiting on binding to occur.
|
||||
assumedPodInfo := podInfo.DeepCopy()
|
||||
assumedPod := assumedPodInfo.Pod
|
||||
|
||||
// Run the Reserve method of reserve plugins.
|
||||
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
// trigger un-reserve to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
return
|
||||
}
|
||||
|
||||
// assume modifies `assumedPod` by setting NodeName=scheduleResult.SuggestedHost
|
||||
err = sched.assume(assumedPod, scheduleResult.SuggestedHost)
|
||||
if err != nil {
|
||||
@ -521,7 +511,14 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
// (otherwise this would cause an infinite loop).
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, err, SchedulerError, "")
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
return
|
||||
}
|
||||
|
||||
// Run the Reserve method of reserve plugins.
|
||||
if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
// trigger un-reserve to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
return
|
||||
}
|
||||
@ -537,11 +534,11 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
reason = SchedulerError
|
||||
}
|
||||
// One of the plugins returned status different than success or wait.
|
||||
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
||||
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
||||
}
|
||||
// One of the plugins returned status different than success or wait.
|
||||
prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, runPermitStatus.AsError(), reason, "")
|
||||
return
|
||||
}
|
||||
@ -563,11 +560,11 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
reason = SchedulerError
|
||||
}
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
||||
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
||||
}
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, waitOnPermitStatus.AsError(), reason, "")
|
||||
return
|
||||
}
|
||||
@ -575,15 +572,13 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
|
||||
// Run "prebind" plugins.
|
||||
preBindStatus := prof.RunPreBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
if !preBindStatus.IsSuccess() {
|
||||
var reason string
|
||||
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
|
||||
reason = SchedulerError
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
|
||||
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
|
||||
}
|
||||
// trigger un-reserve plugins to clean up state associated with the reserved Pod
|
||||
prof.RunReservePluginsUnreserve(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, preBindStatus.AsError(), reason, "")
|
||||
sched.recordSchedulingFailure(prof, assumedPodInfo, preBindStatus.AsError(), SchedulerError, "")
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -971,6 +971,28 @@ func TestPrebindPlugin(t *testing.T) {
|
||||
// tests that the order of invocation of Unreserve operation is executed in the
|
||||
// reverse order of invocation of the Reserve operation.
|
||||
func TestReservePluginUnreserve(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
failReserve bool
|
||||
failReserveIndex int
|
||||
failPreBind bool
|
||||
}{
|
||||
{
|
||||
name: "fail reserve",
|
||||
failReserve: true,
|
||||
failReserveIndex: 1,
|
||||
},
|
||||
{
|
||||
name: "fail preBind",
|
||||
failPreBind: true,
|
||||
},
|
||||
{
|
||||
name: "pass everything",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
numReservePlugins := 3
|
||||
pluginInvokeEventChan := make(chan pluginInvokeEvent, numReservePlugins)
|
||||
|
||||
@ -1022,28 +1044,6 @@ func TestReservePluginUnreserve(t *testing.T) {
|
||||
scheduler.WithFrameworkOutOfTreeRegistry(registry))
|
||||
defer testutils.CleanupTest(t, testCtx)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
failReserve bool
|
||||
failReserveIndex int
|
||||
failPreBind bool
|
||||
}{
|
||||
{
|
||||
name: "fail reserve",
|
||||
failReserve: true,
|
||||
failReserveIndex: 1,
|
||||
},
|
||||
{
|
||||
name: "fail preBind",
|
||||
failPreBind: true,
|
||||
},
|
||||
{
|
||||
name: "pass everything",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
preBindPlugin.failPreBind = test.failPreBind
|
||||
if test.failReserve {
|
||||
reservePlugins[test.failReserveIndex].failReserve = true
|
||||
@ -1080,11 +1080,6 @@ func TestReservePluginUnreserve(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
preBindPlugin.reset()
|
||||
for _, pl := range reservePlugins {
|
||||
pl.reset()
|
||||
}
|
||||
testutils.CleanupPods(testCtx.ClientSet, t, []*v1.Pod{pod})
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user