diff --git a/pkg/scheduler/framework/runtime/framework.go b/pkg/scheduler/framework/runtime/framework.go index 65767f1c234..0cc09db631e 100644 --- a/pkg/scheduler/framework/runtime/framework.go +++ b/pkg/scheduler/framework/runtime/framework.go @@ -788,7 +788,8 @@ func (f *frameworkImpl) runPostBindPlugin(ctx context.Context, pl framework.Post // RunReservePluginsReserve runs the Reserve method in the set of configured // reserve plugins. If any of these plugins returns an error, it does not // continue running the remaining ones and returns the error. In such a case, -// the pod will not be scheduled. +// the pod will not be scheduled and the caller will be expected to call +// RunReservePluginsUnreserve. func (f *frameworkImpl) RunReservePluginsReserve(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (status *framework.Status) { startTime := time.Now() defer func() { diff --git a/pkg/scheduler/framework/v1alpha1/interface.go b/pkg/scheduler/framework/v1alpha1/interface.go index bce5b07a36d..8427f9b7f7f 100644 --- a/pkg/scheduler/framework/v1alpha1/interface.go +++ b/pkg/scheduler/framework/v1alpha1/interface.go @@ -333,11 +333,14 @@ type ScorePlugin interface { type ReservePlugin interface { Plugin // Reserve is called by the scheduling framework when the scheduler cache is - // updated. + // updated. If this method returns a failed Status, the scheduler will call + // the Unreserve method for all enabled ReservePlugins. Reserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) *Status // Unreserve is called by the scheduling framework when a reserved pod was // rejected, an error occurred during reservation of subsequent plugins, or - // in a later phase. The Unreserve method implementation must be idempotent. + // in a later phase. The Unreserve method implementation must be idempotent + // and may be called by the scheduler even if the corresponding Reserve + // method for the same plugin was not called. Unreserve(ctx context.Context, state *CycleState, p *v1.Pod, nodeName string) } diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 54b8fd27de4..7b0a2a452e1 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -520,6 +520,8 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) { if sts := prof.RunReservePluginsReserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() { sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "") metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start)) + // trigger un-reserve to clean up state associated with the reserved Pod + prof.RunReservePluginsUnreserve(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost) return } diff --git a/test/integration/scheduler/framework_test.go b/test/integration/scheduler/framework_test.go index a641915b700..0657a409d2c 100644 --- a/test/integration/scheduler/framework_test.go +++ b/test/integration/scheduler/framework_test.go @@ -269,6 +269,7 @@ func (rp *ReservePlugin) Unreserve(ctx context.Context, state *framework.CycleSt func (rp *ReservePlugin) reset() { rp.numReserveCalled = 0 rp.numUnreserveCalled = 0 + rp.failReserve = false } // Name returns name of the plugin. @@ -1006,22 +1007,31 @@ func TestReservePluginUnreserve(t *testing.T) { defer testutils.CleanupTest(t, testCtx) tests := []struct { - name string - preBindFail bool + name string + failReserve bool + failReserveIndex int + failPreBind bool }{ { - name: "fail preBind", - preBindFail: true, + name: "fail reserve", + failReserve: true, + failReserveIndex: 1, }, { - name: "pass preBind", - preBindFail: false, + name: "fail preBind", + failPreBind: true, + }, + { + name: "pass everything", }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - preBindPlugin.failPreBind = test.preBindFail + preBindPlugin.failPreBind = test.failPreBind + if test.failReserve { + reservePlugins[test.failReserveIndex].failReserve = true + } // Create a best effort pod. pod, err := createPausePod(testCtx.ClientSet, initPausePod(&pausePodConfig{Name: "test-pod", Namespace: testCtx.NS.Name})) @@ -1029,7 +1039,7 @@ func TestReservePluginUnreserve(t *testing.T) { t.Errorf("Error while creating a test pod: %v", err) } - if test.preBindFail { + if test.failPreBind || test.failReserve { if err = wait.Poll(10*time.Millisecond, 30*time.Second, podSchedulingError(testCtx.ClientSet, pod.Namespace, pod.Name)); err != nil { t.Errorf("Expected a scheduling error, but didn't get it: %v", err) }