From 2f30fae0e8bd9308e8ba1cd5f801c133a0dbef0e Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Tue, 1 Aug 2023 20:00:16 +0200 Subject: [PATCH] scheduler: fix data race after binding failure When binding has failed, `Done` gets called by `handleBindingCycleError`. Calling it again is at best redundant and worse, suffers from a data race: - the `assumedPodInfo` is placed in the backoff queue - an event causes the `Pod` pointer to get updated in it - reading `assumedPodInfo.Pod.UID` races with that write This race was found with`go test -race`. --- pkg/scheduler/schedule_one.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index 02320d2300f..525e1af8632 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -121,6 +121,7 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) { status := sched.bindingCycle(bindingCycleCtx, state, fwk, scheduleResult, assumedPodInfo, start, podsToActivate) if !status.IsSuccess() { sched.handleBindingCycleError(bindingCycleCtx, state, fwk, assumedPodInfo, start, scheduleResult, status) + return } // Usually, DonePod is called inside the scheduling queue, // but in this case, we need to call it here because this Pod won't go back to the scheduling queue.