From 1be3f8961bd033c4891cb5d9c8b134e3906bfa3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Skocze=C5=84?= Date: Tue, 18 Mar 2025 09:18:48 +0000 Subject: [PATCH] Fix a race when closing activeQ --- pkg/scheduler/backend/queue/active_queue.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/scheduler/backend/queue/active_queue.go b/pkg/scheduler/backend/queue/active_queue.go index f44b552d8ab..27293b39a76 100644 --- a/pkg/scheduler/backend/queue/active_queue.go +++ b/pkg/scheduler/backend/queue/active_queue.go @@ -402,6 +402,12 @@ func (aq *activeQueue) done(pod types.UID) { aq.lock.Lock() defer aq.lock.Unlock() + aq.unlockedDone(pod) +} + +// unlockedDone is used by the activeQueue internally and doesn't take the lock itself. +// It assumes the lock is already taken outside before the method is called. +func (aq *activeQueue) unlockedDone(pod types.UID) { inFlightPod, ok := aq.inFlightPods[pod] if !ok { // This Pod is already done()ed. @@ -446,15 +452,15 @@ func (aq *activeQueue) done(pod types.UID) { // close closes the activeQueue. func (aq *activeQueue) close() { + aq.lock.Lock() + defer aq.lock.Unlock() // We should call done() for all in-flight pods to clean up the inFlightEvents metrics. // It's safe even if the binding cycle running asynchronously calls done() afterwards // done() will just be a no-op. for pod := range aq.inFlightPods { - aq.done(pod) + aq.unlockedDone(pod) } - aq.lock.Lock() aq.closed = true - aq.lock.Unlock() } // broadcast notifies the pop() operation that new pod(s) was added to the activeQueue.