From e6dd36759f07e5967f9f74cbda9c77a59a7977b1 Mon Sep 17 00:00:00 2001 From: Oleg Guba Date: Thu, 29 Feb 2024 20:43:50 -0800 Subject: [PATCH 1/2] [kubernetes/scheduler] use lockless diagnosis collection in findNodesThatPassFilters --- pkg/scheduler/schedule_one.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index ae4be17bb63..42fd13eb38d 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -595,10 +595,15 @@ func (sched *Scheduler) findNodesThatPassFilters( } errCh := parallelize.NewErrorChannel() - var statusesLock sync.Mutex var feasibleNodesLen int32 ctx, cancel := context.WithCancel(ctx) defer cancel() + + type nodeStatus struct { + node string + status *framework.Status + } + result := make([]*nodeStatus, len(feasibleNodes)) checkNode := func(i int) { // We check the nodes starting from where we left off in the previous scheduling cycle, // this is to make sure all nodes have the same chance of being examined across pods. @@ -617,12 +622,16 @@ func (sched *Scheduler) findNodesThatPassFilters( feasibleNodes[length-1] = nodeInfo } } else { - statusesLock.Lock() - diagnosis.NodeToStatusMap[nodeInfo.Node().Name] = status - diagnosis.AddPluginStatus(status) - statusesLock.Unlock() + result[i] = &nodeStatus{node: nodeInfo.Node().Name, status: status} } } + for _, item := range result { + if item == nil { + continue + } + diagnosis.NodeToStatusMap[item.node] = item.status + diagnosis.AddPluginStatus(item.status) + } beginCheckNode := time.Now() statusCode := framework.Success From ba525460e0519a47a6550c64e93f914a489610b4 Mon Sep 17 00:00:00 2001 From: Oleg Guba Date: Fri, 1 Mar 2024 02:06:17 -0800 Subject: [PATCH 2/2] change result size to numAllNodes --- pkg/scheduler/schedule_one.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index 42fd13eb38d..5b0032f312c 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -603,7 +603,7 @@ func (sched *Scheduler) findNodesThatPassFilters( node string status *framework.Status } - result := make([]*nodeStatus, len(feasibleNodes)) + result := make([]*nodeStatus, numAllNodes) checkNode := func(i int) { // We check the nodes starting from where we left off in the previous scheduling cycle, // this is to make sure all nodes have the same chance of being examined across pods. @@ -625,13 +625,6 @@ func (sched *Scheduler) findNodesThatPassFilters( result[i] = &nodeStatus{node: nodeInfo.Node().Name, status: status} } } - for _, item := range result { - if item == nil { - continue - } - diagnosis.NodeToStatusMap[item.node] = item.status - diagnosis.AddPluginStatus(item.status) - } beginCheckNode := time.Now() statusCode := framework.Success @@ -646,6 +639,13 @@ func (sched *Scheduler) findNodesThatPassFilters( // are found. fwk.Parallelizer().Until(ctx, numAllNodes, checkNode, metrics.Filter) feasibleNodes = feasibleNodes[:feasibleNodesLen] + for _, item := range result { + if item == nil { + continue + } + diagnosis.NodeToStatusMap[item.node] = item.status + diagnosis.AddPluginStatus(item.status) + } if err := errCh.ReceiveError(); err != nil { statusCode = framework.Error return feasibleNodes, err