From e660e84459b990239ed1a6f65627cd08617d21f6 Mon Sep 17 00:00:00 2001 From: Abdullah Gharaibeh Date: Wed, 8 May 2019 09:49:01 -0400 Subject: [PATCH] Faster scheduler. --- pkg/scheduler/core/generic_scheduler.go | 14 ++-- pkg/scheduler/internal/cache/cache_test.go | 6 +- pkg/scheduler/internal/cache/node_tree.go | 23 ++++++- .../internal/cache/node_tree_test.go | 68 +++++++++---------- 4 files changed, 67 insertions(+), 44 deletions(-) diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index 88592446cff..8eb2a6b82a1 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -167,6 +167,7 @@ type genericScheduler struct { pvcLister corelisters.PersistentVolumeClaimLister pdbLister algorithm.PDBLister disablePreemption bool + lastIndex int percentageOfNodesToScore int32 } @@ -460,8 +461,8 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v if len(g.predicates) == 0 { filtered = nodes } else { - allNodes := int32(g.cache.NodeTree().NumNodes()) - numNodesToFind := g.numFeasibleNodesToFind(allNodes) + allNodes := g.cache.NodeTree().AllNodes() + numNodesToFind := g.numFeasibleNodesToFind(int32(len(allNodes))) // Create filtered list with enough space to avoid growing it // and allow assigning. @@ -477,8 +478,12 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v // We can use the same metadata producer for all nodes. meta := g.predicateMetaProducer(pod, g.nodeInfoSnapshot.NodeInfoMap) + processedNodes := int32(0) checkNode := func(i int) { - nodeName := g.cache.NodeTree().Next() + // We check the nodes starting from where we left off in the previous scheduling cycle, + // this is to make sure all nodes have the same chance of being examined across pods. + atomic.AddInt32(&processedNodes, 1) + nodeName := allNodes[(g.lastIndex+i)%len(allNodes)] fits, failedPredicates, err := podFitsOnNode( pod, meta, @@ -510,7 +515,8 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v // Stops searching for more nodes once the configured number of feasible nodes // are found. - workqueue.ParallelizeUntil(ctx, 16, int(allNodes), checkNode) + workqueue.ParallelizeUntil(ctx, 16, len(allNodes), checkNode) + g.lastIndex = (g.lastIndex + int(processedNodes)) % len(allNodes) filtered = filtered[:filteredLen] if len(errs) > 0 { diff --git a/pkg/scheduler/internal/cache/cache_test.go b/pkg/scheduler/internal/cache/cache_test.go index 70e2f9f9ca0..f68303ece77 100644 --- a/pkg/scheduler/internal/cache/cache_test.go +++ b/pkg/scheduler/internal/cache/cache_test.go @@ -1066,7 +1066,7 @@ func TestNodeOperators(t *testing.T) { if !found { t.Errorf("Failed to find node %v in internalcache.", node.Name) } - if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.Next() != node.Name { + if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.next() != node.Name { t.Errorf("cache.nodeTree is not updated correctly after adding node: %v", node.Name) } @@ -1109,7 +1109,7 @@ func TestNodeOperators(t *testing.T) { t.Errorf("Failed to update node in schedulernodeinfo:\n got: %+v \nexpected: %+v", got, expected) } // Check nodeTree after update - if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.Next() != node.Name { + if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.next() != node.Name { t.Errorf("unexpected cache.nodeTree after updating node: %v", node.Name) } @@ -1120,7 +1120,7 @@ func TestNodeOperators(t *testing.T) { } // Check nodeTree after remove. The node should be removed from the nodeTree even if there are // still pods on it. - if cache.nodeTree.NumNodes() != 0 || cache.nodeTree.Next() != "" { + if cache.nodeTree.NumNodes() != 0 || cache.nodeTree.next() != "" { t.Errorf("unexpected cache.nodeTree after removing node: %v", node.Name) } } diff --git a/pkg/scheduler/internal/cache/node_tree.go b/pkg/scheduler/internal/cache/node_tree.go index 1c7ef2c6ebf..ff770597165 100644 --- a/pkg/scheduler/internal/cache/node_tree.go +++ b/pkg/scheduler/internal/cache/node_tree.go @@ -32,6 +32,7 @@ type NodeTree struct { tree map[string]*nodeArray // a map from zone (region-zone) to an array of nodes in the zone. zones []string // a list of all the zones in the tree (keys) zoneIndex int + allNodes []string numNodes int mu sync.RWMutex } @@ -92,6 +93,7 @@ func (nt *NodeTree) addNode(n *v1.Node) { } klog.V(5).Infof("Added node %v in group %v to NodeTree", n.Name, zone) nt.numNodes++ + nt.recomputeAllNodes() } // RemoveNode removes a node from the NodeTree. @@ -112,6 +114,7 @@ func (nt *NodeTree) removeNode(n *v1.Node) error { } klog.V(5).Infof("Removed node %v in group %v from NodeTree", n.Name, zone) nt.numNodes-- + nt.recomputeAllNodes() return nil } } @@ -159,9 +162,7 @@ func (nt *NodeTree) resetExhausted() { // Next returns the name of the next node. NodeTree iterates over zones and in each zone iterates // over nodes in a round robin fashion. -func (nt *NodeTree) Next() string { - nt.mu.Lock() - defer nt.mu.Unlock() +func (nt *NodeTree) next() string { if len(nt.zones) == 0 { return "" } @@ -186,6 +187,22 @@ func (nt *NodeTree) Next() string { } } +func (nt *NodeTree) recomputeAllNodes() { + nt.allNodes = make([]string, 0, nt.numNodes) + nt.resetExhausted() + for i := 0; i < nt.numNodes; i++ { + nt.allNodes = append(nt.allNodes, nt.next()) + } +} + +// AllNodes returns the list of nodes as they would be iterated by +// Next() method. +func (nt *NodeTree) AllNodes() []string { + nt.mu.RLock() + defer nt.mu.RUnlock() + return nt.allNodes +} + // NumNodes returns the number of nodes. func (nt *NodeTree) NumNodes() int { nt.mu.RLock() diff --git a/pkg/scheduler/internal/cache/node_tree_test.go b/pkg/scheduler/internal/cache/node_tree_test.go index e8cb35ba78a..8a92423eea5 100644 --- a/pkg/scheduler/internal/cache/node_tree_test.go +++ b/pkg/scheduler/internal/cache/node_tree_test.go @@ -140,28 +140,28 @@ func TestNodeTree_AddNode(t *testing.T) { { name: "single node no labels", nodesToAdd: allNodes[:1], - expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 0}}, + expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 1}}, }, { name: "mix of nodes with and without proper labels", nodesToAdd: allNodes[:4], expectedTree: map[string]*nodeArray{ - "": {[]string{"node-0"}, 0}, - "region-1:\x00:": {[]string{"node-1"}, 0}, - ":\x00:zone-2": {[]string{"node-2"}, 0}, - "region-1:\x00:zone-2": {[]string{"node-3"}, 0}, + "": {[]string{"node-0"}, 1}, + "region-1:\x00:": {[]string{"node-1"}, 1}, + ":\x00:zone-2": {[]string{"node-2"}, 1}, + "region-1:\x00:zone-2": {[]string{"node-3"}, 1}, }, }, { name: "mix of nodes with and without proper labels and some zones with multiple nodes", nodesToAdd: allNodes[:7], expectedTree: map[string]*nodeArray{ - "": {[]string{"node-0"}, 0}, - "region-1:\x00:": {[]string{"node-1"}, 0}, - ":\x00:zone-2": {[]string{"node-2"}, 0}, - "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0}, - "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, - "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, + "": {[]string{"node-0"}, 1}, + "region-1:\x00:": {[]string{"node-1"}, 1}, + ":\x00:zone-2": {[]string{"node-2"}, 1}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 2}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 1}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 1}, }, }, } @@ -190,11 +190,11 @@ func TestNodeTree_RemoveNode(t *testing.T) { existingNodes: allNodes[:7], nodesToRemove: allNodes[:1], expectedTree: map[string]*nodeArray{ - "region-1:\x00:": {[]string{"node-1"}, 0}, - ":\x00:zone-2": {[]string{"node-2"}, 0}, - "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0}, - "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, - "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, + "region-1:\x00:": {[]string{"node-1"}, 1}, + ":\x00:zone-2": {[]string{"node-2"}, 1}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 2}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 1}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 1}, }, }, { @@ -202,10 +202,10 @@ func TestNodeTree_RemoveNode(t *testing.T) { existingNodes: allNodes[:7], nodesToRemove: allNodes[1:4], expectedTree: map[string]*nodeArray{ - "": {[]string{"node-0"}, 0}, - "region-1:\x00:zone-2": {[]string{"node-4"}, 0}, - "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, - "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, + "": {[]string{"node-0"}, 1}, + "region-1:\x00:zone-2": {[]string{"node-4"}, 1}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 1}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 1}, }, }, { @@ -257,11 +257,11 @@ func TestNodeTree_UpdateNode(t *testing.T) { }, }, expectedTree: map[string]*nodeArray{ - "region-1:\x00:": {[]string{"node-1"}, 0}, - ":\x00:zone-2": {[]string{"node-2"}, 0}, - "region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 0}, - "region-1:\x00:zone-3": {[]string{"node-5"}, 0}, - "region-2:\x00:zone-2": {[]string{"node-6"}, 0}, + "region-1:\x00:": {[]string{"node-1"}, 1}, + ":\x00:zone-2": {[]string{"node-2"}, 1}, + "region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 3}, + "region-1:\x00:zone-3": {[]string{"node-5"}, 1}, + "region-2:\x00:zone-2": {[]string{"node-6"}, 1}, }, }, { @@ -277,7 +277,7 @@ func TestNodeTree_UpdateNode(t *testing.T) { }, }, expectedTree: map[string]*nodeArray{ - "region-1:\x00:zone-2": {[]string{"node-0"}, 0}, + "region-1:\x00:zone-2": {[]string{"node-0"}, 1}, }, }, { @@ -293,8 +293,8 @@ func TestNodeTree_UpdateNode(t *testing.T) { }, }, expectedTree: map[string]*nodeArray{ - "": {[]string{"node-0"}, 0}, - "region-1:\x00:zone-2": {[]string{"node-new"}, 0}, + "": {[]string{"node-0"}, 1}, + "region-1:\x00:zone-2": {[]string{"node-new"}, 1}, }, }, } @@ -322,7 +322,7 @@ func TestNodeTree_Next(t *testing.T) { tests := []struct { name string nodesToAdd []*v1.Node - numRuns int // number of times to run Next() + numRuns int // number of times to run next() expectedOutput []string }{ { @@ -357,7 +357,7 @@ func TestNodeTree_Next(t *testing.T) { var output []string for i := 0; i < test.numRuns; i++ { - output = append(output, nt.Next()) + output = append(output, nt.next()) } if !reflect.DeepEqual(output, test.expectedOutput) { t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output) @@ -399,15 +399,15 @@ func TestNodeTreeMultiOperations(t *testing.T) { name: "add more nodes to an exhausted zone", nodesToAdd: append(allNodes[4:9], allNodes[3]), nodesToRemove: nil, - operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "add", "next", "next", "next"}, - expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-3", "node-8", "node-4"}, + operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "next", "next", "next"}, + expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-8", "node-4", "node-5"}, }, { name: "remove zone and add new to ensure exhausted is reset correctly", nodesToAdd: append(allNodes[3:5], allNodes[6:8]...), nodesToRemove: allNodes[3:5], operations: []string{"add", "add", "next", "next", "remove", "add", "add", "next", "next", "remove", "next", "next"}, - expectedOutput: []string{"node-3", "node-4", "node-6", "node-7", "node-6", "node-7"}, + expectedOutput: []string{"node-3", "node-4", "node-4", "node-6", "node-6", "node-7"}, }, } @@ -434,7 +434,7 @@ func TestNodeTreeMultiOperations(t *testing.T) { removeIndex++ } case "next": - output = append(output, nt.Next()) + output = append(output, nt.next()) default: t.Errorf("unknow operation: %v", op) }