From c2ec8bedbcc9a675d0c4c0317e8be559bc5e3f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Kimmerlin?= Date: Wed, 22 Jul 2020 23:03:35 +0300 Subject: [PATCH] Fix scheduler issue with nodetree additions When nodes are added in multiple zones at once, the nodeTree next function does not return a correct list of nodes but repeats some This commit resets the index before starting to call next() to prevent this issue Special thanks to igraecao for the help in finding the bug Co-authored-by: igraecao --- pkg/scheduler/internal/cache/cache.go | 1 + pkg/scheduler/internal/cache/cache_test.go | 145 +++++++++++++++++++++ 2 files changed, 146 insertions(+) diff --git a/pkg/scheduler/internal/cache/cache.go b/pkg/scheduler/internal/cache/cache.go index 58c83f68f1f..9f40b620825 100644 --- a/pkg/scheduler/internal/cache/cache.go +++ b/pkg/scheduler/internal/cache/cache.go @@ -280,6 +280,7 @@ func (cache *schedulerCache) updateNodeInfoSnapshotList(snapshot *Snapshot, upda if updateAll { // Take a snapshot of the nodes order in the tree snapshot.nodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes) + cache.nodeTree.resetExhausted() for i := 0; i < cache.nodeTree.numNodes; i++ { nodeName := cache.nodeTree.next() if n := snapshot.nodeInfoMap[nodeName]; n != nil { diff --git a/pkg/scheduler/internal/cache/cache_test.go b/pkg/scheduler/internal/cache/cache_test.go index 0f2cb080a68..5a8d3fb6f54 100644 --- a/pkg/scheduler/internal/cache/cache_test.go +++ b/pkg/scheduler/internal/cache/cache_test.go @@ -1539,6 +1539,151 @@ func compareCacheWithNodeInfoSnapshot(cache *schedulerCache, snapshot *Snapshot) return nil } +func TestSchedulerCache_updateNodeInfoSnapshotList(t *testing.T) { + // Create a few nodes to be used in tests. + nodes := []*v1.Node{} + i := 0 + // List of number of nodes per zone, zone 0 -> 2, zone 1 -> 6 + for zone, nb := range []int{2, 6} { + for j := 0; j < nb; j++ { + nodes = append(nodes, &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("node-%d", i), + Labels: map[string]string{ + v1.LabelZoneRegion: fmt.Sprintf("region-%d", zone), + v1.LabelZoneFailureDomain: fmt.Sprintf("zone-%d", zone), + }, + }, + }) + i++ + } + } + + var cache *schedulerCache + var snapshot *Snapshot + + addNode := func(t *testing.T, i int) { + if err := cache.AddNode(nodes[i]); err != nil { + t.Error(err) + } + _, ok := snapshot.nodeInfoMap[nodes[i].Name] + if !ok { + snapshot.nodeInfoMap[nodes[i].Name] = cache.nodes[nodes[i].Name].info + } + } + + updateSnapshot := func(t *testing.T) { + cache.updateNodeInfoSnapshotList(snapshot, true) + if err := compareCacheWithNodeInfoSnapshot(cache, snapshot); err != nil { + t.Error(err) + } + } + + tests := []struct { + name string + operations func(t *testing.T) + expected []string + }{ + { + name: "Empty cache", + operations: func(t *testing.T) {}, + expected: []string{}, + }, + { + name: "Single node", + operations: func(t *testing.T) { + addNode(t, 0) + }, + expected: []string{"node-0"}, + }, + { + name: "Two nodes", + operations: func(t *testing.T) { + addNode(t, 0) + updateSnapshot(t) + addNode(t, 1) + }, + expected: []string{"node-0", "node-1"}, + }, + { + name: "bug 91601, two nodes, update the snapshot and add two nodes in different zones", + operations: func(t *testing.T) { + addNode(t, 2) + addNode(t, 3) + updateSnapshot(t) + addNode(t, 4) + addNode(t, 0) + }, + expected: []string{"node-2", "node-0", "node-3", "node-4"}, + }, + { + name: "bug 91601, 6 nodes, one in a different zone", + operations: func(t *testing.T) { + addNode(t, 2) + addNode(t, 3) + addNode(t, 4) + addNode(t, 5) + updateSnapshot(t) + addNode(t, 6) + addNode(t, 0) + }, + expected: []string{"node-2", "node-0", "node-3", "node-4", "node-5", "node-6"}, + }, + { + name: "bug 91601, 7 nodes, two in a different zone", + operations: func(t *testing.T) { + addNode(t, 2) + updateSnapshot(t) + addNode(t, 3) + addNode(t, 4) + updateSnapshot(t) + addNode(t, 5) + addNode(t, 6) + addNode(t, 0) + addNode(t, 1) + }, + expected: []string{"node-2", "node-0", "node-3", "node-1", "node-4", "node-5", "node-6"}, + }, + { + name: "bug 91601, 7 nodes, two in a different zone, different zone order", + operations: func(t *testing.T) { + addNode(t, 2) + addNode(t, 1) + updateSnapshot(t) + addNode(t, 3) + addNode(t, 4) + updateSnapshot(t) + addNode(t, 5) + addNode(t, 6) + addNode(t, 0) + }, + expected: []string{"node-2", "node-1", "node-3", "node-0", "node-4", "node-5", "node-6"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + cache = newSchedulerCache(time.Second, time.Second, nil) + snapshot = NewEmptySnapshot() + + test.operations(t) + + // Always update the snapshot at the end of operations and compare it. + cache.updateNodeInfoSnapshotList(snapshot, true) + if err := compareCacheWithNodeInfoSnapshot(cache, snapshot); err != nil { + t.Error(err) + } + nodeNames := make([]string, len(snapshot.nodeInfoList)) + for i, nodeInfo := range snapshot.nodeInfoList { + nodeNames[i] = nodeInfo.Node().Name + } + if !reflect.DeepEqual(nodeNames, test.expected) { + t.Errorf("The nodeInfoList is incorrect. Expected %v , got %v", test.expected, nodeNames) + } + }) + } +} + func BenchmarkUpdate1kNodes30kPods(b *testing.B) { // Enable volumesOnNodeForBalancing to do balanced resource allocation defer featuregatetesting.SetFeatureGateDuringTest(nil, utilfeature.DefaultFeatureGate, features.BalanceAttachedNodeVolumes, true)()