Merge pull request #63264 from bsalamat/monotonic_generation

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Increase scheduler cache generation number monotonically in order to avoid collision **What this PR does / why we need it**: Increments the scheduler cache generation number monotonically to avoid collision of the generation numbers. More context in #63262. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #63262. **Special notes for your reviewer**: **Release note**: ```release-note Increase scheduler cache generation number monotonically in order to avoid collision and use of stale information in scheduler. ```
2025-09-07 12:11:43 +00:00 · 2018-05-08 03:45:57 -07:00
parent 37ef3232b9 3cfed68c7a
commit 8d064823bb
3 changed files with 43 additions and 7 deletions
--- a/pkg/scheduler/schedulercache/cache_test.go
+++ b/pkg/scheduler/schedulercache/cache_test.go
@@ -992,6 +992,8 @@ func TestNodeOperators(t *testing.T) {
 			t.Errorf("Failed to find node %v in schedulercache.", node.Name)
 		}

+		// Generations are globally unique. We check in our unit tests that they are incremented correctly.
+		expected.generation = got.generation
 		if !reflect.DeepEqual(got, expected) {
 			t.Errorf("Failed to add node into schedulercache:\n got: %+v \nexpected: %+v", got, expected)
 		}
@@ -1003,6 +1005,7 @@ func TestNodeOperators(t *testing.T) {
 		if !found || len(cachedNodes) != 1 {
 			t.Errorf("failed to dump cached nodes:\n got: %v \nexpected: %v", cachedNodes, cache.nodes)
 		}
+		expected.generation = newNode.generation
 		if !reflect.DeepEqual(newNode, expected) {
 			t.Errorf("Failed to clone node:\n got: %+v, \n expected: %+v", newNode, expected)
 		}
@@ -1010,12 +1013,15 @@ func TestNodeOperators(t *testing.T) {
 		// Case 3: update node attribute successfully.
 		node.Status.Allocatable[v1.ResourceMemory] = mem50m
 		expected.allocatableResource.Memory = mem50m.Value()
-		expected.generation++
 		cache.UpdateNode(nil, node)
 		got, found = cache.nodes[node.Name]
 		if !found {
 			t.Errorf("Failed to find node %v in schedulercache after UpdateNode.", node.Name)
 		}
+		if got.generation <= expected.generation {
+			t.Errorf("generation is not incremented. got: %v, expected: %v", got.generation, expected.generation)
+		}
+		expected.generation = got.generation

 		if !reflect.DeepEqual(got, expected) {
 			t.Errorf("Failed to update node in schedulercache:\n got: %+v \nexpected: %+v", got, expected)
--- a/pkg/scheduler/schedulercache/node_info.go
+++ b/pkg/scheduler/schedulercache/node_info.go
@@ -20,6 +20,7 @@ import (
 	"errors"
 	"fmt"
 	"sync"
+	"sync/atomic"

 	"github.com/golang/glog"

@@ -30,7 +31,10 @@ import (
 	"k8s.io/kubernetes/pkg/scheduler/util"
 )

-var emptyResource = Resource{}
+var (
+	emptyResource = Resource{}
+	generation    int64
+)

 // NodeInfo is node level aggregated information.
 type NodeInfo struct {
@@ -74,6 +78,14 @@ func initializeNodeTransientInfo() nodeTransientInfo {
 	return nodeTransientInfo{AllocatableVolumesCount: 0, RequestedVolumes: 0}
 }

+// nextGeneration: Let's make sure history never forgets the name...
+// Increments the generation number monotonically ensuring that generation numbers never collide.
+// Collision of the generation numbers would be particularly problematic if a node was deleted and
+// added back with the same name. See issue#63262.
+func nextGeneration() int64 {
+	return atomic.AddInt64(&generation, 1)
+}
+
 // nodeTransientInfo contains transient node information while scheduling.
 type nodeTransientInfo struct {
 	// AllocatableVolumesCount contains number of volumes that could be attached to node.
@@ -212,7 +224,7 @@ func NewNodeInfo(pods ...*v1.Pod) *NodeInfo {
 		nonzeroRequest:      &Resource{},
 		allocatableResource: &Resource{},
 		TransientInfo:       newTransientSchedulerInfo(),
-		generation:          0,
+		generation:          nextGeneration(),
 		usedPorts:           make(util.HostPortInfo),
 	}
 	for _, pod := range pods {
@@ -320,6 +332,7 @@ func (n *NodeInfo) AllocatableResource() Resource {
 // SetAllocatableResource sets the allocatableResource information of given node.
 func (n *NodeInfo) SetAllocatableResource(allocatableResource *Resource) {
 	n.allocatableResource = allocatableResource
+	n.generation = nextGeneration()
 }

 // Clone returns a copy of this node.
@@ -391,7 +404,7 @@ func (n *NodeInfo) AddPod(pod *v1.Pod) {
 	// Consume ports when pods added.
 	n.updateUsedPorts(pod, true)

-	n.generation++
+	n.generation = nextGeneration()
 }

 // RemovePod subtracts pod information from this NodeInfo.
@@ -442,7 +455,7 @@ func (n *NodeInfo) RemovePod(pod *v1.Pod) error {
 			// Release ports when remove Pods.
 			n.updateUsedPorts(pod, false)

-			n.generation++
+			n.generation = nextGeneration()

 			return nil
 		}
@@ -499,7 +512,7 @@ func (n *NodeInfo) SetNode(node *v1.Node) error {
 		}
 	}
 	n.TransientInfo = newTransientSchedulerInfo()
-	n.generation++
+	n.generation = nextGeneration()
 	return nil
 }

@@ -515,7 +528,7 @@ func (n *NodeInfo) RemoveNode(node *v1.Node) error {
 	n.memoryPressureCondition = v1.ConditionUnknown
 	n.diskPressureCondition = v1.ConditionUnknown
 	n.pidPressureCondition = v1.ConditionUnknown
-	n.generation++
+	n.generation = nextGeneration()
 	return nil
 }

--- a/pkg/scheduler/schedulercache/node_info_test.go
+++ b/pkg/scheduler/schedulercache/node_info_test.go
@@ -274,7 +274,12 @@ func TestNewNodeInfo(t *testing.T) {
 		},
 	}

+	gen := generation
 	ni := NewNodeInfo(pods...)
+	if ni.generation <= gen {
+		t.Errorf("generation is not incremented. previous: %v, current: %v", gen, ni.generation)
+	}
+	expected.generation = ni.generation
 	if !reflect.DeepEqual(expected, ni) {
 		t.Errorf("expected: %#v, got: %#v", expected, ni)
 	}
@@ -584,10 +589,16 @@ func TestNodeInfoAddPod(t *testing.T) {
 	}

 	ni := fakeNodeInfo()
+	gen := ni.generation
 	for _, pod := range pods {
 		ni.AddPod(pod)
+		if ni.generation <= gen {
+			t.Errorf("generation is not incremented. Prev: %v, current: %v", gen, ni.generation)
+		}
+		gen = ni.generation
 	}

+	expected.generation = ni.generation
 	if !reflect.DeepEqual(expected, ni) {
 		t.Errorf("expected: %#v, got: %#v", expected, ni)
 	}
@@ -788,6 +799,7 @@ func TestNodeInfoRemovePod(t *testing.T) {
 	for _, test := range tests {
 		ni := fakeNodeInfo(pods...)

+		gen := ni.generation
 		err := ni.RemovePod(test.pod)
 		if err != nil {
 			if test.errExpected {
@@ -798,8 +810,13 @@ func TestNodeInfoRemovePod(t *testing.T) {
 			} else {
 				t.Errorf("expected no error, got: %v", err)
 			}
+		} else {
+			if ni.generation <= gen {
+				t.Errorf("generation is not incremented. Prev: %v, current: %v", gen, ni.generation)
+			}
 		}

+		test.expectedNodeInfo.generation = ni.generation
 		if !reflect.DeepEqual(test.expectedNodeInfo, ni) {
 			t.Errorf("expected: %#v, got: %#v", test.expectedNodeInfo, ni)
 		}