From 3cfed68c7a865a1edbe7466164a335cc7ee992ed Mon Sep 17 00:00:00 2001 From: "Bobby (Babak) Salamat" Date: Fri, 27 Apr 2018 17:03:12 -0700 Subject: [PATCH] fixup! Make scheduler cache generation number monotonic to avoid collision --- pkg/scheduler/schedulercache/cache_test.go | 1 + pkg/scheduler/schedulercache/node_info.go | 25 ++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/pkg/scheduler/schedulercache/cache_test.go b/pkg/scheduler/schedulercache/cache_test.go index 38971cdaf55..517c9bccb8a 100644 --- a/pkg/scheduler/schedulercache/cache_test.go +++ b/pkg/scheduler/schedulercache/cache_test.go @@ -992,6 +992,7 @@ func TestNodeOperators(t *testing.T) { t.Errorf("Failed to find node %v in schedulercache.", node.Name) } + // Generations are globally unique. We check in our unit tests that they are incremented correctly. expected.generation = got.generation if !reflect.DeepEqual(got, expected) { t.Errorf("Failed to add node into schedulercache:\n got: %+v \nexpected: %+v", got, expected) diff --git a/pkg/scheduler/schedulercache/node_info.go b/pkg/scheduler/schedulercache/node_info.go index 37224f1bdea..8bf864b7e1f 100644 --- a/pkg/scheduler/schedulercache/node_info.go +++ b/pkg/scheduler/schedulercache/node_info.go @@ -32,8 +32,8 @@ import ( ) var ( - emptyResource = Resource{} - generation int64 = 0 + emptyResource = Resource{} + generation int64 ) // NodeInfo is node level aggregated information. @@ -78,9 +78,12 @@ func initializeNodeTransientInfo() nodeTransientInfo { return nodeTransientInfo{AllocatableVolumesCount: 0, RequestedVolumes: 0} } -func incrementGeneration() int64 { - atomic.AddInt64(&generation, 1) - return generation +// nextGeneration: Let's make sure history never forgets the name... +// Increments the generation number monotonically ensuring that generation numbers never collide. +// Collision of the generation numbers would be particularly problematic if a node was deleted and +// added back with the same name. See issue#63262. +func nextGeneration() int64 { + return atomic.AddInt64(&generation, 1) } // nodeTransientInfo contains transient node information while scheduling. @@ -221,7 +224,7 @@ func NewNodeInfo(pods ...*v1.Pod) *NodeInfo { nonzeroRequest: &Resource{}, allocatableResource: &Resource{}, TransientInfo: newTransientSchedulerInfo(), - generation: incrementGeneration(), + generation: nextGeneration(), usedPorts: make(util.HostPortInfo), } for _, pod := range pods { @@ -329,7 +332,7 @@ func (n *NodeInfo) AllocatableResource() Resource { // SetAllocatableResource sets the allocatableResource information of given node. func (n *NodeInfo) SetAllocatableResource(allocatableResource *Resource) { n.allocatableResource = allocatableResource - n.generation = incrementGeneration() + n.generation = nextGeneration() } // Clone returns a copy of this node. @@ -401,7 +404,7 @@ func (n *NodeInfo) AddPod(pod *v1.Pod) { // Consume ports when pods added. n.updateUsedPorts(pod, true) - n.generation = incrementGeneration() + n.generation = nextGeneration() } // RemovePod subtracts pod information from this NodeInfo. @@ -452,7 +455,7 @@ func (n *NodeInfo) RemovePod(pod *v1.Pod) error { // Release ports when remove Pods. n.updateUsedPorts(pod, false) - n.generation = incrementGeneration() + n.generation = nextGeneration() return nil } @@ -509,7 +512,7 @@ func (n *NodeInfo) SetNode(node *v1.Node) error { } } n.TransientInfo = newTransientSchedulerInfo() - n.generation = incrementGeneration() + n.generation = nextGeneration() return nil } @@ -525,7 +528,7 @@ func (n *NodeInfo) RemoveNode(node *v1.Node) error { n.memoryPressureCondition = v1.ConditionUnknown n.diskPressureCondition = v1.ConditionUnknown n.pidPressureCondition = v1.ConditionUnknown - n.generation = incrementGeneration() + n.generation = nextGeneration() return nil }