Ensure that a node's CIDR isn't released until the node is deleted

Fixes https://github.com/kubernetes/kubernetes/issues/127792

Fixes bug where a node's PodCIDR was released when the node was given a
delete time stamp, but was hanging around due to a finalizer.
This commit is contained in:
Adrian Moisey 2024-10-23 21:51:29 +02:00
parent 11d2b4d7ed
commit ea299acf97
No known key found for this signature in database
GPG Key ID: 41AE4AE32747C7CF
2 changed files with 122 additions and 10 deletions

View File

@ -145,16 +145,22 @@ func NewCIDRRangeAllocator(ctx context.Context, client clientset.Interface, node
DeleteFunc: func(obj interface{}) {
// The informer cache no longer has the object, and since Node doesn't have a finalizer,
// we don't see the Update with DeletionTimestamp != 0.
// TODO: instead of executing the operation directly in the handler, build a small cache with key node.Name
// and value PodCIDRs use ReleaseCIDR on the reconcile loop so we can retry on `ReleaseCIDR` failures.
if err := ra.ReleaseCIDR(logger, obj.(*v1.Node)); err != nil {
utilruntime.HandleError(fmt.Errorf("error while processing CIDR Release: %w", err))
node, ok := obj.(*v1.Node)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
utilruntime.HandleError(fmt.Errorf("unexpected object type: %v", obj))
return
}
node, ok = tombstone.Obj.(*v1.Node)
if !ok {
utilruntime.HandleError(fmt.Errorf("unexpected object types: %v", obj))
return
}
}
// IndexerInformer uses a delta nodeQueue, therefore for deletes we have to use this
// key function.
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
if err == nil {
ra.queue.Add(key)
if err := ra.ReleaseCIDR(logger, node); err != nil {
utilruntime.HandleError(fmt.Errorf("error while processing CIDR Release: %w", err))
}
},
})
@ -270,7 +276,7 @@ func (r *rangeAllocator) syncNode(ctx context.Context, key string) error {
// Check the DeletionTimestamp to determine if object is under deletion.
if !node.DeletionTimestamp.IsZero() {
logger.V(3).Info("node is being deleted", "node", key)
return r.ReleaseCIDR(logger, node)
return nil
}
return r.AllocateOrOccupyCIDR(ctx, node)
}

View File

@ -841,3 +841,109 @@ func TestReleaseCIDRSuccess(t *testing.T) {
testFunc(tc)
}
}
func TestNodeDeletionReleaseCIDR(t *testing.T) {
_, clusterCIDRv4, err := netutils.ParseCIDRSloppy("10.10.0.0/16")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
_, allocatedCIDR, err := netutils.ParseCIDRSloppy("10.10.0.0/24")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
testCases := []struct {
description string
nodeKey string
existingNodes []*v1.Node
shouldReleaseCIDR bool
}{
{
description: "Regular node not under deletion",
nodeKey: "node0",
existingNodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
},
Spec: v1.NodeSpec{
PodCIDR: allocatedCIDR.String(),
PodCIDRs: []string{allocatedCIDR.String()}},
},
},
shouldReleaseCIDR: false,
},
{
description: "Node under deletion",
nodeKey: "node0",
existingNodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
DeletionTimestamp: &metav1.Time{Time: time.Now()},
},
Spec: v1.NodeSpec{
PodCIDR: allocatedCIDR.String(),
PodCIDRs: []string{allocatedCIDR.String()}},
},
},
shouldReleaseCIDR: false,
},
{
description: "Node deleted",
nodeKey: "node0",
existingNodes: []*v1.Node{},
shouldReleaseCIDR: true,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
allocatorParams := CIDRAllocatorParams{
ClusterCIDRs: []*net.IPNet{clusterCIDRv4}, ServiceCIDR: nil,
SecondaryServiceCIDR: nil,
NodeCIDRMaskSizes: []int{24},
}
fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: tc.existingNodes,
Clientset: fake.NewSimpleClientset(),
}
_, tCtx := ktesting.NewTestContext(t)
fakeNodeInformer := test.FakeNodeInformer(fakeNodeHandler)
nodeList, err := fakeNodeHandler.List(tCtx, metav1.ListOptions{})
if err != nil {
t.Fatalf("Failed to get list of nodes %v", err)
}
allocator, err := NewCIDRRangeAllocator(tCtx, fakeNodeHandler, fakeNodeInformer, allocatorParams, nodeList)
if err != nil {
t.Fatalf("failed to create CIDRRangeAllocator: %v", err)
}
rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok {
t.Fatalf("found non-default implementation of CIDRAllocator")
}
rangeAllocator.nodesSynced = test.AlwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder()
rangeAllocator.syncNode(tCtx, tc.nodeKey)
if len(rangeAllocator.cidrSets) != 1 {
t.Fatalf("Expected a single cidrSet, but got %d", len(rangeAllocator.cidrSets))
}
// if the allocated CIDR was released we expect the nextAllocated CIDR to be the same
nextCIDR, err := rangeAllocator.cidrSets[0].AllocateNext()
if err != nil {
t.Fatalf("unexpected error trying to allocate next CIDR: %v", err)
}
expectedCIDR := "10.10.1.0/24" // existing allocated is 10.0.0.0/24
if tc.shouldReleaseCIDR {
expectedCIDR = allocatedCIDR.String() // if cidr was released we expect to reuse it. ie: 10.0.0.0/24
}
if nextCIDR.String() != expectedCIDR {
t.Fatalf("Expected CIDR %s to be allocated next, but got: %v", expectedCIDR, nextCIDR.String())
}
})
}
}